Initial commit
This commit is contained in:
@@ -0,0 +1,441 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "arrow/util/secure_string.h"
|
||||
#include "parquet/exception.h"
|
||||
#include "parquet/schema.h"
|
||||
#include "parquet/types.h"
|
||||
|
||||
namespace parquet {
|
||||
|
||||
static constexpr ParquetCipher::type kDefaultEncryptionAlgorithm =
|
||||
ParquetCipher::AES_GCM_V1;
|
||||
static constexpr int32_t kMaximalAadMetadataLength = 256;
|
||||
static constexpr bool kDefaultEncryptedFooter = true;
|
||||
static constexpr bool kDefaultCheckSignature = true;
|
||||
static constexpr bool kDefaultAllowPlaintextFiles = false;
|
||||
static constexpr int32_t kAadFileUniqueLength = 8;
|
||||
|
||||
class ColumnDecryptionProperties;
|
||||
using ColumnPathToDecryptionPropertiesMap =
|
||||
std::map<std::string, std::shared_ptr<ColumnDecryptionProperties>>;
|
||||
|
||||
class ColumnEncryptionProperties;
|
||||
using ColumnPathToEncryptionPropertiesMap =
|
||||
std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>>;
|
||||
|
||||
class PARQUET_EXPORT DecryptionKeyRetriever {
|
||||
public:
|
||||
/// \brief Retrieve a key.
|
||||
virtual ::arrow::util::SecureString GetKey(const std::string& key_id) = 0;
|
||||
|
||||
virtual ~DecryptionKeyRetriever() {}
|
||||
};
|
||||
|
||||
/// Simple integer key retriever
|
||||
class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever {
|
||||
public:
|
||||
void PutKey(uint32_t key_id, ::arrow::util::SecureString key);
|
||||
|
||||
::arrow::util::SecureString GetKey(const std::string& key_id_string) override {
|
||||
// key_id_string is string but for IntegerKeyIdRetriever it encodes
|
||||
// a native-endian 32 bit unsigned integer key_id
|
||||
uint32_t key_id;
|
||||
assert(key_id_string.size() == sizeof(key_id));
|
||||
memcpy(&key_id, key_id_string.data(), sizeof(key_id));
|
||||
|
||||
return key_map_.at(key_id);
|
||||
}
|
||||
|
||||
private:
|
||||
std::map<uint32_t, ::arrow::util::SecureString> key_map_;
|
||||
};
|
||||
|
||||
// Simple string key retriever
|
||||
class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever {
|
||||
public:
|
||||
void PutKey(std::string key_id, ::arrow::util::SecureString key);
|
||||
::arrow::util::SecureString GetKey(const std::string& key_id) override;
|
||||
|
||||
private:
|
||||
std::map<std::string, ::arrow::util::SecureString> key_map_;
|
||||
};
|
||||
|
||||
class PARQUET_EXPORT HiddenColumnException : public ParquetException {
|
||||
public:
|
||||
explicit HiddenColumnException(const std::string& columnPath)
|
||||
: ParquetException(columnPath.c_str()) {}
|
||||
};
|
||||
|
||||
class PARQUET_EXPORT KeyAccessDeniedException : public ParquetException {
|
||||
public:
|
||||
explicit KeyAccessDeniedException(const std::string& columnPath)
|
||||
: ParquetException(columnPath.c_str()) {}
|
||||
};
|
||||
|
||||
inline const uint8_t* str2bytes(const std::string& str) {
|
||||
if (str.empty()) return NULLPTR;
|
||||
|
||||
char* cbytes = const_cast<char*>(str.c_str());
|
||||
return reinterpret_cast<const uint8_t*>(cbytes);
|
||||
}
|
||||
|
||||
inline ::arrow::util::span<const uint8_t> str2span(const std::string& str) {
|
||||
if (str.empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
return {reinterpret_cast<const uint8_t*>(str.data()), str.size()};
|
||||
}
|
||||
|
||||
class PARQUET_EXPORT ColumnEncryptionProperties {
|
||||
public:
|
||||
class PARQUET_EXPORT Builder {
|
||||
public:
|
||||
/// Convenience builder for encrypted columns.
|
||||
explicit Builder(std::string name) : Builder(std::move(name), true) {}
|
||||
|
||||
/// Convenience builder for encrypted columns.
|
||||
explicit Builder(const schema::ColumnPath& path)
|
||||
: Builder(path.ToDotString(), true) {}
|
||||
|
||||
/// Set a column-specific key.
|
||||
/// If key is not set on an encrypted column, the column will
|
||||
/// be encrypted with the footer key.
|
||||
/// keyBytes Key length must be either 16, 24 or 32 bytes.
|
||||
/// Caller is responsible for wiping out the input key array.
|
||||
Builder* key(::arrow::util::SecureString column_key);
|
||||
|
||||
/// Set a key retrieval metadata.
|
||||
/// use either key_metadata() or key_id(), not both
|
||||
Builder* key_metadata(std::string key_metadata);
|
||||
|
||||
/// A convenience function to set key metadata using a string id.
|
||||
/// Set a key retrieval metadata (converted from String).
|
||||
/// use either key_metadata() or key_id(), not both
|
||||
/// key_id will be converted to metadata (UTF-8 array).
|
||||
Builder* key_id(std::string key_id);
|
||||
|
||||
std::shared_ptr<ColumnEncryptionProperties> build() {
|
||||
return std::shared_ptr<ColumnEncryptionProperties>(
|
||||
new ColumnEncryptionProperties(encrypted_, column_path_, key_, key_metadata_));
|
||||
}
|
||||
|
||||
private:
|
||||
std::string column_path_;
|
||||
bool encrypted_;
|
||||
::arrow::util::SecureString key_;
|
||||
std::string key_metadata_;
|
||||
|
||||
Builder(std::string path, bool encrypted)
|
||||
: column_path_(std::move(path)), encrypted_(encrypted) {}
|
||||
};
|
||||
|
||||
const std::string& column_path() const { return column_path_; }
|
||||
bool is_encrypted() const { return encrypted_; }
|
||||
bool is_encrypted_with_footer_key() const { return encrypted_with_footer_key_; }
|
||||
const ::arrow::util::SecureString& key() const { return key_; }
|
||||
const std::string& key_metadata() const { return key_metadata_; }
|
||||
|
||||
private:
|
||||
std::string column_path_;
|
||||
bool encrypted_;
|
||||
bool encrypted_with_footer_key_;
|
||||
::arrow::util::SecureString key_;
|
||||
std::string key_metadata_;
|
||||
explicit ColumnEncryptionProperties(bool encrypted, std::string column_path,
|
||||
::arrow::util::SecureString key,
|
||||
std::string key_metadata);
|
||||
};
|
||||
|
||||
class PARQUET_EXPORT ColumnDecryptionProperties {
|
||||
public:
|
||||
class PARQUET_EXPORT Builder {
|
||||
public:
|
||||
explicit Builder(std::string name) : column_path_(std::move(name)) {}
|
||||
|
||||
explicit Builder(const schema::ColumnPath& path) : Builder(path.ToDotString()) {}
|
||||
|
||||
/// Set an explicit column key. If applied on a file that contains
|
||||
/// key metadata for this column the metadata will be ignored,
|
||||
/// the column will be decrypted with this key.
|
||||
/// key length must be either 16, 24 or 32 bytes.
|
||||
Builder* key(::arrow::util::SecureString key);
|
||||
|
||||
std::shared_ptr<ColumnDecryptionProperties> build();
|
||||
|
||||
private:
|
||||
std::string column_path_;
|
||||
::arrow::util::SecureString key_;
|
||||
};
|
||||
|
||||
const std::string& column_path() const { return column_path_; }
|
||||
const ::arrow::util::SecureString& key() const { return key_; }
|
||||
|
||||
private:
|
||||
std::string column_path_;
|
||||
::arrow::util::SecureString key_;
|
||||
|
||||
/// This class is only required for setting explicit column decryption keys -
|
||||
/// to override key retriever (or to provide keys when key metadata and/or
|
||||
/// key retriever are not available)
|
||||
explicit ColumnDecryptionProperties(std::string column_path,
|
||||
::arrow::util::SecureString key);
|
||||
};
|
||||
|
||||
class PARQUET_EXPORT AADPrefixVerifier {
|
||||
public:
|
||||
/// Verifies identity (AAD Prefix) of individual file,
|
||||
/// or of file collection in a data set.
|
||||
/// Throws exception if an AAD prefix is wrong.
|
||||
/// In a data set, AAD Prefixes should be collected,
|
||||
/// and then checked for missing files.
|
||||
virtual void Verify(const std::string& aad_prefix) = 0;
|
||||
virtual ~AADPrefixVerifier() {}
|
||||
};
|
||||
|
||||
class PARQUET_EXPORT FileDecryptionProperties {
|
||||
public:
|
||||
class PARQUET_EXPORT Builder {
|
||||
public:
|
||||
Builder() {
|
||||
check_plaintext_footer_integrity_ = kDefaultCheckSignature;
|
||||
plaintext_files_allowed_ = kDefaultAllowPlaintextFiles;
|
||||
}
|
||||
|
||||
/// Set an explicit footer key. If applied on a file that contains
|
||||
/// footer key metadata the metadata will be ignored, the footer
|
||||
/// will be decrypted/verified with this key.
|
||||
/// If explicit key is not set, footer key will be fetched from
|
||||
/// key retriever.
|
||||
/// With explicit keys or AAD prefix, new encryption properties object must be
|
||||
/// created for each encrypted file.
|
||||
/// Explicit encryption keys (footer and column) are cloned.
|
||||
/// Upon completion of file reading, the cloned encryption keys in the properties
|
||||
/// will be wiped out (array values set to 0).
|
||||
/// Caller is responsible for wiping out the input key array.
|
||||
/// param footerKey Key length must be either 16, 24 or 32 bytes.
|
||||
Builder* footer_key(::arrow::util::SecureString footer_key);
|
||||
|
||||
/// Set explicit column keys (decryption properties).
|
||||
/// Its also possible to set a key retriever on this property object.
|
||||
/// Upon file decryption, availability of explicit keys is checked before
|
||||
/// invocation of the retriever callback.
|
||||
/// If an explicit key is available for a footer or a column,
|
||||
/// its key metadata will be ignored.
|
||||
Builder* column_keys(
|
||||
ColumnPathToDecryptionPropertiesMap column_decryption_properties);
|
||||
|
||||
/// Set a key retriever callback. Its also possible to
|
||||
/// set explicit footer or column keys on this file property object.
|
||||
/// Upon file decryption, availability of explicit keys is checked before
|
||||
/// invocation of the retriever callback.
|
||||
/// If an explicit key is available for a footer or a column,
|
||||
/// its key metadata will be ignored.
|
||||
Builder* key_retriever(std::shared_ptr<DecryptionKeyRetriever> key_retriever);
|
||||
|
||||
/// Skip integrity verification of plaintext footers.
|
||||
/// If not called, integrity of plaintext footers will be checked in runtime,
|
||||
/// and an exception will be thrown in the following situations:
|
||||
/// - footer signing key is not available
|
||||
/// (not passed, or not found by key retriever)
|
||||
/// - footer content and signature don't match
|
||||
Builder* disable_footer_signature_verification() {
|
||||
check_plaintext_footer_integrity_ = false;
|
||||
return this;
|
||||
}
|
||||
|
||||
/// Explicitly supply the file AAD prefix.
|
||||
/// A must when a prefix is used for file encryption, but not stored in file.
|
||||
/// If AAD prefix is stored in file, it will be compared to the explicitly
|
||||
/// supplied value and an exception will be thrown if they differ.
|
||||
Builder* aad_prefix(std::string aad_prefix);
|
||||
|
||||
/// Set callback for verification of AAD Prefixes stored in file.
|
||||
Builder* aad_prefix_verifier(std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier);
|
||||
|
||||
/// By default, reading plaintext (unencrypted) files is not
|
||||
/// allowed when using a decryptor
|
||||
/// - in order to detect files that were not encrypted by mistake.
|
||||
/// However, the default behavior can be overridden by calling this method.
|
||||
/// The caller should use then a different method to ensure encryption
|
||||
/// of files with sensitive data.
|
||||
Builder* plaintext_files_allowed() {
|
||||
plaintext_files_allowed_ = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
std::shared_ptr<FileDecryptionProperties> build() {
|
||||
return std::shared_ptr<FileDecryptionProperties>(new FileDecryptionProperties(
|
||||
footer_key_, key_retriever_, check_plaintext_footer_integrity_, aad_prefix_,
|
||||
aad_prefix_verifier_, column_decryption_properties_, plaintext_files_allowed_));
|
||||
}
|
||||
|
||||
private:
|
||||
::arrow::util::SecureString footer_key_;
|
||||
std::string aad_prefix_;
|
||||
std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
|
||||
ColumnPathToDecryptionPropertiesMap column_decryption_properties_;
|
||||
|
||||
std::shared_ptr<DecryptionKeyRetriever> key_retriever_;
|
||||
bool check_plaintext_footer_integrity_;
|
||||
bool plaintext_files_allowed_;
|
||||
};
|
||||
|
||||
const ::arrow::util::SecureString& column_key(const std::string& column_path) const;
|
||||
|
||||
const ::arrow::util::SecureString& footer_key() const { return footer_key_; }
|
||||
|
||||
const std::string& aad_prefix() const { return aad_prefix_; }
|
||||
|
||||
const std::shared_ptr<DecryptionKeyRetriever>& key_retriever() const {
|
||||
return key_retriever_;
|
||||
}
|
||||
|
||||
bool check_plaintext_footer_integrity() const {
|
||||
return check_plaintext_footer_integrity_;
|
||||
}
|
||||
|
||||
bool plaintext_files_allowed() const { return plaintext_files_allowed_; }
|
||||
|
||||
const std::shared_ptr<AADPrefixVerifier>& aad_prefix_verifier() const {
|
||||
return aad_prefix_verifier_;
|
||||
}
|
||||
|
||||
private:
|
||||
::arrow::util::SecureString footer_key_;
|
||||
std::string aad_prefix_;
|
||||
std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
|
||||
ColumnPathToDecryptionPropertiesMap column_decryption_properties_;
|
||||
std::shared_ptr<DecryptionKeyRetriever> key_retriever_;
|
||||
bool check_plaintext_footer_integrity_;
|
||||
bool plaintext_files_allowed_;
|
||||
|
||||
FileDecryptionProperties(
|
||||
::arrow::util::SecureString footer_key,
|
||||
std::shared_ptr<DecryptionKeyRetriever> key_retriever,
|
||||
bool check_plaintext_footer_integrity, std::string aad_prefix,
|
||||
std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier,
|
||||
ColumnPathToDecryptionPropertiesMap column_decryption_properties,
|
||||
bool plaintext_files_allowed);
|
||||
};
|
||||
|
||||
class PARQUET_EXPORT FileEncryptionProperties {
|
||||
public:
|
||||
class PARQUET_EXPORT Builder {
|
||||
public:
|
||||
explicit Builder(::arrow::util::SecureString footer_key)
|
||||
: parquet_cipher_(kDefaultEncryptionAlgorithm),
|
||||
encrypted_footer_(kDefaultEncryptedFooter),
|
||||
footer_key_(std::move(footer_key)) {
|
||||
store_aad_prefix_in_file_ = false;
|
||||
}
|
||||
|
||||
/// Create files with plaintext footer.
|
||||
/// If not called, the files will be created with encrypted footer (default).
|
||||
Builder* set_plaintext_footer() {
|
||||
encrypted_footer_ = false;
|
||||
return this;
|
||||
}
|
||||
|
||||
/// Set encryption algorithm.
|
||||
/// If not called, files will be encrypted with AES_GCM_V1 (default).
|
||||
Builder* algorithm(ParquetCipher::type parquet_cipher) {
|
||||
parquet_cipher_ = parquet_cipher;
|
||||
return this;
|
||||
}
|
||||
|
||||
/// Set a key retrieval metadata (converted from String).
|
||||
/// use either footer_key_metadata or footer_key_id, not both.
|
||||
Builder* footer_key_id(std::string key_id);
|
||||
|
||||
/// Set a key retrieval metadata.
|
||||
/// use either footer_key_metadata or footer_key_id, not both.
|
||||
Builder* footer_key_metadata(std::string footer_key_metadata);
|
||||
|
||||
/// Set the file AAD Prefix.
|
||||
Builder* aad_prefix(std::string aad_prefix);
|
||||
|
||||
/// Skip storing AAD Prefix in file.
|
||||
/// If not called, and if AAD Prefix is set, it will be stored.
|
||||
Builder* disable_aad_prefix_storage();
|
||||
|
||||
/// Set the list of encrypted columns and their properties (keys etc).
|
||||
/// If not called, all columns will be encrypted with the footer key.
|
||||
/// If called, the file columns not in the list will be left unencrypted.
|
||||
Builder* encrypted_columns(ColumnPathToEncryptionPropertiesMap encrypted_columns);
|
||||
|
||||
std::shared_ptr<FileEncryptionProperties> build() {
|
||||
return std::shared_ptr<FileEncryptionProperties>(new FileEncryptionProperties(
|
||||
parquet_cipher_, footer_key_, footer_key_metadata_, encrypted_footer_,
|
||||
aad_prefix_, store_aad_prefix_in_file_, encrypted_columns_));
|
||||
}
|
||||
|
||||
private:
|
||||
ParquetCipher::type parquet_cipher_;
|
||||
bool encrypted_footer_;
|
||||
::arrow::util::SecureString footer_key_;
|
||||
std::string footer_key_metadata_;
|
||||
|
||||
std::string aad_prefix_;
|
||||
bool store_aad_prefix_in_file_;
|
||||
ColumnPathToEncryptionPropertiesMap encrypted_columns_;
|
||||
};
|
||||
|
||||
bool encrypted_footer() const { return encrypted_footer_; }
|
||||
|
||||
EncryptionAlgorithm algorithm() const { return algorithm_; }
|
||||
|
||||
const ::arrow::util::SecureString& footer_key() const { return footer_key_; }
|
||||
|
||||
const std::string& footer_key_metadata() const { return footer_key_metadata_; }
|
||||
|
||||
const std::string& file_aad() const { return file_aad_; }
|
||||
|
||||
std::shared_ptr<ColumnEncryptionProperties> column_encryption_properties(
|
||||
const std::string& column_path);
|
||||
|
||||
const ColumnPathToEncryptionPropertiesMap& encrypted_columns() const {
|
||||
return encrypted_columns_;
|
||||
}
|
||||
|
||||
private:
|
||||
EncryptionAlgorithm algorithm_;
|
||||
::arrow::util::SecureString footer_key_;
|
||||
std::string footer_key_metadata_;
|
||||
bool encrypted_footer_;
|
||||
std::string file_aad_;
|
||||
std::string aad_prefix_;
|
||||
bool store_aad_prefix_in_file_;
|
||||
ColumnPathToEncryptionPropertiesMap encrypted_columns_;
|
||||
|
||||
FileEncryptionProperties(ParquetCipher::type cipher,
|
||||
::arrow::util::SecureString footer_key,
|
||||
std::string footer_key_metadata, bool encrypted_footer,
|
||||
std::string aad_prefix, bool store_aad_prefix_in_file,
|
||||
ColumnPathToEncryptionPropertiesMap encrypted_columns);
|
||||
};
|
||||
|
||||
} // namespace parquet
|
||||
Reference in New Issue
Block a user