Initial commit
This commit is contained in:
@@ -0,0 +1,152 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "parquet/encryption/encryption.h"
|
||||
#include "parquet/encryption/file_key_wrapper.h"
|
||||
#include "parquet/encryption/key_toolkit.h"
|
||||
#include "parquet/encryption/kms_client_factory.h"
|
||||
#include "parquet/platform.h"
|
||||
|
||||
namespace parquet::encryption {
|
||||
|
||||
static constexpr ParquetCipher::type kDefaultEncryptionAlgorithm =
|
||||
ParquetCipher::AES_GCM_V1;
|
||||
static constexpr bool kDefaultPlaintextFooter = false;
|
||||
static constexpr bool kDefaultDoubleWrapping = true;
|
||||
static constexpr double kDefaultCacheLifetimeSeconds = 600; // 10 minutes
|
||||
static constexpr bool kDefaultInternalKeyMaterial = true;
|
||||
static constexpr bool kDefaultUniformEncryption = false;
|
||||
static constexpr int32_t kDefaultDataKeyLengthBits = 128;
|
||||
|
||||
struct PARQUET_EXPORT EncryptionConfiguration {
|
||||
explicit EncryptionConfiguration(const std::string& footer_key)
|
||||
: footer_key(footer_key) {}
|
||||
|
||||
/// ID of the master key for footer encryption/signing
|
||||
std::string footer_key;
|
||||
|
||||
/// List of columns to encrypt, with column master key IDs (see HIVE-21848).
|
||||
/// Format: "columnKeyID:colName,colName;columnKeyID:colName..."
|
||||
/// Either
|
||||
/// (1) column_keys must be set
|
||||
/// or
|
||||
/// (2) uniform_encryption must be set to true
|
||||
/// If none of (1) and (2) are true, or if both are true, an exception will be
|
||||
/// thrown.
|
||||
std::string column_keys;
|
||||
|
||||
/// Encrypt footer and all columns with the same encryption key.
|
||||
bool uniform_encryption = kDefaultUniformEncryption;
|
||||
|
||||
/// Parquet encryption algorithm. Can be "AES_GCM_V1" (default), or "AES_GCM_CTR_V1".
|
||||
ParquetCipher::type encryption_algorithm = kDefaultEncryptionAlgorithm;
|
||||
|
||||
/// Write files with plaintext footer.
|
||||
/// The default is false - files are written with encrypted footer.
|
||||
bool plaintext_footer = kDefaultPlaintextFooter;
|
||||
|
||||
/// Use double wrapping - where data encryption keys (DEKs) are encrypted with key
|
||||
/// encryption keys (KEKs), which in turn are encrypted with master keys.
|
||||
/// The default is true. If set to false, use single wrapping - where DEKs are
|
||||
/// encrypted directly with master keys.
|
||||
bool double_wrapping = kDefaultDoubleWrapping;
|
||||
|
||||
/// Lifetime of cached entities (key encryption keys, local wrapping keys, KMS client
|
||||
/// objects).
|
||||
/// The default is 600 (10 minutes).
|
||||
double cache_lifetime_seconds = kDefaultCacheLifetimeSeconds;
|
||||
|
||||
/// Store key material inside Parquet file footers; this mode doesn’t produce
|
||||
/// additional files. By default, true. If set to false, key material is stored in
|
||||
/// separate files in the same folder, which enables key rotation for immutable
|
||||
/// Parquet files.
|
||||
bool internal_key_material = kDefaultInternalKeyMaterial;
|
||||
|
||||
/// Length of data encryption keys (DEKs), randomly generated by parquet key
|
||||
/// management tools. Can be 128, 192 or 256 bits.
|
||||
/// The default is 128 bits.
|
||||
int32_t data_key_length_bits = kDefaultDataKeyLengthBits;
|
||||
};
|
||||
|
||||
struct PARQUET_EXPORT DecryptionConfiguration {
|
||||
/// Lifetime of cached entities (key encryption keys, local wrapping keys, KMS client
|
||||
/// objects).
|
||||
/// The default is 600 (10 minutes).
|
||||
double cache_lifetime_seconds = kDefaultCacheLifetimeSeconds;
|
||||
};
|
||||
|
||||
/// This is a core class, that translates the parameters of high level encryption (like
|
||||
/// the names of encrypted columns, names of master keys, etc), into parameters of low
|
||||
/// level encryption (like the key metadata, DEK, etc). A factory that produces the low
|
||||
/// level FileEncryptionProperties and FileDecryptionProperties objects, from the high
|
||||
/// level parameters.
|
||||
class PARQUET_EXPORT CryptoFactory {
|
||||
public:
|
||||
/// a KmsClientFactory object must be registered via this method before calling any of
|
||||
/// GetFileEncryptionProperties()/GetFileDecryptionProperties() methods.
|
||||
void RegisterKmsClientFactory(std::shared_ptr<KmsClientFactory> kms_client_factory);
|
||||
|
||||
/// Get the encryption properties for a Parquet file.
|
||||
/// If external key material is used then a file system and path to the
|
||||
/// parquet file must be provided.
|
||||
std::shared_ptr<FileEncryptionProperties> GetFileEncryptionProperties(
|
||||
const KmsConnectionConfig& kms_connection_config,
|
||||
const EncryptionConfiguration& encryption_config, const std::string& file_path = "",
|
||||
const std::shared_ptr<::arrow::fs::FileSystem>& file_system = NULLPTR);
|
||||
|
||||
/// Get decryption properties for a Parquet file.
|
||||
/// If external key material is used then a file system and path to the
|
||||
/// parquet file must be provided.
|
||||
std::shared_ptr<FileDecryptionProperties> GetFileDecryptionProperties(
|
||||
const KmsConnectionConfig& kms_connection_config,
|
||||
const DecryptionConfiguration& decryption_config, const std::string& file_path = "",
|
||||
const std::shared_ptr<::arrow::fs::FileSystem>& file_system = NULLPTR);
|
||||
|
||||
void RemoveCacheEntriesForToken(const std::string& access_token) {
|
||||
key_toolkit_->RemoveCacheEntriesForToken(access_token);
|
||||
}
|
||||
|
||||
void RemoveCacheEntriesForAllTokens() {
|
||||
key_toolkit_->RemoveCacheEntriesForAllTokens();
|
||||
}
|
||||
|
||||
/// Rotates master encryption keys for a Parquet file that uses external key material.
|
||||
/// In single wrapping mode, data encryption keys are decrypted with the old master keys
|
||||
/// and then re-encrypted with new master keys.
|
||||
/// In double wrapping mode, key encryption keys are decrypted with the old master keys
|
||||
/// and then re-encrypted with new master keys.
|
||||
/// This relies on the KMS supporting versioning, such that the old master key is
|
||||
/// used when unwrapping a key, and the latest version is used when wrapping a key.
|
||||
void RotateMasterKeys(const KmsConnectionConfig& kms_connection_config,
|
||||
const std::string& parquet_file_path,
|
||||
const std::shared_ptr<::arrow::fs::FileSystem>& file_system,
|
||||
bool double_wrapping = kDefaultDoubleWrapping,
|
||||
double cache_lifetime_seconds = kDefaultCacheLifetimeSeconds);
|
||||
|
||||
private:
|
||||
ColumnPathToEncryptionPropertiesMap GetColumnEncryptionProperties(
|
||||
int dek_length, const std::string& column_keys, FileKeyWrapper* key_wrapper);
|
||||
|
||||
/// Key utilities object for kms client initialization and cache control
|
||||
std::shared_ptr<KeyToolkit> key_toolkit_ = std::make_shared<KeyToolkit>();
|
||||
};
|
||||
|
||||
} // namespace parquet::encryption
|
||||
@@ -0,0 +1,441 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "arrow/util/secure_string.h"
|
||||
#include "parquet/exception.h"
|
||||
#include "parquet/schema.h"
|
||||
#include "parquet/types.h"
|
||||
|
||||
namespace parquet {
|
||||
|
||||
static constexpr ParquetCipher::type kDefaultEncryptionAlgorithm =
|
||||
ParquetCipher::AES_GCM_V1;
|
||||
static constexpr int32_t kMaximalAadMetadataLength = 256;
|
||||
static constexpr bool kDefaultEncryptedFooter = true;
|
||||
static constexpr bool kDefaultCheckSignature = true;
|
||||
static constexpr bool kDefaultAllowPlaintextFiles = false;
|
||||
static constexpr int32_t kAadFileUniqueLength = 8;
|
||||
|
||||
class ColumnDecryptionProperties;
|
||||
using ColumnPathToDecryptionPropertiesMap =
|
||||
std::map<std::string, std::shared_ptr<ColumnDecryptionProperties>>;
|
||||
|
||||
class ColumnEncryptionProperties;
|
||||
using ColumnPathToEncryptionPropertiesMap =
|
||||
std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>>;
|
||||
|
||||
class PARQUET_EXPORT DecryptionKeyRetriever {
|
||||
public:
|
||||
/// \brief Retrieve a key.
|
||||
virtual ::arrow::util::SecureString GetKey(const std::string& key_id) = 0;
|
||||
|
||||
virtual ~DecryptionKeyRetriever() {}
|
||||
};
|
||||
|
||||
/// Simple integer key retriever
|
||||
class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever {
|
||||
public:
|
||||
void PutKey(uint32_t key_id, ::arrow::util::SecureString key);
|
||||
|
||||
::arrow::util::SecureString GetKey(const std::string& key_id_string) override {
|
||||
// key_id_string is string but for IntegerKeyIdRetriever it encodes
|
||||
// a native-endian 32 bit unsigned integer key_id
|
||||
uint32_t key_id;
|
||||
assert(key_id_string.size() == sizeof(key_id));
|
||||
memcpy(&key_id, key_id_string.data(), sizeof(key_id));
|
||||
|
||||
return key_map_.at(key_id);
|
||||
}
|
||||
|
||||
private:
|
||||
std::map<uint32_t, ::arrow::util::SecureString> key_map_;
|
||||
};
|
||||
|
||||
// Simple string key retriever
|
||||
class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever {
|
||||
public:
|
||||
void PutKey(std::string key_id, ::arrow::util::SecureString key);
|
||||
::arrow::util::SecureString GetKey(const std::string& key_id) override;
|
||||
|
||||
private:
|
||||
std::map<std::string, ::arrow::util::SecureString> key_map_;
|
||||
};
|
||||
|
||||
class PARQUET_EXPORT HiddenColumnException : public ParquetException {
|
||||
public:
|
||||
explicit HiddenColumnException(const std::string& columnPath)
|
||||
: ParquetException(columnPath.c_str()) {}
|
||||
};
|
||||
|
||||
class PARQUET_EXPORT KeyAccessDeniedException : public ParquetException {
|
||||
public:
|
||||
explicit KeyAccessDeniedException(const std::string& columnPath)
|
||||
: ParquetException(columnPath.c_str()) {}
|
||||
};
|
||||
|
||||
inline const uint8_t* str2bytes(const std::string& str) {
|
||||
if (str.empty()) return NULLPTR;
|
||||
|
||||
char* cbytes = const_cast<char*>(str.c_str());
|
||||
return reinterpret_cast<const uint8_t*>(cbytes);
|
||||
}
|
||||
|
||||
inline ::arrow::util::span<const uint8_t> str2span(const std::string& str) {
|
||||
if (str.empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
return {reinterpret_cast<const uint8_t*>(str.data()), str.size()};
|
||||
}
|
||||
|
||||
class PARQUET_EXPORT ColumnEncryptionProperties {
|
||||
public:
|
||||
class PARQUET_EXPORT Builder {
|
||||
public:
|
||||
/// Convenience builder for encrypted columns.
|
||||
explicit Builder(std::string name) : Builder(std::move(name), true) {}
|
||||
|
||||
/// Convenience builder for encrypted columns.
|
||||
explicit Builder(const schema::ColumnPath& path)
|
||||
: Builder(path.ToDotString(), true) {}
|
||||
|
||||
/// Set a column-specific key.
|
||||
/// If key is not set on an encrypted column, the column will
|
||||
/// be encrypted with the footer key.
|
||||
/// keyBytes Key length must be either 16, 24 or 32 bytes.
|
||||
/// Caller is responsible for wiping out the input key array.
|
||||
Builder* key(::arrow::util::SecureString column_key);
|
||||
|
||||
/// Set a key retrieval metadata.
|
||||
/// use either key_metadata() or key_id(), not both
|
||||
Builder* key_metadata(std::string key_metadata);
|
||||
|
||||
/// A convenience function to set key metadata using a string id.
|
||||
/// Set a key retrieval metadata (converted from String).
|
||||
/// use either key_metadata() or key_id(), not both
|
||||
/// key_id will be converted to metadata (UTF-8 array).
|
||||
Builder* key_id(std::string key_id);
|
||||
|
||||
std::shared_ptr<ColumnEncryptionProperties> build() {
|
||||
return std::shared_ptr<ColumnEncryptionProperties>(
|
||||
new ColumnEncryptionProperties(encrypted_, column_path_, key_, key_metadata_));
|
||||
}
|
||||
|
||||
private:
|
||||
std::string column_path_;
|
||||
bool encrypted_;
|
||||
::arrow::util::SecureString key_;
|
||||
std::string key_metadata_;
|
||||
|
||||
Builder(std::string path, bool encrypted)
|
||||
: column_path_(std::move(path)), encrypted_(encrypted) {}
|
||||
};
|
||||
|
||||
const std::string& column_path() const { return column_path_; }
|
||||
bool is_encrypted() const { return encrypted_; }
|
||||
bool is_encrypted_with_footer_key() const { return encrypted_with_footer_key_; }
|
||||
const ::arrow::util::SecureString& key() const { return key_; }
|
||||
const std::string& key_metadata() const { return key_metadata_; }
|
||||
|
||||
private:
|
||||
std::string column_path_;
|
||||
bool encrypted_;
|
||||
bool encrypted_with_footer_key_;
|
||||
::arrow::util::SecureString key_;
|
||||
std::string key_metadata_;
|
||||
explicit ColumnEncryptionProperties(bool encrypted, std::string column_path,
|
||||
::arrow::util::SecureString key,
|
||||
std::string key_metadata);
|
||||
};
|
||||
|
||||
class PARQUET_EXPORT ColumnDecryptionProperties {
|
||||
public:
|
||||
class PARQUET_EXPORT Builder {
|
||||
public:
|
||||
explicit Builder(std::string name) : column_path_(std::move(name)) {}
|
||||
|
||||
explicit Builder(const schema::ColumnPath& path) : Builder(path.ToDotString()) {}
|
||||
|
||||
/// Set an explicit column key. If applied on a file that contains
|
||||
/// key metadata for this column the metadata will be ignored,
|
||||
/// the column will be decrypted with this key.
|
||||
/// key length must be either 16, 24 or 32 bytes.
|
||||
Builder* key(::arrow::util::SecureString key);
|
||||
|
||||
std::shared_ptr<ColumnDecryptionProperties> build();
|
||||
|
||||
private:
|
||||
std::string column_path_;
|
||||
::arrow::util::SecureString key_;
|
||||
};
|
||||
|
||||
const std::string& column_path() const { return column_path_; }
|
||||
const ::arrow::util::SecureString& key() const { return key_; }
|
||||
|
||||
private:
|
||||
std::string column_path_;
|
||||
::arrow::util::SecureString key_;
|
||||
|
||||
/// This class is only required for setting explicit column decryption keys -
|
||||
/// to override key retriever (or to provide keys when key metadata and/or
|
||||
/// key retriever are not available)
|
||||
explicit ColumnDecryptionProperties(std::string column_path,
|
||||
::arrow::util::SecureString key);
|
||||
};
|
||||
|
||||
class PARQUET_EXPORT AADPrefixVerifier {
|
||||
public:
|
||||
/// Verifies identity (AAD Prefix) of individual file,
|
||||
/// or of file collection in a data set.
|
||||
/// Throws exception if an AAD prefix is wrong.
|
||||
/// In a data set, AAD Prefixes should be collected,
|
||||
/// and then checked for missing files.
|
||||
virtual void Verify(const std::string& aad_prefix) = 0;
|
||||
virtual ~AADPrefixVerifier() {}
|
||||
};
|
||||
|
||||
class PARQUET_EXPORT FileDecryptionProperties {
|
||||
public:
|
||||
class PARQUET_EXPORT Builder {
|
||||
public:
|
||||
Builder() {
|
||||
check_plaintext_footer_integrity_ = kDefaultCheckSignature;
|
||||
plaintext_files_allowed_ = kDefaultAllowPlaintextFiles;
|
||||
}
|
||||
|
||||
/// Set an explicit footer key. If applied on a file that contains
|
||||
/// footer key metadata the metadata will be ignored, the footer
|
||||
/// will be decrypted/verified with this key.
|
||||
/// If explicit key is not set, footer key will be fetched from
|
||||
/// key retriever.
|
||||
/// With explicit keys or AAD prefix, new encryption properties object must be
|
||||
/// created for each encrypted file.
|
||||
/// Explicit encryption keys (footer and column) are cloned.
|
||||
/// Upon completion of file reading, the cloned encryption keys in the properties
|
||||
/// will be wiped out (array values set to 0).
|
||||
/// Caller is responsible for wiping out the input key array.
|
||||
/// param footerKey Key length must be either 16, 24 or 32 bytes.
|
||||
Builder* footer_key(::arrow::util::SecureString footer_key);
|
||||
|
||||
/// Set explicit column keys (decryption properties).
|
||||
/// Its also possible to set a key retriever on this property object.
|
||||
/// Upon file decryption, availability of explicit keys is checked before
|
||||
/// invocation of the retriever callback.
|
||||
/// If an explicit key is available for a footer or a column,
|
||||
/// its key metadata will be ignored.
|
||||
Builder* column_keys(
|
||||
ColumnPathToDecryptionPropertiesMap column_decryption_properties);
|
||||
|
||||
/// Set a key retriever callback. Its also possible to
|
||||
/// set explicit footer or column keys on this file property object.
|
||||
/// Upon file decryption, availability of explicit keys is checked before
|
||||
/// invocation of the retriever callback.
|
||||
/// If an explicit key is available for a footer or a column,
|
||||
/// its key metadata will be ignored.
|
||||
Builder* key_retriever(std::shared_ptr<DecryptionKeyRetriever> key_retriever);
|
||||
|
||||
/// Skip integrity verification of plaintext footers.
|
||||
/// If not called, integrity of plaintext footers will be checked in runtime,
|
||||
/// and an exception will be thrown in the following situations:
|
||||
/// - footer signing key is not available
|
||||
/// (not passed, or not found by key retriever)
|
||||
/// - footer content and signature don't match
|
||||
Builder* disable_footer_signature_verification() {
|
||||
check_plaintext_footer_integrity_ = false;
|
||||
return this;
|
||||
}
|
||||
|
||||
/// Explicitly supply the file AAD prefix.
|
||||
/// A must when a prefix is used for file encryption, but not stored in file.
|
||||
/// If AAD prefix is stored in file, it will be compared to the explicitly
|
||||
/// supplied value and an exception will be thrown if they differ.
|
||||
Builder* aad_prefix(std::string aad_prefix);
|
||||
|
||||
/// Set callback for verification of AAD Prefixes stored in file.
|
||||
Builder* aad_prefix_verifier(std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier);
|
||||
|
||||
/// By default, reading plaintext (unencrypted) files is not
|
||||
/// allowed when using a decryptor
|
||||
/// - in order to detect files that were not encrypted by mistake.
|
||||
/// However, the default behavior can be overridden by calling this method.
|
||||
/// The caller should use then a different method to ensure encryption
|
||||
/// of files with sensitive data.
|
||||
Builder* plaintext_files_allowed() {
|
||||
plaintext_files_allowed_ = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
std::shared_ptr<FileDecryptionProperties> build() {
|
||||
return std::shared_ptr<FileDecryptionProperties>(new FileDecryptionProperties(
|
||||
footer_key_, key_retriever_, check_plaintext_footer_integrity_, aad_prefix_,
|
||||
aad_prefix_verifier_, column_decryption_properties_, plaintext_files_allowed_));
|
||||
}
|
||||
|
||||
private:
|
||||
::arrow::util::SecureString footer_key_;
|
||||
std::string aad_prefix_;
|
||||
std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
|
||||
ColumnPathToDecryptionPropertiesMap column_decryption_properties_;
|
||||
|
||||
std::shared_ptr<DecryptionKeyRetriever> key_retriever_;
|
||||
bool check_plaintext_footer_integrity_;
|
||||
bool plaintext_files_allowed_;
|
||||
};
|
||||
|
||||
const ::arrow::util::SecureString& column_key(const std::string& column_path) const;
|
||||
|
||||
const ::arrow::util::SecureString& footer_key() const { return footer_key_; }
|
||||
|
||||
const std::string& aad_prefix() const { return aad_prefix_; }
|
||||
|
||||
const std::shared_ptr<DecryptionKeyRetriever>& key_retriever() const {
|
||||
return key_retriever_;
|
||||
}
|
||||
|
||||
bool check_plaintext_footer_integrity() const {
|
||||
return check_plaintext_footer_integrity_;
|
||||
}
|
||||
|
||||
bool plaintext_files_allowed() const { return plaintext_files_allowed_; }
|
||||
|
||||
const std::shared_ptr<AADPrefixVerifier>& aad_prefix_verifier() const {
|
||||
return aad_prefix_verifier_;
|
||||
}
|
||||
|
||||
private:
|
||||
::arrow::util::SecureString footer_key_;
|
||||
std::string aad_prefix_;
|
||||
std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
|
||||
ColumnPathToDecryptionPropertiesMap column_decryption_properties_;
|
||||
std::shared_ptr<DecryptionKeyRetriever> key_retriever_;
|
||||
bool check_plaintext_footer_integrity_;
|
||||
bool plaintext_files_allowed_;
|
||||
|
||||
FileDecryptionProperties(
|
||||
::arrow::util::SecureString footer_key,
|
||||
std::shared_ptr<DecryptionKeyRetriever> key_retriever,
|
||||
bool check_plaintext_footer_integrity, std::string aad_prefix,
|
||||
std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier,
|
||||
ColumnPathToDecryptionPropertiesMap column_decryption_properties,
|
||||
bool plaintext_files_allowed);
|
||||
};
|
||||
|
||||
class PARQUET_EXPORT FileEncryptionProperties {
|
||||
public:
|
||||
class PARQUET_EXPORT Builder {
|
||||
public:
|
||||
explicit Builder(::arrow::util::SecureString footer_key)
|
||||
: parquet_cipher_(kDefaultEncryptionAlgorithm),
|
||||
encrypted_footer_(kDefaultEncryptedFooter),
|
||||
footer_key_(std::move(footer_key)) {
|
||||
store_aad_prefix_in_file_ = false;
|
||||
}
|
||||
|
||||
/// Create files with plaintext footer.
|
||||
/// If not called, the files will be created with encrypted footer (default).
|
||||
Builder* set_plaintext_footer() {
|
||||
encrypted_footer_ = false;
|
||||
return this;
|
||||
}
|
||||
|
||||
/// Set encryption algorithm.
|
||||
/// If not called, files will be encrypted with AES_GCM_V1 (default).
|
||||
Builder* algorithm(ParquetCipher::type parquet_cipher) {
|
||||
parquet_cipher_ = parquet_cipher;
|
||||
return this;
|
||||
}
|
||||
|
||||
/// Set a key retrieval metadata (converted from String).
|
||||
/// use either footer_key_metadata or footer_key_id, not both.
|
||||
Builder* footer_key_id(std::string key_id);
|
||||
|
||||
/// Set a key retrieval metadata.
|
||||
/// use either footer_key_metadata or footer_key_id, not both.
|
||||
Builder* footer_key_metadata(std::string footer_key_metadata);
|
||||
|
||||
/// Set the file AAD Prefix.
|
||||
Builder* aad_prefix(std::string aad_prefix);
|
||||
|
||||
/// Skip storing AAD Prefix in file.
|
||||
/// If not called, and if AAD Prefix is set, it will be stored.
|
||||
Builder* disable_aad_prefix_storage();
|
||||
|
||||
/// Set the list of encrypted columns and their properties (keys etc).
|
||||
/// If not called, all columns will be encrypted with the footer key.
|
||||
/// If called, the file columns not in the list will be left unencrypted.
|
||||
Builder* encrypted_columns(ColumnPathToEncryptionPropertiesMap encrypted_columns);
|
||||
|
||||
std::shared_ptr<FileEncryptionProperties> build() {
|
||||
return std::shared_ptr<FileEncryptionProperties>(new FileEncryptionProperties(
|
||||
parquet_cipher_, footer_key_, footer_key_metadata_, encrypted_footer_,
|
||||
aad_prefix_, store_aad_prefix_in_file_, encrypted_columns_));
|
||||
}
|
||||
|
||||
private:
|
||||
ParquetCipher::type parquet_cipher_;
|
||||
bool encrypted_footer_;
|
||||
::arrow::util::SecureString footer_key_;
|
||||
std::string footer_key_metadata_;
|
||||
|
||||
std::string aad_prefix_;
|
||||
bool store_aad_prefix_in_file_;
|
||||
ColumnPathToEncryptionPropertiesMap encrypted_columns_;
|
||||
};
|
||||
|
||||
bool encrypted_footer() const { return encrypted_footer_; }
|
||||
|
||||
EncryptionAlgorithm algorithm() const { return algorithm_; }
|
||||
|
||||
const ::arrow::util::SecureString& footer_key() const { return footer_key_; }
|
||||
|
||||
const std::string& footer_key_metadata() const { return footer_key_metadata_; }
|
||||
|
||||
const std::string& file_aad() const { return file_aad_; }
|
||||
|
||||
std::shared_ptr<ColumnEncryptionProperties> column_encryption_properties(
|
||||
const std::string& column_path);
|
||||
|
||||
const ColumnPathToEncryptionPropertiesMap& encrypted_columns() const {
|
||||
return encrypted_columns_;
|
||||
}
|
||||
|
||||
private:
|
||||
EncryptionAlgorithm algorithm_;
|
||||
::arrow::util::SecureString footer_key_;
|
||||
std::string footer_key_metadata_;
|
||||
bool encrypted_footer_;
|
||||
std::string file_aad_;
|
||||
std::string aad_prefix_;
|
||||
bool store_aad_prefix_in_file_;
|
||||
ColumnPathToEncryptionPropertiesMap encrypted_columns_;
|
||||
|
||||
FileEncryptionProperties(ParquetCipher::type cipher,
|
||||
::arrow::util::SecureString footer_key,
|
||||
std::string footer_key_metadata, bool encrypted_footer,
|
||||
std::string aad_prefix, bool store_aad_prefix_in_file,
|
||||
ColumnPathToEncryptionPropertiesMap encrypted_columns);
|
||||
};
|
||||
|
||||
} // namespace parquet
|
||||
@@ -0,0 +1,57 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "arrow/filesystem/filesystem.h"
|
||||
#include "parquet/platform.h"
|
||||
|
||||
namespace parquet::encryption {
|
||||
|
||||
/// Stores encryption key material outside the Parquet file, for example in a separate
|
||||
/// small file in the same folder. This is important for “key rotation”, when MEKs have to
|
||||
/// be changed (if compromised; or periodically, just in case) - without modifying the
|
||||
/// Parquet files (often immutable).
|
||||
class PARQUET_EXPORT FileKeyMaterialStore {
|
||||
public:
|
||||
/// Add key material for one encryption key.
|
||||
virtual void AddKeyMaterial(std::string key_id_in_file, std::string key_material) = 0;
|
||||
|
||||
/// Get key material
|
||||
virtual std::string GetKeyMaterial(std::string key_id_in_file) = 0;
|
||||
|
||||
/// After key material was added for all keys in the given Parquet file,
|
||||
/// save material in persistent store.
|
||||
virtual void SaveMaterial() = 0;
|
||||
|
||||
/// Remove key material from persistent store. Used in key rotation.
|
||||
virtual void RemoveMaterial() = 0;
|
||||
|
||||
/// Move key material to another store. Used in key rotation.
|
||||
virtual void MoveMaterialTo(std::shared_ptr<FileKeyMaterialStore> target_key_store) = 0;
|
||||
|
||||
/// Returns the Set of all key IDs in this store (for the given Parquet file)
|
||||
virtual std::vector<std::string> GetKeyIDSet() = 0;
|
||||
|
||||
virtual ~FileKeyMaterialStore() {}
|
||||
};
|
||||
|
||||
} // namespace parquet::encryption
|
||||
@@ -0,0 +1,96 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "arrow/util/concurrent_map.h"
|
||||
#include "arrow/util/secure_string.h"
|
||||
|
||||
#include "parquet/encryption/encryption.h"
|
||||
#include "parquet/encryption/file_system_key_material_store.h"
|
||||
#include "parquet/encryption/key_material.h"
|
||||
#include "parquet/encryption/key_toolkit.h"
|
||||
#include "parquet/encryption/key_toolkit_internal.h"
|
||||
#include "parquet/encryption/kms_client.h"
|
||||
#include "parquet/platform.h"
|
||||
|
||||
namespace parquet::encryption {
|
||||
|
||||
// This class will retrieve the key from "key metadata", following these steps:
|
||||
// 1. Parse "key metadata" (see structure in KeyMetadata class).
|
||||
// 2. Retrieve "key material" which can be stored inside or outside "key metadata".
|
||||
// 3. Unwrap the "data encryption key" from "key material". There are 2 modes:
|
||||
// 3.1. single wrapping: decrypt the wrapped "data encryption key" directly with "master
|
||||
// encryption key" 3.2. double wrapping: 2 steps: 3.2.1. "key encryption key" is decrypted
|
||||
// with "master encryption key" 3.2.2. "data encryption key" is decrypted with the above
|
||||
// "key encryption key"
|
||||
class PARQUET_EXPORT FileKeyUnwrapper : public DecryptionKeyRetriever {
|
||||
public:
|
||||
/// key_toolkit and kms_connection_config is to get KmsClient from cache or create
|
||||
/// KmsClient if it's not in the cache yet. cache_entry_lifetime_seconds is life time of
|
||||
/// KmsClient in the cache.
|
||||
/// If the file uses external key material then the Parquet file path and file
|
||||
/// system must be specified.
|
||||
FileKeyUnwrapper(std::shared_ptr<KeyToolkit> key_toolkit,
|
||||
const KmsConnectionConfig& kms_connection_config,
|
||||
double cache_lifetime_seconds, const std::string& file_path = "",
|
||||
const std::shared_ptr<::arrow::fs::FileSystem>& file_system = NULLPTR);
|
||||
|
||||
/// Constructor overload that takes a raw pointer to the KeyToolkit
|
||||
FileKeyUnwrapper(KeyToolkit* key_toolkit,
|
||||
const KmsConnectionConfig& kms_connection_config,
|
||||
double cache_lifetime_seconds, const std::string& file_path = "",
|
||||
const std::shared_ptr<::arrow::fs::FileSystem>& file_system = NULLPTR);
|
||||
|
||||
/// Constructor overload that takes a raw pointer to the KeyToolkit and
|
||||
/// accepts an existing key_material_store rather than using
|
||||
/// the file path and file system to create one when needed.
|
||||
FileKeyUnwrapper(KeyToolkit* key_toolkit,
|
||||
const KmsConnectionConfig& kms_connection_config,
|
||||
double cache_lifetime_seconds,
|
||||
std::shared_ptr<FileKeyMaterialStore> key_material_store);
|
||||
|
||||
/// Get the data key from key metadata
|
||||
::arrow::util::SecureString GetKey(const std::string& key_metadata_bytes) override;
|
||||
|
||||
/// Get the data key along with the master key id from key material
|
||||
KeyWithMasterId GetDataEncryptionKey(const KeyMaterial& key_material);
|
||||
|
||||
private:
|
||||
FileKeyUnwrapper(std::shared_ptr<KeyToolkit> key_toolkit_owner, KeyToolkit* key_toolkit,
|
||||
const KmsConnectionConfig& kms_connection_config,
|
||||
double cache_lifetime_seconds,
|
||||
std::shared_ptr<FileKeyMaterialStore> key_material_store,
|
||||
const std::string& file_path,
|
||||
const std::shared_ptr<::arrow::fs::FileSystem>& file_system);
|
||||
|
||||
std::shared_ptr<KmsClient> GetKmsClientFromConfigOrKeyMaterial(
|
||||
const KeyMaterial& key_material);
|
||||
|
||||
/// A map of Key Encryption Key (KEK) ID -> KEK bytes, for the current token
|
||||
std::shared_ptr<::arrow::util::ConcurrentMap<std::string, ::arrow::util::SecureString>>
|
||||
kek_per_kek_id_;
|
||||
std::shared_ptr<KeyToolkit> key_toolkit_owner_;
|
||||
KeyToolkit* key_toolkit_;
|
||||
KmsConnectionConfig kms_connection_config_;
|
||||
const double cache_entry_lifetime_seconds_;
|
||||
std::shared_ptr<FileKeyMaterialStore> key_material_store_;
|
||||
const std::string file_path_;
|
||||
std::shared_ptr<::arrow::fs::FileSystem> file_system_;
|
||||
};
|
||||
|
||||
} // namespace parquet::encryption
|
||||
@@ -0,0 +1,84 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "arrow/util/concurrent_map.h"
|
||||
|
||||
#include "parquet/encryption/file_key_material_store.h"
|
||||
#include "parquet/encryption/key_encryption_key.h"
|
||||
#include "parquet/encryption/key_toolkit.h"
|
||||
#include "parquet/encryption/kms_client.h"
|
||||
#include "parquet/platform.h"
|
||||
|
||||
namespace parquet::encryption {
|
||||
|
||||
// This class will generate "key metadata" from "data encryption key" and "master key",
|
||||
// following these steps:
|
||||
// 1. Wrap "data encryption key". There are 2 modes:
|
||||
// 1.1. single wrapping: encrypt "data encryption key" directly with "master encryption
|
||||
// key"
|
||||
// 1.2. double wrapping: 2 steps:
|
||||
// 1.2.1. "key encryption key" is randomized (see KeyEncryptionKey class)
|
||||
// 1.2.2. "data encryption key" is encrypted with the above "key encryption key"
|
||||
// 2. Create "key material" (see structure in KeyMaterial class)
|
||||
// 3. Create "key metadata" with "key material" inside or a reference to outside "key
|
||||
// material" (see structure in KeyMetadata class).
|
||||
class PARQUET_EXPORT FileKeyWrapper {
|
||||
public:
|
||||
static constexpr int kKeyEncryptionKeyLength = 16;
|
||||
static constexpr int kKeyEncryptionKeyIdLength = 16;
|
||||
|
||||
/// key_toolkit and kms_connection_config is to get KmsClient from the cache or create
|
||||
/// KmsClient if it's not in the cache yet. cache_entry_lifetime_seconds is life time of
|
||||
/// KmsClient in the cache. key_material_store is to store "key material" outside
|
||||
/// parquet file, NULL if "key material" is stored inside parquet file.
|
||||
FileKeyWrapper(KeyToolkit* key_toolkit,
|
||||
const KmsConnectionConfig& kms_connection_config,
|
||||
std::shared_ptr<FileKeyMaterialStore> key_material_store,
|
||||
double cache_entry_lifetime_seconds, bool double_wrapping);
|
||||
|
||||
/// Creates key_metadata field for a given data key, via wrapping the key with the
|
||||
/// master key.
|
||||
/// When external key material is used, an identifier is usually generated automatically
|
||||
/// but may be specified explicitly to support key rotation,
|
||||
/// which requires keeping the same identifiers.
|
||||
std::string GetEncryptionKeyMetadata(const ::arrow::util::SecureString& data_key,
|
||||
const std::string& master_key_id,
|
||||
bool is_footer_key,
|
||||
std::string key_id_in_file = "");
|
||||
|
||||
private:
|
||||
KeyEncryptionKey CreateKeyEncryptionKey(const std::string& master_key_id);
|
||||
|
||||
/// A map of Master Encryption Key ID -> KeyEncryptionKey, for the current token
|
||||
std::shared_ptr<::arrow::util::ConcurrentMap<std::string, KeyEncryptionKey>>
|
||||
kek_per_master_key_id_;
|
||||
|
||||
std::shared_ptr<KmsClient> kms_client_;
|
||||
KmsConnectionConfig kms_connection_config_;
|
||||
std::shared_ptr<FileKeyMaterialStore> key_material_store_;
|
||||
const double cache_entry_lifetime_seconds_;
|
||||
const bool double_wrapping_;
|
||||
uint16_t key_counter_;
|
||||
};
|
||||
|
||||
} // namespace parquet::encryption
|
||||
@@ -0,0 +1,89 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "arrow/filesystem/filesystem.h"
|
||||
|
||||
#include "parquet/encryption/file_key_material_store.h"
|
||||
|
||||
namespace parquet::encryption {
|
||||
|
||||
/// A FileKeyMaterialStore that stores key material in a file system file in the same
|
||||
/// folder as the Parquet file.
|
||||
class PARQUET_EXPORT FileSystemKeyMaterialStore : public FileKeyMaterialStore {
|
||||
public:
|
||||
static constexpr const char kKeyMaterialFilePrefix[] = "_KEY_MATERIAL_FOR_";
|
||||
static constexpr const char kTempFilePrefix[] = "_TMP";
|
||||
static constexpr const char kKeyMaterialFileSuffix[] = ".json";
|
||||
|
||||
FileSystemKeyMaterialStore() {}
|
||||
FileSystemKeyMaterialStore(std::string key_material_file_path,
|
||||
std::shared_ptr<::arrow::fs::FileSystem> file_system);
|
||||
|
||||
/// Creates a new file system key material store for a parquet file.
|
||||
/// When use_tmp_prefix is true, files are saved with an extra _TMP prefix so they don't
|
||||
/// conflict with existing external material files. This is useful during key rotation
|
||||
/// so that temporary key material files can be created while using the existing key
|
||||
/// material, before moving the key material to the non-temporary location.
|
||||
static std::shared_ptr<FileSystemKeyMaterialStore> Make(
|
||||
std::string parquet_file_path, std::shared_ptr<::arrow::fs::FileSystem> file_system,
|
||||
bool use_tmp_prefix);
|
||||
|
||||
/// Add key material for one encryption key.
|
||||
void AddKeyMaterial(std::string key_id_in_file, std::string key_material) {
|
||||
key_material_map_.emplace(std::move(key_id_in_file), std::move(key_material));
|
||||
}
|
||||
|
||||
/// Get key material
|
||||
std::string GetKeyMaterial(std::string key_id_in_file) {
|
||||
if (key_material_map_.empty()) {
|
||||
LoadKeyMaterialMap();
|
||||
}
|
||||
auto found = key_material_map_.find(key_id_in_file);
|
||||
return found->second;
|
||||
}
|
||||
|
||||
/// After key material was added for all keys in the given Parquet file,
|
||||
/// save material in persistent store.
|
||||
void SaveMaterial();
|
||||
|
||||
/// Remove key material from persistent store. Used in key rotation.
|
||||
void RemoveMaterial();
|
||||
|
||||
/// Move key material to another store. Used in key rotation.
|
||||
void MoveMaterialTo(std::shared_ptr<FileKeyMaterialStore> target_key_store);
|
||||
|
||||
/// Returns the Set of all key IDs in this store (for the given Parquet file)
|
||||
std::vector<std::string> GetKeyIDSet();
|
||||
|
||||
private:
|
||||
std::string GetStorageFilePath() { return key_material_file_path_; }
|
||||
|
||||
std::string BuildKeyMaterialMapJson();
|
||||
void LoadKeyMaterialMap();
|
||||
std::string key_material_file_path_;
|
||||
std::shared_ptr<::arrow::fs::FileSystem> file_system_;
|
||||
/// Maps ID of a key in Parquet file and key material
|
||||
std::unordered_map<std::string, std::string> key_material_map_;
|
||||
};
|
||||
|
||||
} // namespace parquet::encryption
|
||||
@@ -0,0 +1,58 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/util/base64.h"
|
||||
#include "arrow/util/secure_string.h"
|
||||
|
||||
namespace parquet::encryption {
|
||||
|
||||
// In the double wrapping mode, each "data encryption key" (DEK) is encrypted with a “key
|
||||
// encryption key” (KEK), that in turn is encrypted with a "master encryption key" (MEK).
|
||||
// In a writer process, a random KEK is generated for each MEK ID, and cached in a <MEK-ID
|
||||
// : KEK> map. This allows to perform an interaction with a KMS server only once for each
|
||||
// MEK, in order to wrap its KEK. "Data encryption key" (DEK) wrapping is performed
|
||||
// locally, and does not involve an interaction with a KMS server.
|
||||
class KeyEncryptionKey {
|
||||
public:
|
||||
KeyEncryptionKey(::arrow::util::SecureString kek_bytes, std::string kek_id,
|
||||
std::string encoded_wrapped_kek)
|
||||
: kek_bytes_(std::move(kek_bytes)),
|
||||
kek_id_(std::move(kek_id)),
|
||||
encoded_kek_id_(::arrow::util::base64_encode(kek_id_)),
|
||||
encoded_wrapped_kek_(std::move(encoded_wrapped_kek)) {}
|
||||
|
||||
const ::arrow::util::SecureString& kek_bytes() const { return kek_bytes_; }
|
||||
|
||||
const std::string& kek_id() const { return kek_id_; }
|
||||
|
||||
const std::string& encoded_kek_id() const { return encoded_kek_id_; }
|
||||
|
||||
const std::string& encoded_wrapped_kek() const { return encoded_wrapped_kek_; }
|
||||
|
||||
private:
|
||||
::arrow::util::SecureString kek_bytes_;
|
||||
std::string kek_id_;
|
||||
std::string encoded_kek_id_;
|
||||
std::string encoded_wrapped_kek_;
|
||||
};
|
||||
|
||||
} // namespace parquet::encryption
|
||||
@@ -0,0 +1,129 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "parquet/platform.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace json {
|
||||
namespace internal {
|
||||
class ObjectParser;
|
||||
} // namespace internal
|
||||
} // namespace json
|
||||
} // namespace arrow
|
||||
|
||||
namespace parquet::encryption {
|
||||
|
||||
// KeyMaterial class represents the "key material", keeping the information that allows
|
||||
// readers to recover an encryption key (see description of the KeyMetadata class). The
|
||||
// keytools package (PARQUET-1373) implements the "envelope encryption" pattern, in a
|
||||
// "single wrapping" or "double wrapping" mode. In the single wrapping mode, the key
|
||||
// material is generated by encrypting the "data encryption key" (DEK) by a "master key".
|
||||
// In the double wrapping mode, the key material is generated by encrypting the DEK by a
|
||||
// "key encryption key" (KEK), that in turn is encrypted by a "master key".
|
||||
//
|
||||
// Key material is kept in a flat json object, with the following fields:
|
||||
// 1. "keyMaterialType" - a String, with the type of key material. In the current
|
||||
// version, only one value is allowed - "PKMT1" (stands
|
||||
// for "parquet key management tools, version 1"). For external key material storage,
|
||||
// this field is written in both "key metadata" and "key material" jsons. For internal
|
||||
// key material storage, this field is written only once in the common json.
|
||||
// 2. "isFooterKey" - a boolean. If true, means that the material belongs to a file footer
|
||||
// key, and keeps additional information (such as
|
||||
// KMS instance ID and URL). If false, means that the material belongs to a column
|
||||
// key.
|
||||
// 3. "kmsInstanceID" - a String, with the KMS Instance ID. Written only in footer key
|
||||
// material.
|
||||
// 4. "kmsInstanceURL" - a String, with the KMS Instance URL. Written only in footer key
|
||||
// material.
|
||||
// 5. "masterKeyID" - a String, with the ID of the master key used to generate the
|
||||
// material.
|
||||
// 6. "wrappedDEK" - a String, with the wrapped DEK (base64 encoding).
|
||||
// 7. "doubleWrapping" - a boolean. If true, means that the material was generated in
|
||||
// double wrapping mode.
|
||||
// If false - in single wrapping mode.
|
||||
// 8. "keyEncryptionKeyID" - a String, with the ID of the KEK used to generate the
|
||||
// material. Written only in double wrapping mode.
|
||||
// 9. "wrappedKEK" - a String, with the wrapped KEK (base64 encoding). Written only in
|
||||
// double wrapping mode.
|
||||
class PARQUET_EXPORT KeyMaterial {
|
||||
public:
|
||||
// these fields are defined in a specification and should never be changed
|
||||
static constexpr const char kKeyMaterialTypeField[] = "keyMaterialType";
|
||||
static constexpr const char kKeyMaterialType1[] = "PKMT1";
|
||||
|
||||
static constexpr const char kFooterKeyIdInFile[] = "footerKey";
|
||||
static constexpr const char kColumnKeyIdInFilePrefix[] = "columnKey";
|
||||
|
||||
static constexpr const char kIsFooterKeyField[] = "isFooterKey";
|
||||
static constexpr const char kDoubleWrappingField[] = "doubleWrapping";
|
||||
static constexpr const char kKmsInstanceIdField[] = "kmsInstanceID";
|
||||
static constexpr const char kKmsInstanceUrlField[] = "kmsInstanceURL";
|
||||
static constexpr const char kMasterKeyIdField[] = "masterKeyID";
|
||||
static constexpr const char kWrappedDataEncryptionKeyField[] = "wrappedDEK";
|
||||
static constexpr const char kKeyEncryptionKeyIdField[] = "keyEncryptionKeyID";
|
||||
static constexpr const char kWrappedKeyEncryptionKeyField[] = "wrappedKEK";
|
||||
|
||||
public:
|
||||
KeyMaterial() = default;
|
||||
|
||||
static KeyMaterial Parse(const std::string& key_material_string);
|
||||
|
||||
static KeyMaterial Parse(
|
||||
const ::arrow::json::internal::ObjectParser* key_material_json);
|
||||
|
||||
/// This method returns a json string that will be stored either inside a parquet file
|
||||
/// or in a key material store outside the parquet file.
|
||||
static std::string SerializeToJson(bool is_footer_key,
|
||||
const std::string& kms_instance_id,
|
||||
const std::string& kms_instance_url,
|
||||
const std::string& master_key_id,
|
||||
bool is_double_wrapped, const std::string& kek_id,
|
||||
const std::string& encoded_wrapped_kek,
|
||||
const std::string& encoded_wrapped_dek,
|
||||
bool is_internal_storage);
|
||||
|
||||
bool is_footer_key() const { return is_footer_key_; }
|
||||
bool is_double_wrapped() const { return is_double_wrapped_; }
|
||||
const std::string& master_key_id() const { return master_key_id_; }
|
||||
const std::string& wrapped_dek() const { return encoded_wrapped_dek_; }
|
||||
const std::string& kek_id() const { return kek_id_; }
|
||||
const std::string& wrapped_kek() const { return encoded_wrapped_kek_; }
|
||||
const std::string& kms_instance_id() const { return kms_instance_id_; }
|
||||
const std::string& kms_instance_url() const { return kms_instance_url_; }
|
||||
|
||||
private:
|
||||
KeyMaterial(bool is_footer_key, const std::string& kms_instance_id,
|
||||
const std::string& kms_instance_url, const std::string& master_key_id,
|
||||
bool is_double_wrapped, const std::string& kek_id,
|
||||
const std::string& encoded_wrapped_kek,
|
||||
const std::string& encoded_wrapped_dek);
|
||||
|
||||
bool is_footer_key_;
|
||||
std::string kms_instance_id_;
|
||||
std::string kms_instance_url_;
|
||||
std::string master_key_id_;
|
||||
bool is_double_wrapped_;
|
||||
std::string kek_id_;
|
||||
std::string encoded_wrapped_kek_;
|
||||
std::string encoded_wrapped_dek_;
|
||||
};
|
||||
|
||||
} // namespace parquet::encryption
|
||||
@@ -0,0 +1,91 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <variant>
|
||||
|
||||
#include "parquet/encryption/key_material.h"
|
||||
#include "parquet/exception.h"
|
||||
#include "parquet/platform.h"
|
||||
|
||||
namespace parquet::encryption {
|
||||
|
||||
// Parquet encryption specification defines "key metadata" as an arbitrary byte array,
|
||||
// generated by file writers for each encryption key, and passed to the low level API for
|
||||
// storage in the file footer. The "key metadata" field is made available to file readers
|
||||
// to enable recovery of the key. This interface can be utilized for implementation
|
||||
// of any key management scheme.
|
||||
//
|
||||
// The keytools package (PARQUET-1373) implements one approach, of many possible, to key
|
||||
// management and to generation of the "key metadata" fields. This approach, based on the
|
||||
// "envelope encryption" pattern, allows integration with KMS servers. It keeps the actual
|
||||
// material, required to recover a key, in a "key material" object (see the KeyMaterial
|
||||
// class for details). This class is implemented to support version 1 of the parquet key
|
||||
// management tools specification.
|
||||
//
|
||||
// KeyMetadata writes (and reads) the "key metadata" field as a flat json object,
|
||||
// with the following fields:
|
||||
// 1. "keyMaterialType" - a String, with the type of key material.
|
||||
// 2. "internalStorage" - a boolean. If true, means that "key material" is kept inside the
|
||||
// "key metadata" field. If false, "key material" is kept externally (outside Parquet
|
||||
// files) - in this case, "key metadata" keeps a reference to the external "key material".
|
||||
// 3. "keyReference" - a String, with the reference to the external "key material".
|
||||
// Written only if internalStorage is false.
|
||||
//
|
||||
// If internalStorage is true, "key material" is a part of "key metadata", and the json
|
||||
// keeps additional fields, described in the KeyMaterial class.
|
||||
class PARQUET_EXPORT KeyMetadata {
|
||||
public:
|
||||
static constexpr const char kKeyMaterialInternalStorageField[] = "internalStorage";
|
||||
static constexpr const char kKeyReferenceField[] = "keyReference";
|
||||
|
||||
/// key_metadata_bytes is the key metadata field stored in the parquet file,
|
||||
/// in the serialized json object format.
|
||||
static KeyMetadata Parse(const std::string& key_metadata_bytes);
|
||||
|
||||
static std::string CreateSerializedForExternalMaterial(
|
||||
const std::string& key_reference);
|
||||
|
||||
bool key_material_stored_internally() const { return is_internal_storage_; }
|
||||
|
||||
const KeyMaterial& key_material() const {
|
||||
if (!is_internal_storage_) {
|
||||
throw ParquetException("key material is stored externally.");
|
||||
}
|
||||
return ::std::get<KeyMaterial>(key_material_or_reference_);
|
||||
}
|
||||
|
||||
const std::string& key_reference() const {
|
||||
if (is_internal_storage_) {
|
||||
throw ParquetException("key material is stored internally.");
|
||||
}
|
||||
return ::std::get<std::string>(key_material_or_reference_);
|
||||
}
|
||||
|
||||
private:
|
||||
explicit KeyMetadata(const KeyMaterial& key_material);
|
||||
explicit KeyMetadata(const std::string& key_reference);
|
||||
|
||||
bool is_internal_storage_;
|
||||
/// If is_internal_storage_ is true, KeyMaterial is set,
|
||||
/// else a string referencing to an outside "key material" is set.
|
||||
::std::variant<KeyMaterial, std::string> key_material_or_reference_;
|
||||
};
|
||||
|
||||
} // namespace parquet::encryption
|
||||
@@ -0,0 +1,106 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "parquet/encryption/key_encryption_key.h"
|
||||
#include "parquet/encryption/kms_client.h"
|
||||
#include "parquet/encryption/kms_client_factory.h"
|
||||
#include "parquet/encryption/two_level_cache_with_expiration.h"
|
||||
#include "parquet/platform.h"
|
||||
|
||||
namespace parquet::encryption {
|
||||
|
||||
static constexpr uint64_t kCacheCleanPeriodForKeyRotation = 60 * 60; // 1 hour
|
||||
|
||||
// KeyToolkit is a utility that keeps various tools for key management (such as key
|
||||
// rotation, kms client instantiation, cache control, etc), plus a number of auxiliary
|
||||
// classes for internal use.
|
||||
class PARQUET_EXPORT KeyToolkit {
|
||||
public:
|
||||
KeyToolkit() { last_cache_clean_for_key_rotation_time_ = {}; }
|
||||
|
||||
/// KMS client two level cache: token -> KMSInstanceId -> KmsClient
|
||||
TwoLevelCacheWithExpiration<std::shared_ptr<KmsClient>>& kms_client_cache_per_token() {
|
||||
return kms_client_cache_;
|
||||
}
|
||||
/// Key encryption key two level cache for wrapping: token -> MasterEncryptionKeyId ->
|
||||
/// KeyEncryptionKey
|
||||
TwoLevelCacheWithExpiration<KeyEncryptionKey>& kek_write_cache_per_token() {
|
||||
return key_encryption_key_write_cache_;
|
||||
}
|
||||
|
||||
/// Key encryption key two level cache for unwrapping: token -> KeyEncryptionKeyId ->
|
||||
/// KeyEncryptionKeyBytes
|
||||
TwoLevelCacheWithExpiration<::arrow::util::SecureString>& kek_read_cache_per_token() {
|
||||
return key_encryption_key_read_cache_;
|
||||
}
|
||||
|
||||
std::shared_ptr<KmsClient> GetKmsClient(
|
||||
const KmsConnectionConfig& kms_connection_config, double cache_entry_lifetime_ms);
|
||||
|
||||
/// Flush any caches that are tied to the (compromised) access_token
|
||||
void RemoveCacheEntriesForToken(const std::string& access_token);
|
||||
|
||||
void RemoveCacheEntriesForAllTokens();
|
||||
|
||||
void RegisterKmsClientFactory(std::shared_ptr<KmsClientFactory> kms_client_factory) {
|
||||
if (kms_client_factory_ != NULLPTR) {
|
||||
throw ParquetException("KMS client factory has already been registered.");
|
||||
}
|
||||
kms_client_factory_ = std::move(kms_client_factory);
|
||||
}
|
||||
|
||||
/// Key rotation. In the single wrapping mode, decrypts data keys with old master keys,
|
||||
/// then encrypts them with new master keys. In the double wrapping mode, decrypts KEKs
|
||||
/// (key encryption keys) with old master keys, generates new KEKs and encrypts them
|
||||
/// with new master keys. Works only if key material is not stored internally in file
|
||||
/// footers. Not supported in local key wrapping mode. Method can be run by multiple
|
||||
/// threads, but each thread must work on different files.
|
||||
void RotateMasterKeys(const KmsConnectionConfig& kms_connection_config,
|
||||
const std::string& parquet_file_path,
|
||||
const std::shared_ptr<::arrow::fs::FileSystem>& file_system,
|
||||
bool double_wrapping, double cache_lifetime_seconds);
|
||||
|
||||
private:
|
||||
TwoLevelCacheWithExpiration<std::shared_ptr<KmsClient>> kms_client_cache_;
|
||||
TwoLevelCacheWithExpiration<KeyEncryptionKey> key_encryption_key_write_cache_;
|
||||
TwoLevelCacheWithExpiration<::arrow::util::SecureString> key_encryption_key_read_cache_;
|
||||
std::shared_ptr<KmsClientFactory> kms_client_factory_;
|
||||
mutable ::arrow::util::Mutex last_cache_clean_for_key_rotation_time_mutex_;
|
||||
internal::TimePoint last_cache_clean_for_key_rotation_time_;
|
||||
};
|
||||
|
||||
// "data encryption key" and "master key identifier" are paired together as output when
|
||||
// parsing from "key material"
|
||||
class PARQUET_EXPORT KeyWithMasterId {
|
||||
public:
|
||||
KeyWithMasterId(::arrow::util::SecureString key_bytes, std::string master_id)
|
||||
: key_bytes_(std::move(key_bytes)), master_id_(std::move(master_id)) {}
|
||||
|
||||
const ::arrow::util::SecureString& data_key() const { return key_bytes_; }
|
||||
const std::string& master_id() const { return master_id_; }
|
||||
|
||||
private:
|
||||
::arrow::util::SecureString key_bytes_;
|
||||
std::string master_id_;
|
||||
};
|
||||
|
||||
} // namespace parquet::encryption
|
||||
@@ -0,0 +1,97 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "arrow/util/mutex.h"
|
||||
#include "arrow/util/secure_string.h"
|
||||
|
||||
#include "parquet/exception.h"
|
||||
#include "parquet/platform.h"
|
||||
|
||||
namespace parquet::encryption {
|
||||
|
||||
/// This class wraps the key access token of a KMS server. If your token changes over
|
||||
/// time, you should keep the reference to the KeyAccessToken object and call Refresh()
|
||||
/// method every time you have a new token.
|
||||
class PARQUET_EXPORT KeyAccessToken {
|
||||
public:
|
||||
KeyAccessToken() = default;
|
||||
|
||||
explicit KeyAccessToken(const std::string value) : value_(value) {}
|
||||
|
||||
void Refresh(const std::string& new_value) {
|
||||
auto lock = mutex_.Lock();
|
||||
value_ = new_value;
|
||||
}
|
||||
|
||||
const std::string& value() const {
|
||||
auto lock = mutex_.Lock();
|
||||
return value_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string value_;
|
||||
mutable ::arrow::util::Mutex mutex_;
|
||||
};
|
||||
|
||||
struct PARQUET_EXPORT KmsConnectionConfig {
|
||||
std::string kms_instance_id;
|
||||
std::string kms_instance_url;
|
||||
/// If the access token is changed in the future, you should keep a reference to
|
||||
/// this object and call Refresh() on it whenever there is a new access token.
|
||||
std::shared_ptr<KeyAccessToken> refreshable_key_access_token;
|
||||
std::unordered_map<std::string, std::string> custom_kms_conf;
|
||||
|
||||
KmsConnectionConfig();
|
||||
|
||||
const std::string& key_access_token() const {
|
||||
if (refreshable_key_access_token == NULLPTR ||
|
||||
refreshable_key_access_token->value().empty()) {
|
||||
throw ParquetException("key access token is not set!");
|
||||
}
|
||||
return refreshable_key_access_token->value();
|
||||
}
|
||||
|
||||
void SetDefaultIfEmpty();
|
||||
};
|
||||
|
||||
class PARQUET_EXPORT KmsClient {
|
||||
public:
|
||||
static constexpr const char kKmsInstanceIdDefault[] = "DEFAULT";
|
||||
static constexpr const char kKmsInstanceUrlDefault[] = "DEFAULT";
|
||||
static constexpr const char kKeyAccessTokenDefault[] = "DEFAULT";
|
||||
|
||||
/// \brief Wraps a key.
|
||||
///
|
||||
/// Encrypts it with the master key, encodes the result
|
||||
/// and potentially adds a KMS-specific metadata.
|
||||
virtual std::string WrapKey(const ::arrow::util::SecureString& key_bytes,
|
||||
const std::string& master_key_identifier) = 0;
|
||||
|
||||
/// \brief Decrypts (unwraps) a key with the master key.
|
||||
virtual ::arrow::util::SecureString UnwrapKey(
|
||||
const std::string& wrapped_key, const std::string& master_key_identifier) = 0;
|
||||
|
||||
virtual ~KmsClient() {}
|
||||
};
|
||||
|
||||
} // namespace parquet::encryption
|
||||
@@ -0,0 +1,38 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "parquet/encryption/kms_client.h"
|
||||
#include "parquet/platform.h"
|
||||
|
||||
namespace parquet::encryption {
|
||||
|
||||
class PARQUET_EXPORT KmsClientFactory {
|
||||
public:
|
||||
explicit KmsClientFactory(bool wrap_locally = false) : wrap_locally_(wrap_locally) {}
|
||||
|
||||
virtual ~KmsClientFactory() = default;
|
||||
|
||||
virtual std::shared_ptr<KmsClient> CreateKmsClient(
|
||||
const KmsConnectionConfig& kms_connection_config) = 0;
|
||||
|
||||
protected:
|
||||
bool wrap_locally_;
|
||||
};
|
||||
|
||||
} // namespace parquet::encryption
|
||||
@@ -0,0 +1,95 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/util/concurrent_map.h"
|
||||
|
||||
#include "parquet/encryption/kms_client.h"
|
||||
#include "parquet/platform.h"
|
||||
|
||||
namespace parquet::encryption {
|
||||
|
||||
/// This class supports local wrapping mode, master keys will be fetched from the KMS
|
||||
/// server and used to encrypt other keys (data encryption keys or key encryption keys).
|
||||
class PARQUET_EXPORT LocalWrapKmsClient : public KmsClient {
|
||||
public:
|
||||
static constexpr const char kLocalWrapNoKeyVersion[] = "NO_VERSION";
|
||||
|
||||
explicit LocalWrapKmsClient(const KmsConnectionConfig& kms_connection_config);
|
||||
|
||||
std::string WrapKey(const ::arrow::util::SecureString& key_bytes,
|
||||
const std::string& master_key_identifier) override;
|
||||
|
||||
::arrow::util::SecureString UnwrapKey(
|
||||
const std::string& wrapped_key, const std::string& master_key_identifier) override;
|
||||
|
||||
protected:
|
||||
/// Get master key from the remote KMS server.
|
||||
/// Note: this function might be called by multiple threads
|
||||
virtual const ::arrow::util::SecureString& GetMasterKeyFromServer(
|
||||
const std::string& master_key_identifier) = 0;
|
||||
|
||||
private:
|
||||
/// KMS systems wrap keys by encrypting them by master keys, and attaching additional
|
||||
/// information (such as the version number of the masker key) to the result of
|
||||
/// encryption. The master key version is required in key rotation. Currently, the
|
||||
/// local wrapping mode does not support key rotation (because not all KMS systems allow
|
||||
/// to fetch a master key by its ID and version number). Still, the local wrapping mode
|
||||
/// adds a placeholder for the master key version, that will enable support for key
|
||||
/// rotation in this mode in the future, with appropriate KMS systems. This will also
|
||||
/// enable backward compatibility, where future readers will be able to extract master
|
||||
/// key version in the files written by the current code.
|
||||
///
|
||||
/// LocalKeyWrap class writes (and reads) the "key wrap" as a flat json with the
|
||||
/// following fields:
|
||||
/// 1. "masterKeyVersion" - a String, with the master key version. In the current
|
||||
/// version, only one value is allowed - "NO_VERSION".
|
||||
/// 2. "encryptedKey" - a String, with the key encrypted by the master key
|
||||
/// (base64-encoded).
|
||||
class LocalKeyWrap {
|
||||
public:
|
||||
static constexpr const char kLocalWrapKeyVersionField[] = "masterKeyVersion";
|
||||
static constexpr const char kLocalWrapEncryptedKeyField[] = "encryptedKey";
|
||||
|
||||
LocalKeyWrap(std::string master_key_version, std::string encrypted_encoded_key);
|
||||
|
||||
static std::string CreateSerialized(const std::string& encrypted_encoded_key);
|
||||
|
||||
static LocalKeyWrap Parse(const std::string& wrapped_key);
|
||||
|
||||
const std::string& master_key_version() const { return master_key_version_; }
|
||||
|
||||
const std::string& encrypted_encoded_key() const { return encrypted_encoded_key_; }
|
||||
|
||||
private:
|
||||
std::string encrypted_encoded_key_;
|
||||
std::string master_key_version_;
|
||||
};
|
||||
|
||||
const ::arrow::util::SecureString& GetKeyFromServer(const std::string& key_identifier);
|
||||
|
||||
protected:
|
||||
KmsConnectionConfig kms_connection_config_;
|
||||
::arrow::util::ConcurrentMap<std::string, ::arrow::util::SecureString>
|
||||
master_key_cache_;
|
||||
};
|
||||
|
||||
} // namespace parquet::encryption
|
||||
@@ -0,0 +1,135 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
// This module defines an abstract interface for iterating through pages in a
|
||||
// Parquet column chunk within a row group. It could be extended in the future
|
||||
// to iterate through all data pages in all chunks in a file.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "arrow/filesystem/filesystem.h"
|
||||
#include "arrow/filesystem/localfs.h"
|
||||
#include "arrow/status.h"
|
||||
#include "arrow/util/io_util.h"
|
||||
#include "arrow/util/secure_string.h"
|
||||
|
||||
#include "parquet/encryption/encryption.h"
|
||||
#include "parquet/test_util.h"
|
||||
|
||||
namespace parquet {
|
||||
class ParquetFileReader;
|
||||
namespace encryption::test {
|
||||
|
||||
using ::arrow::internal::TemporaryDir;
|
||||
using ::arrow::util::SecureString;
|
||||
|
||||
constexpr int kFixedLength = 10;
|
||||
|
||||
const SecureString kFooterEncryptionKey("0123456789012345");
|
||||
const SecureString kColumnEncryptionKey1("1234567890123450");
|
||||
const SecureString kColumnEncryptionKey2("1234567890123451");
|
||||
const char kFileName[] = "tester";
|
||||
|
||||
// Get the path of file inside parquet test data directory
|
||||
std::string data_file(const char* file);
|
||||
|
||||
// A temporary directory that contains the encrypted files generated in the tests.
|
||||
extern std::unique_ptr<TemporaryDir> temp_dir;
|
||||
|
||||
inline ::arrow::Result<std::unique_ptr<TemporaryDir>> temp_data_dir() {
|
||||
return TemporaryDir::Make("parquet-encryption-test-");
|
||||
}
|
||||
|
||||
const char kDoubleFieldName[] = "double_field";
|
||||
const char kFloatFieldName[] = "float_field";
|
||||
const char kBooleanFieldName[] = "boolean_field";
|
||||
const char kInt32FieldName[] = "int32_field";
|
||||
const char kInt64FieldName[] = "int64_field";
|
||||
const char kInt96FieldName[] = "int96_field";
|
||||
const char kByteArrayFieldName[] = "ba_field";
|
||||
const char kFixedLenByteArrayFieldName[] = "flba_field";
|
||||
|
||||
const char kFooterMasterKey[] = "0123456789012345";
|
||||
const char kFooterMasterKeyId[] = "kf";
|
||||
const char* const kColumnMasterKeys[] = {"1234567890123450", "1234567890123451",
|
||||
"1234567890123452", "1234567890123453",
|
||||
"1234567890123454", "1234567890123455"};
|
||||
const char* const kColumnMasterKeyIds[] = {"kc1", "kc2", "kc3", "kc4", "kc5", "kc6"};
|
||||
|
||||
// New master key values used to simulate key rotation
|
||||
const char kNewFooterMasterKey[] = "9123456789012345";
|
||||
const char* const kNewColumnMasterKeys[] = {"9234567890123450", "9234567890123451",
|
||||
"9234567890123452", "9234567890123453",
|
||||
"9234567890123454", "9234567890123455"};
|
||||
|
||||
// The result of this function will be used to set into TestOnlyInMemoryKmsClientFactory
|
||||
// as the key mapping to look at.
|
||||
std::unordered_map<std::string, SecureString> BuildKeyMap(const char* const* column_ids,
|
||||
const char* const* column_keys,
|
||||
const char* footer_id,
|
||||
const char* footer_key);
|
||||
|
||||
// The result of this function will be used to set into EncryptionConfiguration
|
||||
// as column keys.
|
||||
std::string BuildColumnKeyMapping();
|
||||
|
||||
// FileEncryptor and FileDecryptor are helper classes to write/read an encrypted parquet
|
||||
// file corresponding to each pair of FileEncryptionProperties/FileDecryptionProperties.
|
||||
// FileEncryptor writes the file with fixed data values and FileDecryptor reads the file
|
||||
// and verify the correctness of data values.
|
||||
class FileEncryptor {
|
||||
public:
|
||||
FileEncryptor();
|
||||
|
||||
void EncryptFile(
|
||||
std::string file,
|
||||
std::shared_ptr<parquet::FileEncryptionProperties> encryption_configurations);
|
||||
|
||||
private:
|
||||
std::shared_ptr<schema::GroupNode> SetupEncryptionSchema();
|
||||
|
||||
int num_rowgroups_ = 5;
|
||||
int rows_per_rowgroup_ = 50;
|
||||
std::shared_ptr<schema::GroupNode> schema_;
|
||||
};
|
||||
|
||||
class FileDecryptor {
|
||||
public:
|
||||
void DecryptFile(
|
||||
const std::string& file_name,
|
||||
const std::shared_ptr<FileDecryptionProperties>& file_decryption_properties);
|
||||
void DecryptPageIndex(
|
||||
const std::string& file_name,
|
||||
const std::shared_ptr<FileDecryptionProperties>& file_decryption_properties);
|
||||
|
||||
private:
|
||||
void CheckFile(
|
||||
parquet::ParquetFileReader* file_reader,
|
||||
const std::shared_ptr<FileDecryptionProperties>& file_decryption_properties);
|
||||
void CheckPageIndex(
|
||||
parquet::ParquetFileReader* file_reader,
|
||||
const std::shared_ptr<FileDecryptionProperties>& file_decryption_properties);
|
||||
};
|
||||
|
||||
} // namespace encryption::test
|
||||
} // namespace parquet
|
||||
@@ -0,0 +1,101 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "arrow/util/base64.h"
|
||||
|
||||
#include "parquet/encryption/kms_client_factory.h"
|
||||
#include "parquet/encryption/local_wrap_kms_client.h"
|
||||
#include "parquet/platform.h"
|
||||
|
||||
namespace parquet::encryption {
|
||||
|
||||
// This is a mock class, built for testing only. Don't use it as an example of
|
||||
// LocalWrapKmsClient implementation.
|
||||
class TestOnlyLocalWrapInMemoryKms : public LocalWrapKmsClient {
|
||||
public:
|
||||
explicit TestOnlyLocalWrapInMemoryKms(const KmsConnectionConfig& kms_connection_config);
|
||||
|
||||
static void InitializeMasterKeys(
|
||||
const std::unordered_map<std::string, ::arrow::util::SecureString>&
|
||||
master_keys_map);
|
||||
|
||||
protected:
|
||||
const ::arrow::util::SecureString& GetMasterKeyFromServer(
|
||||
const std::string& master_key_identifier) override;
|
||||
|
||||
private:
|
||||
static std::unordered_map<std::string, ::arrow::util::SecureString> master_key_map_;
|
||||
};
|
||||
|
||||
// This is a mock class, built for testing only. Don't use it as an example of KmsClient
|
||||
// implementation.
|
||||
class TestOnlyInServerWrapKms : public KmsClient {
|
||||
public:
|
||||
static void InitializeMasterKeys(
|
||||
const std::unordered_map<std::string, ::arrow::util::SecureString>&
|
||||
master_keys_map);
|
||||
|
||||
std::string WrapKey(const ::arrow::util::SecureString& key_bytes,
|
||||
const std::string& master_key_identifier) override;
|
||||
|
||||
::arrow::util::SecureString UnwrapKey(
|
||||
const std::string& wrapped_key, const std::string& master_key_identifier) override;
|
||||
|
||||
static void StartKeyRotation(
|
||||
const std::unordered_map<std::string, ::arrow::util::SecureString>&
|
||||
new_master_keys_map);
|
||||
static void FinishKeyRotation();
|
||||
|
||||
private:
|
||||
::arrow::util::SecureString GetMasterKeyFromServer(
|
||||
const std::string& master_key_identifier);
|
||||
|
||||
// Different wrapping and unwrapping key maps to imitate versioning
|
||||
// and support key rotation.
|
||||
static std::unordered_map<std::string, ::arrow::util::SecureString>
|
||||
unwrapping_master_key_map_;
|
||||
static std::unordered_map<std::string, ::arrow::util::SecureString>
|
||||
wrapping_master_key_map_;
|
||||
};
|
||||
|
||||
// This is a mock class, built for testing only. Don't use it as an example of
|
||||
// KmsClientFactory implementation.
|
||||
class TestOnlyInMemoryKmsClientFactory : public KmsClientFactory {
|
||||
public:
|
||||
TestOnlyInMemoryKmsClientFactory(
|
||||
bool wrap_locally,
|
||||
const std::unordered_map<std::string, ::arrow::util::SecureString>& master_keys_map)
|
||||
: KmsClientFactory(wrap_locally) {
|
||||
TestOnlyLocalWrapInMemoryKms::InitializeMasterKeys(master_keys_map);
|
||||
TestOnlyInServerWrapKms::InitializeMasterKeys(master_keys_map);
|
||||
}
|
||||
|
||||
std::shared_ptr<KmsClient> CreateKmsClient(
|
||||
const KmsConnectionConfig& kms_connection_config) {
|
||||
if (wrap_locally_) {
|
||||
return std::make_shared<TestOnlyLocalWrapInMemoryKms>(kms_connection_config);
|
||||
} else {
|
||||
return std::make_shared<TestOnlyInServerWrapKms>();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace parquet::encryption
|
||||
@@ -0,0 +1,149 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "arrow/util/concurrent_map.h"
|
||||
#include "arrow/util/mutex.h"
|
||||
|
||||
namespace parquet::encryption {
|
||||
|
||||
using ::arrow::util::ConcurrentMap;
|
||||
|
||||
namespace internal {
|
||||
|
||||
using TimePoint =
|
||||
std::chrono::time_point<std::chrono::system_clock, std::chrono::duration<double>>;
|
||||
|
||||
inline TimePoint CurrentTimePoint() { return std::chrono::system_clock::now(); }
|
||||
|
||||
template <typename E>
|
||||
class ExpiringCacheEntry {
|
||||
public:
|
||||
ExpiringCacheEntry() = default;
|
||||
|
||||
ExpiringCacheEntry(E cached_item, double expiration_interval_seconds)
|
||||
: expiration_timestamp_(CurrentTimePoint() +
|
||||
std::chrono::duration<double>(expiration_interval_seconds)),
|
||||
cached_item_(std::move(cached_item)) {}
|
||||
|
||||
bool IsExpired() const {
|
||||
const auto now = CurrentTimePoint();
|
||||
return (now > expiration_timestamp_);
|
||||
}
|
||||
|
||||
E cached_item() { return cached_item_; }
|
||||
|
||||
private:
|
||||
const TimePoint expiration_timestamp_;
|
||||
E cached_item_;
|
||||
};
|
||||
|
||||
// This class is to avoid the below warning when compiling KeyToolkit class with VS2015
|
||||
// warning C4503: decorated name length exceeded, name was truncated
|
||||
template <typename V>
|
||||
class ExpiringCacheMapEntry {
|
||||
public:
|
||||
ExpiringCacheMapEntry() = default;
|
||||
|
||||
explicit ExpiringCacheMapEntry(
|
||||
std::shared_ptr<ConcurrentMap<std::string, V>> cached_item,
|
||||
double expiration_interval_seconds)
|
||||
: map_cache_(cached_item, expiration_interval_seconds) {}
|
||||
|
||||
bool IsExpired() { return map_cache_.IsExpired(); }
|
||||
|
||||
std::shared_ptr<ConcurrentMap<std::string, V>> cached_item() {
|
||||
return map_cache_.cached_item();
|
||||
}
|
||||
|
||||
private:
|
||||
// ConcurrentMap object may be accessed and modified at many places at the same time,
|
||||
// from multiple threads, or even removed from cache.
|
||||
ExpiringCacheEntry<std::shared_ptr<ConcurrentMap<std::string, V>>> map_cache_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
// Two-level cache with expiration of internal caches according to token lifetime.
|
||||
// External cache is per token, internal is per string key.
|
||||
// Wrapper class around:
|
||||
// std::unordered_map<std::string,
|
||||
// internal::ExpiringCacheEntry<std::unordered_map<std::string, V>>>
|
||||
// This cache is safe to be shared between threads.
|
||||
template <typename V>
|
||||
class TwoLevelCacheWithExpiration {
|
||||
public:
|
||||
TwoLevelCacheWithExpiration() {
|
||||
last_cache_cleanup_timestamp_ = internal::CurrentTimePoint();
|
||||
}
|
||||
|
||||
std::shared_ptr<ConcurrentMap<std::string, V>> GetOrCreateInternalCache(
|
||||
const std::string& access_token, double cache_entry_lifetime_seconds) {
|
||||
auto lock = mutex_.Lock();
|
||||
|
||||
auto external_cache_entry = cache_.find(access_token);
|
||||
if (external_cache_entry == cache_.end() ||
|
||||
external_cache_entry->second.IsExpired()) {
|
||||
cache_.insert({access_token, internal::ExpiringCacheMapEntry<V>(
|
||||
std::make_shared<ConcurrentMap<std::string, V>>(),
|
||||
cache_entry_lifetime_seconds)});
|
||||
}
|
||||
|
||||
return cache_[access_token].cached_item();
|
||||
}
|
||||
|
||||
void CheckCacheForExpiredTokens(double cache_cleanup_period_seconds = 0.0) {
|
||||
auto lock = mutex_.Lock();
|
||||
|
||||
const auto now = internal::CurrentTimePoint();
|
||||
if (now > (last_cache_cleanup_timestamp_ +
|
||||
std::chrono::duration<double>(cache_cleanup_period_seconds))) {
|
||||
RemoveExpiredEntriesNoMutex();
|
||||
last_cache_cleanup_timestamp_ = now;
|
||||
}
|
||||
}
|
||||
|
||||
void Remove(const std::string& access_token) {
|
||||
auto lock = mutex_.Lock();
|
||||
cache_.erase(access_token);
|
||||
}
|
||||
|
||||
void Clear() {
|
||||
auto lock = mutex_.Lock();
|
||||
cache_.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
void RemoveExpiredEntriesNoMutex() {
|
||||
for (auto it = cache_.begin(); it != cache_.end();) {
|
||||
if (it->second.IsExpired()) {
|
||||
it = cache_.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::unordered_map<std::string, internal::ExpiringCacheMapEntry<V>> cache_;
|
||||
internal::TimePoint last_cache_cleanup_timestamp_;
|
||||
::arrow::util::Mutex mutex_;
|
||||
};
|
||||
|
||||
} // namespace parquet::encryption
|
||||
@@ -0,0 +1,28 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace parquet {
|
||||
|
||||
class Decryptor;
|
||||
class Encryptor;
|
||||
|
||||
class InternalFileDecryptor;
|
||||
class InternalFileEncryptor;
|
||||
|
||||
} // namespace parquet
|
||||
Reference in New Issue
Block a user