ggershinsky commented on a change in pull request #8023: URL: https://github.com/apache/arrow/pull/8023#discussion_r492540735
########## File path: cpp/src/parquet/encryption/remote_kms_client.h ########## @@ -0,0 +1,106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <unordered_map> +#include <vector> + +#include "parquet/encryption/kms_client.h" +#include "parquet/platform.h" + +namespace parquet { +namespace encryption { + +// KMS systems wrap keys by encrypting them by master keys, and attaching additional +// information (such as the version number of the masker key) to the result of encryption. +// The master key version is required in key rotation. Currently, the local wrapping mode +// does not support key rotation (because not all KMS systems allow to fetch a master key +// by its ID and version number). Still, the local wrapping mode adds a placeholder for +// the master key version, that will enable support for key rotation in this mode in the +// future, with appropriate KMS systems. This will also enable backward compatibility, +// where future readers will be able to extract master key version in the files written by +// the current code. +// +// LocalKeyWrap class writes (and reads) the "key wrap" as a flat json with the following +// fields: +// 1. "masterKeyVersion" - a String, with the master key version. In the current version, +// only one value is allowed - "NO_VERSION". +// 2. "encryptedKey" - a String, with the key encrypted by the master key +// (base64-encoded). +class PARQUET_EXPORT RemoteKmsClient : public KmsClient { + public: + static constexpr const char kLocalWrapNoKeyVersion[] = "NO_VERSION"; + + void Initialize(const KmsConnectionConfig& kms_connection_config, bool is_wrap_locally); + + std::string WrapKey(const std::string& key_bytes, + const std::string& master_key_identifier) override; + + std::string UnwrapKey(const std::string& wrapped_key, + const std::string& master_key_identifier) override; + + protected: + // Wrap a key with the master key in the remote KMS server. + virtual std::string WrapKeyInServer(const std::string& key_bytes, + const std::string& master_key_identifier) = 0; + + // Unwrap a key with the master key in the remote KMS server. + virtual std::string UnwrapKeyInServer(const std::string& wrapped_key, + const std::string& master_key_identifier) = 0; + + // Get master key from the remote KMS server. + // Required only for local wrapping. No need to implement if KMS supports in-server + // wrapping/unwrapping. Review comment: "Note: this function might be called by multiple threads" ########## File path: cpp/src/parquet/encryption/remote_kms_client.h ########## @@ -0,0 +1,106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <unordered_map> +#include <vector> + +#include "parquet/encryption/kms_client.h" +#include "parquet/platform.h" + +namespace parquet { +namespace encryption { + +// KMS systems wrap keys by encrypting them by master keys, and attaching additional +// information (such as the version number of the masker key) to the result of encryption. +// The master key version is required in key rotation. Currently, the local wrapping mode +// does not support key rotation (because not all KMS systems allow to fetch a master key +// by its ID and version number). Still, the local wrapping mode adds a placeholder for +// the master key version, that will enable support for key rotation in this mode in the +// future, with appropriate KMS systems. This will also enable backward compatibility, +// where future readers will be able to extract master key version in the files written by +// the current code. +// +// LocalKeyWrap class writes (and reads) the "key wrap" as a flat json with the following +// fields: +// 1. "masterKeyVersion" - a String, with the master key version. In the current version, +// only one value is allowed - "NO_VERSION". +// 2. "encryptedKey" - a String, with the key encrypted by the master key +// (base64-encoded). +class PARQUET_EXPORT RemoteKmsClient : public KmsClient { + public: + static constexpr const char kLocalWrapNoKeyVersion[] = "NO_VERSION"; + + void Initialize(const KmsConnectionConfig& kms_connection_config, bool is_wrap_locally); + + std::string WrapKey(const std::string& key_bytes, + const std::string& master_key_identifier) override; + + std::string UnwrapKey(const std::string& wrapped_key, + const std::string& master_key_identifier) override; + + protected: + // Wrap a key with the master key in the remote KMS server. Review comment: Suggest adding this to the comment: "Note: this function might be called by multiple threads" ########## File path: cpp/src/parquet/encryption/remote_kms_client.h ########## @@ -0,0 +1,106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <unordered_map> +#include <vector> + +#include "parquet/encryption/kms_client.h" +#include "parquet/platform.h" + +namespace parquet { +namespace encryption { + +// KMS systems wrap keys by encrypting them by master keys, and attaching additional +// information (such as the version number of the masker key) to the result of encryption. +// The master key version is required in key rotation. Currently, the local wrapping mode +// does not support key rotation (because not all KMS systems allow to fetch a master key +// by its ID and version number). Still, the local wrapping mode adds a placeholder for +// the master key version, that will enable support for key rotation in this mode in the +// future, with appropriate KMS systems. This will also enable backward compatibility, +// where future readers will be able to extract master key version in the files written by +// the current code. +// +// LocalKeyWrap class writes (and reads) the "key wrap" as a flat json with the following +// fields: +// 1. "masterKeyVersion" - a String, with the master key version. In the current version, +// only one value is allowed - "NO_VERSION". +// 2. "encryptedKey" - a String, with the key encrypted by the master key +// (base64-encoded). +class PARQUET_EXPORT RemoteKmsClient : public KmsClient { + public: + static constexpr const char kLocalWrapNoKeyVersion[] = "NO_VERSION"; + + void Initialize(const KmsConnectionConfig& kms_connection_config, bool is_wrap_locally); + + std::string WrapKey(const std::string& key_bytes, + const std::string& master_key_identifier) override; + + std::string UnwrapKey(const std::string& wrapped_key, + const std::string& master_key_identifier) override; + + protected: + // Wrap a key with the master key in the remote KMS server. + virtual std::string WrapKeyInServer(const std::string& key_bytes, + const std::string& master_key_identifier) = 0; + + // Unwrap a key with the master key in the remote KMS server. Review comment: "Note: this function might be called by multiple threads" ########## File path: cpp/src/parquet/encryption/remote_kms_client.h ########## @@ -0,0 +1,106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <unordered_map> +#include <vector> + +#include "parquet/encryption/kms_client.h" +#include "parquet/platform.h" + +namespace parquet { +namespace encryption { + +// KMS systems wrap keys by encrypting them by master keys, and attaching additional +// information (such as the version number of the masker key) to the result of encryption. +// The master key version is required in key rotation. Currently, the local wrapping mode +// does not support key rotation (because not all KMS systems allow to fetch a master key +// by its ID and version number). Still, the local wrapping mode adds a placeholder for +// the master key version, that will enable support for key rotation in this mode in the +// future, with appropriate KMS systems. This will also enable backward compatibility, +// where future readers will be able to extract master key version in the files written by +// the current code. +// +// LocalKeyWrap class writes (and reads) the "key wrap" as a flat json with the following +// fields: +// 1. "masterKeyVersion" - a String, with the master key version. In the current version, +// only one value is allowed - "NO_VERSION". +// 2. "encryptedKey" - a String, with the key encrypted by the master key +// (base64-encoded). Review comment: Should this comment (lines 29-44) be moved to the LocalWrapKey class at the line 75? ########## File path: cpp/src/parquet/encryption/kms_client.h ########## @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <string> +#include <unordered_map> +#include <vector> + +#include "parquet/exception.h" +#include "parquet/platform.h" + +namespace parquet { +namespace encryption { + +// This class wraps the key access token of a KMS server. If your token changes over time, +// you should keep the reference to the KeyAccessToken object and call Refresh() method +// every time you have a new token. +class PARQUET_EXPORT KeyAccessToken { + public: + KeyAccessToken() = default; + + explicit KeyAccessToken(const std::string value) : value_(value) {} + + void Refresh(const std::string& new_value) { value_ = new_value; } + + const std::string& value() const { return value_; } + + void SetDefaultIfEmpty(); + + private: + std::string value_; +}; + +struct PARQUET_EXPORT KmsConnectionConfig { + std::string kms_instance_id; + std::string kms_instance_url; + std::shared_ptr<KeyAccessToken> refreshable_key_access_token; + std::unordered_map<std::string, std::string> custom_kms_conf; + + const std::string& key_access_token() const { + if (refreshable_key_access_token == NULL || + refreshable_key_access_token->value().empty()) { + throw ParquetException("key access token is not set!"); + } + return refreshable_key_access_token->value(); + } + + void SetDefaultIfEmpty(); +}; + +class PARQUET_EXPORT KmsClient { + public: + static constexpr const char kKmsInstanceIdDefault[] = "DEFAULT"; + static constexpr const char kKmsInstanceUrlDefault[] = "DEFAULT"; + static constexpr const char kKeyAccessTokenDefault[] = "DEFAULT"; + + // Wraps a key - encrypts it with the master key, encodes the result + // and potentially adds a KMS-specific metadata. + virtual std::string WrapKey(const std::string& key_bytes, + const std::string& master_key_identifier) = 0; + + // Decrypts (unwraps) a key with the master key. Review comment: "Note: this function might be called by multiple threads" ########## File path: cpp/src/parquet/encryption/kms_client.h ########## @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <string> +#include <unordered_map> +#include <vector> + +#include "parquet/exception.h" +#include "parquet/platform.h" + +namespace parquet { +namespace encryption { + +// This class wraps the key access token of a KMS server. If your token changes over time, +// you should keep the reference to the KeyAccessToken object and call Refresh() method +// every time you have a new token. +class PARQUET_EXPORT KeyAccessToken { + public: + KeyAccessToken() = default; + + explicit KeyAccessToken(const std::string value) : value_(value) {} + + void Refresh(const std::string& new_value) { value_ = new_value; } + + const std::string& value() const { return value_; } + + void SetDefaultIfEmpty(); + + private: + std::string value_; +}; + +struct PARQUET_EXPORT KmsConnectionConfig { + std::string kms_instance_id; + std::string kms_instance_url; + std::shared_ptr<KeyAccessToken> refreshable_key_access_token; + std::unordered_map<std::string, std::string> custom_kms_conf; + + const std::string& key_access_token() const { + if (refreshable_key_access_token == NULL || + refreshable_key_access_token->value().empty()) { + throw ParquetException("key access token is not set!"); + } + return refreshable_key_access_token->value(); + } + + void SetDefaultIfEmpty(); +}; + +class PARQUET_EXPORT KmsClient { + public: + static constexpr const char kKmsInstanceIdDefault[] = "DEFAULT"; + static constexpr const char kKmsInstanceUrlDefault[] = "DEFAULT"; + static constexpr const char kKeyAccessTokenDefault[] = "DEFAULT"; + + // Wraps a key - encrypts it with the master key, encodes the result + // and potentially adds a KMS-specific metadata. Review comment: Suggest adding this to the comment: "Note: this function might be called by multiple threads" ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org