ggershinsky commented on a change in pull request #8023:
URL: https://github.com/apache/arrow/pull/8023#discussion_r492540735



##########
File path: cpp/src/parquet/encryption/remote_kms_client.h
##########
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <unordered_map>
+#include <vector>
+
+#include "parquet/encryption/kms_client.h"
+#include "parquet/platform.h"
+
+namespace parquet {
+namespace encryption {
+
+// KMS systems wrap keys by encrypting them by master keys, and attaching 
additional
+// information (such as the version number of the masker key) to the result of 
encryption.
+// The master key version is required in  key rotation. Currently, the local 
wrapping mode
+// does not support key rotation (because not all KMS systems allow to fetch a 
master key
+// by its ID and version number). Still, the local wrapping mode adds a 
placeholder for
+// the master key version, that will enable support for key rotation in this 
mode in the
+// future, with appropriate KMS systems. This will also enable backward 
compatibility,
+// where future readers will be able to extract master key version in the 
files written by
+// the current code.
+//
+// LocalKeyWrap class writes (and reads) the "key wrap" as a flat json with 
the following
+// fields:
+// 1. "masterKeyVersion" - a String, with the master key version. In the 
current version,
+// only one value is allowed - "NO_VERSION".
+// 2. "encryptedKey" - a String, with the key encrypted by the master key
+// (base64-encoded).
+class PARQUET_EXPORT RemoteKmsClient : public KmsClient {
+ public:
+  static constexpr const char kLocalWrapNoKeyVersion[] = "NO_VERSION";
+
+  void Initialize(const KmsConnectionConfig& kms_connection_config, bool 
is_wrap_locally);
+
+  std::string WrapKey(const std::string& key_bytes,
+                      const std::string& master_key_identifier) override;
+
+  std::string UnwrapKey(const std::string& wrapped_key,
+                        const std::string& master_key_identifier) override;
+
+ protected:
+  // Wrap a key with the master key in the remote KMS server.
+  virtual std::string WrapKeyInServer(const std::string& key_bytes,
+                                      const std::string& 
master_key_identifier) = 0;
+
+  // Unwrap a key with the master key in the remote KMS server.
+  virtual std::string UnwrapKeyInServer(const std::string& wrapped_key,
+                                        const std::string& 
master_key_identifier) = 0;
+
+  // Get master key from the remote KMS server.
+  // Required only for local wrapping. No need to implement if KMS supports 
in-server
+  // wrapping/unwrapping.

Review comment:
       "Note: this function might be called by multiple threads"

##########
File path: cpp/src/parquet/encryption/remote_kms_client.h
##########
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <unordered_map>
+#include <vector>
+
+#include "parquet/encryption/kms_client.h"
+#include "parquet/platform.h"
+
+namespace parquet {
+namespace encryption {
+
+// KMS systems wrap keys by encrypting them by master keys, and attaching 
additional
+// information (such as the version number of the masker key) to the result of 
encryption.
+// The master key version is required in  key rotation. Currently, the local 
wrapping mode
+// does not support key rotation (because not all KMS systems allow to fetch a 
master key
+// by its ID and version number). Still, the local wrapping mode adds a 
placeholder for
+// the master key version, that will enable support for key rotation in this 
mode in the
+// future, with appropriate KMS systems. This will also enable backward 
compatibility,
+// where future readers will be able to extract master key version in the 
files written by
+// the current code.
+//
+// LocalKeyWrap class writes (and reads) the "key wrap" as a flat json with 
the following
+// fields:
+// 1. "masterKeyVersion" - a String, with the master key version. In the 
current version,
+// only one value is allowed - "NO_VERSION".
+// 2. "encryptedKey" - a String, with the key encrypted by the master key
+// (base64-encoded).
+class PARQUET_EXPORT RemoteKmsClient : public KmsClient {
+ public:
+  static constexpr const char kLocalWrapNoKeyVersion[] = "NO_VERSION";
+
+  void Initialize(const KmsConnectionConfig& kms_connection_config, bool 
is_wrap_locally);
+
+  std::string WrapKey(const std::string& key_bytes,
+                      const std::string& master_key_identifier) override;
+
+  std::string UnwrapKey(const std::string& wrapped_key,
+                        const std::string& master_key_identifier) override;
+
+ protected:
+  // Wrap a key with the master key in the remote KMS server.

Review comment:
       Suggest adding this to the comment: "Note: this function might be called 
by multiple threads"

##########
File path: cpp/src/parquet/encryption/remote_kms_client.h
##########
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <unordered_map>
+#include <vector>
+
+#include "parquet/encryption/kms_client.h"
+#include "parquet/platform.h"
+
+namespace parquet {
+namespace encryption {
+
+// KMS systems wrap keys by encrypting them by master keys, and attaching 
additional
+// information (such as the version number of the masker key) to the result of 
encryption.
+// The master key version is required in  key rotation. Currently, the local 
wrapping mode
+// does not support key rotation (because not all KMS systems allow to fetch a 
master key
+// by its ID and version number). Still, the local wrapping mode adds a 
placeholder for
+// the master key version, that will enable support for key rotation in this 
mode in the
+// future, with appropriate KMS systems. This will also enable backward 
compatibility,
+// where future readers will be able to extract master key version in the 
files written by
+// the current code.
+//
+// LocalKeyWrap class writes (and reads) the "key wrap" as a flat json with 
the following
+// fields:
+// 1. "masterKeyVersion" - a String, with the master key version. In the 
current version,
+// only one value is allowed - "NO_VERSION".
+// 2. "encryptedKey" - a String, with the key encrypted by the master key
+// (base64-encoded).
+class PARQUET_EXPORT RemoteKmsClient : public KmsClient {
+ public:
+  static constexpr const char kLocalWrapNoKeyVersion[] = "NO_VERSION";
+
+  void Initialize(const KmsConnectionConfig& kms_connection_config, bool 
is_wrap_locally);
+
+  std::string WrapKey(const std::string& key_bytes,
+                      const std::string& master_key_identifier) override;
+
+  std::string UnwrapKey(const std::string& wrapped_key,
+                        const std::string& master_key_identifier) override;
+
+ protected:
+  // Wrap a key with the master key in the remote KMS server.
+  virtual std::string WrapKeyInServer(const std::string& key_bytes,
+                                      const std::string& 
master_key_identifier) = 0;
+
+  // Unwrap a key with the master key in the remote KMS server.

Review comment:
       "Note: this function might be called by multiple threads"

##########
File path: cpp/src/parquet/encryption/remote_kms_client.h
##########
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <unordered_map>
+#include <vector>
+
+#include "parquet/encryption/kms_client.h"
+#include "parquet/platform.h"
+
+namespace parquet {
+namespace encryption {
+
+// KMS systems wrap keys by encrypting them by master keys, and attaching 
additional
+// information (such as the version number of the masker key) to the result of 
encryption.
+// The master key version is required in  key rotation. Currently, the local 
wrapping mode
+// does not support key rotation (because not all KMS systems allow to fetch a 
master key
+// by its ID and version number). Still, the local wrapping mode adds a 
placeholder for
+// the master key version, that will enable support for key rotation in this 
mode in the
+// future, with appropriate KMS systems. This will also enable backward 
compatibility,
+// where future readers will be able to extract master key version in the 
files written by
+// the current code.
+//
+// LocalKeyWrap class writes (and reads) the "key wrap" as a flat json with 
the following
+// fields:
+// 1. "masterKeyVersion" - a String, with the master key version. In the 
current version,
+// only one value is allowed - "NO_VERSION".
+// 2. "encryptedKey" - a String, with the key encrypted by the master key
+// (base64-encoded).

Review comment:
       Should this comment (lines 29-44) be moved to the LocalWrapKey class at 
the line 75?

##########
File path: cpp/src/parquet/encryption/kms_client.h
##########
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "parquet/exception.h"
+#include "parquet/platform.h"
+
+namespace parquet {
+namespace encryption {
+
+// This class wraps the key access token of a KMS server. If your token 
changes over time,
+// you should keep the reference to the KeyAccessToken object and call 
Refresh() method
+// every time you have a new token.
+class PARQUET_EXPORT KeyAccessToken {
+ public:
+  KeyAccessToken() = default;
+
+  explicit KeyAccessToken(const std::string value) : value_(value) {}
+
+  void Refresh(const std::string& new_value) { value_ = new_value; }
+
+  const std::string& value() const { return value_; }
+
+  void SetDefaultIfEmpty();
+
+ private:
+  std::string value_;
+};
+
+struct PARQUET_EXPORT KmsConnectionConfig {
+  std::string kms_instance_id;
+  std::string kms_instance_url;
+  std::shared_ptr<KeyAccessToken> refreshable_key_access_token;
+  std::unordered_map<std::string, std::string> custom_kms_conf;
+
+  const std::string& key_access_token() const {
+    if (refreshable_key_access_token == NULL ||
+        refreshable_key_access_token->value().empty()) {
+      throw ParquetException("key access token is not set!");
+    }
+    return refreshable_key_access_token->value();
+  }
+
+  void SetDefaultIfEmpty();
+};
+
+class PARQUET_EXPORT KmsClient {
+ public:
+  static constexpr const char kKmsInstanceIdDefault[] = "DEFAULT";
+  static constexpr const char kKmsInstanceUrlDefault[] = "DEFAULT";
+  static constexpr const char kKeyAccessTokenDefault[] = "DEFAULT";
+
+  // Wraps a key - encrypts it with the master key, encodes the result
+  // and potentially adds a KMS-specific metadata.
+  virtual std::string WrapKey(const std::string& key_bytes,
+                              const std::string& master_key_identifier) = 0;
+
+  // Decrypts (unwraps) a key with the master key.

Review comment:
       "Note: this function might be called by multiple threads"

##########
File path: cpp/src/parquet/encryption/kms_client.h
##########
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "parquet/exception.h"
+#include "parquet/platform.h"
+
+namespace parquet {
+namespace encryption {
+
+// This class wraps the key access token of a KMS server. If your token 
changes over time,
+// you should keep the reference to the KeyAccessToken object and call 
Refresh() method
+// every time you have a new token.
+class PARQUET_EXPORT KeyAccessToken {
+ public:
+  KeyAccessToken() = default;
+
+  explicit KeyAccessToken(const std::string value) : value_(value) {}
+
+  void Refresh(const std::string& new_value) { value_ = new_value; }
+
+  const std::string& value() const { return value_; }
+
+  void SetDefaultIfEmpty();
+
+ private:
+  std::string value_;
+};
+
+struct PARQUET_EXPORT KmsConnectionConfig {
+  std::string kms_instance_id;
+  std::string kms_instance_url;
+  std::shared_ptr<KeyAccessToken> refreshable_key_access_token;
+  std::unordered_map<std::string, std::string> custom_kms_conf;
+
+  const std::string& key_access_token() const {
+    if (refreshable_key_access_token == NULL ||
+        refreshable_key_access_token->value().empty()) {
+      throw ParquetException("key access token is not set!");
+    }
+    return refreshable_key_access_token->value();
+  }
+
+  void SetDefaultIfEmpty();
+};
+
+class PARQUET_EXPORT KmsClient {
+ public:
+  static constexpr const char kKmsInstanceIdDefault[] = "DEFAULT";
+  static constexpr const char kKmsInstanceUrlDefault[] = "DEFAULT";
+  static constexpr const char kKeyAccessTokenDefault[] = "DEFAULT";
+
+  // Wraps a key - encrypts it with the master key, encodes the result
+  // and potentially adds a KMS-specific metadata.

Review comment:
       Suggest adding this to the comment: "Note: this function might be called 
by multiple threads"




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to