kou commented on code in PR #43601:
URL: https://github.com/apache/arrow/pull/43601#discussion_r1792855876


##########
cpp/src/arrow/filesystem/s3_internal.h:
##########
@@ -291,6 +293,46 @@ class ConnectRetryStrategy : public 
Aws::Client::RetryStrategy {
   int32_t max_retry_duration_;
 };
 
+/// \brief calculate the MD5 of the input sse-c key (raw key, not base64 
encoded)
+/// \param sse_customer_key is the input sse key
+/// \return the base64 encoded MD5 for the input key
+inline Result<std::string> CalculateSSECustomerKeyMD5(
+    const std::string& sse_customer_key) {
+  // the key needs to be 256 bits (32 bytes) according to
+  // 
https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html#specifying-s3-c-encryption
+  if (sse_customer_key.length() != 32) {
+    return Status::Invalid("32 bytes sse-c key is expected");
+  }
+
+  // Convert the raw binary key to an Aws::String
+  Aws::String sse_customer_key_aws_string(sse_customer_key.data(),
+                                          sse_customer_key.length());
+
+  // Compute the MD5 hash of the raw binary key
+  Aws::Utils::ByteBuffer sse_customer_key_md5 =
+      Aws::Utils::HashingUtils::CalculateMD5(sse_customer_key_aws_string);
+
+  // Base64-encode the MD5 hash
+  return arrow::util::base64_encode(std::string_view(
+      reinterpret_cast<const char*>(sse_customer_key_md5.GetUnderlyingData()),
+      sse_customer_key_md5.GetLength()));
+}
+
+template <typename S3RequestType>
+Status SetSSECustomerKey(S3RequestType& request, const std::string& 
sse_customer_key) {
+  if (sse_customer_key.empty()) {
+    return Status::OK();  // do nothing if the sse_customer_key is not 
configured
+  }
+#ifndef ARROW_S3_SUPPORT_SSEC
+  return Status::NotImplemented("SSE-C is not supported");
+#endif
+  ARROW_ASSIGN_OR_RAISE(auto md5, 
internal::CalculateSSECustomerKeyMD5(sse_customer_key));
+  request.SetSSECustomerKeyMD5(md5);
+  request.SetSSECustomerKey(arrow::util::base64_encode(sse_customer_key));
+  request.SetSSECustomerAlgorithm("AES256");
+  return Status::OK();

Review Comment:
   
https://github.com/apache/arrow/actions/runs/11247400512/job/31273013174?pr=43601#step:7:6474
   
   ```text
    In file included from D:/a/arrow/arrow/cpp/src/arrow/filesystem/s3fs.cc:128:
   D:/a/arrow/arrow/cpp/src/arrow/filesystem/s3_internal.h: In instantiation of 
'arrow::Status arrow::fs::internal::SetSSECustomerKey(S3RequestType&, const 
string&) [with S3RequestType = Aws::S3::Model::CompleteMultipartUploadRequest; 
std::string = std::__cxx11::basic_string<char>]':
   D:/a/arrow/arrow/cpp/src/arrow/filesystem/s3fs.cc:1796:5:   required from 
here
   D:/a/arrow/arrow/cpp/src/arrow/filesystem/s3_internal.h:330:11: error: 
'class Aws::S3::Model::CompleteMultipartUploadRequest' has no member named 
'SetSSECustomerKeyMD5'
     330 |   request.SetSSECustomerKeyMD5(md5);
         |   ~~~~~~~~^~~~~~~~~~~~~~~~~~~~
   D:/a/arrow/arrow/cpp/src/arrow/filesystem/s3_internal.h:331:11: error: 
'class Aws::S3::Model::CompleteMultipartUploadRequest' has no member named 
'SetSSECustomerKey'
     331 |   
request.SetSSECustomerKey(arrow::util::base64_encode(sse_customer_key));
         |   ~~~~~~~~^~~~~~~~~~~~~~~~~
   D:/a/arrow/arrow/cpp/src/arrow/filesystem/s3_internal.h:332:11: error: 
'class Aws::S3::Model::CompleteMultipartUploadRequest' has no member named 
'SetSSECustomerAlgorithm'
     332 |   request.SetSSECustomerAlgorithm("AES256");
         |   ~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~
   make[2]: *** 
[src/arrow/CMakeFiles/arrow_filesystem_static.dir/build.make:177: 
src/arrow/CMakeFiles/arrow_filesystem_static.dir/filesystem/s3fs.cc.obj] Error 1
   make[2]: *** Waiting for unfinished jobs....
   ```
   
   
   ```suggestion
   #ifdef ARROW_S3_SUPPORT_SSEC
     ARROW_ASSIGN_OR_RAISE(auto md5, 
internal::CalculateSSECustomerKeyMD5(sse_customer_key));
     request.SetSSECustomerKeyMD5(md5);
     request.SetSSECustomerKey(arrow::util::base64_encode(sse_customer_key));
     request.SetSSECustomerAlgorithm("AES256");
     return Status::OK();
   #else
     return Status::NotImplemented("SSE-C is not supported");
   #endif
   ```



##########
cpp/src/arrow/filesystem/s3fs.h:
##########
@@ -196,6 +196,23 @@ struct ARROW_EXPORT S3Options {
   /// delay between retries.
   std::shared_ptr<S3RetryStrategy> retry_strategy;
 
+  /// the SSE-C customized key (raw 32 bytes key).
+  std::string sse_customer_key;
+
+  /// Path to a single PEM file holding all TLS CA certificates
+  ///
+  /// If empty, the underlying TLS library's defaults will be used.
+  std::string tls_ca_file_path;
+
+  /// Path to a directory holding TLS CA certificates in individual PEM files
+  /// named along the OpenSSL "hashed" format.
+  ///
+  /// If empty, the underlying TLS library's defaults will be used.
+  std::string tls_ca_dir_path;
+
+  /// Controls whether to verify SSL certificates, Default to true

Review Comment:
   ```suggestion
     /// Controls whether to verify TLS certificates. Defaults to true.
   ```



##########
cpp/src/arrow/filesystem/s3fs.h:
##########
@@ -196,6 +196,23 @@ struct ARROW_EXPORT S3Options {
   /// delay between retries.
   std::shared_ptr<S3RetryStrategy> retry_strategy;
 
+  /// the SSE-C customized key (raw 32 bytes key).
+  std::string sse_customer_key;
+
+  /// Path to a single PEM file holding all TLS CA certificates
+  ///
+  /// If empty, the underlying TLS library's defaults will be used.
+  std::string tls_ca_file_path;
+
+  /// Path to a directory holding TLS CA certificates in individual PEM files
+  /// named along the OpenSSL "hashed" format.
+  ///
+  /// If empty, the underlying TLS library's defaults will be used.
+  std::string tls_ca_dir_path;
+
+  /// Controls whether to verify SSL certificates, Default to true
+  bool tls_verify_certificates = true;

Review Comment:
   Could you update `S3Options::FromUri()` to accept them?
   Could you also add tests for them?
   See also: 
https://github.com/apache/arrow/blob/d4516c5386f84619dfdf2a9f72fed6d7df89704c/cpp/src/arrow/filesystem/s3fs_test.cc#L257-L321
   
   Or how about creating a separated PR that includes only TLS related 
`S3Options` and `FileSystemGlobalOptions` changes?
   FYI: I created https://github.com/apache/arrow/issues/44333 yesterday.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to