kou commented on code in PR #43601:
URL: https://github.com/apache/arrow/pull/43601#discussion_r1758770525


##########
cpp/src/arrow/filesystem/s3_internal.h:
##########
@@ -291,6 +294,42 @@ class ConnectRetryStrategy : public 
Aws::Client::RetryStrategy {
   int32_t max_retry_duration_;
 };
 
+/// \brief calculate the MD5 of the input sse-c key(raw key, not base64 
encoded)
+/// \param sse_customer_key is the input sse key
+/// \return the base64 encoded MD5 for the input key
+inline Result<std::string> CalculateSSECKeyMD5(const std::string& 
sse_customer_key) {
+  // the key needs to be 256 bits(32 bytes)according to
+  // 
https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html#specifying-s3-c-encryption
+  if (sse_customer_key.length() != 32) {
+    return Status::Invalid("32 bytes sse-c key is expected");
+  }
+
+  // Convert the raw binary key to an Aws::String
+  Aws::String sse_customer_key_aws_string(sse_customer_key.c_str(),

Review Comment:
   Can we use `data()` here?
   
   ```suggestion
     Aws::String sse_customer_key_aws_string(sse_customer_key.data(),
   ```



##########
cpp/src/arrow/filesystem/s3_internal.h:
##########
@@ -291,6 +294,42 @@ class ConnectRetryStrategy : public 
Aws::Client::RetryStrategy {
   int32_t max_retry_duration_;
 };
 
+/// \brief calculate the MD5 of the input sse-c key(raw key, not base64 
encoded)
+/// \param sse_customer_key is the input sse key
+/// \return the base64 encoded MD5 for the input key
+inline Result<std::string> CalculateSSECKeyMD5(const std::string& 
sse_customer_key) {
+  // the key needs to be 256 bits(32 bytes)according to

Review Comment:
   ```suggestion
     // the key needs to be 256 bits (32 bytes) according to
   ```



##########
cpp/src/arrow/filesystem/s3fs.cc:
##########
@@ -1769,6 +1781,8 @@ class ObjectOutputStream final : public io::OutputStream {
     S3Model::CompleteMultipartUploadRequest req;
     req.SetBucket(ToAwsString(path_.bucket));
     req.SetKey(ToAwsString(path_.key));
+    RETURN_NOT_OK(SetSSECustomerKey(req, sse_customer_key_));
+

Review Comment:
   ```suggestion
   ```



##########
cpp/src/arrow/filesystem/s3fs.h:
##########
@@ -196,6 +196,9 @@ struct ARROW_EXPORT S3Options {
   /// delay between retries.
   std::shared_ptr<S3RetryStrategy> retry_strategy;
 
+  /// the SSE-C customized key(raw 32 bytes key).

Review Comment:
   ```suggestion
     /// the SSE-C customized key (raw 32 bytes key).
   ```



##########
cpp/src/arrow/filesystem/s3_internal.h:
##########
@@ -291,6 +294,42 @@ class ConnectRetryStrategy : public 
Aws::Client::RetryStrategy {
   int32_t max_retry_duration_;
 };
 
+/// \brief calculate the MD5 of the input sse-c key(raw key, not base64 
encoded)

Review Comment:
   ```suggestion
   /// \brief calculate the MD5 of the input sse-c key (raw key, not base64 
encoded)
   ```



##########
cpp/src/arrow/filesystem/s3_internal.h:
##########
@@ -291,6 +294,42 @@ class ConnectRetryStrategy : public 
Aws::Client::RetryStrategy {
   int32_t max_retry_duration_;
 };
 
+/// \brief calculate the MD5 of the input sse-c key(raw key, not base64 
encoded)
+/// \param sse_customer_key is the input sse key
+/// \return the base64 encoded MD5 for the input key
+inline Result<std::string> CalculateSSECKeyMD5(const std::string& 
sse_customer_key) {
+  // the key needs to be 256 bits(32 bytes)according to
+  // 
https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html#specifying-s3-c-encryption
+  if (sse_customer_key.length() != 32) {
+    return Status::Invalid("32 bytes sse-c key is expected");
+  }
+
+  // Convert the raw binary key to an Aws::String
+  Aws::String sse_customer_key_aws_string(sse_customer_key.c_str(),
+                                          sse_customer_key.length());
+
+  // Compute the MD5 hash of the raw binary key
+  Aws::Utils::ByteBuffer sse_customer_key_md5 =
+      Aws::Utils::HashingUtils::CalculateMD5(sse_customer_key_aws_string);
+
+  // Base64-encode the MD5 hash
+  return arrow::util::base64_encode(std::string_view(
+      reinterpret_cast<const char*>(sse_customer_key_md5.GetUnderlyingData()),
+      sse_customer_key_md5.GetLength()));
+}
+
+template <typename S3RequestType>
+Status SetSSECustomerKey(S3RequestType& request, const std::string& 
sse_customer_key) {
+  if (sse_customer_key.empty()) {
+    return Status::OK();  // do nothing if the sse_customer_key is not 
configured
+  }
+  ARROW_ASSIGN_OR_RAISE(auto result, 
internal::CalculateSSECKeyMD5(sse_customer_key));
+  request.SetSSECustomerKeyMD5(result);

Review Comment:
   ```suggestion
     ARROW_ASSIGN_OR_RAISE(auto md5, 
internal::CalculateSSECKeyMD5(sse_customer_key));
     request.SetSSECustomerKeyMD5(md5);
   ```



##########
cpp/src/arrow/filesystem/s3fs_test.cc:
##########
@@ -1579,5 +1612,26 @@ TEST(S3GlobalOptions, DefaultsLogLevel) {
   }
 }
 
+TEST(CalculateSSECKeyMD5, Sanity) {
+  ASSERT_FALSE(CalculateSSECKeyMD5("").ok());  // invalid length
+  ASSERT_FALSE(CalculateSSECKeyMD5("1234567890123456789012345678901234567890")
+                   .ok());  // invalid length
+  // valid case, with ascii input key
+  ASSERT_OK_AND_ASSIGN(auto result,
+                       CalculateSSECKeyMD5("NLbTMHZn9aCW3Li3ViAdBsoIldPCREw1"))

Review Comment:
   ```suggestion
     ASSERT_OK_AND_ASSIGN(auto md5,
                          
CalculateSSECKeyMD5("NLbTMHZn9aCW3Li3ViAdBsoIldPCREw1"))
   ```



##########
cpp/src/arrow/filesystem/s3fs_test.cc:
##########
@@ -1579,5 +1612,26 @@ TEST(S3GlobalOptions, DefaultsLogLevel) {
   }
 }
 
+TEST(CalculateSSECKeyMD5, Sanity) {
+  ASSERT_FALSE(CalculateSSECKeyMD5("").ok());  // invalid length
+  ASSERT_FALSE(CalculateSSECKeyMD5("1234567890123456789012345678901234567890")
+                   .ok());  // invalid length
+  // valid case, with ascii input key
+  ASSERT_OK_AND_ASSIGN(auto result,
+                       CalculateSSECKeyMD5("NLbTMHZn9aCW3Li3ViAdBsoIldPCREw1"))
+  ASSERT_STREQ(result.c_str(),
+               "ZWMxZGUyOTZhMDQwZWJmM2Q4N2E4ZTczZjdlNmI3Mzk=");  // valid case
+  // valid case, with some non-ASCII character and a null byte in the 
sse_customer_key

Review Comment:
   Do we need ASCII case?
   It seems that non-ASCII case is only enough.



##########
cpp/src/arrow/filesystem/s3fs.cc:
##########
@@ -1668,6 +1694,8 @@ class ObjectOutputStream final : public io::OutputStream {
     S3Model::CreateMultipartUploadRequest req;
     req.SetBucket(ToAwsString(path_.bucket));
     req.SetKey(ToAwsString(path_.key));
+    RETURN_NOT_OK(SetSSECustomerKey(req, sse_customer_key));
+

Review Comment:
   Could you revert a needless change?



##########
cpp/src/arrow/filesystem/s3fs_test.cc:
##########
@@ -1579,5 +1612,26 @@ TEST(S3GlobalOptions, DefaultsLogLevel) {
   }
 }
 
+TEST(CalculateSSECKeyMD5, Sanity) {
+  ASSERT_FALSE(CalculateSSECKeyMD5("").ok());  // invalid length
+  ASSERT_FALSE(CalculateSSECKeyMD5("1234567890123456789012345678901234567890")
+                   .ok());  // invalid length
+  // valid case, with ascii input key
+  ASSERT_OK_AND_ASSIGN(auto result,
+                       CalculateSSECKeyMD5("NLbTMHZn9aCW3Li3ViAdBsoIldPCREw1"))
+  ASSERT_STREQ(result.c_str(),
+               "ZWMxZGUyOTZhMDQwZWJmM2Q4N2E4ZTczZjdlNmI3Mzk=");  // valid case
+  // valid case, with some non-ASCII character and a null byte in the 
sse_customer_key
+  char sse_customer_key[32] = {};
+  sse_customer_key[0] = '\x40';   // '@' character
+  sse_customer_key[1] = '\0';     // null byte
+  sse_customer_key[2] = '\xFF';   // non-ASCII
+  sse_customer_key[31] = '\xFA';  // non-ASCII
+  std::string sse_customer_key_string(sse_customer_key, 
sizeof(sse_customer_key));
+  ASSERT_OK_AND_ASSIGN(result, CalculateSSECKeyMD5(sse_customer_key_string))
+  ASSERT_STREQ(result.c_str(),
+               "ZjdiMTUzNmJhOTYzZDIxMTNiOTZjODRhNzQxY2JhZDY=");  // valid case

Review Comment:
   ```suggestion
     ASSERT_OK_AND_ASSIGN(md5, CalculateSSECKeyMD5(sse_customer_key_string))
     ASSERT_EQ(md5, "ZjdiMTUzNmJhOTYzZDIxMTNiOTZjODRhNzQxY2JhZDY=");  // valid 
case
   ```



##########
cpp/src/arrow/filesystem/s3_internal.h:
##########
@@ -291,6 +294,42 @@ class ConnectRetryStrategy : public 
Aws::Client::RetryStrategy {
   int32_t max_retry_duration_;
 };
 
+/// \brief calculate the MD5 of the input sse-c key(raw key, not base64 
encoded)
+/// \param sse_customer_key is the input sse key
+/// \return the base64 encoded MD5 for the input key
+inline Result<std::string> CalculateSSECKeyMD5(const std::string& 
sse_customer_key) {

Review Comment:
   Could you avoid abbreviating here like others?
   
   ```suggestion
   inline Result<std::string> CalculateSSECustomerKeyMD5(const std::string& 
sse_customer_key) {
   ```
   



##########
cpp/src/arrow/filesystem/s3fs_test.cc:
##########
@@ -1579,5 +1612,26 @@ TEST(S3GlobalOptions, DefaultsLogLevel) {
   }
 }
 
+TEST(CalculateSSECKeyMD5, Sanity) {
+  ASSERT_FALSE(CalculateSSECKeyMD5("").ok());  // invalid length
+  ASSERT_FALSE(CalculateSSECKeyMD5("1234567890123456789012345678901234567890")
+                   .ok());  // invalid length
+  // valid case, with ascii input key
+  ASSERT_OK_AND_ASSIGN(auto result,
+                       CalculateSSECKeyMD5("NLbTMHZn9aCW3Li3ViAdBsoIldPCREw1"))
+  ASSERT_STREQ(result.c_str(),
+               "ZWMxZGUyOTZhMDQwZWJmM2Q4N2E4ZTczZjdlNmI3Mzk=");  // valid case

Review Comment:
   ```suggestion
     ASSERT_EQ(md5, "ZWMxZGUyOTZhMDQwZWJmM2Q4N2E4ZTczZjdlNmI3Mzk=");  // valid 
case
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to