kou commented on code in PR #43601: URL: https://github.com/apache/arrow/pull/43601#discussion_r1758770525
########## cpp/src/arrow/filesystem/s3_internal.h: ########## @@ -291,6 +294,42 @@ class ConnectRetryStrategy : public Aws::Client::RetryStrategy { int32_t max_retry_duration_; }; +/// \brief calculate the MD5 of the input sse-c key(raw key, not base64 encoded) +/// \param sse_customer_key is the input sse key +/// \return the base64 encoded MD5 for the input key +inline Result<std::string> CalculateSSECKeyMD5(const std::string& sse_customer_key) { + // the key needs to be 256 bits(32 bytes)according to + // https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html#specifying-s3-c-encryption + if (sse_customer_key.length() != 32) { + return Status::Invalid("32 bytes sse-c key is expected"); + } + + // Convert the raw binary key to an Aws::String + Aws::String sse_customer_key_aws_string(sse_customer_key.c_str(), Review Comment: Can we use `data()` here? ```suggestion Aws::String sse_customer_key_aws_string(sse_customer_key.data(), ``` ########## cpp/src/arrow/filesystem/s3_internal.h: ########## @@ -291,6 +294,42 @@ class ConnectRetryStrategy : public Aws::Client::RetryStrategy { int32_t max_retry_duration_; }; +/// \brief calculate the MD5 of the input sse-c key(raw key, not base64 encoded) +/// \param sse_customer_key is the input sse key +/// \return the base64 encoded MD5 for the input key +inline Result<std::string> CalculateSSECKeyMD5(const std::string& sse_customer_key) { + // the key needs to be 256 bits(32 bytes)according to Review Comment: ```suggestion // the key needs to be 256 bits (32 bytes) according to ``` ########## cpp/src/arrow/filesystem/s3fs.cc: ########## @@ -1769,6 +1781,8 @@ class ObjectOutputStream final : public io::OutputStream { S3Model::CompleteMultipartUploadRequest req; req.SetBucket(ToAwsString(path_.bucket)); req.SetKey(ToAwsString(path_.key)); + RETURN_NOT_OK(SetSSECustomerKey(req, sse_customer_key_)); + Review Comment: ```suggestion ``` ########## cpp/src/arrow/filesystem/s3fs.h: ########## @@ -196,6 +196,9 @@ struct ARROW_EXPORT S3Options { /// delay between retries. std::shared_ptr<S3RetryStrategy> retry_strategy; + /// the SSE-C customized key(raw 32 bytes key). Review Comment: ```suggestion /// the SSE-C customized key (raw 32 bytes key). ``` ########## cpp/src/arrow/filesystem/s3_internal.h: ########## @@ -291,6 +294,42 @@ class ConnectRetryStrategy : public Aws::Client::RetryStrategy { int32_t max_retry_duration_; }; +/// \brief calculate the MD5 of the input sse-c key(raw key, not base64 encoded) Review Comment: ```suggestion /// \brief calculate the MD5 of the input sse-c key (raw key, not base64 encoded) ``` ########## cpp/src/arrow/filesystem/s3_internal.h: ########## @@ -291,6 +294,42 @@ class ConnectRetryStrategy : public Aws::Client::RetryStrategy { int32_t max_retry_duration_; }; +/// \brief calculate the MD5 of the input sse-c key(raw key, not base64 encoded) +/// \param sse_customer_key is the input sse key +/// \return the base64 encoded MD5 for the input key +inline Result<std::string> CalculateSSECKeyMD5(const std::string& sse_customer_key) { + // the key needs to be 256 bits(32 bytes)according to + // https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html#specifying-s3-c-encryption + if (sse_customer_key.length() != 32) { + return Status::Invalid("32 bytes sse-c key is expected"); + } + + // Convert the raw binary key to an Aws::String + Aws::String sse_customer_key_aws_string(sse_customer_key.c_str(), + sse_customer_key.length()); + + // Compute the MD5 hash of the raw binary key + Aws::Utils::ByteBuffer sse_customer_key_md5 = + Aws::Utils::HashingUtils::CalculateMD5(sse_customer_key_aws_string); + + // Base64-encode the MD5 hash + return arrow::util::base64_encode(std::string_view( + reinterpret_cast<const char*>(sse_customer_key_md5.GetUnderlyingData()), + sse_customer_key_md5.GetLength())); +} + +template <typename S3RequestType> +Status SetSSECustomerKey(S3RequestType& request, const std::string& sse_customer_key) { + if (sse_customer_key.empty()) { + return Status::OK(); // do nothing if the sse_customer_key is not configured + } + ARROW_ASSIGN_OR_RAISE(auto result, internal::CalculateSSECKeyMD5(sse_customer_key)); + request.SetSSECustomerKeyMD5(result); Review Comment: ```suggestion ARROW_ASSIGN_OR_RAISE(auto md5, internal::CalculateSSECKeyMD5(sse_customer_key)); request.SetSSECustomerKeyMD5(md5); ``` ########## cpp/src/arrow/filesystem/s3fs_test.cc: ########## @@ -1579,5 +1612,26 @@ TEST(S3GlobalOptions, DefaultsLogLevel) { } } +TEST(CalculateSSECKeyMD5, Sanity) { + ASSERT_FALSE(CalculateSSECKeyMD5("").ok()); // invalid length + ASSERT_FALSE(CalculateSSECKeyMD5("1234567890123456789012345678901234567890") + .ok()); // invalid length + // valid case, with ascii input key + ASSERT_OK_AND_ASSIGN(auto result, + CalculateSSECKeyMD5("NLbTMHZn9aCW3Li3ViAdBsoIldPCREw1")) Review Comment: ```suggestion ASSERT_OK_AND_ASSIGN(auto md5, CalculateSSECKeyMD5("NLbTMHZn9aCW3Li3ViAdBsoIldPCREw1")) ``` ########## cpp/src/arrow/filesystem/s3fs_test.cc: ########## @@ -1579,5 +1612,26 @@ TEST(S3GlobalOptions, DefaultsLogLevel) { } } +TEST(CalculateSSECKeyMD5, Sanity) { + ASSERT_FALSE(CalculateSSECKeyMD5("").ok()); // invalid length + ASSERT_FALSE(CalculateSSECKeyMD5("1234567890123456789012345678901234567890") + .ok()); // invalid length + // valid case, with ascii input key + ASSERT_OK_AND_ASSIGN(auto result, + CalculateSSECKeyMD5("NLbTMHZn9aCW3Li3ViAdBsoIldPCREw1")) + ASSERT_STREQ(result.c_str(), + "ZWMxZGUyOTZhMDQwZWJmM2Q4N2E4ZTczZjdlNmI3Mzk="); // valid case + // valid case, with some non-ASCII character and a null byte in the sse_customer_key Review Comment: Do we need ASCII case? It seems that non-ASCII case is only enough. ########## cpp/src/arrow/filesystem/s3fs.cc: ########## @@ -1668,6 +1694,8 @@ class ObjectOutputStream final : public io::OutputStream { S3Model::CreateMultipartUploadRequest req; req.SetBucket(ToAwsString(path_.bucket)); req.SetKey(ToAwsString(path_.key)); + RETURN_NOT_OK(SetSSECustomerKey(req, sse_customer_key)); + Review Comment: Could you revert a needless change? ########## cpp/src/arrow/filesystem/s3fs_test.cc: ########## @@ -1579,5 +1612,26 @@ TEST(S3GlobalOptions, DefaultsLogLevel) { } } +TEST(CalculateSSECKeyMD5, Sanity) { + ASSERT_FALSE(CalculateSSECKeyMD5("").ok()); // invalid length + ASSERT_FALSE(CalculateSSECKeyMD5("1234567890123456789012345678901234567890") + .ok()); // invalid length + // valid case, with ascii input key + ASSERT_OK_AND_ASSIGN(auto result, + CalculateSSECKeyMD5("NLbTMHZn9aCW3Li3ViAdBsoIldPCREw1")) + ASSERT_STREQ(result.c_str(), + "ZWMxZGUyOTZhMDQwZWJmM2Q4N2E4ZTczZjdlNmI3Mzk="); // valid case + // valid case, with some non-ASCII character and a null byte in the sse_customer_key + char sse_customer_key[32] = {}; + sse_customer_key[0] = '\x40'; // '@' character + sse_customer_key[1] = '\0'; // null byte + sse_customer_key[2] = '\xFF'; // non-ASCII + sse_customer_key[31] = '\xFA'; // non-ASCII + std::string sse_customer_key_string(sse_customer_key, sizeof(sse_customer_key)); + ASSERT_OK_AND_ASSIGN(result, CalculateSSECKeyMD5(sse_customer_key_string)) + ASSERT_STREQ(result.c_str(), + "ZjdiMTUzNmJhOTYzZDIxMTNiOTZjODRhNzQxY2JhZDY="); // valid case Review Comment: ```suggestion ASSERT_OK_AND_ASSIGN(md5, CalculateSSECKeyMD5(sse_customer_key_string)) ASSERT_EQ(md5, "ZjdiMTUzNmJhOTYzZDIxMTNiOTZjODRhNzQxY2JhZDY="); // valid case ``` ########## cpp/src/arrow/filesystem/s3_internal.h: ########## @@ -291,6 +294,42 @@ class ConnectRetryStrategy : public Aws::Client::RetryStrategy { int32_t max_retry_duration_; }; +/// \brief calculate the MD5 of the input sse-c key(raw key, not base64 encoded) +/// \param sse_customer_key is the input sse key +/// \return the base64 encoded MD5 for the input key +inline Result<std::string> CalculateSSECKeyMD5(const std::string& sse_customer_key) { Review Comment: Could you avoid abbreviating here like others? ```suggestion inline Result<std::string> CalculateSSECustomerKeyMD5(const std::string& sse_customer_key) { ``` ########## cpp/src/arrow/filesystem/s3fs_test.cc: ########## @@ -1579,5 +1612,26 @@ TEST(S3GlobalOptions, DefaultsLogLevel) { } } +TEST(CalculateSSECKeyMD5, Sanity) { + ASSERT_FALSE(CalculateSSECKeyMD5("").ok()); // invalid length + ASSERT_FALSE(CalculateSSECKeyMD5("1234567890123456789012345678901234567890") + .ok()); // invalid length + // valid case, with ascii input key + ASSERT_OK_AND_ASSIGN(auto result, + CalculateSSECKeyMD5("NLbTMHZn9aCW3Li3ViAdBsoIldPCREw1")) + ASSERT_STREQ(result.c_str(), + "ZWMxZGUyOTZhMDQwZWJmM2Q4N2E4ZTczZjdlNmI3Mzk="); // valid case Review Comment: ```suggestion ASSERT_EQ(md5, "ZWMxZGUyOTZhMDQwZWJmM2Q4N2E4ZTczZjdlNmI3Mzk="); // valid case ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org