kou commented on code in PR #43601:
URL: https://github.com/apache/arrow/pull/43601#discussion_r1758770525
##########
cpp/src/arrow/filesystem/s3_internal.h:
##########
@@ -291,6 +294,42 @@ class ConnectRetryStrategy : public
Aws::Client::RetryStrategy {
int32_t max_retry_duration_;
};
+/// \brief calculate the MD5 of the input sse-c key(raw key, not base64
encoded)
+/// \param sse_customer_key is the input sse key
+/// \return the base64 encoded MD5 for the input key
+inline Result<std::string> CalculateSSECKeyMD5(const std::string&
sse_customer_key) {
+ // the key needs to be 256 bits(32 bytes)according to
+ //
https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html#specifying-s3-c-encryption
+ if (sse_customer_key.length() != 32) {
+ return Status::Invalid("32 bytes sse-c key is expected");
+ }
+
+ // Convert the raw binary key to an Aws::String
+ Aws::String sse_customer_key_aws_string(sse_customer_key.c_str(),
Review Comment:
Can we use `data()` here?
```suggestion
Aws::String sse_customer_key_aws_string(sse_customer_key.data(),
```
##########
cpp/src/arrow/filesystem/s3_internal.h:
##########
@@ -291,6 +294,42 @@ class ConnectRetryStrategy : public
Aws::Client::RetryStrategy {
int32_t max_retry_duration_;
};
+/// \brief calculate the MD5 of the input sse-c key(raw key, not base64
encoded)
+/// \param sse_customer_key is the input sse key
+/// \return the base64 encoded MD5 for the input key
+inline Result<std::string> CalculateSSECKeyMD5(const std::string&
sse_customer_key) {
+ // the key needs to be 256 bits(32 bytes)according to
Review Comment:
```suggestion
// the key needs to be 256 bits (32 bytes) according to
```
##########
cpp/src/arrow/filesystem/s3fs.cc:
##########
@@ -1769,6 +1781,8 @@ class ObjectOutputStream final : public io::OutputStream {
S3Model::CompleteMultipartUploadRequest req;
req.SetBucket(ToAwsString(path_.bucket));
req.SetKey(ToAwsString(path_.key));
+ RETURN_NOT_OK(SetSSECustomerKey(req, sse_customer_key_));
+
Review Comment:
```suggestion
```
##########
cpp/src/arrow/filesystem/s3fs.h:
##########
@@ -196,6 +196,9 @@ struct ARROW_EXPORT S3Options {
/// delay between retries.
std::shared_ptr<S3RetryStrategy> retry_strategy;
+ /// the SSE-C customized key(raw 32 bytes key).
Review Comment:
```suggestion
/// the SSE-C customized key (raw 32 bytes key).
```
##########
cpp/src/arrow/filesystem/s3_internal.h:
##########
@@ -291,6 +294,42 @@ class ConnectRetryStrategy : public
Aws::Client::RetryStrategy {
int32_t max_retry_duration_;
};
+/// \brief calculate the MD5 of the input sse-c key(raw key, not base64
encoded)
Review Comment:
```suggestion
/// \brief calculate the MD5 of the input sse-c key (raw key, not base64
encoded)
```
##########
cpp/src/arrow/filesystem/s3_internal.h:
##########
@@ -291,6 +294,42 @@ class ConnectRetryStrategy : public
Aws::Client::RetryStrategy {
int32_t max_retry_duration_;
};
+/// \brief calculate the MD5 of the input sse-c key(raw key, not base64
encoded)
+/// \param sse_customer_key is the input sse key
+/// \return the base64 encoded MD5 for the input key
+inline Result<std::string> CalculateSSECKeyMD5(const std::string&
sse_customer_key) {
+ // the key needs to be 256 bits(32 bytes)according to
+ //
https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html#specifying-s3-c-encryption
+ if (sse_customer_key.length() != 32) {
+ return Status::Invalid("32 bytes sse-c key is expected");
+ }
+
+ // Convert the raw binary key to an Aws::String
+ Aws::String sse_customer_key_aws_string(sse_customer_key.c_str(),
+ sse_customer_key.length());
+
+ // Compute the MD5 hash of the raw binary key
+ Aws::Utils::ByteBuffer sse_customer_key_md5 =
+ Aws::Utils::HashingUtils::CalculateMD5(sse_customer_key_aws_string);
+
+ // Base64-encode the MD5 hash
+ return arrow::util::base64_encode(std::string_view(
+ reinterpret_cast<const char*>(sse_customer_key_md5.GetUnderlyingData()),
+ sse_customer_key_md5.GetLength()));
+}
+
+template <typename S3RequestType>
+Status SetSSECustomerKey(S3RequestType& request, const std::string&
sse_customer_key) {
+ if (sse_customer_key.empty()) {
+ return Status::OK(); // do nothing if the sse_customer_key is not
configured
+ }
+ ARROW_ASSIGN_OR_RAISE(auto result,
internal::CalculateSSECKeyMD5(sse_customer_key));
+ request.SetSSECustomerKeyMD5(result);
Review Comment:
```suggestion
ARROW_ASSIGN_OR_RAISE(auto md5,
internal::CalculateSSECKeyMD5(sse_customer_key));
request.SetSSECustomerKeyMD5(md5);
```
##########
cpp/src/arrow/filesystem/s3fs_test.cc:
##########
@@ -1579,5 +1612,26 @@ TEST(S3GlobalOptions, DefaultsLogLevel) {
}
}
+TEST(CalculateSSECKeyMD5, Sanity) {
+ ASSERT_FALSE(CalculateSSECKeyMD5("").ok()); // invalid length
+ ASSERT_FALSE(CalculateSSECKeyMD5("1234567890123456789012345678901234567890")
+ .ok()); // invalid length
+ // valid case, with ascii input key
+ ASSERT_OK_AND_ASSIGN(auto result,
+ CalculateSSECKeyMD5("NLbTMHZn9aCW3Li3ViAdBsoIldPCREw1"))
Review Comment:
```suggestion
ASSERT_OK_AND_ASSIGN(auto md5,
CalculateSSECKeyMD5("NLbTMHZn9aCW3Li3ViAdBsoIldPCREw1"))
```
##########
cpp/src/arrow/filesystem/s3fs_test.cc:
##########
@@ -1579,5 +1612,26 @@ TEST(S3GlobalOptions, DefaultsLogLevel) {
}
}
+TEST(CalculateSSECKeyMD5, Sanity) {
+ ASSERT_FALSE(CalculateSSECKeyMD5("").ok()); // invalid length
+ ASSERT_FALSE(CalculateSSECKeyMD5("1234567890123456789012345678901234567890")
+ .ok()); // invalid length
+ // valid case, with ascii input key
+ ASSERT_OK_AND_ASSIGN(auto result,
+ CalculateSSECKeyMD5("NLbTMHZn9aCW3Li3ViAdBsoIldPCREw1"))
+ ASSERT_STREQ(result.c_str(),
+ "ZWMxZGUyOTZhMDQwZWJmM2Q4N2E4ZTczZjdlNmI3Mzk="); // valid case
+ // valid case, with some non-ASCII character and a null byte in the
sse_customer_key
Review Comment:
Do we need ASCII case?
It seems that non-ASCII case is only enough.
##########
cpp/src/arrow/filesystem/s3fs.cc:
##########
@@ -1668,6 +1694,8 @@ class ObjectOutputStream final : public io::OutputStream {
S3Model::CreateMultipartUploadRequest req;
req.SetBucket(ToAwsString(path_.bucket));
req.SetKey(ToAwsString(path_.key));
+ RETURN_NOT_OK(SetSSECustomerKey(req, sse_customer_key));
+
Review Comment:
Could you revert a needless change?
##########
cpp/src/arrow/filesystem/s3fs_test.cc:
##########
@@ -1579,5 +1612,26 @@ TEST(S3GlobalOptions, DefaultsLogLevel) {
}
}
+TEST(CalculateSSECKeyMD5, Sanity) {
+ ASSERT_FALSE(CalculateSSECKeyMD5("").ok()); // invalid length
+ ASSERT_FALSE(CalculateSSECKeyMD5("1234567890123456789012345678901234567890")
+ .ok()); // invalid length
+ // valid case, with ascii input key
+ ASSERT_OK_AND_ASSIGN(auto result,
+ CalculateSSECKeyMD5("NLbTMHZn9aCW3Li3ViAdBsoIldPCREw1"))
+ ASSERT_STREQ(result.c_str(),
+ "ZWMxZGUyOTZhMDQwZWJmM2Q4N2E4ZTczZjdlNmI3Mzk="); // valid case
+ // valid case, with some non-ASCII character and a null byte in the
sse_customer_key
+ char sse_customer_key[32] = {};
+ sse_customer_key[0] = '\x40'; // '@' character
+ sse_customer_key[1] = '\0'; // null byte
+ sse_customer_key[2] = '\xFF'; // non-ASCII
+ sse_customer_key[31] = '\xFA'; // non-ASCII
+ std::string sse_customer_key_string(sse_customer_key,
sizeof(sse_customer_key));
+ ASSERT_OK_AND_ASSIGN(result, CalculateSSECKeyMD5(sse_customer_key_string))
+ ASSERT_STREQ(result.c_str(),
+ "ZjdiMTUzNmJhOTYzZDIxMTNiOTZjODRhNzQxY2JhZDY="); // valid case
Review Comment:
```suggestion
ASSERT_OK_AND_ASSIGN(md5, CalculateSSECKeyMD5(sse_customer_key_string))
ASSERT_EQ(md5, "ZjdiMTUzNmJhOTYzZDIxMTNiOTZjODRhNzQxY2JhZDY="); // valid
case
```
##########
cpp/src/arrow/filesystem/s3_internal.h:
##########
@@ -291,6 +294,42 @@ class ConnectRetryStrategy : public
Aws::Client::RetryStrategy {
int32_t max_retry_duration_;
};
+/// \brief calculate the MD5 of the input sse-c key(raw key, not base64
encoded)
+/// \param sse_customer_key is the input sse key
+/// \return the base64 encoded MD5 for the input key
+inline Result<std::string> CalculateSSECKeyMD5(const std::string&
sse_customer_key) {
Review Comment:
Could you avoid abbreviating here like others?
```suggestion
inline Result<std::string> CalculateSSECustomerKeyMD5(const std::string&
sse_customer_key) {
```
##########
cpp/src/arrow/filesystem/s3fs_test.cc:
##########
@@ -1579,5 +1612,26 @@ TEST(S3GlobalOptions, DefaultsLogLevel) {
}
}
+TEST(CalculateSSECKeyMD5, Sanity) {
+ ASSERT_FALSE(CalculateSSECKeyMD5("").ok()); // invalid length
+ ASSERT_FALSE(CalculateSSECKeyMD5("1234567890123456789012345678901234567890")
+ .ok()); // invalid length
+ // valid case, with ascii input key
+ ASSERT_OK_AND_ASSIGN(auto result,
+ CalculateSSECKeyMD5("NLbTMHZn9aCW3Li3ViAdBsoIldPCREw1"))
+ ASSERT_STREQ(result.c_str(),
+ "ZWMxZGUyOTZhMDQwZWJmM2Q4N2E4ZTczZjdlNmI3Mzk="); // valid case
Review Comment:
```suggestion
ASSERT_EQ(md5, "ZWMxZGUyOTZhMDQwZWJmM2Q4N2E4ZTczZjdlNmI3Mzk="); // valid
case
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]