This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 12f62653c8 GH-45304: [C++][S3] Workaround compatibility issue between
AWS SDK and MinIO (#45310)
12f62653c8 is described below
commit 12f62653c825fbf305bfde61c112d2aa69203c62
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Jan 22 17:43:33 2025 +0100
GH-45304: [C++][S3] Workaround compatibility issue between AWS SDK and
MinIO (#45310)
### Rationale for this change
Some AWS SDK versions have faulty chunked encoding when the body is 0 bytes:
https://github.com/aws/aws-sdk-cpp/issues/3259
### What changes are included in this PR?
Work around faulty chunked encoding implementation by only setting a body
stream if non-empty.
### Are these changes tested?
Locally for now, but will be picked by CI (and conda-forge) at some point.
### Are there any user-facing changes?
No.
* GitHub Issue: #45304
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/filesystem/s3fs.cc | 29 +++++++++++++++++------------
cpp/src/arrow/filesystem/s3fs_test.cc | 1 -
2 files changed, 17 insertions(+), 13 deletions(-)
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index b6a928ecdd..773ef84d24 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -1983,27 +1983,33 @@ class ObjectOutputStream final : public
io::OutputStream {
const void* data, int64_t nbytes, std::shared_ptr<Buffer> owned_buffer =
nullptr) {
req.SetBucket(ToAwsString(path_.bucket));
req.SetKey(ToAwsString(path_.key));
- req.SetBody(std::make_shared<StringViewStream>(data, nbytes));
req.SetContentLength(nbytes);
RETURN_NOT_OK(SetSSECustomerKey(&req, sse_customer_key_));
if (!background_writes_) {
- req.SetBody(std::make_shared<StringViewStream>(data, nbytes));
+ // GH-45304: avoid setting a body stream if length is 0.
+ // This workaround can be removed once we require AWS SDK 1.11.489 or
later.
+ if (nbytes != 0) {
+ req.SetBody(std::make_shared<StringViewStream>(data, nbytes));
+ }
ARROW_ASSIGN_OR_RAISE(auto outcome, TriggerUploadRequest(req, holder_));
RETURN_NOT_OK(sync_result_callback(req, upload_state_, part_number_,
outcome));
} else {
- // If the data isn't owned, make an immutable copy for the lifetime of
the closure
- if (owned_buffer == nullptr) {
- ARROW_ASSIGN_OR_RAISE(owned_buffer, AllocateBuffer(nbytes,
io_context_.pool()));
- memcpy(owned_buffer->mutable_data(), data, nbytes);
- } else {
- DCHECK_EQ(data, owned_buffer->data());
- DCHECK_EQ(nbytes, owned_buffer->size());
+ // (GH-45304: avoid setting a body stream if length is 0, see above)
+ if (nbytes != 0) {
+ // If the data isn't owned, make an immutable copy for the lifetime of
the closure
+ if (owned_buffer == nullptr) {
+ ARROW_ASSIGN_OR_RAISE(owned_buffer, AllocateBuffer(nbytes,
io_context_.pool()));
+ memcpy(owned_buffer->mutable_data(), data, nbytes);
+ } else {
+ DCHECK_EQ(data, owned_buffer->data());
+ DCHECK_EQ(nbytes, owned_buffer->size());
+ }
+ req.SetBody(std::make_shared<StringViewStream>(owned_buffer->data(),
+ owned_buffer->size()));
}
- req.SetBody(
- std::make_shared<StringViewStream>(owned_buffer->data(),
owned_buffer->size()));
{
std::unique_lock<std::mutex> lock(upload_state_->mutex);
@@ -2345,7 +2351,6 @@ class S3FileSystem::Impl : public
std::enable_shared_from_this<S3FileSystem::Imp
req.SetBucket(ToAwsString(bucket));
req.SetKey(ToAwsString(key));
req.SetContentType(kAwsDirectoryContentType);
- req.SetBody(std::make_shared<std::stringstream>(""));
return OutcomeToStatus(
std::forward_as_tuple("When creating key '", key, "' in bucket '",
bucket, "': "),
"PutObject", client_lock.Move()->PutObject(req));
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc
b/cpp/src/arrow/filesystem/s3fs_test.cc
index 3082ecb784..370f3b2685 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -526,7 +526,6 @@ class TestS3FS : public S3TestMixin {
Aws::S3::Model::PutObjectRequest req;
req.SetBucket(ToAwsString("bucket"));
req.SetKey(ToAwsString("emptydir/"));
- req.SetBody(std::make_shared<std::stringstream>(""));
RETURN_NOT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
// NOTE: no need to create intermediate "directories" somedir/ and
// somedir/subdir/