This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 12f62653c8 GH-45304: [C++][S3] Workaround compatibility issue between 
AWS SDK and MinIO (#45310)
12f62653c8 is described below

commit 12f62653c825fbf305bfde61c112d2aa69203c62
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Jan 22 17:43:33 2025 +0100

    GH-45304: [C++][S3] Workaround compatibility issue between AWS SDK and 
MinIO (#45310)
    
    ### Rationale for this change
    
    Some AWS SDK versions have faulty chunked encoding when the body is 0 bytes:
    https://github.com/aws/aws-sdk-cpp/issues/3259
    
    ### What changes are included in this PR?
    
    Work around faulty chunked encoding implementation by only setting a body 
stream if non-empty.
    
    ### Are these changes tested?
    
    Locally for now, but will be picked by CI (and conda-forge) at some point.
    
    ### Are there any user-facing changes?
    
    No.
    
    * GitHub Issue: #45304
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/filesystem/s3fs.cc      | 29 +++++++++++++++++------------
 cpp/src/arrow/filesystem/s3fs_test.cc |  1 -
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index b6a928ecdd..773ef84d24 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -1983,27 +1983,33 @@ class ObjectOutputStream final : public 
io::OutputStream {
       const void* data, int64_t nbytes, std::shared_ptr<Buffer> owned_buffer = 
nullptr) {
     req.SetBucket(ToAwsString(path_.bucket));
     req.SetKey(ToAwsString(path_.key));
-    req.SetBody(std::make_shared<StringViewStream>(data, nbytes));
     req.SetContentLength(nbytes);
     RETURN_NOT_OK(SetSSECustomerKey(&req, sse_customer_key_));
 
     if (!background_writes_) {
-      req.SetBody(std::make_shared<StringViewStream>(data, nbytes));
+      // GH-45304: avoid setting a body stream if length is 0.
+      // This workaround can be removed once we require AWS SDK 1.11.489 or 
later.
+      if (nbytes != 0) {
+        req.SetBody(std::make_shared<StringViewStream>(data, nbytes));
+      }
 
       ARROW_ASSIGN_OR_RAISE(auto outcome, TriggerUploadRequest(req, holder_));
 
       RETURN_NOT_OK(sync_result_callback(req, upload_state_, part_number_, 
outcome));
     } else {
-      // If the data isn't owned, make an immutable copy for the lifetime of 
the closure
-      if (owned_buffer == nullptr) {
-        ARROW_ASSIGN_OR_RAISE(owned_buffer, AllocateBuffer(nbytes, 
io_context_.pool()));
-        memcpy(owned_buffer->mutable_data(), data, nbytes);
-      } else {
-        DCHECK_EQ(data, owned_buffer->data());
-        DCHECK_EQ(nbytes, owned_buffer->size());
+      // (GH-45304: avoid setting a body stream if length is 0, see above)
+      if (nbytes != 0) {
+        // If the data isn't owned, make an immutable copy for the lifetime of 
the closure
+        if (owned_buffer == nullptr) {
+          ARROW_ASSIGN_OR_RAISE(owned_buffer, AllocateBuffer(nbytes, 
io_context_.pool()));
+          memcpy(owned_buffer->mutable_data(), data, nbytes);
+        } else {
+          DCHECK_EQ(data, owned_buffer->data());
+          DCHECK_EQ(nbytes, owned_buffer->size());
+        }
+        req.SetBody(std::make_shared<StringViewStream>(owned_buffer->data(),
+                                                       owned_buffer->size()));
       }
-      req.SetBody(
-          std::make_shared<StringViewStream>(owned_buffer->data(), 
owned_buffer->size()));
 
       {
         std::unique_lock<std::mutex> lock(upload_state_->mutex);
@@ -2345,7 +2351,6 @@ class S3FileSystem::Impl : public 
std::enable_shared_from_this<S3FileSystem::Imp
     req.SetBucket(ToAwsString(bucket));
     req.SetKey(ToAwsString(key));
     req.SetContentType(kAwsDirectoryContentType);
-    req.SetBody(std::make_shared<std::stringstream>(""));
     return OutcomeToStatus(
         std::forward_as_tuple("When creating key '", key, "' in bucket '", 
bucket, "': "),
         "PutObject", client_lock.Move()->PutObject(req));
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc 
b/cpp/src/arrow/filesystem/s3fs_test.cc
index 3082ecb784..370f3b2685 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -526,7 +526,6 @@ class TestS3FS : public S3TestMixin {
     Aws::S3::Model::PutObjectRequest req;
     req.SetBucket(ToAwsString("bucket"));
     req.SetKey(ToAwsString("emptydir/"));
-    req.SetBody(std::make_shared<std::stringstream>(""));
     RETURN_NOT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
     // NOTE: no need to create intermediate "directories" somedir/ and
     // somedir/subdir/

Reply via email to