This is an automated email from the ASF dual-hosted git repository. brycemecum pushed a commit to branch maint-19.0.1 in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 4be13f21a3321784ff16d4e93544a85a3d3c0458 Author: Antoine Pitrou <[email protected]> AuthorDate: Wed Jan 22 17:43:33 2025 +0100 GH-45304: [C++][S3] Workaround compatibility issue between AWS SDK and MinIO (#45310) ### Rationale for this change Some AWS SDK versions have faulty chunked encoding when the body is 0 bytes: https://github.com/aws/aws-sdk-cpp/issues/3259 ### What changes are included in this PR? Work around faulty chunked encoding implementation by only setting a body stream if non-empty. ### Are these changes tested? Locally for now, but will be picked by CI (and conda-forge) at some point. ### Are there any user-facing changes? No. * GitHub Issue: #45304 Authored-by: Antoine Pitrou <[email protected]> Signed-off-by: Antoine Pitrou <[email protected]> --- cpp/src/arrow/filesystem/s3fs.cc | 29 +++++++++++++++++------------ cpp/src/arrow/filesystem/s3fs_test.cc | 1 - 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index b6a928ecdd..773ef84d24 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -1983,27 +1983,33 @@ class ObjectOutputStream final : public io::OutputStream { const void* data, int64_t nbytes, std::shared_ptr<Buffer> owned_buffer = nullptr) { req.SetBucket(ToAwsString(path_.bucket)); req.SetKey(ToAwsString(path_.key)); - req.SetBody(std::make_shared<StringViewStream>(data, nbytes)); req.SetContentLength(nbytes); RETURN_NOT_OK(SetSSECustomerKey(&req, sse_customer_key_)); if (!background_writes_) { - req.SetBody(std::make_shared<StringViewStream>(data, nbytes)); + // GH-45304: avoid setting a body stream if length is 0. + // This workaround can be removed once we require AWS SDK 1.11.489 or later. + if (nbytes != 0) { + req.SetBody(std::make_shared<StringViewStream>(data, nbytes)); + } ARROW_ASSIGN_OR_RAISE(auto outcome, TriggerUploadRequest(req, holder_)); RETURN_NOT_OK(sync_result_callback(req, upload_state_, part_number_, outcome)); } else { - // If the data isn't owned, make an immutable copy for the lifetime of the closure - if (owned_buffer == nullptr) { - ARROW_ASSIGN_OR_RAISE(owned_buffer, AllocateBuffer(nbytes, io_context_.pool())); - memcpy(owned_buffer->mutable_data(), data, nbytes); - } else { - DCHECK_EQ(data, owned_buffer->data()); - DCHECK_EQ(nbytes, owned_buffer->size()); + // (GH-45304: avoid setting a body stream if length is 0, see above) + if (nbytes != 0) { + // If the data isn't owned, make an immutable copy for the lifetime of the closure + if (owned_buffer == nullptr) { + ARROW_ASSIGN_OR_RAISE(owned_buffer, AllocateBuffer(nbytes, io_context_.pool())); + memcpy(owned_buffer->mutable_data(), data, nbytes); + } else { + DCHECK_EQ(data, owned_buffer->data()); + DCHECK_EQ(nbytes, owned_buffer->size()); + } + req.SetBody(std::make_shared<StringViewStream>(owned_buffer->data(), + owned_buffer->size())); } - req.SetBody( - std::make_shared<StringViewStream>(owned_buffer->data(), owned_buffer->size())); { std::unique_lock<std::mutex> lock(upload_state_->mutex); @@ -2345,7 +2351,6 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp req.SetBucket(ToAwsString(bucket)); req.SetKey(ToAwsString(key)); req.SetContentType(kAwsDirectoryContentType); - req.SetBody(std::make_shared<std::stringstream>("")); return OutcomeToStatus( std::forward_as_tuple("When creating key '", key, "' in bucket '", bucket, "': "), "PutObject", client_lock.Move()->PutObject(req)); diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc index 3082ecb784..370f3b2685 100644 --- a/cpp/src/arrow/filesystem/s3fs_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_test.cc @@ -526,7 +526,6 @@ class TestS3FS : public S3TestMixin { Aws::S3::Model::PutObjectRequest req; req.SetBucket(ToAwsString("bucket")); req.SetKey(ToAwsString("emptydir/")); - req.SetBody(std::make_shared<std::stringstream>("")); RETURN_NOT_OK(OutcomeToStatus("PutObject", client_->PutObject(req))); // NOTE: no need to create intermediate "directories" somedir/ and // somedir/subdir/
