OliLay commented on code in PR #41564:
URL: https://github.com/apache/arrow/pull/41564#discussion_r1601401047


##########
cpp/src/arrow/filesystem/s3fs.cc:
##########
@@ -1560,34 +1582,47 @@ class ObjectOutputStream final : public 
io::OutputStream {
         path_(path),
         metadata_(metadata),
         default_metadata_(options.default_metadata),
-        background_writes_(options.background_writes) {}
+        background_writes_(options.background_writes),
+        sanitize_bucket_on_open_(options.sanitize_bucket_on_open) {}
 
   ~ObjectOutputStream() override {
     // For compliance with the rest of the IO stack, Close rather than Abort,
     // even though it may be more expensive.
     io::internal::CloseFromDestructor(this);
   }
 
-  Status Init() {
-    ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock());
+  template <typename ObjectRequest>
+  Status SetMetadataInRequest(ObjectRequest* request) {
+    std::shared_ptr<const KeyValueMetadata> metadata;
 
-    // Initiate the multi-part upload
-    S3Model::CreateMultipartUploadRequest req;
-    req.SetBucket(ToAwsString(path_.bucket));
-    req.SetKey(ToAwsString(path_.key));
     if (metadata_ && metadata_->size() != 0) {
-      RETURN_NOT_OK(SetObjectMetadata(metadata_, &req));
+      metadata = metadata_;
     } else if (default_metadata_ && default_metadata_->size() != 0) {
-      RETURN_NOT_OK(SetObjectMetadata(default_metadata_, &req));
+      metadata = default_metadata_;
     }
 
-    // If we do not set anything then the SDK will default to application/xml
-    // which confuses some tools (https://github.com/apache/arrow/issues/11934)
-    // So we instead default to application/octet-stream which is less 
misleading
-    if (!req.ContentTypeHasBeenSet()) {
-      req.SetContentType("application/octet-stream");
+    if (metadata == nullptr ||
+        
!metadata->Contains(ObjectMetadataSetter<ObjectRequest>::CONTENT_TYPE_KEY)) {
+      // If we do not set anything then the SDK will default to application/xml
+      // which confuses some tools 
(https://github.com/apache/arrow/issues/11934)
+      // So we instead default to application/octet-stream which is less 
misleading
+      request->SetContentType("application/octet-stream");
+    } else {
+      RETURN_NOT_OK(SetObjectMetadata(metadata, request));
     }
 
+    return Status::OK();
+  }
+
+  Status CreateMultipartUpload() {
+    ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock());

Review Comment:
   I added a debug assert asserting that we do not create a multi-part upload 
if the conditions are not met (not enough data or optimization disabled). 
[1e9e3a4](https://github.com/apache/arrow/pull/41564/commits/1e9e3a476874b9d464517f5f8357b370e315fbed)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to