This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 51bc2a61c9 GH-41797: [C++][S3] Remove GetBucketRegion hack for newer 
AWS SDK versions (#41798)
51bc2a61c9 is described below

commit 51bc2a61c90a89a29dacacbada190aa06f232271
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Jun 5 17:57:30 2024 +0200

    GH-41797: [C++][S3] Remove GetBucketRegion hack for newer AWS SDK versions 
(#41798)
    
    ### Rationale for this change
    
    To get the region a S3 bucket resides on, it is required to issue a 
HeadBucket request and parse the response headers for a certain header value.
    
    Unfortunately, the AWS SDK doesn't let us access arbitrary headers on 
successful responses for S3 model requests, which had us implement a workaround 
by calling lower-level SDK APIs.
    
    However, the SDK recently added a `GetBucketRegion` method on 
`HeadBucketRequest`, which obsoletes the need for this workaround. We now use 
this method if the available AWS SDK version is recent enough.
    
    ### Are these changes tested?
    
    By existing tests on the various CI configurations.
    
    ### Are there any user-facing changes?
    
    No.
    
    * GitHub Issue: #41797
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/filesystem/s3fs.cc | 81 +++++++++++++++++++++++++++++-----------
 1 file changed, 59 insertions(+), 22 deletions(-)

diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index 78e02c31a3..c456be2d0d 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -601,44 +601,81 @@ class S3Client : public Aws::S3::S3Client {
  public:
   using Aws::S3::S3Client::S3Client;
 
+  static inline constexpr auto kBucketRegionHeaderName = "x-amz-bucket-region";
+
+  std::string GetBucketRegionFromHeaders(
+      const Aws::Http::HeaderValueCollection& headers) {
+    const auto it = headers.find(ToAwsString(kBucketRegionHeaderName));
+    if (it != headers.end()) {
+      return std::string(FromAwsString(it->second));
+    }
+    return std::string();
+  }
+
+  template <typename ErrorType>
+  Result<std::string> GetBucketRegionFromError(
+      const std::string& bucket, const Aws::Client::AWSError<ErrorType>& 
error) {
+    std::string region = 
GetBucketRegionFromHeaders(error.GetResponseHeaders());
+    if (!region.empty()) {
+      return region;
+    } else if (error.GetResponseCode() == 
Aws::Http::HttpResponseCode::NOT_FOUND) {
+      return Status::IOError("Bucket '", bucket, "' not found");
+    } else {
+      return ErrorToStatus(
+          std::forward_as_tuple("When resolving region for bucket '", bucket, 
"': "),
+          "HeadBucket", error);
+    }
+  }
+
+#if ARROW_AWS_SDK_VERSION_CHECK(1, 11, 212)
+  // HeadBucketResult::GetBucketRegion appeared in AWS SDK 1.11.212
+  Result<std::string> GetBucketRegion(const std::string& bucket,
+                                      const S3Model::HeadBucketRequest& 
request) {
+    auto outcome = this->HeadBucket(request);
+    if (!outcome.IsSuccess()) {
+      return GetBucketRegionFromError(bucket, outcome.GetError());
+    }
+    auto&& region = std::move(outcome).GetResult().GetBucketRegion();
+    if (region.empty()) {
+      return Status::IOError("When resolving region for bucket '", 
request.GetBucket(),
+                             "': missing 'x-amz-bucket-region' header in 
response");
+    }
+    return region;
+  }
+#else
   // To get a bucket's region, we must extract the "x-amz-bucket-region" header
   // from the response to a HEAD bucket request.
   // Unfortunately, the S3Client APIs don't let us access the headers of 
successful
   // responses.  So we have to cook a AWS request and issue it ourselves.
-
-  Result<std::string> GetBucketRegion(const S3Model::HeadBucketRequest& 
request) {
+  Result<std::string> GetBucketRegion(const std::string& bucket,
+                                      const S3Model::HeadBucketRequest& 
request) {
     auto uri = GeneratePresignedUrl(request.GetBucket(),
                                     /*key=*/"", 
Aws::Http::HttpMethod::HTTP_HEAD);
     // NOTE: The signer region argument isn't passed here, as there's no easy
     // way of computing it (the relevant method is private).
     auto outcome = MakeRequest(uri, request, Aws::Http::HttpMethod::HTTP_HEAD,
                                Aws::Auth::SIGV4_SIGNER);
-    const auto code = outcome.IsSuccess() ? 
outcome.GetResult().GetResponseCode()
-                                          : 
outcome.GetError().GetResponseCode();
-    const auto& headers = outcome.IsSuccess()
-                              ? outcome.GetResult().GetHeaderValueCollection()
-                              : outcome.GetError().GetResponseHeaders();
-
-    const auto it = headers.find(ToAwsString("x-amz-bucket-region"));
-    if (it == headers.end()) {
-      if (code == Aws::Http::HttpResponseCode::NOT_FOUND) {
-        return Status::IOError("Bucket '", request.GetBucket(), "' not found");
-      } else if (!outcome.IsSuccess()) {
-        return ErrorToStatus(std::forward_as_tuple("When resolving region for 
bucket '",
-                                                   request.GetBucket(), "': "),
-                             "HeadBucket", outcome.GetError());
-      } else {
-        return Status::IOError("When resolving region for bucket '", 
request.GetBucket(),
-                               "': missing 'x-amz-bucket-region' header in 
response");
-      }
+    if (!outcome.IsSuccess()) {
+      return GetBucketRegionFromError(bucket, outcome.GetError());
+    }
+    std::string region =
+        
GetBucketRegionFromHeaders(outcome.GetResult().GetHeaderValueCollection());
+    if (!region.empty()) {
+      return region;
+    } else if (outcome.GetResult().GetResponseCode() ==
+               Aws::Http::HttpResponseCode::NOT_FOUND) {
+      return Status::IOError("Bucket '", request.GetBucket(), "' not found");
+    } else {
+      return Status::IOError("When resolving region for bucket '", 
request.GetBucket(),
+                             "': missing 'x-amz-bucket-region' header in 
response");
     }
-    return std::string(FromAwsString(it->second));
   }
+#endif
 
   Result<std::string> GetBucketRegion(const std::string& bucket) {
     S3Model::HeadBucketRequest req;
     req.SetBucket(ToAwsString(bucket));
-    return GetBucketRegion(req);
+    return GetBucketRegion(bucket, req);
   }
 
   S3Model::CompleteMultipartUploadOutcome 
CompleteMultipartUploadWithErrorFixup(

Reply via email to