This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 85b4e6d0af GH-46746: [C++] Assume AWS SDK >= 1.11.0 (#46742)
85b4e6d0af is described below

commit 85b4e6d0af00f795a4530896d5c3779f2e95ccea
Author: Antoine Pitrou <[email protected]>
AuthorDate: Tue Jun 24 11:02:02 2025 +0200

    GH-46746: [C++] Assume AWS SDK >= 1.11.0 (#46742)
    
    ### Rationale for this change
    
    We're currently carrying compatibility code for AWS SDK before 1.9.0. 
However, we almost always bundled our own build of AWS SDK, so we should be 
able to rely on a more recent version. Version 1.11.0 was [released in January 
2023](https://github.com/aws/aws-sdk-cpp/releases/tag/1.11.0), which is already 
2.5 years ago, so this should be a reasonable target.
    
    This will make the S3 filesystem code more maintainable.
    
    ### Are these changes tested?
    
    Yes, on existing CI builds.
    
    ### Are there any user-facing changes?
    
    Users won't be able to build Arrow C++ if they have a very old version of 
the AWS SDK installed, unless they pass `-DAWSSDK_SOURCE=BUNDLED`.
    * GitHub Issue: #46746
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake |  7 ++-
 cpp/src/arrow/filesystem/s3_internal.h      | 31 ----------
 cpp/src/arrow/filesystem/s3fs.cc            | 90 +++++------------------------
 3 files changed, 22 insertions(+), 106 deletions(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake 
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 7191db7104..14c36af02d 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -5278,7 +5278,12 @@ function(build_awssdk)
 endfunction()
 
 if(ARROW_S3)
-  resolve_dependency(AWSSDK HAVE_ALT TRUE)
+  # Keep this in sync with s3fs.cc
+  resolve_dependency(AWSSDK
+                     HAVE_ALT
+                     TRUE
+                     REQUIRED_VERSION
+                     1.11.0)
 
   message(STATUS "Found AWS SDK headers: ${AWSSDK_INCLUDE_DIR}")
   message(STATUS "Found AWS SDK libraries: ${AWSSDK_LINK_LIBRARIES}")
diff --git a/cpp/src/arrow/filesystem/s3_internal.h 
b/cpp/src/arrow/filesystem/s3_internal.h
index f408b18ebd..cf31bca58c 100644
--- a/cpp/src/arrow/filesystem/s3_internal.h
+++ b/cpp/src/arrow/filesystem/s3_internal.h
@@ -40,27 +40,6 @@
 #include "arrow/util/print_internal.h"
 #include "arrow/util/string.h"
 
-#ifndef ARROW_AWS_SDK_VERSION_CHECK
-// AWS_SDK_VERSION_{MAJOR,MINOR,PATCH} are available since 1.9.7.
-#  if defined(AWS_SDK_VERSION_MAJOR) && defined(AWS_SDK_VERSION_MINOR) && \
-      defined(AWS_SDK_VERSION_PATCH)
-// Redundant "(...)" are for suppressing "Weird number of spaces at
-// line-start. Are you using a 2-space indent? [whitespace/indent]
-// [3]" errors...
-#    define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch)                   
   \
-      ((AWS_SDK_VERSION_MAJOR > (major) ||                                     
   \
-        (AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR > (minor)) 
||  \
-        ((AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR == (minor) 
&& \
-          AWS_SDK_VERSION_PATCH >= (patch)))))
-#  else
-#    define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) 0
-#  endif
-#endif  // !ARROW_AWS_SDK_VERSION_CHECK
-
-#if ARROW_AWS_SDK_VERSION_CHECK(1, 9, 201)
-#  define ARROW_S3_HAS_SSE_CUSTOMER_KEY
-#endif
-
 namespace arrow {
 namespace fs {
 namespace internal {
@@ -350,14 +329,9 @@ inline Result<std::optional<SSECustomerKeyHeaders>> 
GetSSECustomerKeyHeaders(
   if (sse_customer_key.empty()) {
     return std::nullopt;
   }
-#ifdef ARROW_S3_HAS_SSE_CUSTOMER_KEY
   ARROW_ASSIGN_OR_RAISE(auto md5, 
internal::CalculateSSECustomerKeyMD5(sse_customer_key));
   return SSECustomerKeyHeaders{arrow::util::base64_encode(sse_customer_key), 
md5,
                                "AES256"};
-#else
-  return Status::NotImplemented(
-      "SSE customer key not supported by this version of the AWS SDK");
-#endif
 }
 
 template <typename S3RequestType>
@@ -366,16 +340,11 @@ Status SetSSECustomerKey(S3RequestType* request, const 
std::string& sse_customer
   if (!maybe_headers.has_value()) {
     return Status::OK();
   }
-#ifdef ARROW_S3_HAS_SSE_CUSTOMER_KEY
   auto headers = std::move(maybe_headers).value();
   request->SetSSECustomerKey(headers.sse_customer_key);
   request->SetSSECustomerKeyMD5(headers.sse_customer_key_md5);
   request->SetSSECustomerAlgorithm(headers.sse_customer_algorithm);
   return Status::OK();
-#else
-  return Status::NotImplemented(
-      "SSE customer key not supported by this version of the AWS SDK");
-#endif
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index fa22413363..62daab249c 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -55,8 +55,13 @@
 #include <aws/core/utils/logging/ConsoleLogSystem.h>
 #include <aws/core/utils/stream/PreallocatedStreamBuf.h>
 #include <aws/core/utils/xml/XmlSerializer.h>
+#include <aws/crt/io/Bootstrap.h>
+#include <aws/crt/io/EventLoopGroup.h>
+#include <aws/crt/io/HostResolver.h>
 #include <aws/identity-management/auth/STSAssumeRoleCredentialsProvider.h>
 #include <aws/s3/S3Client.h>
+#include <aws/s3/S3ClientConfiguration.h>
+#include <aws/s3/S3EndpointProvider.h>
 #include <aws/s3/S3Errors.h>
 #include <aws/s3/model/AbortMultipartUploadRequest.h>
 #include <aws/s3/model/CompleteMultipartUploadRequest.h>
@@ -78,42 +83,18 @@
 #include <aws/s3/model/PutObjectResult.h>
 #include <aws/s3/model/UploadPartRequest.h>
 
-// AWS_SDK_VERSION_{MAJOR,MINOR,PATCH} are available since 1.9.7.
-#if defined(AWS_SDK_VERSION_MAJOR) && defined(AWS_SDK_VERSION_MINOR) && \
-    defined(AWS_SDK_VERSION_PATCH)
 // Redundant "(...)" are for suppressing "Weird number of spaces at
 // line-start. Are you using a 2-space indent? [whitespace/indent]
 // [3]" errors...
-#  define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch)                     
 \
-    ((AWS_SDK_VERSION_MAJOR > (major) ||                                       
 \
-      (AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR > (minor)) || 
 \
-      ((AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR == (minor) 
&& \
-        AWS_SDK_VERSION_PATCH >= (patch)))))
-#else
-#  define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) 0
-#endif
-
-// This feature is available since 1.9.0 but
-// AWS_SDK_VERSION_{MAJOR,MINOR,PATCH} are available since 1.9.7. So
-// we can't use this feature for [1.9.0,1.9.6]. If it's a problem,
-// please report it to our issue tracker.
-#if ARROW_AWS_SDK_VERSION_CHECK(1, 9, 0)
-#  define ARROW_S3_HAS_CRT
-#endif
-
-#if ARROW_AWS_SDK_VERSION_CHECK(1, 10, 0)
-#  define ARROW_S3_HAS_S3CLIENT_CONFIGURATION
-#endif
-
-#ifdef ARROW_S3_HAS_CRT
-#  include <aws/crt/io/Bootstrap.h>
-#  include <aws/crt/io/EventLoopGroup.h>
-#  include <aws/crt/io/HostResolver.h>
-#endif
-
-#ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
-#  include <aws/s3/S3ClientConfiguration.h>
-#  include <aws/s3/S3EndpointProvider.h>
+#define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch)                      \
+  ((AWS_SDK_VERSION_MAJOR > (major) ||                                        \
+    (AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR > (minor)) ||  \
+    ((AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR == (minor) && \
+      AWS_SDK_VERSION_PATCH >= (patch)))))
+
+// Keep this in sync with ThirdPartyToolChain.cmake
+#if !defined(AWS_SDK_VERSION_MAJOR) || !ARROW_AWS_SDK_VERSION_CHECK(1, 11, 0)
+#  error "AWS SDK version 1.11.0 or later is required"
 #endif
 
 #include "arrow/util/windows_fixup.h"
@@ -792,22 +773,6 @@ class S3Client : public Aws::S3::S3Client {
   std::shared_ptr<S3RetryStrategy> s3_retry_strategy_;
 };
 
-// In AWS SDK < 1.8, Aws::Client::ClientConfiguration::followRedirects is a 
bool.
-template <bool Never = false>
-void DisableRedirectsImpl(bool* followRedirects) {
-  *followRedirects = false;
-}
-
-// In AWS SDK >= 1.8, it's a Aws::Client::FollowRedirectsPolicy scoped enum.
-template <typename PolicyEnum, PolicyEnum Never = PolicyEnum::NEVER>
-void DisableRedirectsImpl(PolicyEnum* followRedirects) {
-  *followRedirects = Never;
-}
-
-void DisableRedirects(Aws::Client::ClientConfiguration* c) {
-  DisableRedirectsImpl(&c->followRedirects);
-}
-
 // -----------------------------------------------------------------------
 // S3 client protection against use after finalization
 //
@@ -978,8 +943,6 @@ Result<std::shared_ptr<S3ClientHolder>> GetClientHolder(
 // -----------------------------------------------------------------------
 // S3 client factory: build S3Client from S3Options
 
-#ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
-
 // GH-40279: standard initialization of S3Client creates a new 
`S3EndpointProvider`
 // every time. Its construction takes 1ms, which makes instantiating every 
S3Client
 // very costly (see upstream bug report
@@ -1104,8 +1067,6 @@ class EndpointProviderCache {
   std::unordered_map<EndpointConfigKey, CacheValue> cache_;
 };
 
-#endif  // ARROW_S3_HAS_S3CLIENT_CONFIGURATION
-
 class ClientBuilder {
  public:
   explicit ClientBuilder(S3Options options) : options_(std::move(options)) {}
@@ -1188,17 +1149,10 @@ class ClientBuilder {
     const bool use_virtual_addressing =
         options_.endpoint_override.empty() || 
options_.force_virtual_addressing;
 
-#ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
     client_config_.useVirtualAddressing = use_virtual_addressing;
     auto endpoint_provider = 
EndpointProviderCache::Instance()->Lookup(client_config_);
     auto client = std::make_shared<S3Client>(credentials_provider_, 
endpoint_provider,
                                              client_config_);
-#else
-    auto client = std::make_shared<S3Client>(
-        credentials_provider_, client_config_,
-        Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
-        use_virtual_addressing);
-#endif
     client->s3_retry_strategy_ = options_.retry_strategy;
     return GetClientHolder(std::move(client));
   }
@@ -1207,11 +1161,7 @@ class ClientBuilder {
 
  protected:
   S3Options options_;
-#ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
   Aws::S3::S3ClientConfiguration client_config_;
-#else
-  Aws::Client::ClientConfiguration client_config_;
-#endif
   std::shared_ptr<Aws::Auth::AWSCredentialsProvider> credentials_provider_;
 };
 
@@ -1275,7 +1225,8 @@ class RegionResolver {
   Status Init() {
     DCHECK(builder_.options().endpoint_override.empty());
     // On Windows with AWS SDK >= 1.8, it is necessary to disable redirects 
(ARROW-10085).
-    DisableRedirects(builder_.mutable_config());
+    builder_.mutable_config()->followRedirects =
+        Aws::Client::FollowRedirectsPolicy::NEVER;
     return builder_.BuildClient().Value(&holder_);
   }
 
@@ -2391,8 +2342,6 @@ class S3FileSystem::Impl : public 
std::enable_shared_from_this<S3FileSystem::Imp
       req.SetCopySourceSSECustomerKeyMD5(sse_headers.sse_customer_key_md5);
       
req.SetCopySourceSSECustomerAlgorithm(sse_headers.sse_customer_algorithm);
     }
-    // ARROW-13048: Copy source "Must be URL-encoded" according to AWS SDK 
docs.
-    // However at least in 1.8 and 1.9 the SDK URL-encodes the path for you
     req.SetCopySource(src_path.ToAwsString());
     return OutcomeToStatus(
         std::forward_as_tuple("When copying key '", src_path.key, "' in bucket 
'",
@@ -3500,9 +3449,7 @@ struct AwsInstance {
         return;
       }
       GetClientFinalizer()->Finalize();
-#ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
       EndpointProviderCache::Instance()->Reset();
-#endif
       Aws::ShutdownAPI(aws_options_);
     }
   }
@@ -3529,7 +3476,6 @@ struct AwsInstance {
 
 #undef LOG_LEVEL_CASE
 
-#ifdef ARROW_S3_HAS_CRT
     aws_options_.ioOptions.clientBootstrap_create_fn =
         [ev_threads = options.num_event_loop_threads]() {
           // 
https://github.com/aws/aws-sdk-cpp/blob/1.11.15/src/aws-cpp-sdk-core/source/Aws.cpp#L65
@@ -3541,18 +3487,14 @@ struct AwsInstance {
           client_bootstrap->EnableBlockingShutdown();
           return client_bootstrap;
         };
-#endif
     aws_options_.loggingOptions.logLevel = aws_log_level;
     // By default the AWS SDK logs to files, log to console instead
     aws_options_.loggingOptions.logger_create_fn = [this] {
       return std::make_shared<Aws::Utils::Logging::ConsoleLogSystem>(
           aws_options_.loggingOptions.logLevel);
     };
-#if ARROW_AWS_SDK_VERSION_CHECK(1, 9, 272)
     // ARROW-18290: escape all special chars for compatibility with non-AWS S3 
backends.
-    // This configuration options is only available with AWS SDK 1.9.272 and 
later.
     aws_options_.httpOptions.compliantRfc3986Encoding = true;
-#endif
     aws_options_.httpOptions.installSigPipeHandler = 
options.install_sigpipe_handler;
     Aws::InitAPI(aws_options_);
   }

Reply via email to