This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 85b4e6d0af GH-46746: [C++] Assume AWS SDK >= 1.11.0 (#46742)
85b4e6d0af is described below
commit 85b4e6d0af00f795a4530896d5c3779f2e95ccea
Author: Antoine Pitrou <[email protected]>
AuthorDate: Tue Jun 24 11:02:02 2025 +0200
GH-46746: [C++] Assume AWS SDK >= 1.11.0 (#46742)
### Rationale for this change
We're currently carrying compatibility code for AWS SDK before 1.9.0.
However, we almost always bundled our own build of AWS SDK, so we should be
able to rely on a more recent version. Version 1.11.0 was [released in January
2023](https://github.com/aws/aws-sdk-cpp/releases/tag/1.11.0), which is already
2.5 years ago, so this should be a reasonable target.
This will make the S3 filesystem code more maintainable.
### Are these changes tested?
Yes, on existing CI builds.
### Are there any user-facing changes?
Users won't be able to build Arrow C++ if they have a very old version of
the AWS SDK installed, unless they pass `-DAWSSDK_SOURCE=BUNDLED`.
* GitHub Issue: #46746
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/cmake_modules/ThirdpartyToolchain.cmake | 7 ++-
cpp/src/arrow/filesystem/s3_internal.h | 31 ----------
cpp/src/arrow/filesystem/s3fs.cc | 90 +++++------------------------
3 files changed, 22 insertions(+), 106 deletions(-)
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 7191db7104..14c36af02d 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -5278,7 +5278,12 @@ function(build_awssdk)
endfunction()
if(ARROW_S3)
- resolve_dependency(AWSSDK HAVE_ALT TRUE)
+ # Keep this in sync with s3fs.cc
+ resolve_dependency(AWSSDK
+ HAVE_ALT
+ TRUE
+ REQUIRED_VERSION
+ 1.11.0)
message(STATUS "Found AWS SDK headers: ${AWSSDK_INCLUDE_DIR}")
message(STATUS "Found AWS SDK libraries: ${AWSSDK_LINK_LIBRARIES}")
diff --git a/cpp/src/arrow/filesystem/s3_internal.h
b/cpp/src/arrow/filesystem/s3_internal.h
index f408b18ebd..cf31bca58c 100644
--- a/cpp/src/arrow/filesystem/s3_internal.h
+++ b/cpp/src/arrow/filesystem/s3_internal.h
@@ -40,27 +40,6 @@
#include "arrow/util/print_internal.h"
#include "arrow/util/string.h"
-#ifndef ARROW_AWS_SDK_VERSION_CHECK
-// AWS_SDK_VERSION_{MAJOR,MINOR,PATCH} are available since 1.9.7.
-# if defined(AWS_SDK_VERSION_MAJOR) && defined(AWS_SDK_VERSION_MINOR) && \
- defined(AWS_SDK_VERSION_PATCH)
-// Redundant "(...)" are for suppressing "Weird number of spaces at
-// line-start. Are you using a 2-space indent? [whitespace/indent]
-// [3]" errors...
-# define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch)
\
- ((AWS_SDK_VERSION_MAJOR > (major) ||
\
- (AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR > (minor))
|| \
- ((AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR == (minor)
&& \
- AWS_SDK_VERSION_PATCH >= (patch)))))
-# else
-# define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) 0
-# endif
-#endif // !ARROW_AWS_SDK_VERSION_CHECK
-
-#if ARROW_AWS_SDK_VERSION_CHECK(1, 9, 201)
-# define ARROW_S3_HAS_SSE_CUSTOMER_KEY
-#endif
-
namespace arrow {
namespace fs {
namespace internal {
@@ -350,14 +329,9 @@ inline Result<std::optional<SSECustomerKeyHeaders>>
GetSSECustomerKeyHeaders(
if (sse_customer_key.empty()) {
return std::nullopt;
}
-#ifdef ARROW_S3_HAS_SSE_CUSTOMER_KEY
ARROW_ASSIGN_OR_RAISE(auto md5,
internal::CalculateSSECustomerKeyMD5(sse_customer_key));
return SSECustomerKeyHeaders{arrow::util::base64_encode(sse_customer_key),
md5,
"AES256"};
-#else
- return Status::NotImplemented(
- "SSE customer key not supported by this version of the AWS SDK");
-#endif
}
template <typename S3RequestType>
@@ -366,16 +340,11 @@ Status SetSSECustomerKey(S3RequestType* request, const
std::string& sse_customer
if (!maybe_headers.has_value()) {
return Status::OK();
}
-#ifdef ARROW_S3_HAS_SSE_CUSTOMER_KEY
auto headers = std::move(maybe_headers).value();
request->SetSSECustomerKey(headers.sse_customer_key);
request->SetSSECustomerKeyMD5(headers.sse_customer_key_md5);
request->SetSSECustomerAlgorithm(headers.sse_customer_algorithm);
return Status::OK();
-#else
- return Status::NotImplemented(
- "SSE customer key not supported by this version of the AWS SDK");
-#endif
}
} // namespace internal
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index fa22413363..62daab249c 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -55,8 +55,13 @@
#include <aws/core/utils/logging/ConsoleLogSystem.h>
#include <aws/core/utils/stream/PreallocatedStreamBuf.h>
#include <aws/core/utils/xml/XmlSerializer.h>
+#include <aws/crt/io/Bootstrap.h>
+#include <aws/crt/io/EventLoopGroup.h>
+#include <aws/crt/io/HostResolver.h>
#include <aws/identity-management/auth/STSAssumeRoleCredentialsProvider.h>
#include <aws/s3/S3Client.h>
+#include <aws/s3/S3ClientConfiguration.h>
+#include <aws/s3/S3EndpointProvider.h>
#include <aws/s3/S3Errors.h>
#include <aws/s3/model/AbortMultipartUploadRequest.h>
#include <aws/s3/model/CompleteMultipartUploadRequest.h>
@@ -78,42 +83,18 @@
#include <aws/s3/model/PutObjectResult.h>
#include <aws/s3/model/UploadPartRequest.h>
-// AWS_SDK_VERSION_{MAJOR,MINOR,PATCH} are available since 1.9.7.
-#if defined(AWS_SDK_VERSION_MAJOR) && defined(AWS_SDK_VERSION_MINOR) && \
- defined(AWS_SDK_VERSION_PATCH)
// Redundant "(...)" are for suppressing "Weird number of spaces at
// line-start. Are you using a 2-space indent? [whitespace/indent]
// [3]" errors...
-# define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch)
\
- ((AWS_SDK_VERSION_MAJOR > (major) ||
\
- (AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR > (minor)) ||
\
- ((AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR == (minor)
&& \
- AWS_SDK_VERSION_PATCH >= (patch)))))
-#else
-# define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) 0
-#endif
-
-// This feature is available since 1.9.0 but
-// AWS_SDK_VERSION_{MAJOR,MINOR,PATCH} are available since 1.9.7. So
-// we can't use this feature for [1.9.0,1.9.6]. If it's a problem,
-// please report it to our issue tracker.
-#if ARROW_AWS_SDK_VERSION_CHECK(1, 9, 0)
-# define ARROW_S3_HAS_CRT
-#endif
-
-#if ARROW_AWS_SDK_VERSION_CHECK(1, 10, 0)
-# define ARROW_S3_HAS_S3CLIENT_CONFIGURATION
-#endif
-
-#ifdef ARROW_S3_HAS_CRT
-# include <aws/crt/io/Bootstrap.h>
-# include <aws/crt/io/EventLoopGroup.h>
-# include <aws/crt/io/HostResolver.h>
-#endif
-
-#ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
-# include <aws/s3/S3ClientConfiguration.h>
-# include <aws/s3/S3EndpointProvider.h>
+#define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) \
+ ((AWS_SDK_VERSION_MAJOR > (major) || \
+ (AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR > (minor)) || \
+ ((AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR == (minor) && \
+ AWS_SDK_VERSION_PATCH >= (patch)))))
+
+// Keep this in sync with ThirdPartyToolChain.cmake
+#if !defined(AWS_SDK_VERSION_MAJOR) || !ARROW_AWS_SDK_VERSION_CHECK(1, 11, 0)
+# error "AWS SDK version 1.11.0 or later is required"
#endif
#include "arrow/util/windows_fixup.h"
@@ -792,22 +773,6 @@ class S3Client : public Aws::S3::S3Client {
std::shared_ptr<S3RetryStrategy> s3_retry_strategy_;
};
-// In AWS SDK < 1.8, Aws::Client::ClientConfiguration::followRedirects is a
bool.
-template <bool Never = false>
-void DisableRedirectsImpl(bool* followRedirects) {
- *followRedirects = false;
-}
-
-// In AWS SDK >= 1.8, it's a Aws::Client::FollowRedirectsPolicy scoped enum.
-template <typename PolicyEnum, PolicyEnum Never = PolicyEnum::NEVER>
-void DisableRedirectsImpl(PolicyEnum* followRedirects) {
- *followRedirects = Never;
-}
-
-void DisableRedirects(Aws::Client::ClientConfiguration* c) {
- DisableRedirectsImpl(&c->followRedirects);
-}
-
// -----------------------------------------------------------------------
// S3 client protection against use after finalization
//
@@ -978,8 +943,6 @@ Result<std::shared_ptr<S3ClientHolder>> GetClientHolder(
// -----------------------------------------------------------------------
// S3 client factory: build S3Client from S3Options
-#ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
-
// GH-40279: standard initialization of S3Client creates a new
`S3EndpointProvider`
// every time. Its construction takes 1ms, which makes instantiating every
S3Client
// very costly (see upstream bug report
@@ -1104,8 +1067,6 @@ class EndpointProviderCache {
std::unordered_map<EndpointConfigKey, CacheValue> cache_;
};
-#endif // ARROW_S3_HAS_S3CLIENT_CONFIGURATION
-
class ClientBuilder {
public:
explicit ClientBuilder(S3Options options) : options_(std::move(options)) {}
@@ -1188,17 +1149,10 @@ class ClientBuilder {
const bool use_virtual_addressing =
options_.endpoint_override.empty() ||
options_.force_virtual_addressing;
-#ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
client_config_.useVirtualAddressing = use_virtual_addressing;
auto endpoint_provider =
EndpointProviderCache::Instance()->Lookup(client_config_);
auto client = std::make_shared<S3Client>(credentials_provider_,
endpoint_provider,
client_config_);
-#else
- auto client = std::make_shared<S3Client>(
- credentials_provider_, client_config_,
- Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
- use_virtual_addressing);
-#endif
client->s3_retry_strategy_ = options_.retry_strategy;
return GetClientHolder(std::move(client));
}
@@ -1207,11 +1161,7 @@ class ClientBuilder {
protected:
S3Options options_;
-#ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
Aws::S3::S3ClientConfiguration client_config_;
-#else
- Aws::Client::ClientConfiguration client_config_;
-#endif
std::shared_ptr<Aws::Auth::AWSCredentialsProvider> credentials_provider_;
};
@@ -1275,7 +1225,8 @@ class RegionResolver {
Status Init() {
DCHECK(builder_.options().endpoint_override.empty());
// On Windows with AWS SDK >= 1.8, it is necessary to disable redirects
(ARROW-10085).
- DisableRedirects(builder_.mutable_config());
+ builder_.mutable_config()->followRedirects =
+ Aws::Client::FollowRedirectsPolicy::NEVER;
return builder_.BuildClient().Value(&holder_);
}
@@ -2391,8 +2342,6 @@ class S3FileSystem::Impl : public
std::enable_shared_from_this<S3FileSystem::Imp
req.SetCopySourceSSECustomerKeyMD5(sse_headers.sse_customer_key_md5);
req.SetCopySourceSSECustomerAlgorithm(sse_headers.sse_customer_algorithm);
}
- // ARROW-13048: Copy source "Must be URL-encoded" according to AWS SDK
docs.
- // However at least in 1.8 and 1.9 the SDK URL-encodes the path for you
req.SetCopySource(src_path.ToAwsString());
return OutcomeToStatus(
std::forward_as_tuple("When copying key '", src_path.key, "' in bucket
'",
@@ -3500,9 +3449,7 @@ struct AwsInstance {
return;
}
GetClientFinalizer()->Finalize();
-#ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
EndpointProviderCache::Instance()->Reset();
-#endif
Aws::ShutdownAPI(aws_options_);
}
}
@@ -3529,7 +3476,6 @@ struct AwsInstance {
#undef LOG_LEVEL_CASE
-#ifdef ARROW_S3_HAS_CRT
aws_options_.ioOptions.clientBootstrap_create_fn =
[ev_threads = options.num_event_loop_threads]() {
//
https://github.com/aws/aws-sdk-cpp/blob/1.11.15/src/aws-cpp-sdk-core/source/Aws.cpp#L65
@@ -3541,18 +3487,14 @@ struct AwsInstance {
client_bootstrap->EnableBlockingShutdown();
return client_bootstrap;
};
-#endif
aws_options_.loggingOptions.logLevel = aws_log_level;
// By default the AWS SDK logs to files, log to console instead
aws_options_.loggingOptions.logger_create_fn = [this] {
return std::make_shared<Aws::Utils::Logging::ConsoleLogSystem>(
aws_options_.loggingOptions.logLevel);
};
-#if ARROW_AWS_SDK_VERSION_CHECK(1, 9, 272)
// ARROW-18290: escape all special chars for compatibility with non-AWS S3
backends.
- // This configuration options is only available with AWS SDK 1.9.272 and
later.
aws_options_.httpOptions.compliantRfc3986Encoding = true;
-#endif
aws_options_.httpOptions.installSigPipeHandler =
options.install_sigpipe_handler;
Aws::InitAPI(aws_options_);
}