This is an automated email from the ASF dual-hosted git repository.
nicholasjiang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/celeborn.git
The following commit(s) were added to refs/heads/main by this push:
new eb7a720ac [CELEBORN-2259] The S3MultipartUploadHandler uses
fs.s3a.aws.credentials.provider
eb7a720ac is described below
commit eb7a720ac9c60c14c9f2b7091888911429ab1c7b
Author: Dzeri96 <[email protected]>
AuthorDate: Tue Feb 17 16:32:46 2026 +0800
[CELEBORN-2259] The S3MultipartUploadHandler uses
fs.s3a.aws.credentials.provider
### What changes were proposed in this pull request?
The S3 Client in `S3MultipartUploadHandler` now uses the dynamic config
`fs.s3a.aws.credentials.provider` in order to set its provider chain up.
### Why are the changes needed?
Before this, it was only possible to use the hard-coded provider
configuration.
### Does this PR resolve a correctness bug?
Sort of.
### Does this PR introduce _any_ user-facing change?
Yes, in the sense that `celeborn.hadoop.fs.s3a.aws.credentials.provider`
will now work correctly in the MultiPartHandler.
### How was this patch tested?
Unit tests and a manual test.
**Note**: I don't like having to change the class in order to make it
testable, but I'm planning to get rid of this whole logic in another PR, where
we will use the same hadoop-created S3 client everywhere.
Closes #3599 from Dzeri96/CELEBORN-2259-cherrypicked.
Lead-authored-by: Dzeri96 <[email protected]>
Co-authored-by: Filip Darmanovic <[email protected]>
Signed-off-by: SteNicholas <[email protected]>
---
LICENSE-binary | 1 +
NOTICE-binary | 14 ++++++-
multipart-uploader/multipart-uploader-s3/pom.xml | 6 +++
.../apache/celeborn/S3MultipartUploadHandler.java | 29 ++++---------
.../celeborn/S3MultipartUploadHandlerSuiteJ.java | 47 ++++++++++++++++++++++
project/CelebornBuild.scala | 4 +-
6 files changed, 77 insertions(+), 24 deletions(-)
diff --git a/LICENSE-binary b/LICENSE-binary
index 5c29c642b..65e4b7d04 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -204,6 +204,7 @@
This project bundles the following dependencies under the Apache License 2.0
(http://www.apache.org/licenses/LICENSE-2.0.txt):
com.amazonaws:aws-java-sdk-bundle
+com.amazonaws:aws-java-sdk-sts
com.fasterxml.jackson.core:jackson-annotations
com.fasterxml.jackson.core:jackson-core
com.fasterxml.jackson.core:jackson-databind
diff --git a/NOTICE-binary b/NOTICE-binary
index 9942e1440..2118beeb9 100644
--- a/NOTICE-binary
+++ b/NOTICE-binary
@@ -204,5 +204,15 @@ mimepool
Copyright (c) 2018, 2022 Oracle and/or its affiliates.
-aws-java-sdk
-Copyright 2010-2024 Amazon.com, Inc. or its affiliates.
\ No newline at end of file
+aws-java-sdk, aws-java-sdk-sts
+Copyright 2010-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+
+This product includes software developed by
+Amazon Technologies, Inc (http://www.amazon.com/).
+
+**********************
+THIRD PARTY COMPONENTS
+**********************
+This software includes third party software subject to the following
copyrights:
+- XML parsing and utility functions from JetS3t - Copyright 2006-2009 James
Murty.
+- PKCS#1 PEM encoded private key parsing and utility functions from
oauth.googlecode.com - Copyright 1998-2010 AOL Inc.
\ No newline at end of file
diff --git a/multipart-uploader/multipart-uploader-s3/pom.xml
b/multipart-uploader/multipart-uploader-s3/pom.xml
index 999e9a438..707b11dc2 100644
--- a/multipart-uploader/multipart-uploader-s3/pom.xml
+++ b/multipart-uploader/multipart-uploader-s3/pom.xml
@@ -50,6 +50,12 @@
<artifactId>aws-java-sdk-s3</artifactId>
<version>${aws.version}</version>
</dependency>
+ <!-- Needed for com.amazonaws.auth.WebIdentityTokenCredentialsProvider -->
+ <dependency>
+ <groupId>com.amazonaws</groupId>
+ <artifactId>aws-java-sdk-sts</artifactId>
+ <version>${aws.version}</version>
+ </dependency>
</dependencies>
<build>
diff --git
a/multipart-uploader/multipart-uploader-s3/src/main/java/org/apache/celeborn/S3MultipartUploadHandler.java
b/multipart-uploader/multipart-uploader-s3/src/main/java/org/apache/celeborn/S3MultipartUploadHandler.java
index ab1d8c9a0..555f0695c 100644
---
a/multipart-uploader/multipart-uploader-s3/src/main/java/org/apache/celeborn/S3MultipartUploadHandler.java
+++
b/multipart-uploader/multipart-uploader-s3/src/main/java/org/apache/celeborn/S3MultipartUploadHandler.java
@@ -26,7 +26,6 @@ import java.util.List;
import com.amazonaws.AmazonClientException;
import com.amazonaws.ClientConfiguration;
-import com.amazonaws.auth.EnvironmentVariableCredentialsProvider;
import com.amazonaws.client.builder.AwsClientBuilder;
import com.amazonaws.event.ProgressListener;
import com.amazonaws.retry.PredefinedBackoffStrategies;
@@ -35,23 +34,11 @@ import com.amazonaws.retry.RetryPolicy;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
import com.amazonaws.services.s3.model.*;
-import com.amazonaws.services.s3.model.AbortMultipartUploadRequest;
-import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest;
-import com.amazonaws.services.s3.model.CompleteMultipartUploadResult;
-import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest;
-import com.amazonaws.services.s3.model.InitiateMultipartUploadResult;
-import com.amazonaws.services.s3.model.ListPartsRequest;
-import com.amazonaws.services.s3.model.PartETag;
-import com.amazonaws.services.s3.model.PartListing;
-import com.amazonaws.services.s3.model.PartSummary;
-import com.amazonaws.services.s3.model.UploadPartRequest;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.s3a.AWSCredentialProviderList;
import org.apache.hadoop.fs.s3a.Constants;
-import org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider;
-import org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider;
-import org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider;
+import org.apache.hadoop.fs.s3a.S3AUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -87,12 +74,7 @@ public class S3MultipartUploadHandler implements
MultipartUploadHandler {
this.maxBackoff = maxBackoff;
Configuration conf = hadoopFs.getConf();
- AWSCredentialProviderList providers = new AWSCredentialProviderList();
- providers.add(new TemporaryAWSCredentialsProvider(conf));
- providers.add(
- new SimpleAWSCredentialsProvider(new URI(String.format("s3a://%s",
bucketName)), conf));
- providers.add(new EnvironmentVariableCredentialsProvider());
- providers.add(new IAMInstanceCredentialsProvider());
+ URI binding = new URI(String.format("s3a://%s", bucketName));
RetryPolicy retryPolicy =
new RetryPolicy(
@@ -107,7 +89,7 @@ public class S3MultipartUploadHandler implements
MultipartUploadHandler {
.withMaxErrorRetry(s3MultiplePartUploadMaxRetries);
AmazonS3ClientBuilder builder =
AmazonS3ClientBuilder.standard()
- .withCredentials(providers)
+ .withCredentials(getCredentialsProvider(binding, conf))
.withClientConfiguration(clientConfig);
// for MinIO
String endpoint = conf.get("fs.s3a.endpoint");
@@ -259,4 +241,9 @@ public class S3MultipartUploadHandler implements
MultipartUploadHandler {
s3Client.shutdown();
}
}
+
+ static AWSCredentialProviderList getCredentialsProvider(URI binding,
Configuration conf)
+ throws IOException {
+ return S3AUtils.createAWSCredentialProviderSet(binding, conf);
+ }
}
diff --git
a/multipart-uploader/multipart-uploader-s3/src/test/java/org/apache/celeborn/S3MultipartUploadHandlerSuiteJ.java
b/multipart-uploader/multipart-uploader-s3/src/test/java/org/apache/celeborn/S3MultipartUploadHandlerSuiteJ.java
new file mode 100644
index 000000000..b2f40219f
--- /dev/null
+++
b/multipart-uploader/multipart-uploader-s3/src/test/java/org/apache/celeborn/S3MultipartUploadHandlerSuiteJ.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.celeborn;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.s3a.AWSCredentialProviderList;
+import org.apache.hadoop.fs.s3a.Constants;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class S3MultipartUploadHandlerSuiteJ {
+
+ @Test
+ public void
testGetCredentialsProviderShouldGiveDefaultProvidersOnEmptyConfig() throws
Exception {
+ Configuration conf = new Configuration();
+ AWSCredentialProviderList providers =
+ S3MultipartUploadHandler.getCredentialsProvider(null, conf);
+ Assert.assertTrue(providers.size() != 0);
+ }
+
+ @Test
+ public void
testGetCredentialsProviderShouldReturnWebIdentityTokenCredentialsProvider()
+ throws Exception {
+ Configuration conf = new Configuration();
+ conf.set(
+ Constants.AWS_CREDENTIALS_PROVIDER,
+ "com.amazonaws.auth.WebIdentityTokenCredentialsProvider");
+ AWSCredentialProviderList providers =
+ S3MultipartUploadHandler.getCredentialsProvider(null, conf);
+ Assert.assertEquals("WebIdentityTokenCredentialsProvider ",
providers.listProviderNames());
+ }
+}
diff --git a/project/CelebornBuild.scala b/project/CelebornBuild.scala
index 107edabac..d459e71ba 100644
--- a/project/CelebornBuild.scala
+++ b/project/CelebornBuild.scala
@@ -132,6 +132,8 @@ object Dependencies {
val hadoopAws = "org.apache.hadoop" % "hadoop-aws" % hadoopVersion
excludeAll (
ExclusionRule("com.amazonaws", "aws-java-sdk-bundle"))
val awsS3 = "com.amazonaws" % "aws-java-sdk-s3" % awsS3Version
+ // Needed for com.amazonaws.auth.WebIdentityTokenCredentialsProvider
+ val awsSTS = "com.amazonaws" % "aws-java-sdk-sts" % awsS3Version
val commonsCollections = "commons-collections" % "commons-collections" %
commonsCollectionsVersion
val hadoopAliyun = "org.apache.hadoop" % "hadoop-aliyun" % hadoopVersion
val aliyunOss = "com.aliyun.oss" % "aliyun-sdk-oss" % aliyunOssVersion
@@ -599,7 +601,7 @@ object CelebornSpi {
object CeleborMPU {
- lazy val hadoopAwsDependencies = Seq(Dependencies.hadoopAws,
Dependencies.awsS3)
+ lazy val hadoopAwsDependencies = Seq(Dependencies.hadoopAws,
Dependencies.awsS3, Dependencies.awsSTS)
lazy val hadoopAliyunDependencies = Seq(Dependencies.commonsCollections,
Dependencies.hadoopAliyun, Dependencies.aliyunOss)
lazy val celeborMPU = Project("celeborn-multipart-uploader-s3",
file("multipart-uploader/multipart-uploader-s3"))