This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 86f0b3a0896 [feat](catalog) Support passing credentials_provider_type
to BE for S3 access (#59082)
86f0b3a0896 is described below
commit 86f0b3a089615111707bfe3ed2c0bfcf33ad22c7
Author: zy-kkk <[email protected]>
AuthorDate: Thu Dec 18 22:51:31 2025 +0800
[feat](catalog) Support passing credentials_provider_type to BE for S3
access (#59082)
#58740
This is the backend change for PR #58740, with additional test cases
added on top of it.
---------
Co-authored-by: Calvin Kirs <[email protected]>
---
be/src/util/s3_util.cpp | 43 +++++++++++---
be/src/util/s3_util.h | 2 +
be/test/io/s3_client_factory_test.cpp | 8 ++-
common/cpp/aws_common.cpp | 29 +++++++++
common/cpp/aws_common.h | 13 +++-
.../cpp/custom_aws_credentials_provider_chain.cpp | 3 +
.../common/AwsCredentialsProviderFactory.java | 2 +-
.../storage/AbstractS3CompatibleProperties.java | 2 +
.../datasource/property/storage/S3Properties.java | 11 +++-
.../property/storage/S3PropertiesTest.java | 9 ++-
regression-test/conf/regression-conf.groovy | 7 +++
.../aws_iam_role_p0/test_tvf_anonymous.groovy | 69 ++++++++++++++++++++++
12 files changed, 182 insertions(+), 16 deletions(-)
diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp
index e73e9c6da6e..0cf608877bb 100644
--- a/be/src/util/s3_util.cpp
+++ b/be/src/util/s3_util.cpp
@@ -20,7 +20,9 @@
#include <aws/core/auth/AWSAuthSigner.h>
#include <aws/core/auth/AWSCredentials.h>
#include <aws/core/auth/AWSCredentialsProviderChain.h>
+#include <aws/core/auth/STSCredentialsProvider.h>
#include <aws/core/client/DefaultRetryStrategy.h>
+#include <aws/core/platform/Environment.h>
#include <aws/core/utils/logging/LogLevel.h>
#include <aws/core/utils/logging/LogSystemInterface.h>
#include <aws/core/utils/memory/stl/AWSStringStream.h>
@@ -121,6 +123,7 @@ constexpr char S3_NEED_OVERRIDE_ENDPOINT[] =
"AWS_NEED_OVERRIDE_ENDPOINT";
constexpr char S3_ROLE_ARN[] = "AWS_ROLE_ARN";
constexpr char S3_EXTERNAL_ID[] = "AWS_EXTERNAL_ID";
+constexpr char S3_CREDENTIALS_PROVIDER_TYPE[] =
"AWS_CREDENTIALS_PROVIDER_TYPE";
} // namespace
bvar::Adder<int64_t> get_rate_limit_ns("get_rate_limit_ns");
@@ -323,6 +326,28 @@ S3ClientFactory::_get_aws_credentials_provider_v1(const
S3ClientConf& s3_conf) {
return std::make_shared<Aws::Auth::DefaultAWSCredentialsProviderChain>();
}
+std::shared_ptr<Aws::Auth::AWSCredentialsProvider>
S3ClientFactory::_create_credentials_provider(
+ CredProviderType type) {
+ switch (type) {
+ case CredProviderType::Env:
+ return
std::make_shared<Aws::Auth::EnvironmentAWSCredentialsProvider>();
+ case CredProviderType::SystemProperties:
+ return
std::make_shared<Aws::Auth::ProfileConfigFileAWSCredentialsProvider>();
+ case CredProviderType::WebIdentity:
+ return
std::make_shared<Aws::Auth::STSAssumeRoleWebIdentityCredentialsProvider>();
+ case CredProviderType::Container:
+ return std::make_shared<Aws::Auth::TaskRoleCredentialsProvider>(
+
Aws::Environment::GetEnv("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI").c_str());
+ case CredProviderType::InstanceProfile:
+ return
std::make_shared<Aws::Auth::InstanceProfileCredentialsProvider>();
+ case CredProviderType::Anonymous:
+ return std::make_shared<Aws::Auth::AnonymousAWSCredentialsProvider>();
+ case CredProviderType::Default:
+ default:
+ return std::make_shared<CustomAwsCredentialsProviderChain>();
+ }
+}
+
std::shared_ptr<Aws::Auth::AWSCredentialsProvider>
S3ClientFactory::_get_aws_credentials_provider_v2(const S3ClientConf& s3_conf)
{
if (!s3_conf.ak.empty() && !s3_conf.sk.empty()) {
@@ -334,11 +359,8 @@ S3ClientFactory::_get_aws_credentials_provider_v2(const
S3ClientConf& s3_conf) {
return
std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(std::move(aws_cred));
}
- if (s3_conf.cred_provider_type == CredProviderType::InstanceProfile) {
- if (s3_conf.role_arn.empty()) {
- return std::make_shared<CustomAwsCredentialsProviderChain>();
- }
-
+ // Handle role_arn for assume role scenario
+ if (!s3_conf.role_arn.empty()) {
Aws::Client::ClientConfiguration clientConfiguration =
S3ClientFactory::getClientConfiguration();
@@ -350,15 +372,16 @@ S3ClientFactory::_get_aws_credentials_provider_v2(const
S3ClientConf& s3_conf) {
clientConfiguration.caFile = _ca_cert_file_path;
}
- auto stsClient = std::make_shared<Aws::STS::STSClient>(
- std::make_shared<CustomAwsCredentialsProviderChain>(),
clientConfiguration);
+ auto baseProvider =
_create_credentials_provider(s3_conf.cred_provider_type);
+ auto stsClient = std::make_shared<Aws::STS::STSClient>(baseProvider,
clientConfiguration);
return std::make_shared<Aws::Auth::STSAssumeRoleCredentialsProvider>(
s3_conf.role_arn, Aws::String(), s3_conf.external_id,
Aws::Auth::DEFAULT_CREDS_LOAD_FREQ_SECONDS, stsClient);
}
- return std::make_shared<CustomAwsCredentialsProviderChain>();
+ // Return provider based on cred_provider_type
+ return _create_credentials_provider(s3_conf.cred_provider_type);
}
std::shared_ptr<Aws::Auth::AWSCredentialsProvider>
S3ClientFactory::get_aws_credentials_provider(
@@ -489,6 +512,10 @@ Status S3ClientFactory::convert_properties_to_s3_conf(
s3_conf->client_conf.external_id = it->second;
}
+ if (auto it = properties.find(S3_CREDENTIALS_PROVIDER_TYPE); it !=
properties.end()) {
+ s3_conf->client_conf.cred_provider_type =
cred_provider_type_from_string(it->second);
+ }
+
if (auto st = is_s3_conf_valid(s3_conf->client_conf); !st.ok()) {
return st;
}
diff --git a/be/src/util/s3_util.h b/be/src/util/s3_util.h
index 13604b03a36..46282e525df 100644
--- a/be/src/util/s3_util.h
+++ b/be/src/util/s3_util.h
@@ -169,6 +169,8 @@ private:
const S3ClientConf& s3_conf);
std::shared_ptr<Aws::Auth::AWSCredentialsProvider>
_get_aws_credentials_provider_v2(
const S3ClientConf& s3_conf);
+ std::shared_ptr<Aws::Auth::AWSCredentialsProvider>
_create_credentials_provider(
+ CredProviderType type);
std::shared_ptr<Aws::Auth::AWSCredentialsProvider>
get_aws_credentials_provider(
const S3ClientConf& s3_conf);
diff --git a/be/test/io/s3_client_factory_test.cpp
b/be/test/io/s3_client_factory_test.cpp
index 0ad6bcae89e..30df26ce64d 100644
--- a/be/test/io/s3_client_factory_test.cpp
+++ b/be/test/io/s3_client_factory_test.cpp
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+#include <aws/core/auth/AWSCredentialsProviderChain.h>
#include <aws/identity-management/auth/STSAssumeRoleCredentialsProvider.h>
#include <gtest/gtest.h>
@@ -58,9 +59,10 @@ TEST_F(S3ClientFactoryTest, AwsCredentialsProvider) {
{
auto provider_v2 = factory.get_aws_credentials_provider(role_conf1);
- auto custom_chain_v2 =
-
std::dynamic_pointer_cast<CustomAwsCredentialsProviderChain>(provider_v2);
- ASSERT_NE(custom_chain_v2, nullptr);
+ auto instance_profile_v2 =
+
std::dynamic_pointer_cast<Aws::Auth::InstanceProfileCredentialsProvider>(
+ provider_v2);
+ ASSERT_NE(instance_profile_v2, nullptr);
}
{
diff --git a/common/cpp/aws_common.cpp b/common/cpp/aws_common.cpp
index 5c615b843ae..c8f5e4faf47 100644
--- a/common/cpp/aws_common.cpp
+++ b/common/cpp/aws_common.cpp
@@ -37,6 +37,35 @@ CredProviderType
cred_provider_type_from_pb(cloud::CredProviderTypePB cred_provi
}
}
+CredProviderType cred_provider_type_from_string(const std::string& type) {
+ if (type.empty() || type == "DEFAULT") {
+ return CredProviderType::Default;
+ }
+ if (type == "SIMPLE") {
+ return CredProviderType::Simple;
+ }
+ if (type == "INSTANCE_PROFILE") {
+ return CredProviderType::InstanceProfile;
+ }
+ if (type == "ENV") {
+ return CredProviderType::Env;
+ }
+ if (type == "SYSTEM_PROPERTIES") {
+ return CredProviderType::SystemProperties;
+ }
+ if (type == "WEB_IDENTITY") {
+ return CredProviderType::WebIdentity;
+ }
+ if (type == "CONTAINER") {
+ return CredProviderType::Container;
+ }
+ if (type == "ANONYMOUS") {
+ return CredProviderType::Anonymous;
+ }
+ LOG(WARNING) << "Unknown credentials provider type: " << type << ", use
default instead.";
+ return CredProviderType::Default;
+}
+
std::string get_valid_ca_cert_path(const std::vector<std::string>&
ca_cert_file_paths) {
for (const auto& path : ca_cert_file_paths) {
if (std::filesystem::exists(path)) {
diff --git a/common/cpp/aws_common.h b/common/cpp/aws_common.h
index 183a2ba80c5..cc48e23c2f9 100644
--- a/common/cpp/aws_common.h
+++ b/common/cpp/aws_common.h
@@ -23,10 +23,21 @@
namespace doris {
//AWS Credentials Provider Type
- enum class CredProviderType { Default = 0, Simple = 1, InstanceProfile = 2
};
+ enum class CredProviderType {
+ Default = 0,
+ Simple = 1,
+ InstanceProfile = 2,
+ Env = 3,
+ SystemProperties = 4,
+ WebIdentity = 5,
+ Container = 6,
+ Anonymous = 7
+ };
CredProviderType cred_provider_type_from_pb(cloud::CredProviderTypePB
cred_provider_type);
+ CredProviderType cred_provider_type_from_string(const std::string& type);
+
std::string get_valid_ca_cert_path(const std::vector<std::string>&
ca_cert_file_paths);
} // namespace doris
\ No newline at end of file
diff --git a/common/cpp/custom_aws_credentials_provider_chain.cpp
b/common/cpp/custom_aws_credentials_provider_chain.cpp
index b72f97fc0a8..5b8ae485bd8 100644
--- a/common/cpp/custom_aws_credentials_provider_chain.cpp
+++ b/common/cpp/custom_aws_credentials_provider_chain.cpp
@@ -92,6 +92,9 @@
CustomAwsCredentialsProviderChain::CustomAwsCredentialsProviderChain()
AddProvider(Aws::MakeShared<ProcessCredentialsProvider>(DefaultCredentialsProviderChainTag));
AddProvider(Aws::MakeShared<SSOCredentialsProvider>(DefaultCredentialsProviderChainTag));
+
+ AddProvider(
+
Aws::MakeShared<AnonymousAWSCredentialsProvider>(DefaultCredentialsProviderChainTag));
}
CustomAwsCredentialsProviderChain::CustomAwsCredentialsProviderChain(
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/common/AwsCredentialsProviderFactory.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/common/AwsCredentialsProviderFactory.java
index f56aed0533e..5b49795dfa5 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/common/AwsCredentialsProviderFactory.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/common/AwsCredentialsProviderFactory.java
@@ -149,7 +149,7 @@ public final class AwsCredentialsProviderFactory {
if (includeAnonymousInDefault) {
providers.add(AnonymousCredentialsProvider.class.getName());
}
- return String.join("+", providers);
+ return String.join(",", providers);
default:
throw new UnsupportedOperationException(
"AWS SDK V2 does not support credentials provider
mode: " + mode);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AbstractS3CompatibleProperties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AbstractS3CompatibleProperties.java
index 4b8997b6d56..9c5c01a2b3a 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AbstractS3CompatibleProperties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AbstractS3CompatibleProperties.java
@@ -263,6 +263,8 @@ public abstract class AbstractS3CompatibleProperties
extends StorageProperties i
hadoopStorageConfig.set("fs.s3.impl.disable.cache", "true");
hadoopStorageConfig.set("fs.s3a.impl.disable.cache", "true");
if (StringUtils.isNotBlank(getAccessKey())) {
+ hadoopStorageConfig.set("fs.s3a.aws.credentials.provider",
+ "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider");
hadoopStorageConfig.set("fs.s3a.access.key", getAccessKey());
hadoopStorageConfig.set("fs.s3a.secret.key", getSecretKey());
if (StringUtils.isNotBlank(getSessionToken())) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java
index 3452d759395..63cfe478a0e 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java
@@ -291,6 +291,10 @@ public class S3Properties extends
AbstractS3CompatibleProperties {
if (StringUtils.isNotBlank(s3ExternalId)) {
backendProperties.put("AWS_EXTERNAL_ID", s3ExternalId);
}
+ // Pass credentials provider type to BE
+ if (awsCredentialsProviderMode != null) {
+ backendProperties.put("AWS_CREDENTIALS_PROVIDER_TYPE",
awsCredentialsProviderMode.getMode());
+ }
return backendProperties;
}
@@ -361,6 +365,9 @@ public class S3Properties extends
AbstractS3CompatibleProperties {
@Override
public void initializeHadoopStorageConfig() {
super.initializeHadoopStorageConfig();
+ if (StringUtils.isNotBlank(accessKey)) {
+ return;
+ }
//Set assumed_roles
//@See
https://hadoop.apache.org/docs/r3.4.1/hadoop-aws/tools/hadoop-aws/assumed_roles.html
if (StringUtils.isNotBlank(s3IAMRole)) {
@@ -385,9 +392,9 @@ public class S3Properties extends
AbstractS3CompatibleProperties {
}
if (Config.aws_credentials_provider_version.equalsIgnoreCase("v2")) {
hadoopStorageConfig.set("fs.s3a.aws.credentials.provider",
- AwsCredentialsProviderFactory.createV2(
+ AwsCredentialsProviderFactory.getV2ClassName(
awsCredentialsProviderMode,
- true).getClass().getName());
+ true));
}
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3PropertiesTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3PropertiesTest.java
index 29f9a126ece..755795cca94 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3PropertiesTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3PropertiesTest.java
@@ -268,7 +268,14 @@ public class S3PropertiesTest {
origProps.put("s3.credentials_provider_type", "static");
ExceptionChecker.expectThrowsWithMsg(IllegalArgumentException.class,
"Unsupported AWS credentials provider mode: static", () ->
StorageProperties.createPrimary(origProps));
origProps.put("s3.credentials_provider_type", "anonymous");
- Assertions.assertDoesNotThrow(() ->
StorageProperties.createPrimary(origProps));
+ s3Props = (S3Properties) StorageProperties.createPrimary(origProps);
+ Assertions.assertEquals(AnonymousCredentialsProvider.class.getName(),
s3Props.getHadoopStorageConfig().get("fs.s3a.aws.credentials.provider"));
+ origProps.remove("s3.credentials_provider_type");
+ s3Props = (S3Properties) StorageProperties.createPrimary(origProps);
+ provider = s3Props.getAwsCredentialsProvider();
+ Assertions.assertNotNull(provider);
+ Assertions.assertTrue(provider instanceof AwsCredentialsProviderChain);
+
Assertions.assertEquals("software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider,software.amazon.awssdk.auth.credentials.SystemPropertyCredentialsProvider,software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider,software.amazon.awssdk.auth.credentials.ContainerCredentialsProvider,software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider,software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider",
s3Props.get [...]
}
diff --git a/regression-test/conf/regression-conf.groovy
b/regression-test/conf/regression-conf.groovy
index 0e55c595232..910fcef82b0 100644
--- a/regression-test/conf/regression-conf.groovy
+++ b/regression-test/conf/regression-conf.groovy
@@ -314,3 +314,10 @@ trustStoreType="PKCS12"
trustCert="/your/certificate.crt"
trustCACert="/your/ca.crt"
trustCAKey="/your/certificate.key"
+
+
+enableTestTvfAnonymous="true"
+anymousS3Uri="https://datasets-documentation.s3.eu-west-3.amazonaws.com/aapl_stock.csv"
+anymousS3Region="eu-west-3"
+anymousS3ExpectDataCount="8365"
+awsInstanceProfileRegion="us-east-1"
\ No newline at end of file
diff --git a/regression-test/suites/aws_iam_role_p0/test_tvf_anonymous.groovy
b/regression-test/suites/aws_iam_role_p0/test_tvf_anonymous.groovy
new file mode 100644
index 00000000000..0437f446c26
--- /dev/null
+++ b/regression-test/suites/aws_iam_role_p0/test_tvf_anonymous.groovy
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+import com.google.common.base.Strings;
+suite("test_tvf_anonymous") {
+ if
(Strings.isNullOrEmpty(context.config.otherConfigs.get("enableTestTvfAnonymous")))
{
+ return
+ }
+
+
+ def region = context.config.otherConfigs.get("anymousS3Region")
+ def uri = context.config.otherConfigs.get("anymousS3Uri")
+ def expectDataCount =
context.config.otherConfigs.get("anymousS3ExpectDataCount");
+ //aws_credentials_provider_version
+ sql """ ADMIN SET FRONTEND CONFIG
("aws_credentials_provider_version"="v1"); """
+
+ def result = sql """
+ SELECT count(1) FROM S3 (
+ "uri"="${uri}",
+ "format" = "csv",
+ "s3.region" = "${region}",
+ "s3.endpoint" = "https://s3.${region}.amazonaws.com",
+ "column_separator" = "," );
+ """
+
+ def countValue = result[0][0]
+ assertTrue(countValue == expectDataCount.toInteger())
+ sql """ ADMIN SET FRONTEND CONFIG
("aws_credentials_provider_version"="v2"); """
+
+ result = sql """
+ SELECT count(1) FROM S3 (
+ "uri"="${uri}",
+ "format" = "csv",
+ "s3.region" = "${region}",
+ "s3.endpoint" = "https://s3.${region}.amazonaws.com",
+ "column_separator" = "," );
+ """
+
+ countValue = result[0][0]
+ assertTrue(countValue == expectDataCount.toInteger())
+
+ result = sql """
+ SELECT count(1) FROM S3 (
+ "uri"="${uri}",
+ "format" = "csv",
+ "s3.region" = "${region}",
+ "s3.endpoint" = "https://s3.${region}.amazonaws.com",
+ "s3.credentials_provider_type"="ANONYMOUS",
+ "column_separator" = "," );
+ """
+
+ countValue = result[0][0]
+ assertTrue(countValue == expectDataCount.toInteger())
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]