This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 86f0b3a0896 [feat](catalog) Support passing credentials_provider_type 
to BE for S3 access (#59082)
86f0b3a0896 is described below

commit 86f0b3a089615111707bfe3ed2c0bfcf33ad22c7
Author: zy-kkk <[email protected]>
AuthorDate: Thu Dec 18 22:51:31 2025 +0800

    [feat](catalog) Support passing credentials_provider_type to BE for S3 
access (#59082)
    
    #58740
    
    This is the backend change for PR #58740, with additional test cases
    added on top of it.
    
    ---------
    
    Co-authored-by: Calvin Kirs <[email protected]>
---
 be/src/util/s3_util.cpp                            | 43 +++++++++++---
 be/src/util/s3_util.h                              |  2 +
 be/test/io/s3_client_factory_test.cpp              |  8 ++-
 common/cpp/aws_common.cpp                          | 29 +++++++++
 common/cpp/aws_common.h                            | 13 +++-
 .../cpp/custom_aws_credentials_provider_chain.cpp  |  3 +
 .../common/AwsCredentialsProviderFactory.java      |  2 +-
 .../storage/AbstractS3CompatibleProperties.java    |  2 +
 .../datasource/property/storage/S3Properties.java  | 11 +++-
 .../property/storage/S3PropertiesTest.java         |  9 ++-
 regression-test/conf/regression-conf.groovy        |  7 +++
 .../aws_iam_role_p0/test_tvf_anonymous.groovy      | 69 ++++++++++++++++++++++
 12 files changed, 182 insertions(+), 16 deletions(-)

diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp
index e73e9c6da6e..0cf608877bb 100644
--- a/be/src/util/s3_util.cpp
+++ b/be/src/util/s3_util.cpp
@@ -20,7 +20,9 @@
 #include <aws/core/auth/AWSAuthSigner.h>
 #include <aws/core/auth/AWSCredentials.h>
 #include <aws/core/auth/AWSCredentialsProviderChain.h>
+#include <aws/core/auth/STSCredentialsProvider.h>
 #include <aws/core/client/DefaultRetryStrategy.h>
+#include <aws/core/platform/Environment.h>
 #include <aws/core/utils/logging/LogLevel.h>
 #include <aws/core/utils/logging/LogSystemInterface.h>
 #include <aws/core/utils/memory/stl/AWSStringStream.h>
@@ -121,6 +123,7 @@ constexpr char S3_NEED_OVERRIDE_ENDPOINT[] = 
"AWS_NEED_OVERRIDE_ENDPOINT";
 
 constexpr char S3_ROLE_ARN[] = "AWS_ROLE_ARN";
 constexpr char S3_EXTERNAL_ID[] = "AWS_EXTERNAL_ID";
+constexpr char S3_CREDENTIALS_PROVIDER_TYPE[] = 
"AWS_CREDENTIALS_PROVIDER_TYPE";
 } // namespace
 
 bvar::Adder<int64_t> get_rate_limit_ns("get_rate_limit_ns");
@@ -323,6 +326,28 @@ S3ClientFactory::_get_aws_credentials_provider_v1(const 
S3ClientConf& s3_conf) {
     return std::make_shared<Aws::Auth::DefaultAWSCredentialsProviderChain>();
 }
 
+std::shared_ptr<Aws::Auth::AWSCredentialsProvider> 
S3ClientFactory::_create_credentials_provider(
+        CredProviderType type) {
+    switch (type) {
+    case CredProviderType::Env:
+        return 
std::make_shared<Aws::Auth::EnvironmentAWSCredentialsProvider>();
+    case CredProviderType::SystemProperties:
+        return 
std::make_shared<Aws::Auth::ProfileConfigFileAWSCredentialsProvider>();
+    case CredProviderType::WebIdentity:
+        return 
std::make_shared<Aws::Auth::STSAssumeRoleWebIdentityCredentialsProvider>();
+    case CredProviderType::Container:
+        return std::make_shared<Aws::Auth::TaskRoleCredentialsProvider>(
+                
Aws::Environment::GetEnv("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI").c_str());
+    case CredProviderType::InstanceProfile:
+        return 
std::make_shared<Aws::Auth::InstanceProfileCredentialsProvider>();
+    case CredProviderType::Anonymous:
+        return std::make_shared<Aws::Auth::AnonymousAWSCredentialsProvider>();
+    case CredProviderType::Default:
+    default:
+        return std::make_shared<CustomAwsCredentialsProviderChain>();
+    }
+}
+
 std::shared_ptr<Aws::Auth::AWSCredentialsProvider>
 S3ClientFactory::_get_aws_credentials_provider_v2(const S3ClientConf& s3_conf) 
{
     if (!s3_conf.ak.empty() && !s3_conf.sk.empty()) {
@@ -334,11 +359,8 @@ S3ClientFactory::_get_aws_credentials_provider_v2(const 
S3ClientConf& s3_conf) {
         return 
std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(std::move(aws_cred));
     }
 
-    if (s3_conf.cred_provider_type == CredProviderType::InstanceProfile) {
-        if (s3_conf.role_arn.empty()) {
-            return std::make_shared<CustomAwsCredentialsProviderChain>();
-        }
-
+    // Handle role_arn for assume role scenario
+    if (!s3_conf.role_arn.empty()) {
         Aws::Client::ClientConfiguration clientConfiguration =
                 S3ClientFactory::getClientConfiguration();
 
@@ -350,15 +372,16 @@ S3ClientFactory::_get_aws_credentials_provider_v2(const 
S3ClientConf& s3_conf) {
             clientConfiguration.caFile = _ca_cert_file_path;
         }
 
-        auto stsClient = std::make_shared<Aws::STS::STSClient>(
-                std::make_shared<CustomAwsCredentialsProviderChain>(), 
clientConfiguration);
+        auto baseProvider = 
_create_credentials_provider(s3_conf.cred_provider_type);
+        auto stsClient = std::make_shared<Aws::STS::STSClient>(baseProvider, 
clientConfiguration);
 
         return std::make_shared<Aws::Auth::STSAssumeRoleCredentialsProvider>(
                 s3_conf.role_arn, Aws::String(), s3_conf.external_id,
                 Aws::Auth::DEFAULT_CREDS_LOAD_FREQ_SECONDS, stsClient);
     }
 
-    return std::make_shared<CustomAwsCredentialsProviderChain>();
+    // Return provider based on cred_provider_type
+    return _create_credentials_provider(s3_conf.cred_provider_type);
 }
 
 std::shared_ptr<Aws::Auth::AWSCredentialsProvider> 
S3ClientFactory::get_aws_credentials_provider(
@@ -489,6 +512,10 @@ Status S3ClientFactory::convert_properties_to_s3_conf(
         s3_conf->client_conf.external_id = it->second;
     }
 
+    if (auto it = properties.find(S3_CREDENTIALS_PROVIDER_TYPE); it != 
properties.end()) {
+        s3_conf->client_conf.cred_provider_type = 
cred_provider_type_from_string(it->second);
+    }
+
     if (auto st = is_s3_conf_valid(s3_conf->client_conf); !st.ok()) {
         return st;
     }
diff --git a/be/src/util/s3_util.h b/be/src/util/s3_util.h
index 13604b03a36..46282e525df 100644
--- a/be/src/util/s3_util.h
+++ b/be/src/util/s3_util.h
@@ -169,6 +169,8 @@ private:
             const S3ClientConf& s3_conf);
     std::shared_ptr<Aws::Auth::AWSCredentialsProvider> 
_get_aws_credentials_provider_v2(
             const S3ClientConf& s3_conf);
+    std::shared_ptr<Aws::Auth::AWSCredentialsProvider> 
_create_credentials_provider(
+            CredProviderType type);
     std::shared_ptr<Aws::Auth::AWSCredentialsProvider> 
get_aws_credentials_provider(
             const S3ClientConf& s3_conf);
 
diff --git a/be/test/io/s3_client_factory_test.cpp 
b/be/test/io/s3_client_factory_test.cpp
index 0ad6bcae89e..30df26ce64d 100644
--- a/be/test/io/s3_client_factory_test.cpp
+++ b/be/test/io/s3_client_factory_test.cpp
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <aws/core/auth/AWSCredentialsProviderChain.h>
 #include <aws/identity-management/auth/STSAssumeRoleCredentialsProvider.h>
 #include <gtest/gtest.h>
 
@@ -58,9 +59,10 @@ TEST_F(S3ClientFactoryTest, AwsCredentialsProvider) {
 
     {
         auto provider_v2 = factory.get_aws_credentials_provider(role_conf1);
-        auto custom_chain_v2 =
-                
std::dynamic_pointer_cast<CustomAwsCredentialsProviderChain>(provider_v2);
-        ASSERT_NE(custom_chain_v2, nullptr);
+        auto instance_profile_v2 =
+                
std::dynamic_pointer_cast<Aws::Auth::InstanceProfileCredentialsProvider>(
+                        provider_v2);
+        ASSERT_NE(instance_profile_v2, nullptr);
     }
 
     {
diff --git a/common/cpp/aws_common.cpp b/common/cpp/aws_common.cpp
index 5c615b843ae..c8f5e4faf47 100644
--- a/common/cpp/aws_common.cpp
+++ b/common/cpp/aws_common.cpp
@@ -37,6 +37,35 @@ CredProviderType 
cred_provider_type_from_pb(cloud::CredProviderTypePB cred_provi
     }
 }
 
+CredProviderType cred_provider_type_from_string(const std::string& type) {
+    if (type.empty() || type == "DEFAULT") {
+        return CredProviderType::Default;
+    }
+    if (type == "SIMPLE") {
+        return CredProviderType::Simple;
+    }
+    if (type == "INSTANCE_PROFILE") {
+        return CredProviderType::InstanceProfile;
+    }
+    if (type == "ENV") {
+        return CredProviderType::Env;
+    }
+    if (type == "SYSTEM_PROPERTIES") {
+        return CredProviderType::SystemProperties;
+    }
+    if (type == "WEB_IDENTITY") {
+        return CredProviderType::WebIdentity;
+    }
+    if (type == "CONTAINER") {
+        return CredProviderType::Container;
+    }
+    if (type == "ANONYMOUS") {
+        return CredProviderType::Anonymous;
+    }
+    LOG(WARNING) << "Unknown credentials provider type: " << type << ", use 
default instead.";
+    return CredProviderType::Default;
+}
+
 std::string get_valid_ca_cert_path(const std::vector<std::string>& 
ca_cert_file_paths) {
     for (const auto& path : ca_cert_file_paths) {
         if (std::filesystem::exists(path)) {
diff --git a/common/cpp/aws_common.h b/common/cpp/aws_common.h
index 183a2ba80c5..cc48e23c2f9 100644
--- a/common/cpp/aws_common.h
+++ b/common/cpp/aws_common.h
@@ -23,10 +23,21 @@
 
 namespace doris {
     //AWS Credentials Provider Type
-    enum class CredProviderType { Default = 0, Simple = 1, InstanceProfile = 2 
};
+    enum class CredProviderType {
+        Default = 0,
+        Simple = 1,
+        InstanceProfile = 2,
+        Env = 3,
+        SystemProperties = 4,
+        WebIdentity = 5,
+        Container = 6,
+        Anonymous = 7
+    };
 
     CredProviderType cred_provider_type_from_pb(cloud::CredProviderTypePB 
cred_provider_type);
 
+    CredProviderType cred_provider_type_from_string(const std::string& type);
+
     std::string get_valid_ca_cert_path(const std::vector<std::string>& 
ca_cert_file_paths);
 
     } // namespace doris
\ No newline at end of file
diff --git a/common/cpp/custom_aws_credentials_provider_chain.cpp 
b/common/cpp/custom_aws_credentials_provider_chain.cpp
index b72f97fc0a8..5b8ae485bd8 100644
--- a/common/cpp/custom_aws_credentials_provider_chain.cpp
+++ b/common/cpp/custom_aws_credentials_provider_chain.cpp
@@ -92,6 +92,9 @@ 
CustomAwsCredentialsProviderChain::CustomAwsCredentialsProviderChain()
     
AddProvider(Aws::MakeShared<ProcessCredentialsProvider>(DefaultCredentialsProviderChainTag));
 
     
AddProvider(Aws::MakeShared<SSOCredentialsProvider>(DefaultCredentialsProviderChainTag));
+
+    AddProvider(
+            
Aws::MakeShared<AnonymousAWSCredentialsProvider>(DefaultCredentialsProviderChainTag));
 }
 
 CustomAwsCredentialsProviderChain::CustomAwsCredentialsProviderChain(
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/common/AwsCredentialsProviderFactory.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/common/AwsCredentialsProviderFactory.java
index f56aed0533e..5b49795dfa5 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/common/AwsCredentialsProviderFactory.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/common/AwsCredentialsProviderFactory.java
@@ -149,7 +149,7 @@ public final class AwsCredentialsProviderFactory {
                 if (includeAnonymousInDefault) {
                     
providers.add(AnonymousCredentialsProvider.class.getName());
                 }
-                return String.join("+", providers);
+                return String.join(",", providers);
             default:
                 throw new UnsupportedOperationException(
                         "AWS SDK V2 does not support credentials provider 
mode: " + mode);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AbstractS3CompatibleProperties.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AbstractS3CompatibleProperties.java
index 4b8997b6d56..9c5c01a2b3a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AbstractS3CompatibleProperties.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/AbstractS3CompatibleProperties.java
@@ -263,6 +263,8 @@ public abstract class AbstractS3CompatibleProperties 
extends StorageProperties i
         hadoopStorageConfig.set("fs.s3.impl.disable.cache", "true");
         hadoopStorageConfig.set("fs.s3a.impl.disable.cache", "true");
         if (StringUtils.isNotBlank(getAccessKey())) {
+            hadoopStorageConfig.set("fs.s3a.aws.credentials.provider",
+                    "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider");
             hadoopStorageConfig.set("fs.s3a.access.key", getAccessKey());
             hadoopStorageConfig.set("fs.s3a.secret.key", getSecretKey());
             if (StringUtils.isNotBlank(getSessionToken())) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java
index 3452d759395..63cfe478a0e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java
@@ -291,6 +291,10 @@ public class S3Properties extends 
AbstractS3CompatibleProperties {
         if (StringUtils.isNotBlank(s3ExternalId)) {
             backendProperties.put("AWS_EXTERNAL_ID", s3ExternalId);
         }
+        // Pass credentials provider type to BE
+        if (awsCredentialsProviderMode != null) {
+            backendProperties.put("AWS_CREDENTIALS_PROVIDER_TYPE", 
awsCredentialsProviderMode.getMode());
+        }
         return backendProperties;
     }
 
@@ -361,6 +365,9 @@ public class S3Properties extends 
AbstractS3CompatibleProperties {
     @Override
     public void initializeHadoopStorageConfig() {
         super.initializeHadoopStorageConfig();
+        if (StringUtils.isNotBlank(accessKey)) {
+            return;
+        }
         //Set assumed_roles
         //@See 
https://hadoop.apache.org/docs/r3.4.1/hadoop-aws/tools/hadoop-aws/assumed_roles.html
         if (StringUtils.isNotBlank(s3IAMRole)) {
@@ -385,9 +392,9 @@ public class S3Properties extends 
AbstractS3CompatibleProperties {
         }
         if (Config.aws_credentials_provider_version.equalsIgnoreCase("v2")) {
             hadoopStorageConfig.set("fs.s3a.aws.credentials.provider",
-                    AwsCredentialsProviderFactory.createV2(
+                    AwsCredentialsProviderFactory.getV2ClassName(
                             awsCredentialsProviderMode,
-                            true).getClass().getName());
+                            true));
         }
     }
 
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3PropertiesTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3PropertiesTest.java
index 29f9a126ece..755795cca94 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3PropertiesTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3PropertiesTest.java
@@ -268,7 +268,14 @@ public class S3PropertiesTest {
         origProps.put("s3.credentials_provider_type", "static");
         ExceptionChecker.expectThrowsWithMsg(IllegalArgumentException.class, 
"Unsupported AWS credentials provider mode: static", () -> 
StorageProperties.createPrimary(origProps));
         origProps.put("s3.credentials_provider_type", "anonymous");
-        Assertions.assertDoesNotThrow(() -> 
StorageProperties.createPrimary(origProps));
+        s3Props = (S3Properties) StorageProperties.createPrimary(origProps);
+        Assertions.assertEquals(AnonymousCredentialsProvider.class.getName(), 
s3Props.getHadoopStorageConfig().get("fs.s3a.aws.credentials.provider"));
+        origProps.remove("s3.credentials_provider_type");
+        s3Props = (S3Properties) StorageProperties.createPrimary(origProps);
+        provider = s3Props.getAwsCredentialsProvider();
+        Assertions.assertNotNull(provider);
+        Assertions.assertTrue(provider instanceof AwsCredentialsProviderChain);
+        
Assertions.assertEquals("software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider,software.amazon.awssdk.auth.credentials.SystemPropertyCredentialsProvider,software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider,software.amazon.awssdk.auth.credentials.ContainerCredentialsProvider,software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider,software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider",
 s3Props.get [...]
 
     }
 
diff --git a/regression-test/conf/regression-conf.groovy 
b/regression-test/conf/regression-conf.groovy
index 0e55c595232..910fcef82b0 100644
--- a/regression-test/conf/regression-conf.groovy
+++ b/regression-test/conf/regression-conf.groovy
@@ -314,3 +314,10 @@ trustStoreType="PKCS12"
 trustCert="/your/certificate.crt"
 trustCACert="/your/ca.crt"
 trustCAKey="/your/certificate.key"
+
+
+enableTestTvfAnonymous="true"
+anymousS3Uri="https://datasets-documentation.s3.eu-west-3.amazonaws.com/aapl_stock.csv";
+anymousS3Region="eu-west-3"
+anymousS3ExpectDataCount="8365"
+awsInstanceProfileRegion="us-east-1"
\ No newline at end of file
diff --git a/regression-test/suites/aws_iam_role_p0/test_tvf_anonymous.groovy 
b/regression-test/suites/aws_iam_role_p0/test_tvf_anonymous.groovy
new file mode 100644
index 00000000000..0437f446c26
--- /dev/null
+++ b/regression-test/suites/aws_iam_role_p0/test_tvf_anonymous.groovy
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+import com.google.common.base.Strings;
+suite("test_tvf_anonymous") {
+    if 
(Strings.isNullOrEmpty(context.config.otherConfigs.get("enableTestTvfAnonymous")))
 {
+        return
+    }
+
+    
+    def region = context.config.otherConfigs.get("anymousS3Region")
+    def uri = context.config.otherConfigs.get("anymousS3Uri")
+    def expectDataCount = 
context.config.otherConfigs.get("anymousS3ExpectDataCount");
+    //aws_credentials_provider_version
+    sql """ ADMIN SET FRONTEND CONFIG 
("aws_credentials_provider_version"="v1"); """
+
+    def result = sql """
+        SELECT count(1) FROM S3 (                  
+        "uri"="${uri}",
+         "format" = "csv",     
+          "s3.region" = "${region}",  
+           "s3.endpoint" = "https://s3.${region}.amazonaws.com";, 
+           "column_separator" = ","              );
+        """
+
+    def countValue = result[0][0]
+    assertTrue(countValue == expectDataCount.toInteger())
+    sql """ ADMIN SET FRONTEND CONFIG 
("aws_credentials_provider_version"="v2"); """
+
+     result = sql """
+        SELECT count(1) FROM S3 (                  
+        "uri"="${uri}",
+         "format" = "csv",     
+          "s3.region" = "${region}",  
+           "s3.endpoint" = "https://s3.${region}.amazonaws.com";, 
+           "column_separator" = ","              );
+        """
+
+     countValue = result[0][0]
+    assertTrue(countValue == expectDataCount.toInteger())
+
+    result = sql """
+        SELECT count(1) FROM S3 (                  
+        "uri"="${uri}",
+         "format" = "csv",     
+          "s3.region" = "${region}",  
+           "s3.endpoint" = "https://s3.${region}.amazonaws.com";, 
+           "s3.credentials_provider_type"="ANONYMOUS",
+           "column_separator" = ","              );
+        """
+
+    countValue = result[0][0]
+    assertTrue(countValue == expectDataCount.toInteger())
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to