This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch fix/s3-tvf-anonymous-fallback-public-buckets in repository https://gitbox.apache.org/repos/asf/doris.git
commit d254a6f597b425e986dcc5e2f64758f7cac94155 Author: Yongqiang YANG <[email protected]> AuthorDate: Wed Feb 4 18:22:15 2026 -0800 [fix](s3) Add anonymous credential fallback for S3 TVF on public buckets When Doris runs on an instance with an IAM role, the default AWS credential chain picks up instance profile credentials before reaching the anonymous fallback. If that role lacks s3:ListBucket on a public bucket, the S3 TVF query fails with 403. Add retry-with-anonymous logic in S3TableValuedFunction: when parseFile() fails with 403 and no explicit credentials (access_key, secret_key, or role_arn) were provided, switch to ANONYMOUS credentials and retry. All three property maps (storageProperties, backendConnectProperties, processedParams) are updated so both FE listing and BE data reading use anonymous access. Co-Authored-By: Claude Opus 4.5 <[email protected]> --- .../doris/tablefunction/S3TableValuedFunction.java | 66 ++++++- .../tablefunction/S3TableValuedFunctionTest.java | 195 +++++++++++++++++++++ 2 files changed, 260 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java index 7d38c50f565..2d62c48b916 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java @@ -21,9 +21,12 @@ import org.apache.doris.analysis.BrokerDesc; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.FeConstants; import org.apache.doris.common.UserException; +import org.apache.doris.datasource.property.storage.S3Properties; import org.apache.doris.datasource.property.storage.StorageProperties; import org.apache.doris.thrift.TFileType; +import org.apache.commons.lang3.StringUtils; + import java.util.Map; /** @@ -57,8 +60,69 @@ public class S3TableValuedFunction extends ExternalFileTableValuedFunction { // Fixme wait to be done #50320 // FileSystemFactory.get(storageProperties); } else { - parseFile(); + try { + parseFile(); + } catch (AnalysisException e) { + if (shouldRetryWithAnonymous(e)) { + LOG.info("S3 TVF got 403 with no explicit credentials, retrying with anonymous access"); + try { + retryWithAnonymousCredentials(props); + } catch (Exception retryException) { + LOG.warn("S3 TVF anonymous access retry also failed: {}", + retryException.getMessage()); + // If anonymous retry also fails, throw the original error + throw e; + } + } else { + throw e; + } + } + } + } + + /** + * Determines whether a failed parseFile() call should be retried with anonymous credentials. + * Retry is warranted only when: + * - The storage is S3 + * - No explicit credentials (access_key, secret_key, role_arn) were provided + * - The error is a 403 (access denied), likely from instance profile credentials + * lacking permission on a public bucket + */ + private boolean shouldRetryWithAnonymous(AnalysisException e) { + if (!(storageProperties instanceof S3Properties)) { + return false; + } + S3Properties s3Props = (S3Properties) storageProperties; + if (StringUtils.isNotBlank(s3Props.getAccessKey()) || StringUtils.isNotBlank(s3Props.getSecretKey())) { + return false; + } + if (StringUtils.isNotBlank(s3Props.getS3IAMRole())) { + return false; + } + return e.getMessage() != null && e.getMessage().contains("Status Code: 403"); + } + + /** + * Switches all property maps to use anonymous credentials and retries parseFile(). + */ + private void retryWithAnonymousCredentials(Map<String, String> props) throws AnalysisException { + props.put("s3.credentials_provider_type", "ANONYMOUS"); + + try { + this.storageProperties = StorageProperties.createPrimary(props); + } catch (Exception e) { + throw new AnalysisException("Failed to create anonymous storage properties: " + e.getMessage(), e); } + + this.backendConnectProperties.clear(); + this.backendConnectProperties.putAll(storageProperties.getBackendConfigProperties()); + this.backendConnectProperties.put(URI_KEY, filePath); + + this.processedParams.put("s3.credentials_provider_type", "ANONYMOUS"); + + this.fileStatuses.clear(); + + parseFile(); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/tablefunction/S3TableValuedFunctionTest.java b/fe/fe-core/src/test/java/org/apache/doris/tablefunction/S3TableValuedFunctionTest.java new file mode 100644 index 00000000000..9b8fbabb47d --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/tablefunction/S3TableValuedFunctionTest.java @@ -0,0 +1,195 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.tablefunction; + +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.FeConstants; + +import mockit.Mock; +import mockit.MockUp; +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; + +public class S3TableValuedFunctionTest { + + private Map<String, String> baseS3Props() { + Map<String, String> props = new HashMap<>(); + props.put("uri", "s3://test-bucket/data/file.parquet"); + props.put("s3.region", "us-east-1"); + props.put("format", "parquet"); + return props; + } + + @Test + public void testAnonymousFallbackOn403NoCredentials() throws Exception { + boolean origUnitTest = FeConstants.runningUnitTest; + FeConstants.runningUnitTest = false; + try { + AtomicInteger parseFileCallCount = new AtomicInteger(0); + new MockUp<ExternalFileTableValuedFunction>() { + @Mock + protected void parseFile() throws AnalysisException { + int count = parseFileCallCount.incrementAndGet(); + if (count == 1) { + throw new AnalysisException( + "parse file failed, err: Status Code: 403, AWS Error Code: AccessDenied"); + } + // Second call (anonymous retry) succeeds + } + }; + + S3TableValuedFunction tvf = new S3TableValuedFunction(baseS3Props()); + + // parseFile should have been called twice (original + retry) + Assert.assertEquals(2, parseFileCallCount.get()); + + // Verify processedParams was updated with ANONYMOUS provider + Assert.assertEquals("ANONYMOUS", + tvf.getBrokerDesc().getProperties().get("s3.credentials_provider_type")); + + // Verify backendConnectProperties has ANONYMOUS provider type + Assert.assertEquals("ANONYMOUS", + tvf.getBackendConnectProperties().get("AWS_CREDENTIALS_PROVIDER_TYPE")); + } finally { + FeConstants.runningUnitTest = origUnitTest; + } + } + + @Test + public void testNoFallbackWhenExplicitCredentials() { + boolean origUnitTest = FeConstants.runningUnitTest; + FeConstants.runningUnitTest = false; + try { + new MockUp<ExternalFileTableValuedFunction>() { + @Mock + protected void parseFile() throws AnalysisException { + throw new AnalysisException( + "parse file failed, err: Status Code: 403, AWS Error Code: AccessDenied"); + } + }; + + Map<String, String> props = baseS3Props(); + props.put("s3.access_key", "AKIAIOSFODNN7EXAMPLE"); + props.put("s3.secret_key", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"); + + try { + new S3TableValuedFunction(props); + Assert.fail("Should have thrown AnalysisException"); + } catch (AnalysisException e) { + Assert.assertTrue(e.getMessage().contains("Status Code: 403")); + } + } finally { + FeConstants.runningUnitTest = origUnitTest; + } + } + + @Test + public void testNoFallbackWhenExplicitRoleArn() { + boolean origUnitTest = FeConstants.runningUnitTest; + FeConstants.runningUnitTest = false; + try { + new MockUp<ExternalFileTableValuedFunction>() { + @Mock + protected void parseFile() throws AnalysisException { + throw new AnalysisException( + "parse file failed, err: Status Code: 403, AWS Error Code: AccessDenied"); + } + }; + + Map<String, String> props = baseS3Props(); + props.put("s3.role_arn", "arn:aws:iam::123456789012:role/MyRole"); + + try { + new S3TableValuedFunction(props); + Assert.fail("Should have thrown an exception"); + } catch (AnalysisException e) { + // Expected: 403 error propagated without retry because role_arn is set + Assert.assertTrue(e.getMessage().contains("Status Code: 403")); + } + } finally { + FeConstants.runningUnitTest = origUnitTest; + } + } + + @Test + public void testOriginalErrorThrownWhenBothAttemptsFail() { + boolean origUnitTest = FeConstants.runningUnitTest; + FeConstants.runningUnitTest = false; + try { + AtomicInteger parseFileCallCount = new AtomicInteger(0); + new MockUp<ExternalFileTableValuedFunction>() { + @Mock + protected void parseFile() throws AnalysisException { + int count = parseFileCallCount.incrementAndGet(); + if (count == 1) { + throw new AnalysisException( + "parse file failed, err: Status Code: 403, Original error"); + } + // Second call also fails (bucket isn't actually public) + throw new AnalysisException( + "parse file failed, err: Status Code: 403, Anonymous also denied"); + } + }; + + try { + new S3TableValuedFunction(baseS3Props()); + Assert.fail("Should have thrown AnalysisException"); + } catch (AnalysisException e) { + // Should throw the ORIGINAL error, not the retry error + Assert.assertTrue(e.getMessage().contains("Original error")); + Assert.assertFalse(e.getMessage().contains("Anonymous also denied")); + } + + Assert.assertEquals(2, parseFileCallCount.get()); + } finally { + FeConstants.runningUnitTest = origUnitTest; + } + } + + @Test + public void testNoFallbackOnNon403Error() { + boolean origUnitTest = FeConstants.runningUnitTest; + FeConstants.runningUnitTest = false; + try { + AtomicInteger parseFileCallCount = new AtomicInteger(0); + new MockUp<ExternalFileTableValuedFunction>() { + @Mock + protected void parseFile() throws AnalysisException { + parseFileCallCount.incrementAndGet(); + throw new AnalysisException("parse file failed, err: Status Code: 404, Not Found"); + } + }; + + try { + new S3TableValuedFunction(baseS3Props()); + Assert.fail("Should have thrown AnalysisException"); + } catch (AnalysisException e) { + Assert.assertTrue(e.getMessage().contains("Status Code: 404")); + } + + // parseFile should have been called only once (no retry for non-403) + Assert.assertEquals(1, parseFileCallCount.get()); + } finally { + FeConstants.runningUnitTest = origUnitTest; + } + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
