This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch fix/s3-glob-expansion-safety
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 4d9b83be4d1657329b89cacceb96912f210fc3c7
Author: Yongqiang YANG <[email protected]>
AuthorDate: Fri Mar 27 23:51:57 2026 -0700

    [fix](s3) Add limit-aware brace expansion and fix misleading glob metrics
    
    Prevent OOM from unbounded brace expansion by adding early-stop to
    expandBracePatterns when the expansion exceeds s3_head_request_max_paths.
    Also skip LIST-path metrics logging when the HEAD/getProperties
    optimization was used, avoiding misleading "process 0 elements" logs.
    
    Found via review of #61775.
    
    Co-Authored-By: Claude Opus 4.6 <[email protected]>
---
 .../java/org/apache/doris/common/util/S3Util.java  | 34 ++++++++++++++++--
 .../org/apache/doris/fs/obj/AzureObjStorage.java   | 28 ++++++++-------
 .../java/org/apache/doris/fs/obj/S3ObjStorage.java | 18 +++++-----
 .../org/apache/doris/common/util/S3UtilTest.java   | 41 ++++++++++++++++++++++
 4 files changed, 98 insertions(+), 23 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
index 3e4f4e7a62f..6bb5b7ce029 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
@@ -583,6 +583,15 @@ public class S3Util {
         return chars;
     }
 
+    /**
+     * Exception thrown when brace expansion exceeds the specified limit.
+     */
+    public static class BraceExpansionTooLargeException extends 
RuntimeException {
+        public BraceExpansionTooLargeException(int limit) {
+            super("Brace expansion exceeded limit of " + limit + " paths");
+        }
+    }
+
     /**
      * Expand brace patterns in a path to generate all concrete file paths.
      * Handles nested and multiple brace patterns.
@@ -597,11 +606,30 @@ public class S3Util {
      */
     public static List<String> expandBracePatterns(String pathPattern) {
         List<String> result = new ArrayList<>();
-        expandBracePatternsRecursive(pathPattern, result);
+        expandBracePatternsRecursive(pathPattern, result, 0);
         return result;
     }
 
-    private static void expandBracePatternsRecursive(String pattern, 
List<String> result) {
+    /**
+     * Expand brace patterns with a limit on the number of expanded paths.
+     * Stops expansion early if the limit is exceeded, avoiding large 
allocations.
+     *
+     * @param pathPattern Path with optional brace patterns
+     * @param maxPaths Maximum number of expanded paths allowed (must be > 0)
+     * @return List of expanded concrete paths
+     * @throws BraceExpansionTooLargeException if expansion exceeds maxPaths
+     */
+    public static List<String> expandBracePatterns(String pathPattern, int 
maxPaths) {
+        List<String> result = new ArrayList<>();
+        expandBracePatternsRecursive(pathPattern, result, maxPaths);
+        return result;
+    }
+
+    private static void expandBracePatternsRecursive(String pattern, 
List<String> result, int maxPaths) {
+        if (maxPaths > 0 && result.size() >= maxPaths) {
+            throw new BraceExpansionTooLargeException(maxPaths);
+        }
+
         int braceStart = pattern.indexOf('{');
         if (braceStart == -1) {
             // No more braces, add the pattern as-is
@@ -626,7 +654,7 @@ public class S3Util {
 
         for (String alt : alternatives) {
             // Recursively expand any remaining braces in the suffix
-            expandBracePatternsRecursive(prefix + alt + suffix, result);
+            expandBracePatternsRecursive(prefix + alt + suffix, result, 
maxPaths);
         }
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
index 4929e34e7f5..08f83ea8f60 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
@@ -354,6 +354,7 @@ public class AzureObjStorage implements 
ObjStorage<BlobServiceClient> {
         long elementCnt = 0;
         long matchCnt = 0;
         long startTime = System.nanoTime();
+        boolean usedHeadPath = false;
         Status st = Status.OK;
         try {
             remotePath = 
AzurePropertyUtils.validateAndNormalizeUri(remotePath);
@@ -370,6 +371,7 @@ public class AzureObjStorage implements 
ObjStorage<BlobServiceClient> {
                     && S3Util.isDeterministicPattern(keyPattern)) {
                 Status headStatus = globListByGetProperties(bucket, 
keyPattern, result, fileNameOnly, startTime);
                 if (headStatus != null) {
+                    usedHeadPath = true;
                     return headStatus;
                 }
                 // If headStatus is null, fall through to use listing
@@ -444,11 +446,13 @@ public class AzureObjStorage implements 
ObjStorage<BlobServiceClient> {
             st = new Status(Status.ErrCode.COMMON_ERROR,
                     "errors while glob file " + remotePath + ": " + 
e.getMessage());
         } finally {
-            long endTime = System.nanoTime();
-            long duration = endTime - startTime;
-            LOG.info("process {} elements under prefix {} for {} round, match 
{} elements, take {} micro second",
-                    remotePath, elementCnt, roundCnt, matchCnt,
-                    duration / 1000);
+            if (!usedHeadPath) {
+                long endTime = System.nanoTime();
+                long duration = endTime - startTime;
+                LOG.info("process {} elements under prefix {} for {} round, 
match {} elements, take {} micro second",
+                        remotePath, elementCnt, roundCnt, matchCnt,
+                        duration / 1000);
+            }
         }
         return st;
     }
@@ -468,15 +472,15 @@ public class AzureObjStorage implements 
ObjStorage<BlobServiceClient> {
             List<RemoteFile> result, boolean fileNameOnly, long startTime) {
         try {
             // First expand [...] brackets to {...} braces, then expand {..} 
ranges, then expand braces
+            // Use limit-aware expansion to avoid large allocations before 
checking the limit
             String expandedPattern = S3Util.expandBracketPatterns(keyPattern);
             expandedPattern = S3Util.extendGlobs(expandedPattern);
-            List<String> expandedPaths = 
S3Util.expandBracePatterns(expandedPattern);
-
-            // Fall back to listing if too many paths to avoid overwhelming 
Azure with requests
-            // Controlled by config: s3_head_request_max_paths
-            if (expandedPaths.size() > Config.s3_head_request_max_paths) {
-                LOG.info("Expanded path count {} exceeds limit {}, falling 
back to LIST",
-                        expandedPaths.size(), 
Config.s3_head_request_max_paths);
+            List<String> expandedPaths;
+            try {
+                expandedPaths = S3Util.expandBracePatterns(expandedPattern, 
Config.s3_head_request_max_paths);
+            } catch (S3Util.BraceExpansionTooLargeException e) {
+                LOG.info("Brace expansion exceeded limit {}, falling back to 
LIST",
+                        Config.s3_head_request_max_paths);
                 return null;
             }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
index ec827c785a5..bb7cf945a42 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
@@ -575,6 +575,7 @@ public class S3ObjStorage implements ObjStorage<S3Client> {
         long startTime = System.nanoTime();
         String currentMaxFile = "";
         boolean hasLimits = fileSizeLimit > 0 || fileNumLimit > 0;
+        boolean usedHeadPath = false;
         String bucket = "";
         String finalPrefix = "";
         try {
@@ -602,6 +603,7 @@ public class S3ObjStorage implements ObjStorage<S3Client> {
                 GlobListResult headResult = globListByHeadRequests(
                         bucket, keyPattern, result, fileNameOnly, startTime);
                 if (headResult != null) {
+                    usedHeadPath = true;
                     return headResult;
                 }
                 // If headResult is null, fall through to use listing
@@ -733,7 +735,7 @@ public class S3ObjStorage implements ObjStorage<S3Client> {
         } finally {
             long endTime = System.nanoTime();
             long duration = endTime - startTime;
-            if (LOG.isDebugEnabled()) {
+            if (!usedHeadPath && LOG.isDebugEnabled()) {
                 LOG.debug("process {} elements under prefix {} for {} round, 
match {} elements, take {} ms",
                         elementCnt, remotePath, roundCnt, matchCnt,
                         duration / 1000 / 1000);
@@ -756,15 +758,15 @@ public class S3ObjStorage implements ObjStorage<S3Client> 
{
             List<RemoteFile> result, boolean fileNameOnly, long startTime) {
         try {
             // First expand [...] brackets to {...} braces, then expand {..} 
ranges, then expand braces
+            // Use limit-aware expansion to avoid large allocations before 
checking the limit
             String expandedPattern = S3Util.expandBracketPatterns(keyPattern);
             expandedPattern = S3Util.extendGlobs(expandedPattern);
-            List<String> expandedPaths = 
S3Util.expandBracePatterns(expandedPattern);
-
-            // Fall back to listing if too many paths to avoid overwhelming S3 
with HEAD requests
-            // Controlled by config: s3_head_request_max_paths
-            if (expandedPaths.size() > Config.s3_head_request_max_paths) {
-                LOG.info("Expanded path count {} exceeds limit {}, falling 
back to LIST",
-                        expandedPaths.size(), 
Config.s3_head_request_max_paths);
+            List<String> expandedPaths;
+            try {
+                expandedPaths = S3Util.expandBracePatterns(expandedPattern, 
Config.s3_head_request_max_paths);
+            } catch (S3Util.BraceExpansionTooLargeException e) {
+                LOG.info("Brace expansion exceeded limit {}, falling back to 
LIST",
+                        Config.s3_head_request_max_paths);
                 return null;
             }
 
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java
index 4b976ed86cd..a17f443f080 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java
@@ -456,5 +456,46 @@ public class S3UtilTest {
         // Malformed bracket (no closing ]) - [ kept as literal
         Assert.assertEquals("file[abc.csv", 
S3Util.expandBracketPatterns("file[abc.csv"));
     }
+
+    // Tests for limit-aware expandBracePatterns
+
+    @Test
+    public void testExpandBracePatterns_withinLimit() {
+        // Expansion within the limit should succeed
+        List<String> result = S3Util.expandBracePatterns("file{1,2,3}.csv", 
10);
+        Assert.assertEquals(Arrays.asList("file1.csv", "file2.csv", 
"file3.csv"), result);
+    }
+
+    @Test
+    public void testExpandBracePatterns_exactlyAtLimit() {
+        // Expansion exactly at the limit should succeed
+        List<String> result = S3Util.expandBracePatterns("file{1,2,3}.csv", 3);
+        Assert.assertEquals(Arrays.asList("file1.csv", "file2.csv", 
"file3.csv"), result);
+    }
+
+    @Test(expected = S3Util.BraceExpansionTooLargeException.class)
+    public void testExpandBracePatterns_exceedsLimit() {
+        // Expansion exceeding the limit should throw
+        S3Util.expandBracePatterns("file{1,2,3,4,5}.csv", 3);
+    }
+
+    @Test(expected = S3Util.BraceExpansionTooLargeException.class)
+    public void testExpandBracePatterns_oneOverLimit() {
+        // maxPaths+1 items must also throw (boundary case)
+        S3Util.expandBracePatterns("file{1,2,3,4}.csv", 3);
+    }
+
+    @Test(expected = S3Util.BraceExpansionTooLargeException.class)
+    public void testExpandBracePatterns_cartesianExceedsLimit() {
+        // Cartesian product {a,b,c} x {1,2,3} = 9 paths, limit = 5
+        S3Util.expandBracePatterns("dir{a,b,c}/file{1,2,3}.csv", 5);
+    }
+
+    @Test
+    public void testExpandBracePatterns_zeroLimitMeansUnlimited() {
+        // maxPaths=0 means no limit (backward compatibility)
+        List<String> result = 
S3Util.expandBracePatterns("file{1,2,3,4,5}.csv", 0);
+        Assert.assertEquals(5, result.size());
+    }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to