This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new eb70d3de808 branch-4.0: [improvement](tvf load)add the 
data_{1..200}.csv wildcard in tvf load #56705 (#57255)
eb70d3de808 is described below

commit eb70d3de8081e8f378834c0663676518bc9c5330
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Oct 23 14:43:14 2025 +0800

    branch-4.0: [improvement](tvf load)add the data_{1..200}.csv wildcard in 
tvf load #56705 (#57255)
    
    Cherry-picked from #56705
    
    Co-authored-by: Refrain <[email protected]>
---
 .../java/org/apache/doris/common/util/S3Util.java  |  88 +++++
 .../org/apache/doris/fs/obj/AzureObjStorage.java   |   2 +-
 .../java/org/apache/doris/fs/obj/S3ObjStorage.java |   2 +-
 .../apache/doris/fs/remote/dfs/DFSFileSystem.java  |   3 +-
 .../org/apache/doris/common/util/S3UtilTest.java   | 252 +++++++++++++
 .../data/external_table_p0/tvf/hdfs_data_1.txt     |   1 +
 .../data/external_table_p0/tvf/hdfs_data_2.txt     |   1 +
 .../data/external_table_p0/tvf/hdfs_data_3.txt     |   1 +
 .../tvf/test_s3_tvf_number_range.out               |  57 +++
 .../tvf/test_s3_tvf_number_range.groovy            | 398 +++++++++++++++++++++
 10 files changed, 802 insertions(+), 3 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
index 0732cbb4e72..e537d1f47b0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
@@ -58,6 +58,8 @@ import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 public class S3Util {
     private static final Logger LOG = LogManager.getLogger(Util.class);
@@ -300,6 +302,92 @@ public class S3Util {
         return globPattern.substring(0, earliestSpecialCharIndex);
     }
 
+    // Apply some rules to extend the globs parsing behavior
+    public static String extendGlobs(String pathPattern) {
+        return extendGlobNumberRange(pathPattern);
+    }
+
+    /**
+     * Convert range patterns to brace enumeration patterns for glob matching.
+     * Parts containing negative numbers or non-numeric characters are skipped.
+     * eg(valid):
+     *    -> "file{1..3}" => "file{1,2,3}"
+     *    -> "file_{1..3,4,5..6}" => "file_{1,2,3,4,5,6}"
+     * eg(invalid)
+     *    -> "data_{-1..4}.csv" will not load any file
+     *    -> "data_{a..4}.csv" will not load any file
+     * @param pathPattern Path that may contain {start..end} or mixed 
{start..end,values} patterns
+     * @return Path with ranges converted to comma-separated enumeration
+     */
+    public static String extendGlobNumberRange(String pathPattern) {
+        Pattern bracePattern = Pattern.compile("\\{([^}]+)\\}");
+        Matcher braceMatcher = bracePattern.matcher(pathPattern);
+        StringBuffer result = new StringBuffer();
+
+        while (braceMatcher.find()) {
+            String braceContent = braceMatcher.group(1);
+            String[] parts = braceContent.split(",");
+            List<Integer> allNumbers = new ArrayList<>();
+            Pattern rangePattern = Pattern.compile("^(-?\\d+)\\.\\.(-?\\d+)$");
+
+            for (String part : parts) {
+                part = part.trim();
+                Matcher rangeMatcher = rangePattern.matcher(part);
+
+                if (rangeMatcher.matches()) {
+                    int start = Integer.parseInt(rangeMatcher.group(1));
+                    int end = Integer.parseInt(rangeMatcher.group(2));
+
+                    // Skip this range if either start or end is negative
+                    if (start < 0 || end < 0) {
+                        continue;
+                    }
+
+                    if (start > end) {
+                        int temp = start;
+                        start = end;
+                        end = temp;
+                    }
+                    for (int i = start; i <= end; i++) {
+                        if (!allNumbers.contains(i)) {
+                            allNumbers.add(i);
+                        }
+                    }
+                } else if (part.matches("^\\d+$")) {
+                    // This is a single non-negative number like "4"
+                    int num = Integer.parseInt(part);
+                    if (!allNumbers.contains(num)) {
+                        allNumbers.add(num);
+                    }
+                } else {
+                    // Not a valid number or range (e.g., negative number, or 
contains non-numeric chars)
+                    // Just skip this part and continue processing other parts
+                    continue;
+                }
+            }
+
+            // If no valid numbers found after filtering, keep original content
+            if (allNumbers.isEmpty()) {
+                braceMatcher.appendReplacement(result, "{" + braceContent + 
"}");
+                continue;
+            }
+
+            // Build comma-separated result
+            StringBuilder sb = new StringBuilder("{");
+            for (int i = 0; i < allNumbers.size(); i++) {
+                if (i > 0) {
+                    sb.append(",");
+                }
+                sb.append(allNumbers.get(i));
+            }
+            sb.append("}");
+            braceMatcher.appendReplacement(result, sb.toString());
+        }
+        braceMatcher.appendTail(result);
+
+        return result.toString();
+    }
+
     // Fast fail validation for S3 endpoint connectivity to avoid retries and 
long waits
     // when network conditions are poor. Validates endpoint format, whitelist, 
security,
     // and tests connection with 10s timeout.
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
index 17f2d3b3439..581307c9204 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
@@ -302,7 +302,7 @@ public class AzureObjStorage implements 
ObjStorage<BlobServiceClient> {
         Status st = Status.OK;
         try {
             S3URI uri = S3URI.create(remotePath, isUsePathStyle, 
forceParsingByStandardUri);
-            String globPath = uri.getKey();
+            String globPath = S3Util.extendGlobs(uri.getKey());
             String bucket = uri.getBucket();
             if (LOG.isDebugEnabled()) {
                 LOG.debug("try to glob list for azure, remote path {}, orig 
{}", globPath, remotePath);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
index 9522449c7d9..144ce154b42 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
@@ -573,7 +573,7 @@ public class S3ObjStorage implements ObjStorage<S3Client> {
             }
 
             String bucket = uri.getBucket();
-            String globPath = uri.getKey(); // eg: path/to/*.csv
+            String globPath = S3Util.extendGlobs(uri.getKey());
 
             if (LOG.isDebugEnabled()) {
                 LOG.debug("globList globPath:{}, remotePath:{}", globPath, 
remotePath);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java
index a39812c2909..2ee3c156931 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java
@@ -21,6 +21,7 @@ import org.apache.doris.analysis.StorageBackend;
 import org.apache.doris.backup.Status;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.security.authentication.HadoopAuthenticator;
+import org.apache.doris.common.util.S3Util;
 import org.apache.doris.common.util.URI;
 import org.apache.doris.datasource.property.storage.HdfsCompatibleProperties;
 import org.apache.doris.datasource.property.storage.StorageProperties;
@@ -510,7 +511,7 @@ public class DFSFileSystem extends RemoteFileSystem {
     public Status globList(String remotePath, List<RemoteFile> result, boolean 
fileNameOnly) {
         try {
             URI pathUri = URI.create(remotePath);
-            Path pathPattern = new Path(pathUri.getLocation());
+            Path pathPattern = new 
Path(S3Util.extendGlobs(pathUri.getLocation()));
             FileSystem fileSystem = nativeFileSystem(pathPattern);
             FileStatus[] files = 
hdfsProperties.getHadoopAuthenticator().doAs(() -> 
fileSystem.globStatus(pathPattern));
             if (files == null) {
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java
new file mode 100644
index 00000000000..23715440e8c
--- /dev/null
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java
@@ -0,0 +1,252 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.util;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class S3UtilTest {
+
+    @Test
+    public void testExtendGlobNumberRange_simpleRange() {
+        // Test simple range expansion {1..3}
+        String input = "file_{1..3}.csv";
+        String expected = "file_{1,2,3}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_reverseRange() {
+        // Test reverse range {3..1}, should normalize to {1,2,3}
+        String input = "file_{3..1}.csv";
+        String expected = "file_{1,2,3}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_singleNumber() {
+        // Test single number range {2..2}
+        String input = "file_{2..2}.csv";
+        String expected = "file_{2}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_mixedRangeAndValues() {
+        // Test mixed range and single values {1..2,3,1..3}
+        String input = "file_{1..2,3,1..3}.csv";
+        String expected = "file_{1,2,3}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_multipleRanges() {
+        // Test multiple ranges in one path {1..2}_{1..2}
+        String input = "file_{1..2}_{1..2}.csv";
+        String expected = "file_{1,2}_{1,2}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_largeRange() {
+        // Test large range {0..9}
+        String input = "file_{0..9}.csv";
+        String expected = "file_{0,1,2,3,4,5,6,7,8,9}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_negativeNumbersFiltered() {
+        // If start or end is negative, the entire range is skipped
+        String input = "file_{-1..2}.csv";
+        String expected = "file_{-1..2}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_allNegativeRange() {
+        // Test all negative range {-3..-1}, should keep original
+        String input = "file_{-3..-1}.csv";
+        String expected = "file_{-3..-1}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_mixedWithNegative() {
+        // The range -1..2 is skipped, only 1..3 is expanded
+        String input = "file_{-1..2,1..3}.csv";
+        String expected = "file_{1,2,3}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_invalidCharacters() {
+        // Test invalid characters {Refrain,1..3}
+        String input = "file_{Refrain,1..3}.csv";
+        String expected = "file_{1,2,3}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_mixedInvalidAndValid() {
+        // Range 3..1 is normalized to 1..3, resulting in {1,2,3}
+        String input = "file_{3..1,2,1..2}.csv";
+        String expected = "file_{1,2,3}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_noRange() {
+        // Test no range pattern
+        String input = "file_123.csv";
+        String expected = "file_123.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_noNumericRange() {
+        // Test no numeric range {a..z}
+        String input = "file_{a..z}.csv";
+        String expected = "file_{a..z}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_emptyBraces() {
+        // Test empty braces {}
+        String input = "file_{}.csv";
+        String expected = "file_{}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_singleValue() {
+        // Test single value in braces {5}
+        String input = "file_{5}.csv";
+        String expected = "file_{5}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_multipleValues() {
+        // Test multiple single values {1,2,3}
+        String input = "file_{1,2,3}.csv";
+        String expected = "file_{1,2,3}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_duplicateRemoval() {
+        // Test duplicate removal {1..3,2..4}
+        String input = "file_{1..3,2..4}.csv";
+        String expected = "file_{1,2,3,4}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_largeNumbers() {
+        // Test large numbers {100..103}
+        String input = "file_{100..103}.csv";
+        String expected = "file_{100,101,102,103}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_zeroPadding() {
+        // Test that zero-padding is not preserved (behavior test)
+        // The function converts to integers, so "01" becomes "1"
+        String input = "file_{01..03}.csv";
+        String expected = "file_{1,2,3}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_complexPath() {
+        // Test complex path with multiple patterns
+        String input = "s3://bucket/data_{0..9}/file_{1..3}.csv";
+        String expected = 
"s3://bucket/data_{0,1,2,3,4,5,6,7,8,9}/file_{1,2,3}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_noBraces() {
+        // Test path without any braces
+        String input = "s3://bucket/data.csv";
+        String expected = "s3://bucket/data.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testExtendGlobNumberRange_specialCase() {
+        // Test special case from PR description {2..4,6}
+        String input = "data_{2..4,6}.csv";
+        String expected = "data_{2,3,4,6}.csv";
+        String result = S3Util.extendGlobNumberRange(input);
+        Assert.assertEquals(expected, result);
+    }
+
+    @Test
+    public void testGetLongestPrefix_withGlobPattern() {
+        // Test getLongestPrefix with glob patterns
+        String input1 = "s3://bucket/path/to/file_{1..3}.csv";
+        String expected1 = "s3://bucket/path/to/file_";
+        String result1 = S3Util.getLongestPrefix(input1);
+        Assert.assertEquals(expected1, result1);
+
+        String input2 = "s3://bucket/path/*/file.csv";
+        String expected2 = "s3://bucket/path/";
+        String result2 = S3Util.getLongestPrefix(input2);
+        Assert.assertEquals(expected2, result2);
+
+        String input3 = "s3://bucket/path/file.csv";
+        String expected3 = "s3://bucket/path/file.csv";
+        String result3 = S3Util.getLongestPrefix(input3);
+        Assert.assertEquals(expected3, result3);
+    }
+
+    @Test
+    public void testExtendGlobs() {
+        // Test extendGlobs method (which currently just calls 
extendGlobNumberRange)
+        String input = "file_{1..3}.csv";
+        String expected = "file_{1,2,3}.csv";
+        String result = S3Util.extendGlobs(input);
+        Assert.assertEquals(expected, result);
+    }
+}
+
diff --git a/regression-test/data/external_table_p0/tvf/hdfs_data_1.txt 
b/regression-test/data/external_table_p0/tvf/hdfs_data_1.txt
new file mode 100644
index 00000000000..cf4ee224105
--- /dev/null
+++ b/regression-test/data/external_table_p0/tvf/hdfs_data_1.txt
@@ -0,0 +1 @@
+1,1
\ No newline at end of file
diff --git a/regression-test/data/external_table_p0/tvf/hdfs_data_2.txt 
b/regression-test/data/external_table_p0/tvf/hdfs_data_2.txt
new file mode 100644
index 00000000000..e404eb14a53
--- /dev/null
+++ b/regression-test/data/external_table_p0/tvf/hdfs_data_2.txt
@@ -0,0 +1 @@
+2,2
\ No newline at end of file
diff --git a/regression-test/data/external_table_p0/tvf/hdfs_data_3.txt 
b/regression-test/data/external_table_p0/tvf/hdfs_data_3.txt
new file mode 100644
index 00000000000..746032099e5
--- /dev/null
+++ b/regression-test/data/external_table_p0/tvf/hdfs_data_3.txt
@@ -0,0 +1 @@
+3,3
\ No newline at end of file
diff --git 
a/regression-test/data/external_table_p0/tvf/test_s3_tvf_number_range.out 
b/regression-test/data/external_table_p0/tvf/test_s3_tvf_number_range.out
new file mode 100644
index 00000000000..631b65bb3aa
--- /dev/null
+++ b/regression-test/data/external_table_p0/tvf/test_s3_tvf_number_range.out
@@ -0,0 +1,57 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !test1_data --
+1      1
+2      2
+3      3
+
+-- !test2_data --
+1      1
+2      2
+3      3
+
+-- !test3_data --
+2      2
+
+-- !test4_data --
+
+-- !test5_data --
+1      1
+2      2
+3      3
+4      4
+
+-- !test6_data --
+11     11
+12     12
+21     21
+22     22
+
+-- !test7_data --
+1      1
+2      2
+3      3
+
+-- !test8_data --
+1      1
+2      2
+3      3
+
+-- !test9_data --
+1      1
+2      2
+3      3
+
+-- !test10_data --
+1      1
+2      2
+3      3
+
+-- !test11_data --
+1      1
+2      2
+3      3
+
+-- !test12_data --
+1      1
+2      2
+3      3
diff --git 
a/regression-test/suites/external_table_p0/tvf/test_s3_tvf_number_range.groovy 
b/regression-test/suites/external_table_p0/tvf/test_s3_tvf_number_range.groovy
new file mode 100644
index 00000000000..9c86674de35
--- /dev/null
+++ 
b/regression-test/suites/external_table_p0/tvf/test_s3_tvf_number_range.groovy
@@ -0,0 +1,398 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_s3_tvf_number_range", "p0,external") {
+
+    String ak = getS3AK()
+    String sk = getS3SK()
+    String s3_endpoint = getS3Endpoint()
+    String region = getS3Region()
+    String bucket = context.config.otherConfigs.get("s3BucketName");
+    String test_table = "test_s3_tvf_number_range_table"
+
+    sql """ DROP TABLE IF EXISTS ${test_table} """
+    
+    sql """
+        CREATE TABLE ${test_table} (
+            a INT,
+            b INT
+        )
+        DUPLICATE KEY(a) 
+        DISTRIBUTED BY HASH(a) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+    """
+
+    //////////////////////////////////////////////////////////////////////
+    //                              TEST FOR S3 
+    //////////////////////////////////////////////////////////////////////
+
+    // Test 1: Single range expansion {1..3} - should load {1,2,3}
+    try {
+        sql """ INSERT INTO ${test_table}
+                SELECT a, b FROM S3
+                (
+                    "uri" = "s3://${bucket}/load/tvf_data/data_{1..3}.csv",
+                    "format" = "csv",
+                    "column_separator" = ",",
+                    "s3.endpoint" = "${s3_endpoint}",
+                    "s3.region" = "${region}",
+                    "s3.access_key" = "${ak}",
+                    "s3.secret_key" = "${sk}",
+                    "csv_schema" = "a:int;b:int"
+                );
+            """
+        qt_test1_data """ SELECT * FROM ${test_table} """
+        
+    } finally {
+    }
+
+    // Test 2: Single range expansion {3..1} - should load {1,2,3}
+    try {
+        sql """ TRUNCATE TABLE ${test_table} """
+        
+        sql """ INSERT INTO ${test_table}
+                SELECT a, b FROM S3
+                (
+                    "uri" = "s3://${bucket}/load/tvf_data/data_{3..1}.csv",
+                    "format" = "csv",
+                    "column_separator" = ",",
+                    "s3.endpoint" = "${s3_endpoint}",
+                    "s3.region" = "${region}",
+                    "s3.access_key" = "${ak}",
+                    "s3.secret_key" = "${sk}",
+                    "csv_schema" = "a:int;b:int"
+                );
+            """
+        qt_test2_data """ SELECT * FROM ${test_table} """
+        
+    } finally {
+    }
+
+    // Test 3: Single range expansion {2..2} - should load {2}
+    try {
+        sql """ TRUNCATE TABLE ${test_table} """
+
+        sql """ INSERT INTO ${test_table}
+                SELECT a, b FROM S3
+                (
+                    "uri" = "s3://${bucket}/load/tvf_data/data_{2..2}.csv",
+                    "format" = "csv",
+                    "column_separator" = ",",
+                    "s3.endpoint" = "${s3_endpoint}",
+                    "s3.region" = "${region}",
+                    "s3.access_key" = "${ak}",
+                    "s3.secret_key" = "${sk}",
+                    "csv_schema" = "a:int;b:int"
+                );
+            """
+        qt_test3_data """ SELECT * FROM ${test_table} """
+        
+    } finally {
+    }
+
+    // Test 4: Single range expansion {-1..1} - should load 0 files
+    try {
+        sql """ TRUNCATE TABLE ${test_table} """
+
+        sql """ INSERT INTO ${test_table}
+                SELECT a, b FROM S3
+                (
+                    "uri" = "s3://${bucket}/load/tvf_data/data_{-1..1}.csv",
+                    "format" = "csv",
+                    "column_separator" = ",",
+                    "s3.endpoint" = "${s3_endpoint}",
+                    "s3.region" = "${region}",
+                    "s3.access_key" = "${ak}",
+                    "s3.secret_key" = "${sk}",
+                    "csv_schema" = "a:int;b:int"
+                );
+            """
+        qt_test4_data """ SELECT * FROM ${test_table} """
+        
+    } finally {
+    }
+
+    // Test 5: Multiple ranges in one path {1..2}_{1..2} - should load 
{11,12,21,22}
+    try {
+        sql """ TRUNCATE TABLE ${test_table} """
+        
+        sql """ INSERT INTO ${test_table}
+                SELECT a, b FROM S3
+                (
+                    "uri" = 
"s3://${bucket}/load/tvf_data/data_{1..2}_{1..2}.csv",
+                    "format" = "csv",
+                    "column_separator" = ",",
+                    "s3.endpoint" = "${s3_endpoint}",
+                    "s3.region" = "${region}",
+                    "s3.access_key" = "${ak}",
+                    "s3.secret_key" = "${sk}",
+                    "csv_schema" = "a:int;b:int"
+                );
+            """
+        qt_test5_data """ SELECT * FROM ${test_table} """
+        
+    } finally {
+    }
+
+    // Test 6: Multiple ranges in one path build a nums {0..9}{0..9} - should 
load all files in 00~99
+    // the test cover the case : load the files only existing - {11,12,21,22}
+    try {
+        sql """ TRUNCATE TABLE ${test_table} """
+        
+        sql """ INSERT INTO ${test_table}
+                SELECT a, b FROM S3
+                (
+                    "uri" = 
"s3://${bucket}/load/tvf_data/data_{0..9}{0..9}.csv",
+                    "format" = "csv",
+                    "column_separator" = ",",
+                    "s3.endpoint" = "${s3_endpoint}",
+                    "s3.region" = "${region}",
+                    "s3.access_key" = "${ak}",
+                    "s3.secret_key" = "${sk}",
+                    "csv_schema" = "a:int;b:int"
+                );
+            """
+        qt_test6_data """ SELECT * FROM ${test_table} ORDER BY a, b """
+        
+    } finally {
+    }
+
+    // Test 7:Multiple ranges in one path with single num {1..2,3,1..3} - 
should load {1,2,3}
+    try {
+        sql """ TRUNCATE TABLE ${test_table} """
+        
+        sql """ INSERT INTO ${test_table}
+                SELECT a, b FROM S3
+                (
+                    "uri" = 
"s3://${bucket}/load/tvf_data/data_{1..2,3,1..3}.csv",
+                    "format" = "csv",
+                    "column_separator" = ",",
+                    "s3.endpoint" = "${s3_endpoint}",
+                    "s3.region" = "${region}",
+                    "s3.access_key" = "${ak}",
+                    "s3.secret_key" = "${sk}",
+                    "csv_schema" = "a:int;b:int"
+                );
+            """
+        qt_test7_data """ SELECT * FROM ${test_table} ORDER BY a, b """
+        
+    } finally {
+    }
+
+    // Test 8: Multiple ranges in one path with single num {3..1,2,1..2} - 
shoud load {1,2,3}
+    try {
+        sql """ TRUNCATE TABLE ${test_table} """
+        
+        sql """ INSERT INTO ${test_table}
+                SELECT a, b FROM S3
+                (
+                    "uri" = 
"s3://${bucket}/load/tvf_data/data_{3..1,2,1..2}.csv",
+                    "format" = "csv",
+                    "column_separator" = ",",
+                    "s3.endpoint" = "${s3_endpoint}",
+                    "s3.region" = "${region}",
+                    "s3.access_key" = "${ak}",
+                    "s3.secret_key" = "${sk}",
+                    "csv_schema" = "a:int;b:int"
+                );
+            """
+        qt_test8_data """ SELECT * FROM ${test_table} ORDER BY a, b """
+        
+    } finally {
+    }
+
+    ////////////////////////////////////
+    // Test with invalid character
+    ///////////////////////////////////
+    
+    // Test 9: has negative number {-1..2,1..3} - should load {1,2,3}
+    try {
+        sql """ TRUNCATE TABLE ${test_table} """
+        
+        sql """ INSERT INTO ${test_table}
+                SELECT a, b FROM S3
+                (
+                    "uri" = 
"s3://${bucket}/load/tvf_data/data_{-1..2,1..3}.csv",
+                    "format" = "csv",
+                    "column_separator" = ",",
+                    "s3.endpoint" = "${s3_endpoint}",
+                    "s3.region" = "${region}",
+                    "s3.access_key" = "${ak}",
+                    "s3.secret_key" = "${sk}",
+                    "csv_schema" = "a:int;b:int"
+                );
+            """
+        qt_test9_data """ SELECT * FROM ${test_table} ORDER BY a, b """
+        
+    } finally {
+    }
+
+    // Test 10: has char {Refrain,1..3} - should load {1,2,3}
+    try {
+        sql """ TRUNCATE TABLE ${test_table} """
+        
+        sql """ INSERT INTO ${test_table}
+                SELECT a, b FROM S3
+                (
+                    "uri" = 
"s3://${bucket}/load/tvf_data/data_{Refrain,1..3}.csv",
+                    "format" = "csv",
+                    "column_separator" = ",",
+                    "s3.endpoint" = "${s3_endpoint}",
+                    "s3.region" = "${region}",
+                    "s3.access_key" = "${ak}",
+                    "s3.secret_key" = "${sk}",
+                    "csv_schema" = "a:int;b:int"
+                );
+            """
+        qt_test10_data """ SELECT * FROM ${test_table} ORDER BY a, b """
+        
+    } finally {
+    }
+
+    // Test 11: bcause BrokerLoad uses the same code path, so we just test it 
easily
+    // {3..1,2,1..2} - shoud load {1,2,3}
+    try {
+        sql """ TRUNCATE TABLE ${test_table} """
+        
+        def label = "test_broker_load_number_range_" + 
System.currentTimeMillis()
+        
+        sql """
+            LOAD LABEL ${label} (
+                DATA 
INFILE("s3://${bucket}/load/tvf_data/data_{3..1,2,1..2}.csv")
+                INTO TABLE ${test_table}
+                COLUMNS TERMINATED BY ","
+                FORMAT AS "csv"
+                (a, b)
+            )
+            WITH S3 (
+                "s3.access_key" = "${ak}",
+                "s3.secret_key" = "${sk}",
+                "s3.endpoint" = "${s3_endpoint}",
+                "s3.region" = "${region}"
+            );
+        """
+        
+        // Wait for load to complete
+        def max_try_time = 60
+        def success = false
+        while (max_try_time > 0) {
+            def result = sql """ SHOW LOAD WHERE LABEL = '${label}' ORDER BY 
CreateTime DESC LIMIT 1; """
+            if (result.size() > 0) {
+                def state = result[0][2]  // State column
+                if (state == "FINISHED") {
+                    success = true
+                    break
+                } else if (state == "CANCELLED") {
+                    logger.error("Load job ${label} was cancelled: 
${result[0]}")
+                    break
+                }
+            }
+            Thread.sleep(1000)
+            max_try_time--
+        }
+        
+        if (!success) {
+            def result = sql """ SHOW LOAD WHERE LABEL = '${label}' ORDER BY 
CreateTime DESC LIMIT 1; """
+            logger.error("Load job ${label} failed or timeout. Status: 
${result}")
+        }
+        
+        qt_test11_data """ SELECT * FROM ${test_table} ORDER BY a, b """
+        
+    } finally {
+    }
+
+    //////////////////////////////////////////////////////////////////////
+    //                              TEST FOR HDFS
+    //////////////////////////////////////////////////////////////////////
+
+    if (enableHdfs()) {
+        try {
+            // Get HDFS configuration
+            def hdfsFs = getHdfsFs()
+            def hdfsUser = getHdfsUser()
+            def hdfsPasswd = getHdfsPasswd()
+            
+            // Upload test data files to HDFS
+            // The uploadToHdfs method takes a relative path from 
regression-test/data/
+            // and uploads to HDFS with the same relative path
+            def hdfsPath1 = 
uploadToHdfs("external_table_p0/tvf/hdfs_data_1.txt")
+            def hdfsPath2 = 
uploadToHdfs("external_table_p0/tvf/hdfs_data_2.txt")
+            def hdfsPath3 = 
uploadToHdfs("external_table_p0/tvf/hdfs_data_3.txt")
+            
+            logger.info("Uploaded test files to HDFS: ${hdfsPath1}, 
${hdfsPath2}, ${hdfsPath3}")
+            
+            // Helper closure to check load result
+            def check_hdfs_load_result = {checklabel ->
+                def max_try_milli_secs = 10000
+                def success = false
+                while(max_try_milli_secs) {
+                    def result = sql """ SHOW LOAD WHERE LABEL = 
'${checklabel}' """
+                    if (result.size() > 0) {
+                        def state = result[0][2]  // State column
+                        if (state == "FINISHED") {
+                            sql "sync"
+                            success = true
+                            break
+                        } else if (state == "CANCELLED") {
+                            logger.error("HDFS load job ${checklabel} was 
cancelled: ${result[0]}")
+                            break
+                        }
+                    }
+                     sleep(1000) // wait 1 second every time
+                    max_try_milli_secs-=1000
+                }
+                
+                if (!success) {
+                    def result = sql """ SHOW LOAD WHERE LABEL = 
'${checklabel}' """
+                    logger.error("HDFS load job ${checklabel} failed or 
timeout. Status: ${result}")
+                }
+            }
+            
+            // Test 12: HDFS Broker Load Single range {1..3} - should load 
{1,2,3}
+            try {
+                sql """ TRUNCATE TABLE ${test_table} """
+                
+                def label = "test_hdfs_load_range_" + 
System.currentTimeMillis()
+                // Use the returned HDFS path pattern
+                def hdfsDataPath = 
"${hdfsFs}/external_table_p0/tvf/hdfs_data_{1..3}.csv"
+                
+                sql """
+                    LOAD LABEL ${label} (
+                        DATA INFILE("${hdfsDataPath}")
+                        INTO TABLE ${test_table}
+                        COLUMNS TERMINATED BY ","
+                        FORMAT AS "csv"
+                        (a, b)
+                    ) 
+                    WITH HDFS (
+                        "username" = "${hdfsUser}",
+                        "password" = "${hdfsPasswd}",
+                        "fs.defaultFS"="${hdfsFs}"
+                    );
+                """
+                
+                check_hdfs_load_result.call(label)
+                qt_test12_data """ SELECT * FROM ${test_table} ORDER BY a, b 
"""
+                
+            } finally {
+            }
+            
+        } finally {
+        }
+    }
+    sql """ DROP TABLE IF EXISTS ${test_table} """
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to