This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new eb70d3de808 branch-4.0: [improvement](tvf load)add the
data_{1..200}.csv wildcard in tvf load #56705 (#57255)
eb70d3de808 is described below
commit eb70d3de8081e8f378834c0663676518bc9c5330
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Oct 23 14:43:14 2025 +0800
branch-4.0: [improvement](tvf load)add the data_{1..200}.csv wildcard in
tvf load #56705 (#57255)
Cherry-picked from #56705
Co-authored-by: Refrain <[email protected]>
---
.../java/org/apache/doris/common/util/S3Util.java | 88 +++++
.../org/apache/doris/fs/obj/AzureObjStorage.java | 2 +-
.../java/org/apache/doris/fs/obj/S3ObjStorage.java | 2 +-
.../apache/doris/fs/remote/dfs/DFSFileSystem.java | 3 +-
.../org/apache/doris/common/util/S3UtilTest.java | 252 +++++++++++++
.../data/external_table_p0/tvf/hdfs_data_1.txt | 1 +
.../data/external_table_p0/tvf/hdfs_data_2.txt | 1 +
.../data/external_table_p0/tvf/hdfs_data_3.txt | 1 +
.../tvf/test_s3_tvf_number_range.out | 57 +++
.../tvf/test_s3_tvf_number_range.groovy | 398 +++++++++++++++++++++
10 files changed, 802 insertions(+), 3 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
index 0732cbb4e72..e537d1f47b0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
@@ -58,6 +58,8 @@ import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
public class S3Util {
private static final Logger LOG = LogManager.getLogger(Util.class);
@@ -300,6 +302,92 @@ public class S3Util {
return globPattern.substring(0, earliestSpecialCharIndex);
}
+ // Apply some rules to extend the globs parsing behavior
+ public static String extendGlobs(String pathPattern) {
+ return extendGlobNumberRange(pathPattern);
+ }
+
+ /**
+ * Convert range patterns to brace enumeration patterns for glob matching.
+ * Parts containing negative numbers or non-numeric characters are skipped.
+ * eg(valid):
+ * -> "file{1..3}" => "file{1,2,3}"
+ * -> "file_{1..3,4,5..6}" => "file_{1,2,3,4,5,6}"
+ * eg(invalid)
+ * -> "data_{-1..4}.csv" will not load any file
+ * -> "data_{a..4}.csv" will not load any file
+ * @param pathPattern Path that may contain {start..end} or mixed
{start..end,values} patterns
+ * @return Path with ranges converted to comma-separated enumeration
+ */
+ public static String extendGlobNumberRange(String pathPattern) {
+ Pattern bracePattern = Pattern.compile("\\{([^}]+)\\}");
+ Matcher braceMatcher = bracePattern.matcher(pathPattern);
+ StringBuffer result = new StringBuffer();
+
+ while (braceMatcher.find()) {
+ String braceContent = braceMatcher.group(1);
+ String[] parts = braceContent.split(",");
+ List<Integer> allNumbers = new ArrayList<>();
+ Pattern rangePattern = Pattern.compile("^(-?\\d+)\\.\\.(-?\\d+)$");
+
+ for (String part : parts) {
+ part = part.trim();
+ Matcher rangeMatcher = rangePattern.matcher(part);
+
+ if (rangeMatcher.matches()) {
+ int start = Integer.parseInt(rangeMatcher.group(1));
+ int end = Integer.parseInt(rangeMatcher.group(2));
+
+ // Skip this range if either start or end is negative
+ if (start < 0 || end < 0) {
+ continue;
+ }
+
+ if (start > end) {
+ int temp = start;
+ start = end;
+ end = temp;
+ }
+ for (int i = start; i <= end; i++) {
+ if (!allNumbers.contains(i)) {
+ allNumbers.add(i);
+ }
+ }
+ } else if (part.matches("^\\d+$")) {
+ // This is a single non-negative number like "4"
+ int num = Integer.parseInt(part);
+ if (!allNumbers.contains(num)) {
+ allNumbers.add(num);
+ }
+ } else {
+ // Not a valid number or range (e.g., negative number, or
contains non-numeric chars)
+ // Just skip this part and continue processing other parts
+ continue;
+ }
+ }
+
+ // If no valid numbers found after filtering, keep original content
+ if (allNumbers.isEmpty()) {
+ braceMatcher.appendReplacement(result, "{" + braceContent +
"}");
+ continue;
+ }
+
+ // Build comma-separated result
+ StringBuilder sb = new StringBuilder("{");
+ for (int i = 0; i < allNumbers.size(); i++) {
+ if (i > 0) {
+ sb.append(",");
+ }
+ sb.append(allNumbers.get(i));
+ }
+ sb.append("}");
+ braceMatcher.appendReplacement(result, sb.toString());
+ }
+ braceMatcher.appendTail(result);
+
+ return result.toString();
+ }
+
// Fast fail validation for S3 endpoint connectivity to avoid retries and
long waits
// when network conditions are poor. Validates endpoint format, whitelist,
security,
// and tests connection with 10s timeout.
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
index 17f2d3b3439..581307c9204 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
@@ -302,7 +302,7 @@ public class AzureObjStorage implements
ObjStorage<BlobServiceClient> {
Status st = Status.OK;
try {
S3URI uri = S3URI.create(remotePath, isUsePathStyle,
forceParsingByStandardUri);
- String globPath = uri.getKey();
+ String globPath = S3Util.extendGlobs(uri.getKey());
String bucket = uri.getBucket();
if (LOG.isDebugEnabled()) {
LOG.debug("try to glob list for azure, remote path {}, orig
{}", globPath, remotePath);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
index 9522449c7d9..144ce154b42 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
@@ -573,7 +573,7 @@ public class S3ObjStorage implements ObjStorage<S3Client> {
}
String bucket = uri.getBucket();
- String globPath = uri.getKey(); // eg: path/to/*.csv
+ String globPath = S3Util.extendGlobs(uri.getKey());
if (LOG.isDebugEnabled()) {
LOG.debug("globList globPath:{}, remotePath:{}", globPath,
remotePath);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java
b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java
index a39812c2909..2ee3c156931 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java
@@ -21,6 +21,7 @@ import org.apache.doris.analysis.StorageBackend;
import org.apache.doris.backup.Status;
import org.apache.doris.common.UserException;
import org.apache.doris.common.security.authentication.HadoopAuthenticator;
+import org.apache.doris.common.util.S3Util;
import org.apache.doris.common.util.URI;
import org.apache.doris.datasource.property.storage.HdfsCompatibleProperties;
import org.apache.doris.datasource.property.storage.StorageProperties;
@@ -510,7 +511,7 @@ public class DFSFileSystem extends RemoteFileSystem {
public Status globList(String remotePath, List<RemoteFile> result, boolean
fileNameOnly) {
try {
URI pathUri = URI.create(remotePath);
- Path pathPattern = new Path(pathUri.getLocation());
+ Path pathPattern = new
Path(S3Util.extendGlobs(pathUri.getLocation()));
FileSystem fileSystem = nativeFileSystem(pathPattern);
FileStatus[] files =
hdfsProperties.getHadoopAuthenticator().doAs(() ->
fileSystem.globStatus(pathPattern));
if (files == null) {
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java
b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java
new file mode 100644
index 00000000000..23715440e8c
--- /dev/null
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java
@@ -0,0 +1,252 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.util;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class S3UtilTest {
+
+ @Test
+ public void testExtendGlobNumberRange_simpleRange() {
+ // Test simple range expansion {1..3}
+ String input = "file_{1..3}.csv";
+ String expected = "file_{1,2,3}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_reverseRange() {
+ // Test reverse range {3..1}, should normalize to {1,2,3}
+ String input = "file_{3..1}.csv";
+ String expected = "file_{1,2,3}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_singleNumber() {
+ // Test single number range {2..2}
+ String input = "file_{2..2}.csv";
+ String expected = "file_{2}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_mixedRangeAndValues() {
+ // Test mixed range and single values {1..2,3,1..3}
+ String input = "file_{1..2,3,1..3}.csv";
+ String expected = "file_{1,2,3}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_multipleRanges() {
+ // Test multiple ranges in one path {1..2}_{1..2}
+ String input = "file_{1..2}_{1..2}.csv";
+ String expected = "file_{1,2}_{1,2}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_largeRange() {
+ // Test large range {0..9}
+ String input = "file_{0..9}.csv";
+ String expected = "file_{0,1,2,3,4,5,6,7,8,9}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_negativeNumbersFiltered() {
+ // If start or end is negative, the entire range is skipped
+ String input = "file_{-1..2}.csv";
+ String expected = "file_{-1..2}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_allNegativeRange() {
+ // Test all negative range {-3..-1}, should keep original
+ String input = "file_{-3..-1}.csv";
+ String expected = "file_{-3..-1}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_mixedWithNegative() {
+ // The range -1..2 is skipped, only 1..3 is expanded
+ String input = "file_{-1..2,1..3}.csv";
+ String expected = "file_{1,2,3}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_invalidCharacters() {
+ // Test invalid characters {Refrain,1..3}
+ String input = "file_{Refrain,1..3}.csv";
+ String expected = "file_{1,2,3}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_mixedInvalidAndValid() {
+ // Range 3..1 is normalized to 1..3, resulting in {1,2,3}
+ String input = "file_{3..1,2,1..2}.csv";
+ String expected = "file_{1,2,3}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_noRange() {
+ // Test no range pattern
+ String input = "file_123.csv";
+ String expected = "file_123.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_noNumericRange() {
+ // Test no numeric range {a..z}
+ String input = "file_{a..z}.csv";
+ String expected = "file_{a..z}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_emptyBraces() {
+ // Test empty braces {}
+ String input = "file_{}.csv";
+ String expected = "file_{}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_singleValue() {
+ // Test single value in braces {5}
+ String input = "file_{5}.csv";
+ String expected = "file_{5}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_multipleValues() {
+ // Test multiple single values {1,2,3}
+ String input = "file_{1,2,3}.csv";
+ String expected = "file_{1,2,3}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_duplicateRemoval() {
+ // Test duplicate removal {1..3,2..4}
+ String input = "file_{1..3,2..4}.csv";
+ String expected = "file_{1,2,3,4}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_largeNumbers() {
+ // Test large numbers {100..103}
+ String input = "file_{100..103}.csv";
+ String expected = "file_{100,101,102,103}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_zeroPadding() {
+ // Test that zero-padding is not preserved (behavior test)
+ // The function converts to integers, so "01" becomes "1"
+ String input = "file_{01..03}.csv";
+ String expected = "file_{1,2,3}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_complexPath() {
+ // Test complex path with multiple patterns
+ String input = "s3://bucket/data_{0..9}/file_{1..3}.csv";
+ String expected =
"s3://bucket/data_{0,1,2,3,4,5,6,7,8,9}/file_{1,2,3}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_noBraces() {
+ // Test path without any braces
+ String input = "s3://bucket/data.csv";
+ String expected = "s3://bucket/data.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testExtendGlobNumberRange_specialCase() {
+ // Test special case from PR description {2..4,6}
+ String input = "data_{2..4,6}.csv";
+ String expected = "data_{2,3,4,6}.csv";
+ String result = S3Util.extendGlobNumberRange(input);
+ Assert.assertEquals(expected, result);
+ }
+
+ @Test
+ public void testGetLongestPrefix_withGlobPattern() {
+ // Test getLongestPrefix with glob patterns
+ String input1 = "s3://bucket/path/to/file_{1..3}.csv";
+ String expected1 = "s3://bucket/path/to/file_";
+ String result1 = S3Util.getLongestPrefix(input1);
+ Assert.assertEquals(expected1, result1);
+
+ String input2 = "s3://bucket/path/*/file.csv";
+ String expected2 = "s3://bucket/path/";
+ String result2 = S3Util.getLongestPrefix(input2);
+ Assert.assertEquals(expected2, result2);
+
+ String input3 = "s3://bucket/path/file.csv";
+ String expected3 = "s3://bucket/path/file.csv";
+ String result3 = S3Util.getLongestPrefix(input3);
+ Assert.assertEquals(expected3, result3);
+ }
+
+ @Test
+ public void testExtendGlobs() {
+ // Test extendGlobs method (which currently just calls
extendGlobNumberRange)
+ String input = "file_{1..3}.csv";
+ String expected = "file_{1,2,3}.csv";
+ String result = S3Util.extendGlobs(input);
+ Assert.assertEquals(expected, result);
+ }
+}
+
diff --git a/regression-test/data/external_table_p0/tvf/hdfs_data_1.txt
b/regression-test/data/external_table_p0/tvf/hdfs_data_1.txt
new file mode 100644
index 00000000000..cf4ee224105
--- /dev/null
+++ b/regression-test/data/external_table_p0/tvf/hdfs_data_1.txt
@@ -0,0 +1 @@
+1,1
\ No newline at end of file
diff --git a/regression-test/data/external_table_p0/tvf/hdfs_data_2.txt
b/regression-test/data/external_table_p0/tvf/hdfs_data_2.txt
new file mode 100644
index 00000000000..e404eb14a53
--- /dev/null
+++ b/regression-test/data/external_table_p0/tvf/hdfs_data_2.txt
@@ -0,0 +1 @@
+2,2
\ No newline at end of file
diff --git a/regression-test/data/external_table_p0/tvf/hdfs_data_3.txt
b/regression-test/data/external_table_p0/tvf/hdfs_data_3.txt
new file mode 100644
index 00000000000..746032099e5
--- /dev/null
+++ b/regression-test/data/external_table_p0/tvf/hdfs_data_3.txt
@@ -0,0 +1 @@
+3,3
\ No newline at end of file
diff --git
a/regression-test/data/external_table_p0/tvf/test_s3_tvf_number_range.out
b/regression-test/data/external_table_p0/tvf/test_s3_tvf_number_range.out
new file mode 100644
index 00000000000..631b65bb3aa
--- /dev/null
+++ b/regression-test/data/external_table_p0/tvf/test_s3_tvf_number_range.out
@@ -0,0 +1,57 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !test1_data --
+1 1
+2 2
+3 3
+
+-- !test2_data --
+1 1
+2 2
+3 3
+
+-- !test3_data --
+2 2
+
+-- !test4_data --
+
+-- !test5_data --
+1 1
+2 2
+3 3
+4 4
+
+-- !test6_data --
+11 11
+12 12
+21 21
+22 22
+
+-- !test7_data --
+1 1
+2 2
+3 3
+
+-- !test8_data --
+1 1
+2 2
+3 3
+
+-- !test9_data --
+1 1
+2 2
+3 3
+
+-- !test10_data --
+1 1
+2 2
+3 3
+
+-- !test11_data --
+1 1
+2 2
+3 3
+
+-- !test12_data --
+1 1
+2 2
+3 3
diff --git
a/regression-test/suites/external_table_p0/tvf/test_s3_tvf_number_range.groovy
b/regression-test/suites/external_table_p0/tvf/test_s3_tvf_number_range.groovy
new file mode 100644
index 00000000000..9c86674de35
--- /dev/null
+++
b/regression-test/suites/external_table_p0/tvf/test_s3_tvf_number_range.groovy
@@ -0,0 +1,398 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_s3_tvf_number_range", "p0,external") {
+
+ String ak = getS3AK()
+ String sk = getS3SK()
+ String s3_endpoint = getS3Endpoint()
+ String region = getS3Region()
+ String bucket = context.config.otherConfigs.get("s3BucketName");
+ String test_table = "test_s3_tvf_number_range_table"
+
+ sql """ DROP TABLE IF EXISTS ${test_table} """
+
+ sql """
+ CREATE TABLE ${test_table} (
+ a INT,
+ b INT
+ )
+ DUPLICATE KEY(a)
+ DISTRIBUTED BY HASH(a) BUCKETS 1
+ PROPERTIES("replication_num" = "1");
+ """
+
+ //////////////////////////////////////////////////////////////////////
+ // TEST FOR S3
+ //////////////////////////////////////////////////////////////////////
+
+ // Test 1: Single range expansion {1..3} - should load {1,2,3}
+ try {
+ sql """ INSERT INTO ${test_table}
+ SELECT a, b FROM S3
+ (
+ "uri" = "s3://${bucket}/load/tvf_data/data_{1..3}.csv",
+ "format" = "csv",
+ "column_separator" = ",",
+ "s3.endpoint" = "${s3_endpoint}",
+ "s3.region" = "${region}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "csv_schema" = "a:int;b:int"
+ );
+ """
+ qt_test1_data """ SELECT * FROM ${test_table} """
+
+ } finally {
+ }
+
+ // Test 2: Single range expansion {3..1} - should load {1,2,3}
+ try {
+ sql """ TRUNCATE TABLE ${test_table} """
+
+ sql """ INSERT INTO ${test_table}
+ SELECT a, b FROM S3
+ (
+ "uri" = "s3://${bucket}/load/tvf_data/data_{3..1}.csv",
+ "format" = "csv",
+ "column_separator" = ",",
+ "s3.endpoint" = "${s3_endpoint}",
+ "s3.region" = "${region}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "csv_schema" = "a:int;b:int"
+ );
+ """
+ qt_test2_data """ SELECT * FROM ${test_table} """
+
+ } finally {
+ }
+
+ // Test 3: Single range expansion {2..2} - should load {2}
+ try {
+ sql """ TRUNCATE TABLE ${test_table} """
+
+ sql """ INSERT INTO ${test_table}
+ SELECT a, b FROM S3
+ (
+ "uri" = "s3://${bucket}/load/tvf_data/data_{2..2}.csv",
+ "format" = "csv",
+ "column_separator" = ",",
+ "s3.endpoint" = "${s3_endpoint}",
+ "s3.region" = "${region}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "csv_schema" = "a:int;b:int"
+ );
+ """
+ qt_test3_data """ SELECT * FROM ${test_table} """
+
+ } finally {
+ }
+
+ // Test 4: Single range expansion {-1..1} - should load 0 files
+ try {
+ sql """ TRUNCATE TABLE ${test_table} """
+
+ sql """ INSERT INTO ${test_table}
+ SELECT a, b FROM S3
+ (
+ "uri" = "s3://${bucket}/load/tvf_data/data_{-1..1}.csv",
+ "format" = "csv",
+ "column_separator" = ",",
+ "s3.endpoint" = "${s3_endpoint}",
+ "s3.region" = "${region}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "csv_schema" = "a:int;b:int"
+ );
+ """
+ qt_test4_data """ SELECT * FROM ${test_table} """
+
+ } finally {
+ }
+
+ // Test 5: Multiple ranges in one path {1..2}_{1..2} - should load
{11,12,21,22}
+ try {
+ sql """ TRUNCATE TABLE ${test_table} """
+
+ sql """ INSERT INTO ${test_table}
+ SELECT a, b FROM S3
+ (
+ "uri" =
"s3://${bucket}/load/tvf_data/data_{1..2}_{1..2}.csv",
+ "format" = "csv",
+ "column_separator" = ",",
+ "s3.endpoint" = "${s3_endpoint}",
+ "s3.region" = "${region}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "csv_schema" = "a:int;b:int"
+ );
+ """
+ qt_test5_data """ SELECT * FROM ${test_table} """
+
+ } finally {
+ }
+
+ // Test 6: Multiple ranges in one path build a nums {0..9}{0..9} - should
load all files in 00~99
+ // the test cover the case : load the files only existing - {11,12,21,22}
+ try {
+ sql """ TRUNCATE TABLE ${test_table} """
+
+ sql """ INSERT INTO ${test_table}
+ SELECT a, b FROM S3
+ (
+ "uri" =
"s3://${bucket}/load/tvf_data/data_{0..9}{0..9}.csv",
+ "format" = "csv",
+ "column_separator" = ",",
+ "s3.endpoint" = "${s3_endpoint}",
+ "s3.region" = "${region}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "csv_schema" = "a:int;b:int"
+ );
+ """
+ qt_test6_data """ SELECT * FROM ${test_table} ORDER BY a, b """
+
+ } finally {
+ }
+
+ // Test 7:Multiple ranges in one path with single num {1..2,3,1..3} -
should load {1,2,3}
+ try {
+ sql """ TRUNCATE TABLE ${test_table} """
+
+ sql """ INSERT INTO ${test_table}
+ SELECT a, b FROM S3
+ (
+ "uri" =
"s3://${bucket}/load/tvf_data/data_{1..2,3,1..3}.csv",
+ "format" = "csv",
+ "column_separator" = ",",
+ "s3.endpoint" = "${s3_endpoint}",
+ "s3.region" = "${region}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "csv_schema" = "a:int;b:int"
+ );
+ """
+ qt_test7_data """ SELECT * FROM ${test_table} ORDER BY a, b """
+
+ } finally {
+ }
+
+ // Test 8: Multiple ranges in one path with single num {3..1,2,1..2} -
shoud load {1,2,3}
+ try {
+ sql """ TRUNCATE TABLE ${test_table} """
+
+ sql """ INSERT INTO ${test_table}
+ SELECT a, b FROM S3
+ (
+ "uri" =
"s3://${bucket}/load/tvf_data/data_{3..1,2,1..2}.csv",
+ "format" = "csv",
+ "column_separator" = ",",
+ "s3.endpoint" = "${s3_endpoint}",
+ "s3.region" = "${region}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "csv_schema" = "a:int;b:int"
+ );
+ """
+ qt_test8_data """ SELECT * FROM ${test_table} ORDER BY a, b """
+
+ } finally {
+ }
+
+ ////////////////////////////////////
+ // Test with invalid character
+ ///////////////////////////////////
+
+ // Test 9: has negative number {-1..2,1..3} - should load {1,2,3}
+ try {
+ sql """ TRUNCATE TABLE ${test_table} """
+
+ sql """ INSERT INTO ${test_table}
+ SELECT a, b FROM S3
+ (
+ "uri" =
"s3://${bucket}/load/tvf_data/data_{-1..2,1..3}.csv",
+ "format" = "csv",
+ "column_separator" = ",",
+ "s3.endpoint" = "${s3_endpoint}",
+ "s3.region" = "${region}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "csv_schema" = "a:int;b:int"
+ );
+ """
+ qt_test9_data """ SELECT * FROM ${test_table} ORDER BY a, b """
+
+ } finally {
+ }
+
+ // Test 10: has char {Refrain,1..3} - should load {1,2,3}
+ try {
+ sql """ TRUNCATE TABLE ${test_table} """
+
+ sql """ INSERT INTO ${test_table}
+ SELECT a, b FROM S3
+ (
+ "uri" =
"s3://${bucket}/load/tvf_data/data_{Refrain,1..3}.csv",
+ "format" = "csv",
+ "column_separator" = ",",
+ "s3.endpoint" = "${s3_endpoint}",
+ "s3.region" = "${region}",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "csv_schema" = "a:int;b:int"
+ );
+ """
+ qt_test10_data """ SELECT * FROM ${test_table} ORDER BY a, b """
+
+ } finally {
+ }
+
+ // Test 11: bcause BrokerLoad uses the same code path, so we just test it
easily
+ // {3..1,2,1..2} - shoud load {1,2,3}
+ try {
+ sql """ TRUNCATE TABLE ${test_table} """
+
+ def label = "test_broker_load_number_range_" +
System.currentTimeMillis()
+
+ sql """
+ LOAD LABEL ${label} (
+ DATA
INFILE("s3://${bucket}/load/tvf_data/data_{3..1,2,1..2}.csv")
+ INTO TABLE ${test_table}
+ COLUMNS TERMINATED BY ","
+ FORMAT AS "csv"
+ (a, b)
+ )
+ WITH S3 (
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "s3.endpoint" = "${s3_endpoint}",
+ "s3.region" = "${region}"
+ );
+ """
+
+ // Wait for load to complete
+ def max_try_time = 60
+ def success = false
+ while (max_try_time > 0) {
+ def result = sql """ SHOW LOAD WHERE LABEL = '${label}' ORDER BY
CreateTime DESC LIMIT 1; """
+ if (result.size() > 0) {
+ def state = result[0][2] // State column
+ if (state == "FINISHED") {
+ success = true
+ break
+ } else if (state == "CANCELLED") {
+ logger.error("Load job ${label} was cancelled:
${result[0]}")
+ break
+ }
+ }
+ Thread.sleep(1000)
+ max_try_time--
+ }
+
+ if (!success) {
+ def result = sql """ SHOW LOAD WHERE LABEL = '${label}' ORDER BY
CreateTime DESC LIMIT 1; """
+ logger.error("Load job ${label} failed or timeout. Status:
${result}")
+ }
+
+ qt_test11_data """ SELECT * FROM ${test_table} ORDER BY a, b """
+
+ } finally {
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ // TEST FOR HDFS
+ //////////////////////////////////////////////////////////////////////
+
+ if (enableHdfs()) {
+ try {
+ // Get HDFS configuration
+ def hdfsFs = getHdfsFs()
+ def hdfsUser = getHdfsUser()
+ def hdfsPasswd = getHdfsPasswd()
+
+ // Upload test data files to HDFS
+ // The uploadToHdfs method takes a relative path from
regression-test/data/
+ // and uploads to HDFS with the same relative path
+ def hdfsPath1 =
uploadToHdfs("external_table_p0/tvf/hdfs_data_1.txt")
+ def hdfsPath2 =
uploadToHdfs("external_table_p0/tvf/hdfs_data_2.txt")
+ def hdfsPath3 =
uploadToHdfs("external_table_p0/tvf/hdfs_data_3.txt")
+
+ logger.info("Uploaded test files to HDFS: ${hdfsPath1},
${hdfsPath2}, ${hdfsPath3}")
+
+ // Helper closure to check load result
+ def check_hdfs_load_result = {checklabel ->
+ def max_try_milli_secs = 10000
+ def success = false
+ while(max_try_milli_secs) {
+ def result = sql """ SHOW LOAD WHERE LABEL =
'${checklabel}' """
+ if (result.size() > 0) {
+ def state = result[0][2] // State column
+ if (state == "FINISHED") {
+ sql "sync"
+ success = true
+ break
+ } else if (state == "CANCELLED") {
+ logger.error("HDFS load job ${checklabel} was
cancelled: ${result[0]}")
+ break
+ }
+ }
+ sleep(1000) // wait 1 second every time
+ max_try_milli_secs-=1000
+ }
+
+ if (!success) {
+ def result = sql """ SHOW LOAD WHERE LABEL =
'${checklabel}' """
+ logger.error("HDFS load job ${checklabel} failed or
timeout. Status: ${result}")
+ }
+ }
+
+ // Test 12: HDFS Broker Load Single range {1..3} - should load
{1,2,3}
+ try {
+ sql """ TRUNCATE TABLE ${test_table} """
+
+ def label = "test_hdfs_load_range_" +
System.currentTimeMillis()
+ // Use the returned HDFS path pattern
+ def hdfsDataPath =
"${hdfsFs}/external_table_p0/tvf/hdfs_data_{1..3}.csv"
+
+ sql """
+ LOAD LABEL ${label} (
+ DATA INFILE("${hdfsDataPath}")
+ INTO TABLE ${test_table}
+ COLUMNS TERMINATED BY ","
+ FORMAT AS "csv"
+ (a, b)
+ )
+ WITH HDFS (
+ "username" = "${hdfsUser}",
+ "password" = "${hdfsPasswd}",
+ "fs.defaultFS"="${hdfsFs}"
+ );
+ """
+
+ check_hdfs_load_result.call(label)
+ qt_test12_data """ SELECT * FROM ${test_table} ORDER BY a, b
"""
+
+ } finally {
+ }
+
+ } finally {
+ }
+ }
+ sql """ DROP TABLE IF EXISTS ${test_table} """
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]