This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new fd9bd87b3c5 branch-4.1: [fix](load) fix broker load silently loaded
only the first file when parsing multiple files path #62969 (#63042)
fd9bd87b3c5 is described below
commit fd9bd87b3c5630bbf8950e34429663036e62d659
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu May 7 14:45:06 2026 +0800
branch-4.1: [fix](load) fix broker load silently loaded only the first file
when parsing multiple files path #62969 (#63042)
Cherry-picked from #62969
Co-authored-by: hui lai <[email protected]>
---
.../antlr4/org/apache/doris/nereids/DorisParser.g4 | 2 +-
.../doris/nereids/parser/LogicalPlanBuilder.java | 2 +-
.../trees/plans/commands/LoadCommandTest.java | 28 +++++++++++
.../test_broker_load_multi_filegroup.out | 2 +
.../test_broker_load_multi_filegroup.groovy | 57 +++++++++++++++++++++-
5 files changed, 88 insertions(+), 3 deletions(-)
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
index a244e7fd7a6..c294f183c7f 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
@@ -1049,7 +1049,7 @@ identityOrFunction
;
dataDesc
- : ((WITH)? mergeType)? DATA INFILE LEFT_PAREN filePaths+=STRING_LITERAL
(COMMA filePath+=STRING_LITERAL)* RIGHT_PAREN
+ : ((WITH)? mergeType)? DATA INFILE LEFT_PAREN filePaths+=STRING_LITERAL
(COMMA filePaths+=STRING_LITERAL)* RIGHT_PAREN
(negative=NEGATIVE)?
INTO TABLE targetTableName=identifier
(partitionSpec)?
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
index 0517bcb75a2..5114766e7bd 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
@@ -2274,7 +2274,7 @@ public class LogicalPlanBuilder extends
DorisParserBaseVisitor<Object> {
for (Token filePath : ddc.filePaths) {
multiFilePaths.add(filePath.getText().substring(1,
filePath.getText().length() - 1));
}
- List<String> filePaths = ddc.filePath == null ? null :
multiFilePaths;
+ List<String> filePaths = multiFilePaths.isEmpty() ? null :
multiFilePaths;
List<Expression> colMappings;
if (ddc.columnMapping == null) {
colMappings = null;
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/LoadCommandTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/LoadCommandTest.java
index 50b9da79c5b..0f990e6aba7 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/LoadCommandTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/LoadCommandTest.java
@@ -117,6 +117,34 @@ public class LoadCommandTest extends TestWithFeService {
Assertions.assertTrue(dataDescription.getColumnMappingList().get(1).child(0).getExpressionName().contains("userid_bitmap"));
}
+ @Test
+ public void testLoadCommandWithMultipleFiles() {
+ String loadSql = "LOAD LABEL customer_multiple_files_test( "
+ + " DATA INFILE(\"s3://bucket/customer/part-1\", "
+ + " \"s3://bucket/customer/part-2\", "
+ + " \"s3://bucket/customer/part-3\") "
+ + " INTO TABLE customer"
+ + " ) "
+ + " WITH S3( "
+ + " \"s3.access_key\" = \"AK\", "
+ + " \"s3.secret_key\" = \"SK\", "
+ + " \"s3.endpoint\" = \"cos.ap-beijing.myqcloud.com\", "
+ + " \"s3.region\" = \"ap-beijing\");";
+
+ List<Pair<LogicalPlan, StatementContext>> statements = new
NereidsParser().parseMultiple(loadSql);
+ Assertions.assertFalse(statements.isEmpty());
+
+ LoadCommand command = (LoadCommand) statements.get(0).first;
+ List<NereidsDataDescription> dataDescriptions =
command.getDataDescriptions();
+ Assertions.assertFalse(dataDescriptions.isEmpty());
+
+ List<String> filePaths = dataDescriptions.get(0).getFilePaths();
+ Assertions.assertEquals(3, filePaths.size());
+ Assertions.assertEquals("s3://bucket/customer/part-1",
filePaths.get(0));
+ Assertions.assertEquals("s3://bucket/customer/part-2",
filePaths.get(1));
+ Assertions.assertEquals("s3://bucket/customer/part-3",
filePaths.get(2));
+ }
+
@Test
public void testLoadCommand() throws Exception {
String loadSql1 = "LOAD LABEL customer_lable_for_test( "
diff --git
a/regression-test/data/load_p0/broker_load/test_broker_load_multi_filegroup.out
b/regression-test/data/load_p0/broker_load/test_broker_load_multi_filegroup.out
index e408e27718c..e7136e3aed6 100644
---
a/regression-test/data/load_p0/broker_load/test_broker_load_multi_filegroup.out
+++
b/regression-test/data/load_p0/broker_load/test_broker_load_multi_filegroup.out
@@ -5,3 +5,5 @@
-- !pr22666_2 --
100490
+-- !multi_infile_count --
+200000
diff --git
a/regression-test/suites/load_p0/broker_load/test_broker_load_multi_filegroup.groovy
b/regression-test/suites/load_p0/broker_load/test_broker_load_multi_filegroup.groovy
index 545f7d2ce81..e5ebea8339b 100644
---
a/regression-test/suites/load_p0/broker_load/test_broker_load_multi_filegroup.groovy
+++
b/regression-test/suites/load_p0/broker_load/test_broker_load_multi_filegroup.groovy
@@ -85,5 +85,60 @@ suite("test_broker_load_multi_filegroup", "p0") {
order_qt_pr22666_1 """ select count(*) from ${tbl_22666} where p_brand is
not null limit 10;"""
order_qt_pr22666_2 """ select count(*) from ${tbl_22666} where p_name is
not null limit 10;"""
-}
+ def tbl_multi_infile = "part_multi_infile"
+ sql """drop table if exists ${tbl_multi_infile} force"""
+ sql """
+ CREATE TABLE ${tbl_multi_infile} (
+ p_partkey int NULL,
+ p_name VARCHAR(55) NULL,
+ p_mfgr VARCHAR(25) NULL
+ )ENGINE=OLAP
+ DUPLICATE KEY(`p_partkey`)
+ DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 3
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+ """
+ def label_multi_infile = "part_multi_infile_" +
UUID.randomUUID().toString().replace("-", "0")
+ sql """
+ LOAD LABEL ${label_multi_infile} (
+ DATA INFILE(
+ "s3://${s3BucketName}/regression/load/data/part0.parquet",
+ "s3://${s3BucketName}/regression/load/data/part1.parquet"
+ )
+ INTO TABLE ${tbl_multi_infile}
+ FORMAT AS "PARQUET"
+ (p_partkey, p_name, p_mfgr)
+ )
+ WITH S3 (
+ "AWS_ACCESS_KEY" = "${getS3AK()}",
+ "AWS_SECRET_KEY" = "${getS3SK()}",
+ "AWS_ENDPOINT" = "${s3Endpoint}",
+ "AWS_REGION" = "${s3Region}",
+ "provider" = "${getS3Provider()}"
+ );
+ """
+
+ max_try_milli_secs = 600000
+ while (max_try_milli_secs > 0) {
+ def String[][] result = sql """ show load where
label="$label_multi_infile" order by createtime desc limit 1; """
+ logger.info("Load status: " + result[0])
+ if (result[0][2].equals("FINISHED")) {
+ logger.info("Load FINISHED " + label_multi_infile)
+ break;
+ }
+ if (result[0][2].equals("CANCELLED")) {
+ assertTrue(false, "load failed: $result")
+ break;
+ }
+ Thread.sleep(1000)
+ max_try_milli_secs -= 1000
+ if(max_try_milli_secs <= 0) {
+ assertTrue(1 == 2, "load Timeout: $label_multi_infile")
+ }
+ }
+
+ order_qt_multi_infile_count """ select count(*) from
${tbl_multi_infile};"""
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]