This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 1553bc61dc8 [fix](outfile) fix analysis error when specifying parquet
schema (#57232)
1553bc61dc8 is described below
commit 1553bc61dc85a98f85e6ee038a30d5531c5f0166
Author: Mingyu Chen (Rayner) <[email protected]>
AuthorDate: Wed Oct 22 19:43:54 2025 +0800
[fix](outfile) fix analysis error when specifying parquet schema (#57232)
### What problem does this PR solve?
Related PR: #33016
Introduced from #33016, when specify the "schema" property in outfile
clause with parquet format,
it will return error:
```
Parquet schema number does not equal to select item number
```
This is because we wrongly analyze `OutfileClause` twice.
---
.../org/apache/doris/analysis/OutFileClause.java | 17 ++--
.../doris/nereids/glue/LogicalPlanAdapter.java | 2 +-
.../glue/translator/PhysicalPlanTranslator.java | 2 +-
.../outfile/test_outfile_parquet_schema.out | 11 +++
.../outfile/test_outfile_parquet_schema.groovy | 93 ++++++++++++++++++++++
5 files changed, 118 insertions(+), 7 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
index a4638c4dfa2..cab36a7bb62 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
@@ -195,7 +195,7 @@ public class OutFileClause {
return parquetSchemas;
}
- public void analyze(List<Expr> resultExprs, List<String> colLabels) throws
UserException {
+ public void analyze(List<Expr> resultExprs, List<String> colLabels,
boolean needFormat) throws UserException {
if (isAnalyzed) {
// If the query stmt is rewritten, the whole stmt will be analyzed
again.
// But some of fields in this OutfileClause has been changed,
@@ -214,10 +214,16 @@ public class OutFileClause {
}
isAnalyzed = true;
- if (isParquetFormat()) {
- analyzeForParquetFormat(resultExprs, colLabels);
- } else if (isOrcFormat()) {
- analyzeForOrcFormat(resultExprs, colLabels);
+ // This analyze() method will be called twice:
+ // one is normal query plan analyze,
+ // the other is when writing success file after outfile is done on FE
side.
+ // In the second time, we do not need to analyze format related things
again.
+ if (needFormat) {
+ if (isParquetFormat()) {
+ analyzeForParquetFormat(resultExprs, colLabels);
+ } else if (isOrcFormat()) {
+ analyzeForOrcFormat(resultExprs, colLabels);
+ }
}
}
@@ -750,3 +756,4 @@ public class OutFileClause {
return sinkOptions;
}
}
+
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/LogicalPlanAdapter.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/LogicalPlanAdapter.java
index e061181b466..a68b0058709 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/LogicalPlanAdapter.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/LogicalPlanAdapter.java
@@ -79,7 +79,7 @@ public class LogicalPlanAdapter extends StatementBase
implements Queriable {
fileSink.getProperties()
);
try {
- outFile.analyze(Lists.newArrayList(), Lists.newArrayList());
+ outFile.analyze(Lists.newArrayList(), Lists.newArrayList(),
false);
} catch (Exception e) {
throw new AnalysisException(e.getMessage(), e.getCause());
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index a6af723fff8..44186fe3711 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -615,7 +615,7 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
// TODO: should not call legacy planner analyze in Nereids
try {
- outFile.analyze(outputExprs, labels);
+ outFile.analyze(outputExprs, labels, true);
} catch (Exception e) {
throw new AnalysisException(e.getMessage(), e.getCause());
}
diff --git
a/regression-test/data/export_p0/outfile/test_outfile_parquet_schema.out
b/regression-test/data/export_p0/outfile/test_outfile_parquet_schema.out
new file mode 100644
index 00000000000..7d849aada57
--- /dev/null
+++ b/regression-test/data/export_p0/outfile/test_outfile_parquet_schema.out
@@ -0,0 +1,11 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select_base1 --
+1 abc
+
+-- !select_tvf --
+1 abc
+
+-- !desc_s3 --
+id int Yes false \N NONE
+name text Yes false \N NONE
+
diff --git
a/regression-test/suites/export_p0/outfile/test_outfile_parquet_schema.groovy
b/regression-test/suites/export_p0/outfile/test_outfile_parquet_schema.groovy
new file mode 100644
index 00000000000..793ce9c4891
--- /dev/null
+++
b/regression-test/suites/export_p0/outfile/test_outfile_parquet_schema.groovy
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+import java.nio.file.Paths
+
+suite("test_outfile_parquet_schema", "p0") {
+ String ak = getS3AK()
+ String sk = getS3SK()
+ String s3_endpoint = getS3Endpoint()
+ String region = getS3Region()
+ String bucket = getS3BucketName();
+
+ def export_table_name = "outfile_parquet_schema_test"
+ def outFilePath = "${bucket}/outfile/outfile_parquet_schema_test/exp_"
+
+ // create table to export data
+ sql """ DROP TABLE IF EXISTS ${export_table_name} """
+ sql """
+ CREATE TABLE IF NOT EXISTS ${export_table_name} (
+ `id` int(11) NULL,
+ `name` string NULL
+ )
+ DISTRIBUTED BY HASH(id) BUCKETS 3
+ PROPERTIES("replication_num" = "1");
+ """
+ // insert data
+ sql """insert into ${export_table_name} values(1, "abc");"""
+
+ def check_outfile_data = { outfile_url, outfile_format ->
+ order_qt_select_tvf """ SELECT * FROM S3 (
+ "uri" =
"http://${bucket}.${s3_endpoint}${outfile_url.substring(5 + bucket.length(),
outfile_url.length() - 1)}0.${outfile_format}",
+ "ACCESS_KEY"= "${ak}",
+ "SECRET_KEY" = "${sk}",
+ "format" = "${outfile_format}",
+ "region" = "${region}"
+ );
+ """
+ }
+
+ def check_outfile_column_name = { outfile_url, outfile_format ->
+ order_qt_desc_s3 """ Desc function S3 (
+ "uri" =
"http://${bucket}.${s3_endpoint}${outfile_url.substring(5 + bucket.length(),
outfile_url.length() - 1)}0.${outfile_format}",
+ "ACCESS_KEY"= "${ak}",
+ "SECRET_KEY" = "${sk}",
+ "format" = "${outfile_format}",
+ "region" = "${region}"
+ );
+ """
+ }
+
+ def test_q1 = { outfile_format ->
+ order_qt_select_base1 """ select * from ${export_table_name} """
+
+ // select ... into outfile ...
+ def res = sql """
+ SELECT id, name FROM ${export_table_name}
+ INTO OUTFILE "s3://${outFilePath}"
+ FORMAT AS ${outfile_format}
+ PROPERTIES (
+ "s3.endpoint" = "${s3_endpoint}",
+ "s3.region" = "${region}",
+ "s3.secret_key"="${sk}",
+ "s3.access_key" = "${ak}",
+ "schema" = "required,byte_array,id;required,byte_array,name"
+ );
+ """
+ def outfile_url = res[0][3]
+
+ check_outfile_data(outfile_url, outfile_format)
+ check_outfile_column_name(outfile_url, outfile_format)
+ }
+
+ // test parquet format
+ test_q1("parquet")
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]