This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 1553bc61dc8 [fix](outfile) fix analysis error when specifying parquet 
schema (#57232)
1553bc61dc8 is described below

commit 1553bc61dc85a98f85e6ee038a30d5531c5f0166
Author: Mingyu Chen (Rayner) <[email protected]>
AuthorDate: Wed Oct 22 19:43:54 2025 +0800

    [fix](outfile) fix analysis error when specifying parquet schema (#57232)
    
    ### What problem does this PR solve?
    
    Related PR: #33016
    
    Introduced from #33016, when specify the "schema" property in outfile
    clause with parquet format,
    it will return error:
    ```
    Parquet schema number does not equal to select item number
    ```
    
    This is because we wrongly analyze `OutfileClause` twice.
---
 .../org/apache/doris/analysis/OutFileClause.java   | 17 ++--
 .../doris/nereids/glue/LogicalPlanAdapter.java     |  2 +-
 .../glue/translator/PhysicalPlanTranslator.java    |  2 +-
 .../outfile/test_outfile_parquet_schema.out        | 11 +++
 .../outfile/test_outfile_parquet_schema.groovy     | 93 ++++++++++++++++++++++
 5 files changed, 118 insertions(+), 7 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
index a4638c4dfa2..cab36a7bb62 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
@@ -195,7 +195,7 @@ public class OutFileClause {
         return parquetSchemas;
     }
 
-    public void analyze(List<Expr> resultExprs, List<String> colLabels) throws 
UserException {
+    public void analyze(List<Expr> resultExprs, List<String> colLabels, 
boolean needFormat) throws UserException {
         if (isAnalyzed) {
             // If the query stmt is rewritten, the whole stmt will be analyzed 
again.
             // But some of fields in this OutfileClause has been changed,
@@ -214,10 +214,16 @@ public class OutFileClause {
         }
         isAnalyzed = true;
 
-        if (isParquetFormat()) {
-            analyzeForParquetFormat(resultExprs, colLabels);
-        } else if (isOrcFormat()) {
-            analyzeForOrcFormat(resultExprs, colLabels);
+        // This analyze() method will be called twice:
+        // one is normal query plan analyze,
+        // the other is when writing success file after outfile is done on FE 
side.
+        // In the second time, we do not need to analyze format related things 
again.
+        if (needFormat) {
+            if (isParquetFormat()) {
+                analyzeForParquetFormat(resultExprs, colLabels);
+            } else if (isOrcFormat()) {
+                analyzeForOrcFormat(resultExprs, colLabels);
+            }
         }
     }
 
@@ -750,3 +756,4 @@ public class OutFileClause {
         return sinkOptions;
     }
 }
+
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/LogicalPlanAdapter.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/LogicalPlanAdapter.java
index e061181b466..a68b0058709 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/LogicalPlanAdapter.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/LogicalPlanAdapter.java
@@ -79,7 +79,7 @@ public class LogicalPlanAdapter extends StatementBase 
implements Queriable {
                     fileSink.getProperties()
             );
             try {
-                outFile.analyze(Lists.newArrayList(), Lists.newArrayList());
+                outFile.analyze(Lists.newArrayList(), Lists.newArrayList(), 
false);
             } catch (Exception e) {
                 throw new AnalysisException(e.getMessage(), e.getCause());
             }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index a6af723fff8..44186fe3711 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -615,7 +615,7 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
 
         // TODO: should not call legacy planner analyze in Nereids
         try {
-            outFile.analyze(outputExprs, labels);
+            outFile.analyze(outputExprs, labels, true);
         } catch (Exception e) {
             throw new AnalysisException(e.getMessage(), e.getCause());
         }
diff --git 
a/regression-test/data/export_p0/outfile/test_outfile_parquet_schema.out 
b/regression-test/data/export_p0/outfile/test_outfile_parquet_schema.out
new file mode 100644
index 00000000000..7d849aada57
--- /dev/null
+++ b/regression-test/data/export_p0/outfile/test_outfile_parquet_schema.out
@@ -0,0 +1,11 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select_base1 --
+1      abc
+
+-- !select_tvf --
+1      abc
+
+-- !desc_s3 --
+id     int     Yes     false   \N      NONE
+name   text    Yes     false   \N      NONE
+
diff --git 
a/regression-test/suites/export_p0/outfile/test_outfile_parquet_schema.groovy 
b/regression-test/suites/export_p0/outfile/test_outfile_parquet_schema.groovy
new file mode 100644
index 00000000000..793ce9c4891
--- /dev/null
+++ 
b/regression-test/suites/export_p0/outfile/test_outfile_parquet_schema.groovy
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+import java.nio.file.Paths
+
+suite("test_outfile_parquet_schema", "p0") {
+    String ak = getS3AK()
+    String sk = getS3SK()
+    String s3_endpoint = getS3Endpoint()
+    String region = getS3Region()
+    String bucket = getS3BucketName();
+
+    def export_table_name = "outfile_parquet_schema_test"
+    def outFilePath = "${bucket}/outfile/outfile_parquet_schema_test/exp_"
+
+    // create table to export data
+    sql """ DROP TABLE IF EXISTS ${export_table_name} """
+    sql """
+        CREATE TABLE IF NOT EXISTS ${export_table_name} (
+        `id` int(11) NULL,
+        `name` string NULL
+        )
+        DISTRIBUTED BY HASH(id) BUCKETS 3
+        PROPERTIES("replication_num" = "1");
+    """
+    // insert data
+    sql """insert into ${export_table_name} values(1, "abc");"""
+
+    def check_outfile_data = { outfile_url, outfile_format ->
+        order_qt_select_tvf """ SELECT * FROM S3 (
+                            "uri" = 
"http://${bucket}.${s3_endpoint}${outfile_url.substring(5 + bucket.length(), 
outfile_url.length() - 1)}0.${outfile_format}",
+                            "ACCESS_KEY"= "${ak}",
+                            "SECRET_KEY" = "${sk}",
+                            "format" = "${outfile_format}",
+                            "region" = "${region}"
+                        );
+                        """
+    }
+
+    def check_outfile_column_name = { outfile_url, outfile_format ->
+        order_qt_desc_s3 """ Desc function S3 (
+                    "uri" = 
"http://${bucket}.${s3_endpoint}${outfile_url.substring(5 + bucket.length(), 
outfile_url.length() - 1)}0.${outfile_format}",
+                    "ACCESS_KEY"= "${ak}",
+                    "SECRET_KEY" = "${sk}",
+                    "format" = "${outfile_format}",
+                    "region" = "${region}"
+                );
+                """
+    }
+
+    def test_q1 = { outfile_format ->
+        order_qt_select_base1 """ select * from ${export_table_name} """
+
+        // select ... into outfile ...
+        def res = sql """
+            SELECT id, name FROM ${export_table_name}
+            INTO OUTFILE "s3://${outFilePath}"
+            FORMAT AS ${outfile_format}
+            PROPERTIES (
+                "s3.endpoint" = "${s3_endpoint}",
+                "s3.region" = "${region}",
+                "s3.secret_key"="${sk}",
+                "s3.access_key" = "${ak}",
+                "schema" = "required,byte_array,id;required,byte_array,name"
+            );
+        """
+        def outfile_url = res[0][3]
+        
+        check_outfile_data(outfile_url, outfile_format)
+        check_outfile_column_name(outfile_url, outfile_format)
+    }
+
+    // test parquet format
+    test_q1("parquet")
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to