This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 7f66be84d5c [fix](Outfile) Infer the column name  if the column is 
expression in `select into outfile` (#25854)
7f66be84d5c is described below

commit 7f66be84d5ccd5fb82b05a3bbd27386667c02b69
Author: Tiewei Fang <43782773+bepppo...@users.noreply.github.com>
AuthorDate: Wed Oct 25 22:49:04 2023 +0800

    [fix](Outfile) Infer the column name  if the column is expression in 
`select into outfile` (#25854)
    
    This pr do two things:
    1. Infer the column name if the column is expression in `select into 
outfile`. The rule for column name generation can be refered in pr: #24990
    2. fix bug that it will core dump if the `_schema` fails to build in the 
open phase in vorc_transformer.cpp
    
    
    TODO:
    1. Support infer the column name if the column is expression in `select 
into outfile` in new optimizer(Nereids).
---
 be/src/vec/runtime/vorc_transformer.cpp            |  11 +-
 be/src/vec/sink/writer/vfile_result_writer.cpp     |   2 +-
 .../java/org/apache/doris/analysis/SelectStmt.java |   3 +-
 .../test_outfile_expr_generate_col_name.out        | 267 +++++++++++++++++++++
 .../test_outfile_expr_generate_col_name.groovy     | 260 ++++++++++++++++++++
 5 files changed, 540 insertions(+), 3 deletions(-)

diff --git a/be/src/vec/runtime/vorc_transformer.cpp 
b/be/src/vec/runtime/vorc_transformer.cpp
index e862624c905..fafe2093752 100644
--- a/be/src/vec/runtime/vorc_transformer.cpp
+++ b/be/src/vec/runtime/vorc_transformer.cpp
@@ -116,7 +116,13 @@ std::unique_ptr<orc::ColumnVectorBatch> 
VOrcTransformer::_create_row_batch(size_
 }
 
 int64_t VOrcTransformer::written_len() {
-    return _output_stream->getLength();
+    // written_len() will be called in VFileResultWriter::_close_file_writer
+    // but _output_stream may be nullptr
+    // because the failure built by _schema in open()
+    if (_output_stream) {
+        return _output_stream->getLength();
+    }
+    return 0;
 }
 
 Status VOrcTransformer::close() {
@@ -127,6 +133,9 @@ Status VOrcTransformer::close() {
             return Status::IOError(e.what());
         }
     }
+    if (_output_stream) {
+        _output_stream->close();
+    }
     return Status::OK();
 }
 
diff --git a/be/src/vec/sink/writer/vfile_result_writer.cpp 
b/be/src/vec/sink/writer/vfile_result_writer.cpp
index 0df4e4e6497..c6d6eb987b1 100644
--- a/be/src/vec/sink/writer/vfile_result_writer.cpp
+++ b/be/src/vec/sink/writer/vfile_result_writer.cpp
@@ -260,12 +260,12 @@ Status 
VFileResultWriter::_create_new_file_if_exceed_size() {
 
 Status VFileResultWriter::_close_file_writer(bool done) {
     if (_vfile_writer) {
-        RETURN_IF_ERROR(_vfile_writer->close());
         // we can not use _current_written_bytes to 
COUNTER_UPDATE(_written_data_bytes, _current_written_bytes)
         // because it will call `write()` function of orc/parquet function in 
`_vfile_writer->close()`
         // and the real written_len will increase
         // and _current_written_bytes will less than 
_vfile_writer->written_len()
         COUNTER_UPDATE(_written_data_bytes, _vfile_writer->written_len());
+        RETURN_IF_ERROR(_vfile_writer->close());
         _vfile_writer.reset(nullptr);
     } else if (_file_writer_impl) {
         RETURN_IF_ERROR(_file_writer_impl->close());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
index 8c40f5230ac..d8f6ac55372 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
@@ -545,7 +545,8 @@ public class SelectStmt extends QueryStmt {
                     
resultExprs.add(rewriteQueryExprByMvColumnExpr(item.getExpr(), analyzer));
                     String columnLabel = null;
                     Class<? extends StatementBase> statementClazz = 
analyzer.getRootStatementClazz();
-                    if (statementClazz != null && 
!QueryStmt.class.isAssignableFrom(statementClazz)) {
+                    if (statementClazz != null
+                            && 
(!QueryStmt.class.isAssignableFrom(statementClazz) || hasOutFileClause())) {
                         // Infer column name when item is expr
                         columnLabel = item.toColumnLabel(i);
                     }
diff --git 
a/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
 
b/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
new file mode 100644
index 00000000000..d1fc5fa6313
--- /dev/null
+++ 
b/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
@@ -0,0 +1,267 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select_base --
+1      doris-1 19
+10     \N      \N
+2      doris-2 20
+3      doris-3 21
+4      doris-4 22
+5      doris-5 23
+6      doris-6 24
+7      doris-7 25
+8      doris-8 26
+9      doris-9 27
+
+-- !select_base1 --
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+
+-- !select_tvf --
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+
+-- !desc_s3 --
+__binary_predicate_0   BOOLEAN Yes     false   \N      NONE
+
+-- !select_base1 --
+10
+
+-- !select_tvf --
+10
+
+-- !desc_s3 --
+__max_0        INT     Yes     false   \N      NONE
+
+-- !select_base1 --
+1      id = 1
+10     id not exist
+2      id = 2
+3      id not exist
+4      id not exist
+5      id not exist
+6      id not exist
+7      id not exist
+8      id not exist
+9      id not exist
+
+-- !select_tvf --
+1      id = 1
+10     id not exist
+2      id = 2
+3      id not exist
+4      id not exist
+5      id not exist
+6      id not exist
+7      id not exist
+8      id not exist
+9      id not exist
+
+-- !desc_s3 --
+__case_expr_1  TEXT    Yes     false   \N      NONE
+id     INT     Yes     false   \N      NONE
+
+-- !select_base1 --
+1      1       string  19      false   5       true    1
+10     1       string  \N      false   5       true    1
+2      1       string  20      false   5       true    1
+3      1       string  21      false   5       true    1
+4      1       string  22      false   5       true    1
+5      1       string  23      false   5       true    1
+6      1       string  24      false   5       true    1
+7      1       string  25      false   5       true    1
+8      1       string  26      false   5       true    1
+9      1       string  27      false   5       true    1
+
+-- !select_tvf --
+1      1       string  19      false   5       true    1
+10     1       string  \N      false   5       true    1
+2      1       string  20      false   5       true    1
+3      1       string  21      false   5       true    1
+4      1       string  22      false   5       true    1
+5      1       string  23      false   5       true    1
+6      1       string  24      false   5       true    1
+7      1       string  25      false   5       true    1
+8      1       string  26      false   5       true    1
+9      1       string  27      false   5       true    1
+
+-- !desc_s3 --
+__arithmetic_expr_5    INT     Yes     false   \N      NONE
+__arithmetic_expr_7    BIGINT  Yes     false   \N      NONE
+__binary_predicate_4   BOOLEAN Yes     false   \N      NONE
+__cast_expr_3  BIGINT  Yes     false   \N      NONE
+__in_predicate_6       BOOLEAN Yes     false   \N      NONE
+__literal_1    INT     Yes     false   \N      NONE
+__literal_2    TEXT    Yes     false   \N      NONE
+id     INT     Yes     false   \N      NONE
+
+-- !select_base1 --
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+
+-- !select_tvf --
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+
+-- !desc_s3 --
+__cast_expr_0  TEXT    Yes     false   \N      NONE
+__cast_expr_1  BIGINT  Yes     false   \N      NONE
+__cast_expr_2  TEXT    Yes     false   \N      NONE
+
+-- !select_base1 --
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+
+-- !select_tvf --
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+
+-- !desc_s3 --
+__binary_predicate_0   BOOLEAN Yes     false   \N      NONE
+
+-- !select_base1 --
+10
+
+-- !select_tvf --
+10
+
+-- !desc_s3 --
+__max_0        INT     Yes     false   \N      NONE
+
+-- !select_base1 --
+1      id = 1
+10     id not exist
+2      id = 2
+3      id not exist
+4      id not exist
+5      id not exist
+6      id not exist
+7      id not exist
+8      id not exist
+9      id not exist
+
+-- !select_tvf --
+1      id = 1
+10     id not exist
+2      id = 2
+3      id not exist
+4      id not exist
+5      id not exist
+6      id not exist
+7      id not exist
+8      id not exist
+9      id not exist
+
+-- !desc_s3 --
+__case_expr_1  TEXT    Yes     false   \N      NONE
+id     INT     Yes     false   \N      NONE
+
+-- !select_base1 --
+1      1       string  19      false   5       true    1
+10     1       string  \N      false   5       true    1
+2      1       string  20      false   5       true    1
+3      1       string  21      false   5       true    1
+4      1       string  22      false   5       true    1
+5      1       string  23      false   5       true    1
+6      1       string  24      false   5       true    1
+7      1       string  25      false   5       true    1
+8      1       string  26      false   5       true    1
+9      1       string  27      false   5       true    1
+
+-- !select_tvf --
+1      1       string  19      false   5       true    1
+10     1       string  \N      false   5       true    1
+2      1       string  20      false   5       true    1
+3      1       string  21      false   5       true    1
+4      1       string  22      false   5       true    1
+5      1       string  23      false   5       true    1
+6      1       string  24      false   5       true    1
+7      1       string  25      false   5       true    1
+8      1       string  26      false   5       true    1
+9      1       string  27      false   5       true    1
+
+-- !desc_s3 --
+__arithmetic_expr_5    SMALLINT        Yes     false   \N      NONE
+__arithmetic_expr_7    BIGINT  Yes     false   \N      NONE
+__binary_predicate_4   BOOLEAN Yes     false   \N      NONE
+__cast_expr_3  BIGINT  Yes     false   \N      NONE
+__in_predicate_6       BOOLEAN Yes     false   \N      NONE
+__literal_1    TINYINT Yes     false   \N      NONE
+__literal_2    TEXT    Yes     false   \N      NONE
+id     INT     Yes     false   \N      NONE
+
+-- !select_base1 --
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+
+-- !select_tvf --
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+2566   888     9999
+
+-- !desc_s3 --
+__cast_expr_0  TEXT    Yes     false   \N      NONE
+__cast_expr_1  BIGINT  Yes     false   \N      NONE
+__cast_expr_2  TEXT    Yes     false   \N      NONE
+
diff --git 
a/regression-test/suites/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.groovy
 
b/regression-test/suites/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.groovy
new file mode 100644
index 00000000000..52977c1fd58
--- /dev/null
+++ 
b/regression-test/suites/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.groovy
@@ -0,0 +1,260 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+import java.nio.file.Paths
+
+suite("test_outfile_expr_generate_col_name", "p0") {
+    // close nereids
+    sql """ set enable_nereids_planner=false """
+
+    String ak = getS3AK()
+    String sk = getS3SK()
+    String s3_endpoint = getS3Endpoint()
+    String region = getS3Region()
+    String bucket = getS3BucketName();
+
+
+    def export_table_name = "outfile_expr_export_test"
+    def outFilePath = "${bucket}/outfile/expr_generate_col_name/exp_"
+
+
+    def create_table = {
+        sql """ DROP TABLE IF EXISTS ${export_table_name} """
+        sql """
+            CREATE TABLE IF NOT EXISTS ${export_table_name} (
+            `id` int(11) NULL,
+            `name` string NULL,
+            `age` int(11) NULL
+            )
+            PARTITION BY RANGE(id)
+            (
+                PARTITION less_than_20 VALUES LESS THAN ("20"),
+                PARTITION between_20_70 VALUES [("20"),("70")),
+                PARTITION more_than_70 VALUES LESS THAN ("151")
+            )
+            DISTRIBUTED BY HASH(id) BUCKETS 3
+            PROPERTIES("replication_num" = "1");
+        """
+    }
+
+    def outfile_to_S3 = { format ->
+        // select ... into outfile ...
+        def res = sql """
+            SELECT * FROM ${export_table_name} t ORDER BY user_id
+            INTO OUTFILE "s3://${outFilePath}"
+            FORMAT AS ${outfile_format}
+            PROPERTIES (
+                "s3.endpoint" = "${s3_endpoint}",
+                "s3.region" = "${region}",
+                "s3.secret_key"="${sk}",
+                "s3.access_key" = "${ak}"
+            );
+        """
+
+        return res[0][3]
+    }
+
+    // create table to export data
+    create_table()
+        
+    // insert data
+    StringBuilder sb = new StringBuilder()
+    int i = 1
+    for (; i < 10; i ++) {
+        sb.append("""
+            (${i}, 'doris-${i}', ${i + 18}),
+        """)
+    }
+    sb.append("""
+            (${i}, NULL, NULL)
+        """)
+    sql """ INSERT INTO ${export_table_name} VALUES
+            ${sb.toString()}
+        """
+    def insert_res = sql "show last insert;"
+    logger.info("insert result: " + insert_res.toString())
+    order_qt_select_base """ SELECT * FROM ${export_table_name} t ORDER BY id; 
"""
+
+
+
+    def check_outfile_data = { outfile_url, outfile_format ->
+        order_qt_select_tvf """ SELECT * FROM S3 (
+                            "uri" = 
"http://${s3_endpoint}${outfile_url.substring(4)}0.${outfile_format}",
+                            "ACCESS_KEY"= "${ak}",
+                            "SECRET_KEY" = "${sk}",
+                            "format" = "${outfile_format}",
+                            "region" = "${region}"
+                        );
+                        """
+    }
+
+    def check_outfile_column_name = { outfile_url, outfile_format ->
+        order_qt_desc_s3 """ Desc function S3 (
+                    "uri" = 
"http://${s3_endpoint}${outfile_url.substring(4)}0.${outfile_format}",
+                    "ACCESS_KEY"= "${ak}",
+                    "SECRET_KEY" = "${sk}",
+                    "format" = "${outfile_format}",
+                    "region" = "${region}"
+                );
+                """
+    }
+
+    def test_q1 = { outfile_format ->
+        order_qt_select_base1 """ select 1>2 from ${export_table_name} """
+
+        // select ... into outfile ...
+        def res = sql """
+            SELECT 1>2 FROM ${export_table_name}
+            INTO OUTFILE "s3://${outFilePath}"
+            FORMAT AS ${outfile_format}
+            PROPERTIES (
+                "s3.endpoint" = "${s3_endpoint}",
+                "s3.region" = "${region}",
+                "s3.secret_key"="${sk}",
+                "s3.access_key" = "${ak}"
+            );
+        """
+        outfile_url = res[0][3]
+        
+        check_outfile_data(outfile_url, outfile_format)
+        check_outfile_column_name(outfile_url, outfile_format)
+    }
+
+    def test_q2 = { outfile_format ->
+        order_qt_select_base1 """ select max(id) from ${export_table_name} """
+
+        // select ... into outfile ...
+        def res = sql """
+            SELECT max(id) FROM ${export_table_name}
+            INTO OUTFILE "s3://${outFilePath}"
+            FORMAT AS ${outfile_format}
+            PROPERTIES (
+                "s3.endpoint" = "${s3_endpoint}",
+                "s3.region" = "${region}",
+                "s3.secret_key"="${sk}",
+                "s3.access_key" = "${ak}"
+            );
+        """
+        outfile_url = res[0][3]
+        
+        check_outfile_data(outfile_url, outfile_format)
+        check_outfile_column_name(outfile_url, outfile_format)
+    }
+
+    def test_q3 = { outfile_format ->
+        order_qt_select_base1 """ select id, case id when 1 then 'id = 1' when 
2 then 'id = 2' else 'id not exist' end from ${export_table_name} """
+
+        // select ... into outfile ...
+        def res = sql """
+            SELECT id, case id when 1 then 'id = 1' when 2 then 'id = 2' else 
'id not exist' end FROM ${export_table_name}
+            INTO OUTFILE "s3://${outFilePath}"
+            FORMAT AS ${outfile_format}
+            PROPERTIES (
+                "s3.endpoint" = "${s3_endpoint}",
+                "s3.region" = "${region}",
+                "s3.secret_key"="${sk}",
+                "s3.access_key" = "${ak}"
+            );
+        """
+        outfile_url = res[0][3]
+        
+        check_outfile_data(outfile_url, outfile_format)
+        check_outfile_column_name(outfile_url, outfile_format)
+    }
+
+    def test_q4 = { outfile_format ->
+        order_qt_select_base1 """ select 
+                                id,
+                                1, 
+                                'string', 
+                                cast (age AS BIGINT),  
+                                1 > 2,
+                                2 + 3,
+                                1 IN (1, 2, 3, 4), 
+                                TRUE | FALSE
+                            from ${export_table_name}
+                        """
+
+        // select ... into outfile ...
+        def res = sql """
+            select 
+                id,
+                1, 
+                'string', 
+                cast (age AS BIGINT),  
+                1 > 2,
+                2 + 3,
+                1 IN (1, 2, 3, 4), 
+                TRUE | FALSE
+            from ${export_table_name}
+            INTO OUTFILE "s3://${outFilePath}"
+            FORMAT AS ${outfile_format}
+            PROPERTIES (
+                "s3.endpoint" = "${s3_endpoint}",
+                "s3.region" = "${region}",
+                "s3.secret_key"="${sk}",
+                "s3.access_key" = "${ak}"
+            );
+        """
+        outfile_url = res[0][3]
+        
+        check_outfile_data(outfile_url, outfile_format)
+        check_outfile_column_name(outfile_url, outfile_format)
+    }
+
+    def test_q5 = { outfile_format ->
+        order_qt_select_base1 """ select cast('2566' as string), cast('888' as 
bigint), cast('9999' as largeint)
+                            from ${export_table_name}
+                        """
+
+        // select ... into outfile ...
+        def res = sql """
+            select cast('2566' as string), cast('888' as bigint), cast('9999' 
as largeint)
+            from ${export_table_name}
+            INTO OUTFILE "s3://${outFilePath}"
+            FORMAT AS ${outfile_format}
+            PROPERTIES (
+                "s3.endpoint" = "${s3_endpoint}",
+                "s3.region" = "${region}",
+                "s3.secret_key"="${sk}",
+                "s3.access_key" = "${ak}"
+            );
+        """
+        outfile_url = res[0][3]
+        
+        check_outfile_data(outfile_url, outfile_format)
+        check_outfile_column_name(outfile_url, outfile_format)
+    }
+
+    // test parquet format
+    test_q1("parquet")
+    test_q2("parquet")
+    test_q3("parquet")
+    test_q4("parquet")
+    test_q5("parquet")
+
+    // test orc format
+    test_q1("orc")
+    test_q2("orc")
+    test_q3("orc")
+    test_q4("orc")
+    test_q5("orc")
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to