This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 7f66be84d5c [fix](Outfile) Infer the column name if the column is
expression in `select into outfile` (#25854)
7f66be84d5c is described below
commit 7f66be84d5ccd5fb82b05a3bbd27386667c02b69
Author: Tiewei Fang <[email protected]>
AuthorDate: Wed Oct 25 22:49:04 2023 +0800
[fix](Outfile) Infer the column name if the column is expression in
`select into outfile` (#25854)
This pr do two things:
1. Infer the column name if the column is expression in `select into
outfile`. The rule for column name generation can be refered in pr: #24990
2. fix bug that it will core dump if the `_schema` fails to build in the
open phase in vorc_transformer.cpp
TODO:
1. Support infer the column name if the column is expression in `select
into outfile` in new optimizer(Nereids).
---
be/src/vec/runtime/vorc_transformer.cpp | 11 +-
be/src/vec/sink/writer/vfile_result_writer.cpp | 2 +-
.../java/org/apache/doris/analysis/SelectStmt.java | 3 +-
.../test_outfile_expr_generate_col_name.out | 267 +++++++++++++++++++++
.../test_outfile_expr_generate_col_name.groovy | 260 ++++++++++++++++++++
5 files changed, 540 insertions(+), 3 deletions(-)
diff --git a/be/src/vec/runtime/vorc_transformer.cpp
b/be/src/vec/runtime/vorc_transformer.cpp
index e862624c905..fafe2093752 100644
--- a/be/src/vec/runtime/vorc_transformer.cpp
+++ b/be/src/vec/runtime/vorc_transformer.cpp
@@ -116,7 +116,13 @@ std::unique_ptr<orc::ColumnVectorBatch>
VOrcTransformer::_create_row_batch(size_
}
int64_t VOrcTransformer::written_len() {
- return _output_stream->getLength();
+ // written_len() will be called in VFileResultWriter::_close_file_writer
+ // but _output_stream may be nullptr
+ // because the failure built by _schema in open()
+ if (_output_stream) {
+ return _output_stream->getLength();
+ }
+ return 0;
}
Status VOrcTransformer::close() {
@@ -127,6 +133,9 @@ Status VOrcTransformer::close() {
return Status::IOError(e.what());
}
}
+ if (_output_stream) {
+ _output_stream->close();
+ }
return Status::OK();
}
diff --git a/be/src/vec/sink/writer/vfile_result_writer.cpp
b/be/src/vec/sink/writer/vfile_result_writer.cpp
index 0df4e4e6497..c6d6eb987b1 100644
--- a/be/src/vec/sink/writer/vfile_result_writer.cpp
+++ b/be/src/vec/sink/writer/vfile_result_writer.cpp
@@ -260,12 +260,12 @@ Status
VFileResultWriter::_create_new_file_if_exceed_size() {
Status VFileResultWriter::_close_file_writer(bool done) {
if (_vfile_writer) {
- RETURN_IF_ERROR(_vfile_writer->close());
// we can not use _current_written_bytes to
COUNTER_UPDATE(_written_data_bytes, _current_written_bytes)
// because it will call `write()` function of orc/parquet function in
`_vfile_writer->close()`
// and the real written_len will increase
// and _current_written_bytes will less than
_vfile_writer->written_len()
COUNTER_UPDATE(_written_data_bytes, _vfile_writer->written_len());
+ RETURN_IF_ERROR(_vfile_writer->close());
_vfile_writer.reset(nullptr);
} else if (_file_writer_impl) {
RETURN_IF_ERROR(_file_writer_impl->close());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
index 8c40f5230ac..d8f6ac55372 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
@@ -545,7 +545,8 @@ public class SelectStmt extends QueryStmt {
resultExprs.add(rewriteQueryExprByMvColumnExpr(item.getExpr(), analyzer));
String columnLabel = null;
Class<? extends StatementBase> statementClazz =
analyzer.getRootStatementClazz();
- if (statementClazz != null &&
!QueryStmt.class.isAssignableFrom(statementClazz)) {
+ if (statementClazz != null
+ &&
(!QueryStmt.class.isAssignableFrom(statementClazz) || hasOutFileClause())) {
// Infer column name when item is expr
columnLabel = item.toColumnLabel(i);
}
diff --git
a/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
b/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
new file mode 100644
index 00000000000..d1fc5fa6313
--- /dev/null
+++
b/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
@@ -0,0 +1,267 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select_base --
+1 doris-1 19
+10 \N \N
+2 doris-2 20
+3 doris-3 21
+4 doris-4 22
+5 doris-5 23
+6 doris-6 24
+7 doris-7 25
+8 doris-8 26
+9 doris-9 27
+
+-- !select_base1 --
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+
+-- !select_tvf --
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+
+-- !desc_s3 --
+__binary_predicate_0 BOOLEAN Yes false \N NONE
+
+-- !select_base1 --
+10
+
+-- !select_tvf --
+10
+
+-- !desc_s3 --
+__max_0 INT Yes false \N NONE
+
+-- !select_base1 --
+1 id = 1
+10 id not exist
+2 id = 2
+3 id not exist
+4 id not exist
+5 id not exist
+6 id not exist
+7 id not exist
+8 id not exist
+9 id not exist
+
+-- !select_tvf --
+1 id = 1
+10 id not exist
+2 id = 2
+3 id not exist
+4 id not exist
+5 id not exist
+6 id not exist
+7 id not exist
+8 id not exist
+9 id not exist
+
+-- !desc_s3 --
+__case_expr_1 TEXT Yes false \N NONE
+id INT Yes false \N NONE
+
+-- !select_base1 --
+1 1 string 19 false 5 true 1
+10 1 string \N false 5 true 1
+2 1 string 20 false 5 true 1
+3 1 string 21 false 5 true 1
+4 1 string 22 false 5 true 1
+5 1 string 23 false 5 true 1
+6 1 string 24 false 5 true 1
+7 1 string 25 false 5 true 1
+8 1 string 26 false 5 true 1
+9 1 string 27 false 5 true 1
+
+-- !select_tvf --
+1 1 string 19 false 5 true 1
+10 1 string \N false 5 true 1
+2 1 string 20 false 5 true 1
+3 1 string 21 false 5 true 1
+4 1 string 22 false 5 true 1
+5 1 string 23 false 5 true 1
+6 1 string 24 false 5 true 1
+7 1 string 25 false 5 true 1
+8 1 string 26 false 5 true 1
+9 1 string 27 false 5 true 1
+
+-- !desc_s3 --
+__arithmetic_expr_5 INT Yes false \N NONE
+__arithmetic_expr_7 BIGINT Yes false \N NONE
+__binary_predicate_4 BOOLEAN Yes false \N NONE
+__cast_expr_3 BIGINT Yes false \N NONE
+__in_predicate_6 BOOLEAN Yes false \N NONE
+__literal_1 INT Yes false \N NONE
+__literal_2 TEXT Yes false \N NONE
+id INT Yes false \N NONE
+
+-- !select_base1 --
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+
+-- !select_tvf --
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+
+-- !desc_s3 --
+__cast_expr_0 TEXT Yes false \N NONE
+__cast_expr_1 BIGINT Yes false \N NONE
+__cast_expr_2 TEXT Yes false \N NONE
+
+-- !select_base1 --
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+
+-- !select_tvf --
+false
+false
+false
+false
+false
+false
+false
+false
+false
+false
+
+-- !desc_s3 --
+__binary_predicate_0 BOOLEAN Yes false \N NONE
+
+-- !select_base1 --
+10
+
+-- !select_tvf --
+10
+
+-- !desc_s3 --
+__max_0 INT Yes false \N NONE
+
+-- !select_base1 --
+1 id = 1
+10 id not exist
+2 id = 2
+3 id not exist
+4 id not exist
+5 id not exist
+6 id not exist
+7 id not exist
+8 id not exist
+9 id not exist
+
+-- !select_tvf --
+1 id = 1
+10 id not exist
+2 id = 2
+3 id not exist
+4 id not exist
+5 id not exist
+6 id not exist
+7 id not exist
+8 id not exist
+9 id not exist
+
+-- !desc_s3 --
+__case_expr_1 TEXT Yes false \N NONE
+id INT Yes false \N NONE
+
+-- !select_base1 --
+1 1 string 19 false 5 true 1
+10 1 string \N false 5 true 1
+2 1 string 20 false 5 true 1
+3 1 string 21 false 5 true 1
+4 1 string 22 false 5 true 1
+5 1 string 23 false 5 true 1
+6 1 string 24 false 5 true 1
+7 1 string 25 false 5 true 1
+8 1 string 26 false 5 true 1
+9 1 string 27 false 5 true 1
+
+-- !select_tvf --
+1 1 string 19 false 5 true 1
+10 1 string \N false 5 true 1
+2 1 string 20 false 5 true 1
+3 1 string 21 false 5 true 1
+4 1 string 22 false 5 true 1
+5 1 string 23 false 5 true 1
+6 1 string 24 false 5 true 1
+7 1 string 25 false 5 true 1
+8 1 string 26 false 5 true 1
+9 1 string 27 false 5 true 1
+
+-- !desc_s3 --
+__arithmetic_expr_5 SMALLINT Yes false \N NONE
+__arithmetic_expr_7 BIGINT Yes false \N NONE
+__binary_predicate_4 BOOLEAN Yes false \N NONE
+__cast_expr_3 BIGINT Yes false \N NONE
+__in_predicate_6 BOOLEAN Yes false \N NONE
+__literal_1 TINYINT Yes false \N NONE
+__literal_2 TEXT Yes false \N NONE
+id INT Yes false \N NONE
+
+-- !select_base1 --
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+
+-- !select_tvf --
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+2566 888 9999
+
+-- !desc_s3 --
+__cast_expr_0 TEXT Yes false \N NONE
+__cast_expr_1 BIGINT Yes false \N NONE
+__cast_expr_2 TEXT Yes false \N NONE
+
diff --git
a/regression-test/suites/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.groovy
b/regression-test/suites/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.groovy
new file mode 100644
index 00000000000..52977c1fd58
--- /dev/null
+++
b/regression-test/suites/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.groovy
@@ -0,0 +1,260 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+import java.nio.file.Paths
+
+suite("test_outfile_expr_generate_col_name", "p0") {
+ // close nereids
+ sql """ set enable_nereids_planner=false """
+
+ String ak = getS3AK()
+ String sk = getS3SK()
+ String s3_endpoint = getS3Endpoint()
+ String region = getS3Region()
+ String bucket = getS3BucketName();
+
+
+ def export_table_name = "outfile_expr_export_test"
+ def outFilePath = "${bucket}/outfile/expr_generate_col_name/exp_"
+
+
+ def create_table = {
+ sql """ DROP TABLE IF EXISTS ${export_table_name} """
+ sql """
+ CREATE TABLE IF NOT EXISTS ${export_table_name} (
+ `id` int(11) NULL,
+ `name` string NULL,
+ `age` int(11) NULL
+ )
+ PARTITION BY RANGE(id)
+ (
+ PARTITION less_than_20 VALUES LESS THAN ("20"),
+ PARTITION between_20_70 VALUES [("20"),("70")),
+ PARTITION more_than_70 VALUES LESS THAN ("151")
+ )
+ DISTRIBUTED BY HASH(id) BUCKETS 3
+ PROPERTIES("replication_num" = "1");
+ """
+ }
+
+ def outfile_to_S3 = { format ->
+ // select ... into outfile ...
+ def res = sql """
+ SELECT * FROM ${export_table_name} t ORDER BY user_id
+ INTO OUTFILE "s3://${outFilePath}"
+ FORMAT AS ${outfile_format}
+ PROPERTIES (
+ "s3.endpoint" = "${s3_endpoint}",
+ "s3.region" = "${region}",
+ "s3.secret_key"="${sk}",
+ "s3.access_key" = "${ak}"
+ );
+ """
+
+ return res[0][3]
+ }
+
+ // create table to export data
+ create_table()
+
+ // insert data
+ StringBuilder sb = new StringBuilder()
+ int i = 1
+ for (; i < 10; i ++) {
+ sb.append("""
+ (${i}, 'doris-${i}', ${i + 18}),
+ """)
+ }
+ sb.append("""
+ (${i}, NULL, NULL)
+ """)
+ sql """ INSERT INTO ${export_table_name} VALUES
+ ${sb.toString()}
+ """
+ def insert_res = sql "show last insert;"
+ logger.info("insert result: " + insert_res.toString())
+ order_qt_select_base """ SELECT * FROM ${export_table_name} t ORDER BY id;
"""
+
+
+
+ def check_outfile_data = { outfile_url, outfile_format ->
+ order_qt_select_tvf """ SELECT * FROM S3 (
+ "uri" =
"http://${s3_endpoint}${outfile_url.substring(4)}0.${outfile_format}",
+ "ACCESS_KEY"= "${ak}",
+ "SECRET_KEY" = "${sk}",
+ "format" = "${outfile_format}",
+ "region" = "${region}"
+ );
+ """
+ }
+
+ def check_outfile_column_name = { outfile_url, outfile_format ->
+ order_qt_desc_s3 """ Desc function S3 (
+ "uri" =
"http://${s3_endpoint}${outfile_url.substring(4)}0.${outfile_format}",
+ "ACCESS_KEY"= "${ak}",
+ "SECRET_KEY" = "${sk}",
+ "format" = "${outfile_format}",
+ "region" = "${region}"
+ );
+ """
+ }
+
+ def test_q1 = { outfile_format ->
+ order_qt_select_base1 """ select 1>2 from ${export_table_name} """
+
+ // select ... into outfile ...
+ def res = sql """
+ SELECT 1>2 FROM ${export_table_name}
+ INTO OUTFILE "s3://${outFilePath}"
+ FORMAT AS ${outfile_format}
+ PROPERTIES (
+ "s3.endpoint" = "${s3_endpoint}",
+ "s3.region" = "${region}",
+ "s3.secret_key"="${sk}",
+ "s3.access_key" = "${ak}"
+ );
+ """
+ outfile_url = res[0][3]
+
+ check_outfile_data(outfile_url, outfile_format)
+ check_outfile_column_name(outfile_url, outfile_format)
+ }
+
+ def test_q2 = { outfile_format ->
+ order_qt_select_base1 """ select max(id) from ${export_table_name} """
+
+ // select ... into outfile ...
+ def res = sql """
+ SELECT max(id) FROM ${export_table_name}
+ INTO OUTFILE "s3://${outFilePath}"
+ FORMAT AS ${outfile_format}
+ PROPERTIES (
+ "s3.endpoint" = "${s3_endpoint}",
+ "s3.region" = "${region}",
+ "s3.secret_key"="${sk}",
+ "s3.access_key" = "${ak}"
+ );
+ """
+ outfile_url = res[0][3]
+
+ check_outfile_data(outfile_url, outfile_format)
+ check_outfile_column_name(outfile_url, outfile_format)
+ }
+
+ def test_q3 = { outfile_format ->
+ order_qt_select_base1 """ select id, case id when 1 then 'id = 1' when
2 then 'id = 2' else 'id not exist' end from ${export_table_name} """
+
+ // select ... into outfile ...
+ def res = sql """
+ SELECT id, case id when 1 then 'id = 1' when 2 then 'id = 2' else
'id not exist' end FROM ${export_table_name}
+ INTO OUTFILE "s3://${outFilePath}"
+ FORMAT AS ${outfile_format}
+ PROPERTIES (
+ "s3.endpoint" = "${s3_endpoint}",
+ "s3.region" = "${region}",
+ "s3.secret_key"="${sk}",
+ "s3.access_key" = "${ak}"
+ );
+ """
+ outfile_url = res[0][3]
+
+ check_outfile_data(outfile_url, outfile_format)
+ check_outfile_column_name(outfile_url, outfile_format)
+ }
+
+ def test_q4 = { outfile_format ->
+ order_qt_select_base1 """ select
+ id,
+ 1,
+ 'string',
+ cast (age AS BIGINT),
+ 1 > 2,
+ 2 + 3,
+ 1 IN (1, 2, 3, 4),
+ TRUE | FALSE
+ from ${export_table_name}
+ """
+
+ // select ... into outfile ...
+ def res = sql """
+ select
+ id,
+ 1,
+ 'string',
+ cast (age AS BIGINT),
+ 1 > 2,
+ 2 + 3,
+ 1 IN (1, 2, 3, 4),
+ TRUE | FALSE
+ from ${export_table_name}
+ INTO OUTFILE "s3://${outFilePath}"
+ FORMAT AS ${outfile_format}
+ PROPERTIES (
+ "s3.endpoint" = "${s3_endpoint}",
+ "s3.region" = "${region}",
+ "s3.secret_key"="${sk}",
+ "s3.access_key" = "${ak}"
+ );
+ """
+ outfile_url = res[0][3]
+
+ check_outfile_data(outfile_url, outfile_format)
+ check_outfile_column_name(outfile_url, outfile_format)
+ }
+
+ def test_q5 = { outfile_format ->
+ order_qt_select_base1 """ select cast('2566' as string), cast('888' as
bigint), cast('9999' as largeint)
+ from ${export_table_name}
+ """
+
+ // select ... into outfile ...
+ def res = sql """
+ select cast('2566' as string), cast('888' as bigint), cast('9999'
as largeint)
+ from ${export_table_name}
+ INTO OUTFILE "s3://${outFilePath}"
+ FORMAT AS ${outfile_format}
+ PROPERTIES (
+ "s3.endpoint" = "${s3_endpoint}",
+ "s3.region" = "${region}",
+ "s3.secret_key"="${sk}",
+ "s3.access_key" = "${ak}"
+ );
+ """
+ outfile_url = res[0][3]
+
+ check_outfile_data(outfile_url, outfile_format)
+ check_outfile_column_name(outfile_url, outfile_format)
+ }
+
+ // test parquet format
+ test_q1("parquet")
+ test_q2("parquet")
+ test_q3("parquet")
+ test_q4("parquet")
+ test_q5("parquet")
+
+ // test orc format
+ test_q1("orc")
+ test_q2("orc")
+ test_q3("orc")
+ test_q4("orc")
+ test_q5("orc")
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]