This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 7f66be84d5c [fix](Outfile) Infer the column name if the column is expression in `select into outfile` (#25854) 7f66be84d5c is described below commit 7f66be84d5ccd5fb82b05a3bbd27386667c02b69 Author: Tiewei Fang <43782773+bepppo...@users.noreply.github.com> AuthorDate: Wed Oct 25 22:49:04 2023 +0800 [fix](Outfile) Infer the column name if the column is expression in `select into outfile` (#25854) This pr do two things: 1. Infer the column name if the column is expression in `select into outfile`. The rule for column name generation can be refered in pr: #24990 2. fix bug that it will core dump if the `_schema` fails to build in the open phase in vorc_transformer.cpp TODO: 1. Support infer the column name if the column is expression in `select into outfile` in new optimizer(Nereids). --- be/src/vec/runtime/vorc_transformer.cpp | 11 +- be/src/vec/sink/writer/vfile_result_writer.cpp | 2 +- .../java/org/apache/doris/analysis/SelectStmt.java | 3 +- .../test_outfile_expr_generate_col_name.out | 267 +++++++++++++++++++++ .../test_outfile_expr_generate_col_name.groovy | 260 ++++++++++++++++++++ 5 files changed, 540 insertions(+), 3 deletions(-) diff --git a/be/src/vec/runtime/vorc_transformer.cpp b/be/src/vec/runtime/vorc_transformer.cpp index e862624c905..fafe2093752 100644 --- a/be/src/vec/runtime/vorc_transformer.cpp +++ b/be/src/vec/runtime/vorc_transformer.cpp @@ -116,7 +116,13 @@ std::unique_ptr<orc::ColumnVectorBatch> VOrcTransformer::_create_row_batch(size_ } int64_t VOrcTransformer::written_len() { - return _output_stream->getLength(); + // written_len() will be called in VFileResultWriter::_close_file_writer + // but _output_stream may be nullptr + // because the failure built by _schema in open() + if (_output_stream) { + return _output_stream->getLength(); + } + return 0; } Status VOrcTransformer::close() { @@ -127,6 +133,9 @@ Status VOrcTransformer::close() { return Status::IOError(e.what()); } } + if (_output_stream) { + _output_stream->close(); + } return Status::OK(); } diff --git a/be/src/vec/sink/writer/vfile_result_writer.cpp b/be/src/vec/sink/writer/vfile_result_writer.cpp index 0df4e4e6497..c6d6eb987b1 100644 --- a/be/src/vec/sink/writer/vfile_result_writer.cpp +++ b/be/src/vec/sink/writer/vfile_result_writer.cpp @@ -260,12 +260,12 @@ Status VFileResultWriter::_create_new_file_if_exceed_size() { Status VFileResultWriter::_close_file_writer(bool done) { if (_vfile_writer) { - RETURN_IF_ERROR(_vfile_writer->close()); // we can not use _current_written_bytes to COUNTER_UPDATE(_written_data_bytes, _current_written_bytes) // because it will call `write()` function of orc/parquet function in `_vfile_writer->close()` // and the real written_len will increase // and _current_written_bytes will less than _vfile_writer->written_len() COUNTER_UPDATE(_written_data_bytes, _vfile_writer->written_len()); + RETURN_IF_ERROR(_vfile_writer->close()); _vfile_writer.reset(nullptr); } else if (_file_writer_impl) { RETURN_IF_ERROR(_file_writer_impl->close()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java index 8c40f5230ac..d8f6ac55372 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java @@ -545,7 +545,8 @@ public class SelectStmt extends QueryStmt { resultExprs.add(rewriteQueryExprByMvColumnExpr(item.getExpr(), analyzer)); String columnLabel = null; Class<? extends StatementBase> statementClazz = analyzer.getRootStatementClazz(); - if (statementClazz != null && !QueryStmt.class.isAssignableFrom(statementClazz)) { + if (statementClazz != null + && (!QueryStmt.class.isAssignableFrom(statementClazz) || hasOutFileClause())) { // Infer column name when item is expr columnLabel = item.toColumnLabel(i); } diff --git a/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out b/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out new file mode 100644 index 00000000000..d1fc5fa6313 --- /dev/null +++ b/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out @@ -0,0 +1,267 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select_base -- +1 doris-1 19 +10 \N \N +2 doris-2 20 +3 doris-3 21 +4 doris-4 22 +5 doris-5 23 +6 doris-6 24 +7 doris-7 25 +8 doris-8 26 +9 doris-9 27 + +-- !select_base1 -- +false +false +false +false +false +false +false +false +false +false + +-- !select_tvf -- +false +false +false +false +false +false +false +false +false +false + +-- !desc_s3 -- +__binary_predicate_0 BOOLEAN Yes false \N NONE + +-- !select_base1 -- +10 + +-- !select_tvf -- +10 + +-- !desc_s3 -- +__max_0 INT Yes false \N NONE + +-- !select_base1 -- +1 id = 1 +10 id not exist +2 id = 2 +3 id not exist +4 id not exist +5 id not exist +6 id not exist +7 id not exist +8 id not exist +9 id not exist + +-- !select_tvf -- +1 id = 1 +10 id not exist +2 id = 2 +3 id not exist +4 id not exist +5 id not exist +6 id not exist +7 id not exist +8 id not exist +9 id not exist + +-- !desc_s3 -- +__case_expr_1 TEXT Yes false \N NONE +id INT Yes false \N NONE + +-- !select_base1 -- +1 1 string 19 false 5 true 1 +10 1 string \N false 5 true 1 +2 1 string 20 false 5 true 1 +3 1 string 21 false 5 true 1 +4 1 string 22 false 5 true 1 +5 1 string 23 false 5 true 1 +6 1 string 24 false 5 true 1 +7 1 string 25 false 5 true 1 +8 1 string 26 false 5 true 1 +9 1 string 27 false 5 true 1 + +-- !select_tvf -- +1 1 string 19 false 5 true 1 +10 1 string \N false 5 true 1 +2 1 string 20 false 5 true 1 +3 1 string 21 false 5 true 1 +4 1 string 22 false 5 true 1 +5 1 string 23 false 5 true 1 +6 1 string 24 false 5 true 1 +7 1 string 25 false 5 true 1 +8 1 string 26 false 5 true 1 +9 1 string 27 false 5 true 1 + +-- !desc_s3 -- +__arithmetic_expr_5 INT Yes false \N NONE +__arithmetic_expr_7 BIGINT Yes false \N NONE +__binary_predicate_4 BOOLEAN Yes false \N NONE +__cast_expr_3 BIGINT Yes false \N NONE +__in_predicate_6 BOOLEAN Yes false \N NONE +__literal_1 INT Yes false \N NONE +__literal_2 TEXT Yes false \N NONE +id INT Yes false \N NONE + +-- !select_base1 -- +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 + +-- !select_tvf -- +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 + +-- !desc_s3 -- +__cast_expr_0 TEXT Yes false \N NONE +__cast_expr_1 BIGINT Yes false \N NONE +__cast_expr_2 TEXT Yes false \N NONE + +-- !select_base1 -- +false +false +false +false +false +false +false +false +false +false + +-- !select_tvf -- +false +false +false +false +false +false +false +false +false +false + +-- !desc_s3 -- +__binary_predicate_0 BOOLEAN Yes false \N NONE + +-- !select_base1 -- +10 + +-- !select_tvf -- +10 + +-- !desc_s3 -- +__max_0 INT Yes false \N NONE + +-- !select_base1 -- +1 id = 1 +10 id not exist +2 id = 2 +3 id not exist +4 id not exist +5 id not exist +6 id not exist +7 id not exist +8 id not exist +9 id not exist + +-- !select_tvf -- +1 id = 1 +10 id not exist +2 id = 2 +3 id not exist +4 id not exist +5 id not exist +6 id not exist +7 id not exist +8 id not exist +9 id not exist + +-- !desc_s3 -- +__case_expr_1 TEXT Yes false \N NONE +id INT Yes false \N NONE + +-- !select_base1 -- +1 1 string 19 false 5 true 1 +10 1 string \N false 5 true 1 +2 1 string 20 false 5 true 1 +3 1 string 21 false 5 true 1 +4 1 string 22 false 5 true 1 +5 1 string 23 false 5 true 1 +6 1 string 24 false 5 true 1 +7 1 string 25 false 5 true 1 +8 1 string 26 false 5 true 1 +9 1 string 27 false 5 true 1 + +-- !select_tvf -- +1 1 string 19 false 5 true 1 +10 1 string \N false 5 true 1 +2 1 string 20 false 5 true 1 +3 1 string 21 false 5 true 1 +4 1 string 22 false 5 true 1 +5 1 string 23 false 5 true 1 +6 1 string 24 false 5 true 1 +7 1 string 25 false 5 true 1 +8 1 string 26 false 5 true 1 +9 1 string 27 false 5 true 1 + +-- !desc_s3 -- +__arithmetic_expr_5 SMALLINT Yes false \N NONE +__arithmetic_expr_7 BIGINT Yes false \N NONE +__binary_predicate_4 BOOLEAN Yes false \N NONE +__cast_expr_3 BIGINT Yes false \N NONE +__in_predicate_6 BOOLEAN Yes false \N NONE +__literal_1 TINYINT Yes false \N NONE +__literal_2 TEXT Yes false \N NONE +id INT Yes false \N NONE + +-- !select_base1 -- +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 + +-- !select_tvf -- +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 +2566 888 9999 + +-- !desc_s3 -- +__cast_expr_0 TEXT Yes false \N NONE +__cast_expr_1 BIGINT Yes false \N NONE +__cast_expr_2 TEXT Yes false \N NONE + diff --git a/regression-test/suites/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.groovy b/regression-test/suites/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.groovy new file mode 100644 index 00000000000..52977c1fd58 --- /dev/null +++ b/regression-test/suites/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.groovy @@ -0,0 +1,260 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +import java.nio.charset.StandardCharsets +import java.nio.file.Files +import java.nio.file.Paths + +suite("test_outfile_expr_generate_col_name", "p0") { + // close nereids + sql """ set enable_nereids_planner=false """ + + String ak = getS3AK() + String sk = getS3SK() + String s3_endpoint = getS3Endpoint() + String region = getS3Region() + String bucket = getS3BucketName(); + + + def export_table_name = "outfile_expr_export_test" + def outFilePath = "${bucket}/outfile/expr_generate_col_name/exp_" + + + def create_table = { + sql """ DROP TABLE IF EXISTS ${export_table_name} """ + sql """ + CREATE TABLE IF NOT EXISTS ${export_table_name} ( + `id` int(11) NULL, + `name` string NULL, + `age` int(11) NULL + ) + PARTITION BY RANGE(id) + ( + PARTITION less_than_20 VALUES LESS THAN ("20"), + PARTITION between_20_70 VALUES [("20"),("70")), + PARTITION more_than_70 VALUES LESS THAN ("151") + ) + DISTRIBUTED BY HASH(id) BUCKETS 3 + PROPERTIES("replication_num" = "1"); + """ + } + + def outfile_to_S3 = { format -> + // select ... into outfile ... + def res = sql """ + SELECT * FROM ${export_table_name} t ORDER BY user_id + INTO OUTFILE "s3://${outFilePath}" + FORMAT AS ${outfile_format} + PROPERTIES ( + "s3.endpoint" = "${s3_endpoint}", + "s3.region" = "${region}", + "s3.secret_key"="${sk}", + "s3.access_key" = "${ak}" + ); + """ + + return res[0][3] + } + + // create table to export data + create_table() + + // insert data + StringBuilder sb = new StringBuilder() + int i = 1 + for (; i < 10; i ++) { + sb.append(""" + (${i}, 'doris-${i}', ${i + 18}), + """) + } + sb.append(""" + (${i}, NULL, NULL) + """) + sql """ INSERT INTO ${export_table_name} VALUES + ${sb.toString()} + """ + def insert_res = sql "show last insert;" + logger.info("insert result: " + insert_res.toString()) + order_qt_select_base """ SELECT * FROM ${export_table_name} t ORDER BY id; """ + + + + def check_outfile_data = { outfile_url, outfile_format -> + order_qt_select_tvf """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.${outfile_format}", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "${outfile_format}", + "region" = "${region}" + ); + """ + } + + def check_outfile_column_name = { outfile_url, outfile_format -> + order_qt_desc_s3 """ Desc function S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.${outfile_format}", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "${outfile_format}", + "region" = "${region}" + ); + """ + } + + def test_q1 = { outfile_format -> + order_qt_select_base1 """ select 1>2 from ${export_table_name} """ + + // select ... into outfile ... + def res = sql """ + SELECT 1>2 FROM ${export_table_name} + INTO OUTFILE "s3://${outFilePath}" + FORMAT AS ${outfile_format} + PROPERTIES ( + "s3.endpoint" = "${s3_endpoint}", + "s3.region" = "${region}", + "s3.secret_key"="${sk}", + "s3.access_key" = "${ak}" + ); + """ + outfile_url = res[0][3] + + check_outfile_data(outfile_url, outfile_format) + check_outfile_column_name(outfile_url, outfile_format) + } + + def test_q2 = { outfile_format -> + order_qt_select_base1 """ select max(id) from ${export_table_name} """ + + // select ... into outfile ... + def res = sql """ + SELECT max(id) FROM ${export_table_name} + INTO OUTFILE "s3://${outFilePath}" + FORMAT AS ${outfile_format} + PROPERTIES ( + "s3.endpoint" = "${s3_endpoint}", + "s3.region" = "${region}", + "s3.secret_key"="${sk}", + "s3.access_key" = "${ak}" + ); + """ + outfile_url = res[0][3] + + check_outfile_data(outfile_url, outfile_format) + check_outfile_column_name(outfile_url, outfile_format) + } + + def test_q3 = { outfile_format -> + order_qt_select_base1 """ select id, case id when 1 then 'id = 1' when 2 then 'id = 2' else 'id not exist' end from ${export_table_name} """ + + // select ... into outfile ... + def res = sql """ + SELECT id, case id when 1 then 'id = 1' when 2 then 'id = 2' else 'id not exist' end FROM ${export_table_name} + INTO OUTFILE "s3://${outFilePath}" + FORMAT AS ${outfile_format} + PROPERTIES ( + "s3.endpoint" = "${s3_endpoint}", + "s3.region" = "${region}", + "s3.secret_key"="${sk}", + "s3.access_key" = "${ak}" + ); + """ + outfile_url = res[0][3] + + check_outfile_data(outfile_url, outfile_format) + check_outfile_column_name(outfile_url, outfile_format) + } + + def test_q4 = { outfile_format -> + order_qt_select_base1 """ select + id, + 1, + 'string', + cast (age AS BIGINT), + 1 > 2, + 2 + 3, + 1 IN (1, 2, 3, 4), + TRUE | FALSE + from ${export_table_name} + """ + + // select ... into outfile ... + def res = sql """ + select + id, + 1, + 'string', + cast (age AS BIGINT), + 1 > 2, + 2 + 3, + 1 IN (1, 2, 3, 4), + TRUE | FALSE + from ${export_table_name} + INTO OUTFILE "s3://${outFilePath}" + FORMAT AS ${outfile_format} + PROPERTIES ( + "s3.endpoint" = "${s3_endpoint}", + "s3.region" = "${region}", + "s3.secret_key"="${sk}", + "s3.access_key" = "${ak}" + ); + """ + outfile_url = res[0][3] + + check_outfile_data(outfile_url, outfile_format) + check_outfile_column_name(outfile_url, outfile_format) + } + + def test_q5 = { outfile_format -> + order_qt_select_base1 """ select cast('2566' as string), cast('888' as bigint), cast('9999' as largeint) + from ${export_table_name} + """ + + // select ... into outfile ... + def res = sql """ + select cast('2566' as string), cast('888' as bigint), cast('9999' as largeint) + from ${export_table_name} + INTO OUTFILE "s3://${outFilePath}" + FORMAT AS ${outfile_format} + PROPERTIES ( + "s3.endpoint" = "${s3_endpoint}", + "s3.region" = "${region}", + "s3.secret_key"="${sk}", + "s3.access_key" = "${ak}" + ); + """ + outfile_url = res[0][3] + + check_outfile_data(outfile_url, outfile_format) + check_outfile_column_name(outfile_url, outfile_format) + } + + // test parquet format + test_q1("parquet") + test_q2("parquet") + test_q3("parquet") + test_q4("parquet") + test_q5("parquet") + + // test orc format + test_q1("orc") + test_q2("orc") + test_q3("orc") + test_q4("orc") + test_q5("orc") +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org