This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 5468df60d00 [fix](orc)fix orc reader missing column. (#35737)
5468df60d00 is described below
commit 5468df60d009a003a9e80e1f4082d8c9689fbd4e
Author: daidai <[email protected]>
AuthorDate: Fri May 31 22:56:22 2024 +0800
[fix](orc)fix orc reader missing column. (#35737)
bp #35583
---
be/src/vec/exec/format/orc/vorc_reader.cpp | 10 ++
.../hive/scripts/create_preinstalled_table.hql | 21 +++
.../hive/test_hive_orc_add_column.out | 185 +++++++++++++++++++++
.../hive/test_hive_orc_add_column.groovy | 95 +++++++++++
4 files changed, 311 insertions(+)
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 4a7944defee..8bf7fe3f2d6 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -778,6 +778,15 @@ Status OrcReader::set_fill_columns(
if (iter == predicate_columns.end()) {
_lazy_read_ctx.missing_columns.emplace(kv.first, kv.second);
} else {
+ //For check missing column : missing column == xx, missing
column is null,missing column is not null.
+ if (_slot_id_to_filter_conjuncts->find(iter->second.second) !=
+ _slot_id_to_filter_conjuncts->end()) {
+ for (auto& ctx :
_slot_id_to_filter_conjuncts->find(iter->second.second)->second) {
+ _filter_conjuncts.emplace_back(ctx);
+ }
+ }
+
+ // predicate_missing_columns is VLiteral.To fill in default values
for missing columns.
_lazy_read_ctx.predicate_missing_columns.emplace(kv.first,
kv.second);
_lazy_read_ctx.all_predicate_col_ids.emplace_back(iter->second.first);
}
@@ -1615,6 +1624,7 @@ Status OrcReader::get_next_block(Block* block, size_t*
read_rows, bool* eof) {
}
RETURN_IF_CATCH_EXCEPTION(
Block::filter_block_internal(block, columns_to_filter,
result_filter));
+ //_not_single_slot_filter_conjuncts check : missing column1 ==
missing column2 , missing column == exists column ...
if (!_not_single_slot_filter_conjuncts.empty()) {
static_cast<void>(_convert_dict_cols_to_string_cols(block,
&batch_vec));
RETURN_IF_CATCH_EXCEPTION(
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
index 765958527aa..ad6d326823b 100644
---
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
+++
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
@@ -601,6 +601,27 @@ CREATE TABLE `unsupported_type_table`(
k6 int
);
+set hive.stats.column.autogather=false;
+
+CREATE TABLE `test_hive_orc_add_column`(
+ id int,
+ col1 int
+)
+stored as orc;
+insert into `test_hive_orc_add_column` values(1,2);
+insert into `test_hive_orc_add_column` values(3,4),(4,6);
+alter table `test_hive_orc_add_column` ADD COLUMNS (col2 int);
+insert into `test_hive_orc_add_column` values(7,8,9);
+insert into `test_hive_orc_add_column` values(10,11,null);
+insert into `test_hive_orc_add_column` values(12,13,null);
+insert into `test_hive_orc_add_column` values(14,15,16);
+alter table `test_hive_orc_add_column` ADD COLUMNS (col3 int,col4 string);
+insert into `test_hive_orc_add_column` values(17,18,19,20,"hello world");
+insert into `test_hive_orc_add_column` values(21,22,23,24,"cywcywcyw");
+insert into `test_hive_orc_add_column` values(25,26,null,null,null);
+insert into `test_hive_orc_add_column` values(27,28,29,null,null);
+insert into `test_hive_orc_add_column` values(30,31,32,33,null);
+
CREATE TABLE `schema_evo_test_text`(
id int,
name string
diff --git
a/regression-test/data/external_table_p0/hive/test_hive_orc_add_column.out
b/regression-test/data/external_table_p0/hive/test_hive_orc_add_column.out
new file mode 100644
index 00000000000..d2691568e5f
--- /dev/null
+++ b/regression-test/data/external_table_p0/hive/test_hive_orc_add_column.out
@@ -0,0 +1,185 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !orc_add_col1 --
+1 2 \N \N \N
+3 4 \N \N \N
+4 6 \N \N \N
+7 8 9 \N \N
+10 11 \N \N \N
+12 13 \N \N \N
+14 15 16 \N \N
+17 18 19 20 hello world
+21 22 23 24 cywcywcyw
+25 26 \N \N \N
+27 28 29 \N \N
+30 31 32 33 \N
+
+-- !orc_add_col2 --
+
+-- !orc_add_col3 --
+
+-- !orc_add_col4 --
+1 2 \N \N \N
+3 4 \N \N \N
+4 6 \N \N \N
+10 11 \N \N \N
+12 13 \N \N \N
+25 26 \N \N \N
+
+-- !orc_add_col5 --
+\N
+\N
+\N
+\N
+\N
+\N
+
+-- !orc_add_col6 --
+1 2 \N \N \N
+3 4 \N \N \N
+4 6 \N \N \N
+7 8 9 \N \N
+10 11 \N \N \N
+12 13 \N \N \N
+14 15 16 \N \N
+25 26 \N \N \N
+27 28 29 \N \N
+
+-- !orc_add_col7 --
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+
+-- !orc_add_col8 --
+1 2 \N \N \N
+3 4 \N \N \N
+4 6 \N \N \N
+7 8 9 \N \N
+10 11 \N \N \N
+12 13 \N \N \N
+14 15 16 \N \N
+25 26 \N \N \N
+27 28 29 \N \N
+30 31 32 33 \N
+
+-- !orc_add_col9 --
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+
+-- !orc_add_col10 --
+1 2 \N \N \N
+3 4 \N \N \N
+4 6 \N \N \N
+7 8 9 \N \N
+10 11 \N \N \N
+12 13 \N \N \N
+14 15 16 \N \N
+17 18 19 20 hello world
+21 22 23 24 cywcywcyw
+25 26 \N \N \N
+27 28 29 \N \N
+30 31 32 33 \N
+
+-- !orc_add_col11 --
+2
+4
+6
+8
+11
+13
+15
+18
+22
+26
+28
+31
+
+-- !orc_add_col12 --
+7 8 9 \N \N
+14 15 16 \N \N
+17 18 19 20 hello world
+21 22 23 24 cywcywcyw
+27 28 29 \N \N
+30 31 32 33 \N
+
+-- !orc_add_col13 --
+9
+16
+19
+23
+29
+32
+
+-- !orc_add_col14 --
+17 18 19 20 hello world
+21 22 23 24 cywcywcyw
+30 31 32 33 \N
+
+-- !orc_add_col15 --
+20
+24
+33
+
+-- !orc_add_col16 --
+17 18 19 20 hello world
+21 22 23 24 cywcywcyw
+
+-- !orc_add_col17 --
+cywcywcyw
+hello world
+
+-- !orc_add_col18 --
+7 8 9 \N \N
+
+-- !orc_add_col19 --
+
+-- !orc_add_col20 --
+7 8 9 \N \N
+14 15 16 \N \N
+17 18 19 20 hello world
+21 22 23 24 cywcywcyw
+27 28 29 \N \N
+30 31 32 33 \N
+
+-- !orc_add_col21 --
+7 8 9 \N \N
+14 15 16 \N \N
+17 18 19 20 hello world
+21 22 23 24 cywcywcyw
+27 28 29 \N \N
+30 31 32 33 \N
+
+-- !orc_add_col22 --
+
+-- !orc_add_col23 --
+30 31 32 33 \N
+
+-- !orc_add_col24 --
+
+-- !orc_add_col25 --
+17 18 19 20 hello world
+21 22 23 24 cywcywcyw
+30 31 32 33 \N
+
+-- !orc_add_col26 --
+
+-- !orc_add_col27 --
+21 22 23 24 cywcywcyw
+
+-- !orc_add_col28 --
+17 18 19 20 hello world
+21 22 23 24 cywcywcyw
+
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_orc_add_column.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_orc_add_column.groovy
new file mode 100644
index 00000000000..c3ab409743b
--- /dev/null
+++
b/regression-test/suites/external_table_p0/hive/test_hive_orc_add_column.groovy
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_orc_add_column",
"all_types,p0,external,hive,external_docker,external_docker_hive") {
+
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("diable Hive test.")
+ return;
+ }
+
+ try {
+ String hms_port = context.config.otherConfigs.get("hms_port")
+ String catalog_name = "hive_test_orc_add_column"
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+
+ sql """drop catalog if exists ${catalog_name}"""
+ sql """create catalog if not exists ${catalog_name} properties (
+ "type"="hms",
+ 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}'
+ );"""
+ sql """use `${catalog_name}`.`default`"""
+
+
+ qt_orc_add_col1 """select * from test_hive_orc_add_column order by id
;"""
+ qt_orc_add_col2 """select * from test_hive_orc_add_column where col1
is null order by id ;"""
+ qt_orc_add_col3 """select col1 from test_hive_orc_add_column where
col1 is null;"""
+ qt_orc_add_col4 """select * from test_hive_orc_add_column where col2
is null order by id ;"""
+ qt_orc_add_col5 """select col2 from test_hive_orc_add_column where
col2 is null;"""
+ qt_orc_add_col6 """select * from test_hive_orc_add_column where col3
is null order by id ;"""
+ qt_orc_add_col7 """select col3 from test_hive_orc_add_column where
col3 is null;"""
+ qt_orc_add_col8 """select * from test_hive_orc_add_column where col4
is null order by id ;"""
+ qt_orc_add_col9 """select col4 from test_hive_orc_add_column where
col4 is null;"""
+ qt_orc_add_col10 """select * from test_hive_orc_add_column where col1
is not null order by id ;"""
+ qt_orc_add_col11 """select col1 from test_hive_orc_add_column where
col1 is not null order by col1;"""
+ qt_orc_add_col12 """select * from test_hive_orc_add_column where col2
is not null order by id ;"""
+ qt_orc_add_col13 """select col2 from test_hive_orc_add_column where
col2 is not null order by col2;"""
+ qt_orc_add_col14 """select * from test_hive_orc_add_column where col3
is not null order by id ;"""
+ qt_orc_add_col15 """select col3 from test_hive_orc_add_column where
col3 is not null order by col3;"""
+ qt_orc_add_col16 """select * from test_hive_orc_add_column where col4
is not null order by id ;"""
+ qt_orc_add_col17 """select col4 from test_hive_orc_add_column where
col4 is not null order by col4;"""
+ qt_orc_add_col18 """select * from test_hive_orc_add_column where col2
= 9 order by id ;"""
+ qt_orc_add_col19 """select * from test_hive_orc_add_column where col2
= 190 order by id ;"""
+ qt_orc_add_col20 """select * from test_hive_orc_add_column where col2
- col1 = 1 order by id ;"""
+ qt_orc_add_col21 """select * from test_hive_orc_add_column where col2
- id = 2 order by id ;"""
+ qt_orc_add_col22 """select * from test_hive_orc_add_column where col2
- id = 3 order by id ;"""
+ qt_orc_add_col23 """select * from test_hive_orc_add_column where col3
= 33 order by id ;"""
+ qt_orc_add_col24 """select * from test_hive_orc_add_column where col3
= 330 order by id ;"""
+ qt_orc_add_col25 """select * from test_hive_orc_add_column where col3
- col1 = 2 order by id ;"""
+ qt_orc_add_col26 """select * from test_hive_orc_add_column where col3
- id != 3 order by id ;"""
+ qt_orc_add_col27 """select * from test_hive_orc_add_column where col1
+ col2 + col3 = 23*3 order by id ;"""
+ qt_orc_add_col28 """select * from test_hive_orc_add_column where col1
+ col2 + col3 != 32*3 order by id ; """
+
+
+
+ sql """drop catalog if exists ${catalog_name}"""
+
+ } finally {
+ }
+
+}
+
+
+// CREATE TABLE `test_hive_orc_add_column`(
+// id int,
+// col1 int
+// )
+// stored as orc;
+// insert into `test_hive_orc_add_column` values(1,2);
+// insert into `test_hive_orc_add_column` values(3,4),(4,6);
+// alter table `test_hive_orc_add_column` ADD COLUMNS(col2 int);
+// insert into `test_hive_orc_add_column` values(7,8,9);
+// insert into `test_hive_orc_add_column` values(10,11,null);
+// insert into `test_hive_orc_add_column` values(12,13,null);
+// insert into `test_hive_orc_add_column` values(14,15,16);
+// alter table `test_hive_orc_add_column` ADD COLUMNS(col3 int,col4 string);
+// insert into `test_hive_orc_add_column` values(17,18,19,20,"hello world");
+// insert into `test_hive_orc_add_column` values(21,22,23,24,"cywcywcyw");
+// insert into `test_hive_orc_add_column` values(25,26,null,null,null);
+// insert into `test_hive_orc_add_column` values(27,28,29,null,null);
+// insert into `test_hive_orc_add_column` values(30,31,32,33,null);
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]