This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch dev_syxj_2 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 97821d4b4a3f630fcfe3029ecdbb403f8fe0b04c Author: chenqi <[email protected]> AuthorDate: Thu Aug 17 13:56:29 2023 +0800 [Fix](orc-reader) Fix filling partition or missing column used incorrect row count. --- be/src/vec/exec/format/orc/vorc_reader.cpp | 14 +++--- .../hive/test_external_catalog_hive_partition.out | 50 ++++++++++++++++++++++ .../test_external_catalog_hive_partition.groovy | 2 + 3 files changed, 61 insertions(+), 5 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index c93c7a7590..01ebe8d1de 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -1433,8 +1433,10 @@ Status OrcReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { } *read_rows = rr; - RETURN_IF_ERROR(_fill_partition_columns(block, rr, _lazy_read_ctx.partition_columns)); - RETURN_IF_ERROR(_fill_missing_columns(block, rr, _lazy_read_ctx.missing_columns)); + RETURN_IF_ERROR(_fill_partition_columns(block, _batch->numElements, + _lazy_read_ctx.partition_columns)); + RETURN_IF_ERROR( + _fill_missing_columns(block, _batch->numElements, _lazy_read_ctx.missing_columns)); if (block->rows() == 0) { *eof = true; @@ -1507,16 +1509,18 @@ Status OrcReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { } *read_rows = rr; + RETURN_IF_ERROR(_fill_partition_columns(block, _batch->numElements, + _lazy_read_ctx.partition_columns)); RETURN_IF_ERROR( - _fill_partition_columns(block, *read_rows, _lazy_read_ctx.partition_columns)); - RETURN_IF_ERROR(_fill_missing_columns(block, *read_rows, _lazy_read_ctx.missing_columns)); + _fill_missing_columns(block, _batch->numElements, _lazy_read_ctx.missing_columns)); if (block->rows() == 0) { + _convert_dict_cols_to_string_cols(block, nullptr); *eof = true; return Status::OK(); } - _build_delete_row_filter(block, rr); + _build_delete_row_filter(block, _batch->numElements); std::vector<uint32_t> columns_to_filter; int column_to_keep = block->columns(); diff --git a/regression-test/data/external_table_emr_p2/hive/test_external_catalog_hive_partition.out b/regression-test/data/external_table_emr_p2/hive/test_external_catalog_hive_partition.out index 5608999eb5..c823189e68 100644 --- a/regression-test/data/external_table_emr_p2/hive/test_external_catalog_hive_partition.out +++ b/regression-test/data/external_table_emr_p2/hive/test_external_catalog_hive_partition.out @@ -23,6 +23,31 @@ -- !q06 -- 2023-01-03T00:00 100 0.3 test3 +-- !q07 -- +1994 50063846 1820677 +1995 58220229 1820677 +1995 66859335 1820677 +1997 77350500 1820677 +1995 98899109 1820677 +1996 122310373 1820677 +1996 138664326 1820677 +1995 145803300 1820677 +1998 187514084 1820677 +1994 197627203 1820677 +1993 216217095 1820677 +1997 260737890 1820677 +1998 279581856 1820677 +1992 296560224 1820677 +1993 306190854 1820677 +1997 329189126 1820677 +1992 389043491 1820677 +1997 435247522 1820677 +1998 449388167 1820677 +1994 526241665 1820677 +1998 533034534 1820677 +1996 576018657 1820677 +1997 582732039 1820677 + -- !q01 -- 0.1 test1 2023-01-01T00:00 \N 0.2 test2 2023-01-02T00:00 \N @@ -47,6 +72,31 @@ -- !q06 -- 2023-01-03T00:00 100 0.3 test3 +-- !q07 -- +1994 50063846 1820677 +1995 58220229 1820677 +1995 66859335 1820677 +1997 77350500 1820677 +1995 98899109 1820677 +1996 122310373 1820677 +1996 138664326 1820677 +1995 145803300 1820677 +1998 187514084 1820677 +1994 197627203 1820677 +1993 216217095 1820677 +1997 260737890 1820677 +1998 279581856 1820677 +1992 296560224 1820677 +1993 306190854 1820677 +1997 329189126 1820677 +1992 389043491 1820677 +1997 435247522 1820677 +1998 449388167 1820677 +1994 526241665 1820677 +1998 533034534 1820677 +1996 576018657 1820677 +1997 582732039 1820677 + -- !q01 -- 0.1 test1 2023-01-01T00:00 \N 0.2 test2 2023-01-02T00:00 \N diff --git a/regression-test/suites/external_table_emr_p2/hive/test_external_catalog_hive_partition.groovy b/regression-test/suites/external_table_emr_p2/hive/test_external_catalog_hive_partition.groovy index fc6e7fbc23..642121c22e 100644 --- a/regression-test/suites/external_table_emr_p2/hive/test_external_catalog_hive_partition.groovy +++ b/regression-test/suites/external_table_emr_p2/hive/test_external_catalog_hive_partition.groovy @@ -39,6 +39,7 @@ suite("test_external_catalog_hive_partition", "p2") { qt_q04 """ select * from multi_catalog.parquet_partitioned_columns order by t_float """ qt_q05 """ select * from multi_catalog.parquet_partitioned_columns where t_int is null order by t_float """ qt_q06 """ select * from multi_catalog.parquet_partitioned_columns where t_int is not null order by t_float """ + qt_q07 """ select o_orderyear, o_orderkey, o_custkey from multi_catalog.orders_par_parquet where o_custkey=1820677 order by o_orderkey """ } // test orc format def q01_orc = { @@ -48,6 +49,7 @@ suite("test_external_catalog_hive_partition", "p2") { qt_q04 """ select * from multi_catalog.orc_partitioned_columns order by t_float """ qt_q05 """ select * from multi_catalog.orc_partitioned_columns where t_int is null order by t_float """ qt_q06 """ select * from multi_catalog.orc_partitioned_columns where t_int is not null order by t_float """ + qt_q07 """ select o_orderyear, o_orderkey, o_custkey from multi_catalog.orders_par_orc where o_custkey=1820677 order by o_orderkey """ } // test text format def q01_text = { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
