This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch dev_syxj_2
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 97821d4b4a3f630fcfe3029ecdbb403f8fe0b04c
Author: chenqi <[email protected]>
AuthorDate: Thu Aug 17 13:56:29 2023 +0800

    [Fix](orc-reader) Fix filling partition or missing column used incorrect 
row count.
---
 be/src/vec/exec/format/orc/vorc_reader.cpp         | 14 +++---
 .../hive/test_external_catalog_hive_partition.out  | 50 ++++++++++++++++++++++
 .../test_external_catalog_hive_partition.groovy    |  2 +
 3 files changed, 61 insertions(+), 5 deletions(-)

diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index c93c7a7590..01ebe8d1de 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -1433,8 +1433,10 @@ Status OrcReader::get_next_block(Block* block, size_t* 
read_rows, bool* eof) {
         }
         *read_rows = rr;
 
-        RETURN_IF_ERROR(_fill_partition_columns(block, rr, 
_lazy_read_ctx.partition_columns));
-        RETURN_IF_ERROR(_fill_missing_columns(block, rr, 
_lazy_read_ctx.missing_columns));
+        RETURN_IF_ERROR(_fill_partition_columns(block, _batch->numElements,
+                                                
_lazy_read_ctx.partition_columns));
+        RETURN_IF_ERROR(
+                _fill_missing_columns(block, _batch->numElements, 
_lazy_read_ctx.missing_columns));
 
         if (block->rows() == 0) {
             *eof = true;
@@ -1507,16 +1509,18 @@ Status OrcReader::get_next_block(Block* block, size_t* 
read_rows, bool* eof) {
         }
         *read_rows = rr;
 
+        RETURN_IF_ERROR(_fill_partition_columns(block, _batch->numElements,
+                                                
_lazy_read_ctx.partition_columns));
         RETURN_IF_ERROR(
-                _fill_partition_columns(block, *read_rows, 
_lazy_read_ctx.partition_columns));
-        RETURN_IF_ERROR(_fill_missing_columns(block, *read_rows, 
_lazy_read_ctx.missing_columns));
+                _fill_missing_columns(block, _batch->numElements, 
_lazy_read_ctx.missing_columns));
 
         if (block->rows() == 0) {
+            _convert_dict_cols_to_string_cols(block, nullptr);
             *eof = true;
             return Status::OK();
         }
 
-        _build_delete_row_filter(block, rr);
+        _build_delete_row_filter(block, _batch->numElements);
 
         std::vector<uint32_t> columns_to_filter;
         int column_to_keep = block->columns();
diff --git 
a/regression-test/data/external_table_emr_p2/hive/test_external_catalog_hive_partition.out
 
b/regression-test/data/external_table_emr_p2/hive/test_external_catalog_hive_partition.out
index 5608999eb5..c823189e68 100644
--- 
a/regression-test/data/external_table_emr_p2/hive/test_external_catalog_hive_partition.out
+++ 
b/regression-test/data/external_table_emr_p2/hive/test_external_catalog_hive_partition.out
@@ -23,6 +23,31 @@
 -- !q06 --
 2023-01-03T00:00       100     0.3     test3
 
+-- !q07 --
+1994   50063846        1820677
+1995   58220229        1820677
+1995   66859335        1820677
+1997   77350500        1820677
+1995   98899109        1820677
+1996   122310373       1820677
+1996   138664326       1820677
+1995   145803300       1820677
+1998   187514084       1820677
+1994   197627203       1820677
+1993   216217095       1820677
+1997   260737890       1820677
+1998   279581856       1820677
+1992   296560224       1820677
+1993   306190854       1820677
+1997   329189126       1820677
+1992   389043491       1820677
+1997   435247522       1820677
+1998   449388167       1820677
+1994   526241665       1820677
+1998   533034534       1820677
+1996   576018657       1820677
+1997   582732039       1820677
+
 -- !q01 --
 0.1    test1   2023-01-01T00:00        \N
 0.2    test2   2023-01-02T00:00        \N
@@ -47,6 +72,31 @@
 -- !q06 --
 2023-01-03T00:00       100     0.3     test3
 
+-- !q07 --
+1994   50063846        1820677
+1995   58220229        1820677
+1995   66859335        1820677
+1997   77350500        1820677
+1995   98899109        1820677
+1996   122310373       1820677
+1996   138664326       1820677
+1995   145803300       1820677
+1998   187514084       1820677
+1994   197627203       1820677
+1993   216217095       1820677
+1997   260737890       1820677
+1998   279581856       1820677
+1992   296560224       1820677
+1993   306190854       1820677
+1997   329189126       1820677
+1992   389043491       1820677
+1997   435247522       1820677
+1998   449388167       1820677
+1994   526241665       1820677
+1998   533034534       1820677
+1996   576018657       1820677
+1997   582732039       1820677
+
 -- !q01 --
 0.1    test1   2023-01-01T00:00        \N
 0.2    test2   2023-01-02T00:00        \N
diff --git 
a/regression-test/suites/external_table_emr_p2/hive/test_external_catalog_hive_partition.groovy
 
b/regression-test/suites/external_table_emr_p2/hive/test_external_catalog_hive_partition.groovy
index fc6e7fbc23..642121c22e 100644
--- 
a/regression-test/suites/external_table_emr_p2/hive/test_external_catalog_hive_partition.groovy
+++ 
b/regression-test/suites/external_table_emr_p2/hive/test_external_catalog_hive_partition.groovy
@@ -39,6 +39,7 @@ suite("test_external_catalog_hive_partition", "p2") {
             qt_q04 """ select * from multi_catalog.parquet_partitioned_columns 
order by t_float """
             qt_q05 """ select * from multi_catalog.parquet_partitioned_columns 
where t_int is null order by t_float """
             qt_q06 """ select * from multi_catalog.parquet_partitioned_columns 
where t_int is not null order by t_float """
+            qt_q07 """ select  o_orderyear, o_orderkey, o_custkey from 
multi_catalog.orders_par_parquet where o_custkey=1820677 order by o_orderkey """
         }
         // test orc format
         def q01_orc = {
@@ -48,6 +49,7 @@ suite("test_external_catalog_hive_partition", "p2") {
             qt_q04 """ select * from multi_catalog.orc_partitioned_columns 
order by t_float """
             qt_q05 """ select * from multi_catalog.orc_partitioned_columns 
where t_int is null order by t_float """
             qt_q06 """ select * from multi_catalog.orc_partitioned_columns 
where t_int is not null order by t_float """
+            qt_q07 """ select  o_orderyear, o_orderkey, o_custkey from 
multi_catalog.orders_par_orc where o_custkey=1820677 order by o_orderkey """
         }
         // test text format
         def q01_text = {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to