This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new eb99e4270d2 [Fix](parquet_reader) Fix dict filtering doesn't work with 
plain dict encoding in parquet reader. (#28290)
eb99e4270d2 is described below

commit eb99e4270d21a321401377ef5d78a98185aec131
Author: Qi Chen <[email protected]>
AuthorDate: Fri Dec 15 09:27:02 2023 +0800

    [Fix](parquet_reader) Fix dict filtering doesn't work with plain dict 
encoding in parquet reader. (#28290)
---
 .../exec/format/parquet/vparquet_column_reader.cpp |  5 +-
 .../exec/format/parquet/vparquet_group_reader.cpp  | 14 +++-
 .../hive/test_hive_parquet_alter_column.out        | 78 +++++++++++-----------
 3 files changed, 54 insertions(+), 43 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
index d6f7d746bcd..99d6f6b4e85 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
@@ -482,7 +482,8 @@ Status ScalarColumnReader::read_column_data(ColumnPtr& 
doris_column, DataTypePtr
                                             ColumnSelectVector& select_vector, 
size_t batch_size,
                                             size_t* read_rows, bool* eof, bool 
is_dict_filter) {
     bool need_convert = false;
-    auto& parquet_physical_type = _chunk_meta.meta_data.type;
+    auto parquet_physical_type =
+            !is_dict_filter ? _chunk_meta.meta_data.type : 
tparquet::Type::INT32;
     auto& show_type = _field_schema->type.type;
 
     ColumnPtr src_column = ParquetConvert::get_column(parquet_physical_type, 
show_type,
@@ -759,4 +760,4 @@ Status StructColumnReader::read_column_data(ColumnPtr& 
doris_column, DataTypePtr
     return Status::OK();
 }
 
-}; // namespace doris::vectorized
\ No newline at end of file
+}; // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index 8360ab23e07..3784c054b8c 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -176,8 +176,18 @@ Status RowGroupReader::init(
 
 bool RowGroupReader::_can_filter_by_dict(int slot_id,
                                          const tparquet::ColumnMetaData& 
column_metadata) {
-    if (column_metadata.encodings[0] != tparquet::Encoding::RLE_DICTIONARY ||
-        column_metadata.type != tparquet::Type::BYTE_ARRAY) {
+    SlotDescriptor* slot = nullptr;
+    const std::vector<SlotDescriptor*>& slots = _tuple_descriptor->slots();
+    for (auto each : slots) {
+        if (each->id() == slot_id) {
+            slot = each;
+            break;
+        }
+    }
+    if (!slot->type().is_string_type()) {
+        return false;
+    }
+    if (column_metadata.type != tparquet::Type::BYTE_ARRAY) {
         return false;
     }
 
diff --git 
a/regression-test/data/external_table_p2/hive/test_hive_parquet_alter_column.out
 
b/regression-test/data/external_table_p2/hive/test_hive_parquet_alter_column.out
index cca084ff0f6..5c0d1228d5a 100644
--- 
a/regression-test/data/external_table_p2/hive/test_hive_parquet_alter_column.out
+++ 
b/regression-test/data/external_table_p2/hive/test_hive_parquet_alter_column.out
@@ -638,13 +638,13 @@ col_timestamp     TEXT    Yes     true    \N
 col_decimal    TEXT    Yes     true    \N      
 
 -- !show --
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
 
 -- !order --
 -1
@@ -667,14 +667,14 @@ col_decimal       TEXT    Yes     true    \N
 -20000000
 
 -- !order --
-10.500000
-10.500000
-10.500000
+1.05E1
+1.05E1
+1.05E1
 
 -- !order --
-20.750000
-20.750000
-20.750000
+2.075E1
+2.075E1
+2.075E1
 
 -- !order --
 false
@@ -727,13 +727,13 @@ col_timestamp     CHAR(10)        Yes     true    \N
 col_decimal    CHAR(10)        Yes     true    \N      
 
 -- !show --
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
 
 -- !order --
 -1
@@ -756,14 +756,14 @@ col_decimal       CHAR(10)        Yes     true    \N
 -20000000
 
 -- !order --
-10.500000
-10.500000
-10.500000
+1.05E1
+1.05E1
+1.05E1
 
 -- !order --
-20.750000
-20.750000
-20.750000
+2.075E1
+2.075E1
+2.075E1
 
 -- !order --
 false
@@ -816,13 +816,13 @@ col_timestamp     VARCHAR(20)     Yes     true    \N
 col_decimal    VARCHAR(20)     Yes     true    \N      
 
 -- !show --
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
--1     -200    -10     -20000000       20.577700       30.750000       false   
First   A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
+-1     -200    -10     -20000000       2.05777E1       3.075E1 false   First   
A       ADC     2023-10-06��    2023-10-09 17:15:00��   1238.45
 
 -- !order --
 -1
@@ -845,14 +845,14 @@ col_decimal       VARCHAR(20)     Yes     true    \N
 -20000000
 
 -- !order --
-10.500000
-10.500000
-10.500000
+1.05E1
+1.05E1
+1.05E1
 
 -- !order --
-20.750000
-20.750000
-20.750000
+2.075E1
+2.075E1
+2.075E1
 
 -- !order --
 false


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to