This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new eb99e4270d2 [Fix](parquet_reader) Fix dict filtering doesn't work with
plain dict encoding in parquet reader. (#28290)
eb99e4270d2 is described below
commit eb99e4270d21a321401377ef5d78a98185aec131
Author: Qi Chen <[email protected]>
AuthorDate: Fri Dec 15 09:27:02 2023 +0800
[Fix](parquet_reader) Fix dict filtering doesn't work with plain dict
encoding in parquet reader. (#28290)
---
.../exec/format/parquet/vparquet_column_reader.cpp | 5 +-
.../exec/format/parquet/vparquet_group_reader.cpp | 14 +++-
.../hive/test_hive_parquet_alter_column.out | 78 +++++++++++-----------
3 files changed, 54 insertions(+), 43 deletions(-)
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
index d6f7d746bcd..99d6f6b4e85 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
@@ -482,7 +482,8 @@ Status ScalarColumnReader::read_column_data(ColumnPtr&
doris_column, DataTypePtr
ColumnSelectVector& select_vector,
size_t batch_size,
size_t* read_rows, bool* eof, bool
is_dict_filter) {
bool need_convert = false;
- auto& parquet_physical_type = _chunk_meta.meta_data.type;
+ auto parquet_physical_type =
+ !is_dict_filter ? _chunk_meta.meta_data.type :
tparquet::Type::INT32;
auto& show_type = _field_schema->type.type;
ColumnPtr src_column = ParquetConvert::get_column(parquet_physical_type,
show_type,
@@ -759,4 +760,4 @@ Status StructColumnReader::read_column_data(ColumnPtr&
doris_column, DataTypePtr
return Status::OK();
}
-}; // namespace doris::vectorized
\ No newline at end of file
+}; // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index 8360ab23e07..3784c054b8c 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -176,8 +176,18 @@ Status RowGroupReader::init(
bool RowGroupReader::_can_filter_by_dict(int slot_id,
const tparquet::ColumnMetaData&
column_metadata) {
- if (column_metadata.encodings[0] != tparquet::Encoding::RLE_DICTIONARY ||
- column_metadata.type != tparquet::Type::BYTE_ARRAY) {
+ SlotDescriptor* slot = nullptr;
+ const std::vector<SlotDescriptor*>& slots = _tuple_descriptor->slots();
+ for (auto each : slots) {
+ if (each->id() == slot_id) {
+ slot = each;
+ break;
+ }
+ }
+ if (!slot->type().is_string_type()) {
+ return false;
+ }
+ if (column_metadata.type != tparquet::Type::BYTE_ARRAY) {
return false;
}
diff --git
a/regression-test/data/external_table_p2/hive/test_hive_parquet_alter_column.out
b/regression-test/data/external_table_p2/hive/test_hive_parquet_alter_column.out
index cca084ff0f6..5c0d1228d5a 100644
---
a/regression-test/data/external_table_p2/hive/test_hive_parquet_alter_column.out
+++
b/regression-test/data/external_table_p2/hive/test_hive_parquet_alter_column.out
@@ -638,13 +638,13 @@ col_timestamp TEXT Yes true \N
col_decimal TEXT Yes true \N
-- !show --
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
-- !order --
-1
@@ -667,14 +667,14 @@ col_decimal TEXT Yes true \N
-20000000
-- !order --
-10.500000
-10.500000
-10.500000
+1.05E1
+1.05E1
+1.05E1
-- !order --
-20.750000
-20.750000
-20.750000
+2.075E1
+2.075E1
+2.075E1
-- !order --
false
@@ -727,13 +727,13 @@ col_timestamp CHAR(10) Yes true \N
col_decimal CHAR(10) Yes true \N
-- !show --
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
-- !order --
-1
@@ -756,14 +756,14 @@ col_decimal CHAR(10) Yes true \N
-20000000
-- !order --
-10.500000
-10.500000
-10.500000
+1.05E1
+1.05E1
+1.05E1
-- !order --
-20.750000
-20.750000
-20.750000
+2.075E1
+2.075E1
+2.075E1
-- !order --
false
@@ -816,13 +816,13 @@ col_timestamp VARCHAR(20) Yes true \N
col_decimal VARCHAR(20) Yes true \N
-- !show --
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
--1 -200 -10 -20000000 20.577700 30.750000 false
First A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
+-1 -200 -10 -20000000 2.05777E1 3.075E1 false First
A ADC 2023-10-06�� 2023-10-09 17:15:00�� 1238.45
-- !order --
-1
@@ -845,14 +845,14 @@ col_decimal VARCHAR(20) Yes true \N
-20000000
-- !order --
-10.500000
-10.500000
-10.500000
+1.05E1
+1.05E1
+1.05E1
-- !order --
-20.750000
-20.750000
-20.750000
+2.075E1
+2.075E1
+2.075E1
-- !order --
false
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]