This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 92203ea0f09 branch-3.1:[External](parquet) pass non predicates
column's offset index to RowGroupReader #55795 (#57409)
92203ea0f09 is described below
commit 92203ea0f0970de7dc6bbed0ccfa74c2b4173296
Author: daidai <[email protected]>
AuthorDate: Thu Oct 30 17:45:42 2025 +0800
branch-3.1:[External](parquet) pass non predicates column's offset index to
RowGroupReader #55795 (#57409)
bp #55795
---
be/src/vec/exec/format/parquet/vparquet_reader.cpp | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index 36f8028225d..6d97b570a98 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -809,17 +809,32 @@ Status ParquetReader::_process_page_index(const
tparquet::RowGroup& row_group,
for (size_t idx = 0; idx < _read_table_columns.size(); idx++) {
const auto& read_table_col = _read_table_columns[idx];
const auto& read_file_col = _read_file_columns[idx];
- auto conjunct_iter = _colname_to_value_range->find(read_table_col);
- if (_colname_to_value_range->end() == conjunct_iter) {
+ if (!_colname_to_slot_id->contains(read_table_col)) {
+ // equal delete may add column to read_table_col, but this column
no slot_id.
continue;
}
+
int parquet_col_id =
_file_metadata->schema().get_column(read_file_col)->physical_column_index;
if (parquet_col_id < 0) {
// complex type, not support page index yet.
continue;
}
+
auto& chunk = row_group.columns[parquet_col_id];
+ if (chunk.offset_index_length == 0) {
+ continue;
+ }
+
+ tparquet::OffsetIndex offset_index;
+ RETURN_IF_ERROR(page_index.parse_offset_index(chunk,
off_index_buff.data(), &offset_index));
+ _col_offsets[parquet_col_id] = offset_index;
+
+ auto conjunct_iter = _colname_to_value_range->find(read_table_col);
+ if (_colname_to_value_range->end() == conjunct_iter) {
+ continue;
+ }
+
if (chunk.column_index_offset == 0 && chunk.column_index_length == 0) {
continue;
}
@@ -837,8 +852,6 @@ Status ParquetReader::_process_page_index(const
tparquet::RowGroup& row_group,
if (skipped_page_range.empty()) {
continue;
}
- tparquet::OffsetIndex offset_index;
- RETURN_IF_ERROR(page_index.parse_offset_index(chunk,
off_index_buff.data(), &offset_index));
for (int page_id : skipped_page_range) {
RowRange skipped_row_range;
RETURN_IF_ERROR(page_index.create_skipped_row_range(offset_index,
row_group.num_rows,
@@ -846,7 +859,6 @@ Status ParquetReader::_process_page_index(const
tparquet::RowGroup& row_group,
// use the union row range
skipped_row_ranges.emplace_back(skipped_row_range);
}
- _col_offsets[parquet_col_id] = offset_index;
}
if (skipped_row_ranges.empty()) {
read_whole_row_group();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]