This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 21a9e98f744 branch-4.0: [External](parquet) pass non predicates
column's offset index to RowGroupReader #55795 (#57270)
21a9e98f744 is described below
commit 21a9e98f7449062ab787dc4807ccab3ad78fe29c
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Oct 24 12:01:11 2025 +0800
branch-4.0: [External](parquet) pass non predicates column's offset index
to RowGroupReader #55795 (#57270)
Cherry-picked from #55795
Co-authored-by: Lijia Liu <[email protected]>
Co-authored-by: liutang123 <[email protected]>
---
be/src/vec/exec/format/parquet/vparquet_reader.cpp | 20 +++++++++++++-------
1 file changed, 13 insertions(+), 7 deletions(-)
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index 787161efc22..2e0328314f6 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -1081,10 +1081,6 @@ Status ParquetReader::_process_page_index(const
tparquet::RowGroup& row_group,
continue;
}
auto slot_id = _colname_to_slot_id->at(read_table_col);
- if (!_push_down_simple_expr.contains(slot_id)) {
- continue;
- }
- const auto& push_down_expr = _push_down_simple_expr[slot_id];
int parquet_col_id =
_file_metadata->schema().get_column(read_file_col)->physical_column_index;
@@ -1093,6 +1089,19 @@ Status ParquetReader::_process_page_index(const
tparquet::RowGroup& row_group,
continue;
}
auto& chunk = row_group.columns[parquet_col_id];
+
+ if (chunk.offset_index_length == 0) {
+ continue;
+ }
+ tparquet::OffsetIndex offset_index;
+ RETURN_IF_ERROR(page_index.parse_offset_index(chunk,
off_index_buff.data(), &offset_index));
+ _col_offsets[parquet_col_id] = offset_index;
+
+ if (!_push_down_simple_expr.contains(slot_id)) {
+ continue;
+ }
+ const auto& push_down_expr = _push_down_simple_expr[slot_id];
+
if (chunk.column_index_offset == 0 && chunk.column_index_length == 0) {
continue;
}
@@ -1132,8 +1141,6 @@ Status ParquetReader::_process_page_index(const
tparquet::RowGroup& row_group,
if (skipped_page_range.empty()) {
continue;
}
- tparquet::OffsetIndex offset_index;
- RETURN_IF_ERROR(page_index.parse_offset_index(chunk,
off_index_buff.data(), &offset_index));
for (int page_id : skipped_page_range) {
RowRange skipped_row_range;
RETURN_IF_ERROR(page_index.create_skipped_row_range(offset_index,
row_group.num_rows,
@@ -1141,7 +1148,6 @@ Status ParquetReader::_process_page_index(const
tparquet::RowGroup& row_group,
// use the union row range
skipped_row_ranges.emplace_back(skipped_row_range);
}
- _col_offsets[parquet_col_id] = offset_index;
}
if (skipped_row_ranges.empty()) {
read_whole_row_group();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]