This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 21a9e98f744 branch-4.0: [External](parquet) pass non predicates 
column's offset index to RowGroupReader #55795 (#57270)
21a9e98f744 is described below

commit 21a9e98f7449062ab787dc4807ccab3ad78fe29c
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Oct 24 12:01:11 2025 +0800

    branch-4.0: [External](parquet) pass non predicates column's offset index 
to RowGroupReader #55795 (#57270)
    
    Cherry-picked from #55795
    
    Co-authored-by: Lijia Liu <[email protected]>
    Co-authored-by: liutang123 <[email protected]>
---
 be/src/vec/exec/format/parquet/vparquet_reader.cpp | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index 787161efc22..2e0328314f6 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -1081,10 +1081,6 @@ Status ParquetReader::_process_page_index(const 
tparquet::RowGroup& row_group,
             continue;
         }
         auto slot_id = _colname_to_slot_id->at(read_table_col);
-        if (!_push_down_simple_expr.contains(slot_id)) {
-            continue;
-        }
-        const auto& push_down_expr = _push_down_simple_expr[slot_id];
 
         int parquet_col_id =
                 
_file_metadata->schema().get_column(read_file_col)->physical_column_index;
@@ -1093,6 +1089,19 @@ Status ParquetReader::_process_page_index(const 
tparquet::RowGroup& row_group,
             continue;
         }
         auto& chunk = row_group.columns[parquet_col_id];
+
+        if (chunk.offset_index_length == 0) {
+            continue;
+        }
+        tparquet::OffsetIndex offset_index;
+        RETURN_IF_ERROR(page_index.parse_offset_index(chunk, 
off_index_buff.data(), &offset_index));
+        _col_offsets[parquet_col_id] = offset_index;
+
+        if (!_push_down_simple_expr.contains(slot_id)) {
+            continue;
+        }
+        const auto& push_down_expr = _push_down_simple_expr[slot_id];
+
         if (chunk.column_index_offset == 0 && chunk.column_index_length == 0) {
             continue;
         }
@@ -1132,8 +1141,6 @@ Status ParquetReader::_process_page_index(const 
tparquet::RowGroup& row_group,
         if (skipped_page_range.empty()) {
             continue;
         }
-        tparquet::OffsetIndex offset_index;
-        RETURN_IF_ERROR(page_index.parse_offset_index(chunk, 
off_index_buff.data(), &offset_index));
         for (int page_id : skipped_page_range) {
             RowRange skipped_row_range;
             RETURN_IF_ERROR(page_index.create_skipped_row_range(offset_index, 
row_group.num_rows,
@@ -1141,7 +1148,6 @@ Status ParquetReader::_process_page_index(const 
tparquet::RowGroup& row_group,
             // use the union row range
             skipped_row_ranges.emplace_back(skipped_row_range);
         }
-        _col_offsets[parquet_col_id] = offset_index;
     }
     if (skipped_row_ranges.empty()) {
         read_whole_row_group();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to