This is an automated email from the ASF dual-hosted git repository.

huajianlan pushed a commit to branch nested_column_prune
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 4f4b423b6ad3d3c2990a070206254e0fb299f057
Author: kakachen <[email protected]>
AuthorDate: Fri Oct 31 16:52:18 2025 +0800

    use selected column ids for parquet merge io ranges.
---
 be/src/vec/exec/format/parquet/vparquet_reader.cpp | 43 ++++++++++++----------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index e47a0eddaac..9a9b6c302e1 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -796,27 +796,30 @@ std::vector<io::PrefetchRange> 
ParquetReader::_generate_random_access_ranges(
     size_t total_io_size = 0;
     std::function<void(const FieldSchema*, const tparquet::RowGroup&)> 
scalar_range =
             [&](const FieldSchema* field, const tparquet::RowGroup& row_group) 
{
-                if (field->data_type->get_primitive_type() == TYPE_ARRAY) {
-                    scalar_range(&field->children[0], row_group);
-                } else if (field->data_type->get_primitive_type() == TYPE_MAP) 
{
-                    scalar_range(&field->children[0], row_group);
-                    scalar_range(&field->children[1], row_group);
-                } else if (field->data_type->get_primitive_type() == 
TYPE_STRUCT) {
-                    for (int i = 0; i < field->children.size(); ++i) {
-                        scalar_range(&field->children[i], row_group);
+                if (_column_ids.empty() ||
+                    _column_ids.find(field->get_column_id()) != 
_column_ids.end()) {
+                    if (field->data_type->get_primitive_type() == TYPE_ARRAY) {
+                        scalar_range(&field->children[0], row_group);
+                    } else if (field->data_type->get_primitive_type() == 
TYPE_MAP) {
+                        scalar_range(&field->children[0], row_group);
+                        scalar_range(&field->children[1], row_group);
+                    } else if (field->data_type->get_primitive_type() == 
TYPE_STRUCT) {
+                        for (int i = 0; i < field->children.size(); ++i) {
+                            scalar_range(&field->children[i], row_group);
+                        }
+                    } else {
+                        const tparquet::ColumnChunk& chunk =
+                                
row_group.columns[field->physical_column_index];
+                        auto& chunk_meta = chunk.meta_data;
+                        int64_t chunk_start = has_dict_page(chunk_meta)
+                                                      ? 
chunk_meta.dictionary_page_offset
+                                                      : 
chunk_meta.data_page_offset;
+                        int64_t chunk_end = chunk_start + 
chunk_meta.total_compressed_size;
+                        DCHECK_GE(chunk_start, last_chunk_end);
+                        result.emplace_back(chunk_start, chunk_end);
+                        total_io_size += chunk_meta.total_compressed_size;
+                        last_chunk_end = chunk_end;
                     }
-                } else {
-                    const tparquet::ColumnChunk& chunk =
-                            row_group.columns[field->physical_column_index];
-                    auto& chunk_meta = chunk.meta_data;
-                    int64_t chunk_start = has_dict_page(chunk_meta)
-                                                  ? 
chunk_meta.dictionary_page_offset
-                                                  : 
chunk_meta.data_page_offset;
-                    int64_t chunk_end = chunk_start + 
chunk_meta.total_compressed_size;
-                    DCHECK_GE(chunk_start, last_chunk_end);
-                    result.emplace_back(chunk_start, chunk_end);
-                    total_io_size += chunk_meta.total_compressed_size;
-                    last_chunk_end = chunk_end;
                 }
             };
     const tparquet::RowGroup& row_group = 
_t_metadata->row_groups[group.row_group_id];


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to