This is an automated email from the ASF dual-hosted git repository. huajianlan pushed a commit to branch nested_column_prune in repository https://gitbox.apache.org/repos/asf/doris.git
commit 4f4b423b6ad3d3c2990a070206254e0fb299f057 Author: kakachen <[email protected]> AuthorDate: Fri Oct 31 16:52:18 2025 +0800 use selected column ids for parquet merge io ranges. --- be/src/vec/exec/format/parquet/vparquet_reader.cpp | 43 ++++++++++++---------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_reader.cpp index e47a0eddaac..9a9b6c302e1 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp @@ -796,27 +796,30 @@ std::vector<io::PrefetchRange> ParquetReader::_generate_random_access_ranges( size_t total_io_size = 0; std::function<void(const FieldSchema*, const tparquet::RowGroup&)> scalar_range = [&](const FieldSchema* field, const tparquet::RowGroup& row_group) { - if (field->data_type->get_primitive_type() == TYPE_ARRAY) { - scalar_range(&field->children[0], row_group); - } else if (field->data_type->get_primitive_type() == TYPE_MAP) { - scalar_range(&field->children[0], row_group); - scalar_range(&field->children[1], row_group); - } else if (field->data_type->get_primitive_type() == TYPE_STRUCT) { - for (int i = 0; i < field->children.size(); ++i) { - scalar_range(&field->children[i], row_group); + if (_column_ids.empty() || + _column_ids.find(field->get_column_id()) != _column_ids.end()) { + if (field->data_type->get_primitive_type() == TYPE_ARRAY) { + scalar_range(&field->children[0], row_group); + } else if (field->data_type->get_primitive_type() == TYPE_MAP) { + scalar_range(&field->children[0], row_group); + scalar_range(&field->children[1], row_group); + } else if (field->data_type->get_primitive_type() == TYPE_STRUCT) { + for (int i = 0; i < field->children.size(); ++i) { + scalar_range(&field->children[i], row_group); + } + } else { + const tparquet::ColumnChunk& chunk = + row_group.columns[field->physical_column_index]; + auto& chunk_meta = chunk.meta_data; + int64_t chunk_start = has_dict_page(chunk_meta) + ? chunk_meta.dictionary_page_offset + : chunk_meta.data_page_offset; + int64_t chunk_end = chunk_start + chunk_meta.total_compressed_size; + DCHECK_GE(chunk_start, last_chunk_end); + result.emplace_back(chunk_start, chunk_end); + total_io_size += chunk_meta.total_compressed_size; + last_chunk_end = chunk_end; } - } else { - const tparquet::ColumnChunk& chunk = - row_group.columns[field->physical_column_index]; - auto& chunk_meta = chunk.meta_data; - int64_t chunk_start = has_dict_page(chunk_meta) - ? chunk_meta.dictionary_page_offset - : chunk_meta.data_page_offset; - int64_t chunk_end = chunk_start + chunk_meta.total_compressed_size; - DCHECK_GE(chunk_start, last_chunk_end); - result.emplace_back(chunk_start, chunk_end); - total_io_size += chunk_meta.total_compressed_size; - last_chunk_end = chunk_end; } }; const tparquet::RowGroup& row_group = _t_metadata->row_groups[group.row_group_id]; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
