This is an automated email from the ASF dual-hosted git repository.
kakachen pushed a commit to branch nested_column_prune
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/nested_column_prune by this
push:
new 9628af43edb use selected column ids for parquet merge io ranges.
9628af43edb is described below
commit 9628af43edb08fcef591bc1d1b14969c4b78de07
Author: kakachen <[email protected]>
AuthorDate: Fri Oct 31 16:52:18 2025 +0800
use selected column ids for parquet merge io ranges.
---
be/src/vec/exec/format/parquet/vparquet_reader.cpp | 43 ++++++++++++----------
1 file changed, 23 insertions(+), 20 deletions(-)
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index e47a0eddaac..9a9b6c302e1 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -796,27 +796,30 @@ std::vector<io::PrefetchRange>
ParquetReader::_generate_random_access_ranges(
size_t total_io_size = 0;
std::function<void(const FieldSchema*, const tparquet::RowGroup&)>
scalar_range =
[&](const FieldSchema* field, const tparquet::RowGroup& row_group)
{
- if (field->data_type->get_primitive_type() == TYPE_ARRAY) {
- scalar_range(&field->children[0], row_group);
- } else if (field->data_type->get_primitive_type() == TYPE_MAP)
{
- scalar_range(&field->children[0], row_group);
- scalar_range(&field->children[1], row_group);
- } else if (field->data_type->get_primitive_type() ==
TYPE_STRUCT) {
- for (int i = 0; i < field->children.size(); ++i) {
- scalar_range(&field->children[i], row_group);
+ if (_column_ids.empty() ||
+ _column_ids.find(field->get_column_id()) !=
_column_ids.end()) {
+ if (field->data_type->get_primitive_type() == TYPE_ARRAY) {
+ scalar_range(&field->children[0], row_group);
+ } else if (field->data_type->get_primitive_type() ==
TYPE_MAP) {
+ scalar_range(&field->children[0], row_group);
+ scalar_range(&field->children[1], row_group);
+ } else if (field->data_type->get_primitive_type() ==
TYPE_STRUCT) {
+ for (int i = 0; i < field->children.size(); ++i) {
+ scalar_range(&field->children[i], row_group);
+ }
+ } else {
+ const tparquet::ColumnChunk& chunk =
+
row_group.columns[field->physical_column_index];
+ auto& chunk_meta = chunk.meta_data;
+ int64_t chunk_start = has_dict_page(chunk_meta)
+ ?
chunk_meta.dictionary_page_offset
+ :
chunk_meta.data_page_offset;
+ int64_t chunk_end = chunk_start +
chunk_meta.total_compressed_size;
+ DCHECK_GE(chunk_start, last_chunk_end);
+ result.emplace_back(chunk_start, chunk_end);
+ total_io_size += chunk_meta.total_compressed_size;
+ last_chunk_end = chunk_end;
}
- } else {
- const tparquet::ColumnChunk& chunk =
- row_group.columns[field->physical_column_index];
- auto& chunk_meta = chunk.meta_data;
- int64_t chunk_start = has_dict_page(chunk_meta)
- ?
chunk_meta.dictionary_page_offset
- :
chunk_meta.data_page_offset;
- int64_t chunk_end = chunk_start +
chunk_meta.total_compressed_size;
- DCHECK_GE(chunk_start, last_chunk_end);
- result.emplace_back(chunk_start, chunk_end);
- total_io_size += chunk_meta.total_compressed_size;
- last_chunk_end = chunk_end;
}
};
const tparquet::RowGroup& row_group =
_t_metadata->row_groups[group.row_group_id];
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]