mrhhsg commented on code in PR #63389:
URL: https://github.com/apache/doris/pull/63389#discussion_r3272297851
##########
be/src/format/parquet/vparquet_reader.cpp:
##########
@@ -1443,6 +1577,76 @@ Status ParquetReader::_process_column_stat_filter(
return Status::OK();
}
+Status ParquetReader::_process_expr_zonemap_filter(const tparquet::RowGroup&
row_group,
+ bool* filter_group) {
+ DORIS_CHECK(filter_group != nullptr);
+ if (!config::enable_expr_zonemap_filter || _zonemap_filter_conjuncts ==
nullptr ||
+ _zonemap_filter_conjuncts->empty() || !_enable_filter_by_min_max) {
+ return Status::OK();
+ }
+
+ std::set<int> column_ids;
+ for (const auto& conjunct : *_zonemap_filter_conjuncts) {
+ if (conjunct->root() != nullptr) {
+ conjunct->root()->collect_slot_column_ids(column_ids);
+ }
+ }
+ if (column_ids.empty()) {
+ return Status::OK();
+ }
+
+ ZoneMapEvalContext ctx;
+ for (const int cid : column_ids) {
+ if (cid < 0 || cid >= _tuple_descriptor->slots().size()) {
+ continue;
+ }
+ auto* slot = _tuple_descriptor->slots()[cid];
+ ZoneMapEvalContext::SlotZoneMap slot_zone_map;
+ slot_zone_map.data_type = slot->type();
+ if (!_table_info_node_ptr->children_column_exists(slot->col_name())) {
+ ctx.slots.emplace(cid, std::move(slot_zone_map));
+ continue;
+ }
+ const auto& file_col_name =
+
_table_info_node_ptr->children_file_column_name(slot->col_name());
+ const FieldSchema* col_schema =
_file_metadata->schema().get_column(file_col_name);
+ int parquet_col_id = col_schema->physical_column_index;
+ const auto& meta_data = row_group.columns[parquet_col_id].meta_data;
+
Review Comment:
已处理:row-group expr zonemap 路径现在会在访问 `row_group.columns[parquet_col_id]` 前检查
`parquet_col_id < 0`,对复杂类型父列按无 zonemap 统计保守 fallback,不再访问负下标。
同时补了
`ParquetExprTest.test_expr_zonemap_row_group_filter_skips_complex_parent_column`
覆盖该场景,并已运行:
- `./run-be-ut.sh --run --filter=ParquetExprTest.test_expr_zonemap_*`
- `build-support/clang-format.sh`
- `build-support/check-format.sh`
- `git diff --check`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]