This is an automated email from the ASF dual-hosted git repository.
gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 892d0b7fa72 [refactor](be) Remove redundant remaining conjunct roots
(#63525)
892d0b7fa72 is described below
commit 892d0b7fa724fb494ef75fd07518d88416c6ce64
Author: Jerry Hu <[email protected]>
AuthorDate: Mon May 25 17:42:17 2026 +0800
[refactor](be) Remove redundant remaining conjunct roots (#63525)
Problem Summary: SegmentIterator kept a separate
`remaining_conjunct_roots` list in addition to
`common_expr_ctxs_push_down`. Both represented the same residual common
expression state, so index pruning paths had to keep them synchronized
and could drift when an expression was removed after index evaluation.
This change removes the duplicated `remaining_conjunct_roots` plumbing
and uses `common_expr_ctxs_push_down` as the single source of residual
common expression state for condition cache decisions, lazy
materialization column extraction, and expression execution checks.
---
be/src/exec/scan/olap_scanner.cpp | 12 ------------
be/src/storage/iterators.h | 2 --
be/src/storage/rowset/beta_rowset_reader.cpp | 1 -
be/src/storage/rowset/rowset_reader_context.h | 1 -
be/src/storage/segment/segment_iterator.cpp | 26 +++++++-------------------
be/src/storage/segment/segment_iterator.h | 4 +---
be/src/storage/tablet/tablet_reader.cpp | 1 -
be/src/storage/tablet/tablet_reader.h | 1 -
8 files changed, 8 insertions(+), 40 deletions(-)
diff --git a/be/src/exec/scan/olap_scanner.cpp
b/be/src/exec/scan/olap_scanner.cpp
index 3cfc9e22f9c..8656576dbeb 100644
--- a/be/src/exec/scan/olap_scanner.cpp
+++ b/be/src/exec/scan/olap_scanner.cpp
@@ -89,7 +89,6 @@ OlapScanner::OlapScanner(ScanLocalStateBase* parent,
OlapScanner::Params&& param
.rs_splits {},
.return_columns {},
.output_columns {},
- .remaining_conjunct_roots {},
.common_expr_ctxs_push_down {},
.topn_filter_source_node_ids {},
.key_group_cluster_key_idxes {},
@@ -316,17 +315,6 @@ Status OlapScanner::_init_tablet_reader_params(
_tablet_reader_params.push_down_agg_type_opt =
_local_state->get_push_down_agg_type();
- // TODO: If a new runtime filter arrives after `_conjuncts` move to
`_common_expr_ctxs_push_down`,
- if (_common_expr_ctxs_push_down.empty()) {
- for (auto& conjunct : _conjuncts) {
-
_tablet_reader_params.remaining_conjunct_roots.emplace_back(conjunct->root());
- }
- } else {
- for (auto& ctx : _common_expr_ctxs_push_down) {
-
_tablet_reader_params.remaining_conjunct_roots.emplace_back(ctx->root());
- }
- }
-
_tablet_reader_params.common_expr_ctxs_push_down =
_common_expr_ctxs_push_down;
_tablet_reader_params.virtual_column_exprs = _virtual_column_exprs;
_tablet_reader_params.vir_cid_to_idx_in_block = _vir_cid_to_idx_in_block;
diff --git a/be/src/storage/iterators.h b/be/src/storage/iterators.h
index 192c17c29f0..ff2d4836015 100644
--- a/be/src/storage/iterators.h
+++ b/be/src/storage/iterators.h
@@ -124,8 +124,6 @@ public:
// columns for orderby keys
std::vector<uint32_t>* read_orderby_key_columns = nullptr;
io::IOContext io_ctx;
- VExpr* remaining_vconjunct_root = nullptr;
- std::vector<VExprSPtr> remaining_conjunct_roots;
VExprContextSPtrs common_expr_ctxs_push_down;
const std::set<int32_t>* output_columns = nullptr;
// runtime state
diff --git a/be/src/storage/rowset/beta_rowset_reader.cpp
b/be/src/storage/rowset/beta_rowset_reader.cpp
index 3784d5360e6..9aa51eb4723 100644
--- a/be/src/storage/rowset/beta_rowset_reader.cpp
+++ b/be/src/storage/rowset/beta_rowset_reader.cpp
@@ -99,7 +99,6 @@ Status
BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
_read_options.preferred_block_size_bytes =
read_context->preferred_block_size_bytes;
_read_options.stats = _stats;
_read_options.push_down_agg_type_opt =
_read_context->push_down_agg_type_opt;
- _read_options.remaining_conjunct_roots =
_read_context->remaining_conjunct_roots;
_read_options.common_expr_ctxs_push_down =
_read_context->common_expr_ctxs_push_down;
_read_options.virtual_column_exprs = _read_context->virtual_column_exprs;
diff --git a/be/src/storage/rowset/rowset_reader_context.h
b/be/src/storage/rowset/rowset_reader_context.h
index f78774aef86..dc25c3105d0 100644
--- a/be/src/storage/rowset/rowset_reader_context.h
+++ b/be/src/storage/rowset/rowset_reader_context.h
@@ -66,7 +66,6 @@ struct RowsetReaderContext {
const DeleteHandler* delete_handler = nullptr;
OlapReaderStatistics* stats = nullptr;
RuntimeState* runtime_state = nullptr;
- std::vector<VExprSPtr> remaining_conjunct_roots;
VExprContextSPtrs common_expr_ctxs_push_down;
bool use_page_cache = false;
int sequence_id_idx = -1;
diff --git a/be/src/storage/segment/segment_iterator.cpp
b/be/src/storage/segment/segment_iterator.cpp
index 4c232a913b1..737db88ff1d 100644
--- a/be/src/storage/segment/segment_iterator.cpp
+++ b/be/src/storage/segment/segment_iterator.cpp
@@ -235,8 +235,7 @@ SegmentIterator::~SegmentIterator() = default;
void SegmentIterator::_init_row_bitmap_by_condition_cache() {
// Only dispose need column predicate and expr cal in condition cache
- if (!_col_predicates.empty() ||
- (_enable_common_expr_pushdown && !_remaining_conjunct_roots.empty())) {
+ if (!_col_predicates.empty() || !_common_expr_ctxs_push_down.empty()) {
if (_opts.condition_cache_digest) {
auto* condition_cache = ConditionCache::instance();
ConditionCache::CacheKey cache_key(_opts.rowset_id, _segment->id(),
@@ -522,8 +521,6 @@ Status SegmentIterator::_init_impl(const
StorageReadOptions& opts) {
_initial_block_row_max = _opts.block_row_max;
_block_size_predictor = _make_block_size_predictor();
- _remaining_conjunct_roots = opts.remaining_conjunct_roots;
-
if (_schema->rowid_col_idx() > 0) {
_record_rowids = true;
}
@@ -580,7 +577,6 @@ Status SegmentIterator::_init_impl(const
StorageReadOptions& opts) {
RETURN_IF_ERROR(init_iterators());
RETURN_IF_ERROR(_construct_compound_expr_context());
- _enable_common_expr_pushdown = !_common_expr_ctxs_push_down.empty();
VLOG_DEBUG << fmt::format(
"Segment iterator init, virtual_column_exprs size: {}, "
"_vir_cid_to_idx_in_block size: {}, common_expr_pushdown size: {}",
@@ -597,7 +593,7 @@ void SegmentIterator::_initialize_predicate_results() {
_column_predicate_index_exec_status[cid][pred] = false;
}
- _calculate_expr_in_remaining_conjunct_root();
+ _calculate_common_expr_index_exec_status();
}
Status SegmentIterator::init_iterators() {
@@ -928,12 +924,6 @@ Status
SegmentIterator::_get_row_ranges_by_column_conditions() {
(*it)->root().get());
if (result != nullptr) {
_row_bitmap &= *result->get_data_bitmap();
- auto root = (*it)->root();
- auto iter_find =
std::find(_remaining_conjunct_roots.begin(),
-
_remaining_conjunct_roots.end(), root);
- if (iter_find != _remaining_conjunct_roots.end()) {
- _remaining_conjunct_roots.erase(iter_find);
- }
it = _common_expr_ctxs_push_down.erase(it);
}
} else {
@@ -1404,8 +1394,6 @@ Status SegmentIterator::_apply_index_expr() {
++it;
}
}
- // TODOļ¼Do we need to remove these expr root from
_remaining_conjunct_roots?
-
return Status::OK();
}
@@ -2065,9 +2053,9 @@ Status SegmentIterator::_vec_init_lazy_materialization() {
// Step2: extract columns that can execute expr context
_is_common_expr_column.resize(_schema->columns().size(), false);
- if (_enable_common_expr_pushdown && !_remaining_conjunct_roots.empty()) {
- for (auto expr : _remaining_conjunct_roots) {
- RETURN_IF_ERROR(_extract_common_expr_columns(expr));
+ if (!_common_expr_ctxs_push_down.empty()) {
+ for (const auto& expr_ctx : _common_expr_ctxs_push_down) {
+ RETURN_IF_ERROR(_extract_common_expr_columns(expr_ctx->root()));
}
if (!_common_expr_columns.empty()) {
_is_need_expr_eval = true;
@@ -3238,7 +3226,7 @@ Status SegmentIterator::_process_common_expr(uint16_t*
sel_rowid_idx, uint16_t&
Status SegmentIterator::_execute_common_expr(uint16_t* sel_rowid_idx,
uint16_t& selected_size,
Block* block) {
SCOPED_RAW_TIMER(&_opts.stats->expr_filter_ns);
- DCHECK(!_remaining_conjunct_roots.empty());
+ DCHECK(!_common_expr_ctxs_push_down.empty());
DCHECK(block->rows() != 0);
int prev_columns = block->columns();
uint16_t original_size = selected_size;
@@ -3435,7 +3423,7 @@ Status
SegmentIterator::_construct_compound_expr_context() {
return Status::OK();
}
-void SegmentIterator::_calculate_expr_in_remaining_conjunct_root() {
+void SegmentIterator::_calculate_common_expr_index_exec_status() {
for (const auto& root_expr_ctx : _common_expr_ctxs_push_down) {
const auto& root_expr = root_expr_ctx->root();
if (root_expr == nullptr) {
diff --git a/be/src/storage/segment/segment_iterator.h
b/be/src/storage/segment/segment_iterator.h
index 60dc043ad76..a54aae7db89 100644
--- a/be/src/storage/segment/segment_iterator.h
+++ b/be/src/storage/segment/segment_iterator.h
@@ -327,7 +327,7 @@ private:
bool _check_all_conditions_passed_inverted_index_for_column(ColumnId cid,
bool
default_return = false);
- void _calculate_expr_in_remaining_conjunct_root();
+ void _calculate_common_expr_index_exec_status();
Status _process_eof(Block* block);
@@ -420,8 +420,6 @@ private:
// make a copy of `_opts.column_predicates` in order to make local changes
std::vector<std::shared_ptr<ColumnPredicate>> _col_predicates;
VExprContextSPtrs _common_expr_ctxs_push_down;
- bool _enable_common_expr_pushdown = false;
- std::vector<VExprSPtr> _remaining_conjunct_roots;
std::set<ColumnId> _not_apply_index_pred;
// row schema of the key to seek
diff --git a/be/src/storage/tablet/tablet_reader.cpp
b/be/src/storage/tablet/tablet_reader.cpp
index fb71c6f9541..ffb98c46981 100644
--- a/be/src/storage/tablet/tablet_reader.cpp
+++ b/be/src/storage/tablet/tablet_reader.cpp
@@ -179,7 +179,6 @@ Status TabletReader::_capture_rs_readers(const
ReaderParams& read_params) {
_reader_context.record_rowids = read_params.record_rowids;
_reader_context.rowid_conversion = read_params.rowid_conversion;
_reader_context.is_key_column_group = read_params.is_key_column_group;
- _reader_context.remaining_conjunct_roots =
read_params.remaining_conjunct_roots;
_reader_context.common_expr_ctxs_push_down =
read_params.common_expr_ctxs_push_down;
_reader_context.output_columns = &read_params.output_columns;
_reader_context.push_down_agg_type_opt =
read_params.push_down_agg_type_opt;
diff --git a/be/src/storage/tablet/tablet_reader.h
b/be/src/storage/tablet/tablet_reader.h
index d12fc6ccd6d..6bf57dbe441 100644
--- a/be/src/storage/tablet/tablet_reader.h
+++ b/be/src/storage/tablet/tablet_reader.h
@@ -166,7 +166,6 @@ public:
std::vector<ColumnId>* origin_return_columns = nullptr;
std::unordered_set<uint32_t>* tablet_columns_convert_to_null_set =
nullptr;
TPushAggOp::type push_down_agg_type_opt = TPushAggOp::NONE;
- std::vector<VExprSPtr> remaining_conjunct_roots;
VExprContextSPtrs common_expr_ctxs_push_down;
// used for compaction to record row ids
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]