This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch tpc_preview2
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/tpc_preview2 by this push:
new 51716c93cb6 attach always true counter to column predicate
51716c93cb6 is described below
commit 51716c93cb6f7630a01fc9355cef544962597f1e
Author: BiteTheDDDDt <[email protected]>
AuthorDate: Thu Nov 27 18:26:30 2025 +0800
attach always true counter to column predicate
update
---
be/src/exec/olap_common.h | 29 ++++++++++++++--------
be/src/olap/bitmap_filter_predicate.h | 2 +-
be/src/olap/column_predicate.h | 21 +++++++++++++---
be/src/olap/comparison_predicate.h | 19 +++++++-------
be/src/olap/filter_olap_param.h | 8 +++++-
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 3 ++-
be/src/olap/tablet_reader.cpp | 4 +--
be/src/pipeline/exec/scan_operator.cpp | 16 +++++++++---
be/src/vec/exprs/vruntimefilter_wrapper.h | 3 +++
9 files changed, 73 insertions(+), 32 deletions(-)
diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h
index 5d9f3741c51..8cb0311b326 100644
--- a/be/src/exec/olap_common.h
+++ b/be/src/exec/olap_common.h
@@ -210,7 +210,8 @@ public:
if (null_pred.condition_values.size() != 0) {
filters.emplace_back(_column_name, null_pred,
_runtime_filter_id,
_predicate_filtered_rows_counter,
- _predicate_input_rows_counter);
+ _predicate_input_rows_counter,
+ _predicate_always_true_rows_counter);
return;
}
@@ -223,9 +224,9 @@ public:
}
if (low.condition_values.size() != 0) {
- filters.emplace_back(_column_name, low, _runtime_filter_id,
- _predicate_filtered_rows_counter,
- _predicate_input_rows_counter);
+ filters.emplace_back(
+ _column_name, low, _runtime_filter_id,
_predicate_filtered_rows_counter,
+ _predicate_input_rows_counter,
_predicate_always_true_rows_counter);
}
TCondition high;
@@ -237,9 +238,9 @@ public:
}
if (high.condition_values.size() != 0) {
- filters.emplace_back(_column_name, high, _runtime_filter_id,
- _predicate_filtered_rows_counter,
- _predicate_input_rows_counter);
+ filters.emplace_back(
+ _column_name, high, _runtime_filter_id,
_predicate_filtered_rows_counter,
+ _predicate_input_rows_counter,
_predicate_always_true_rows_counter);
}
} else {
// 3. convert to is null and is not null filter condition
@@ -254,7 +255,8 @@ public:
if (null_pred.condition_values.size() != 0) {
filters.emplace_back(_column_name, null_pred,
_runtime_filter_id,
_predicate_filtered_rows_counter,
- _predicate_input_rows_counter);
+ _predicate_input_rows_counter,
+ _predicate_always_true_rows_counter);
}
}
}
@@ -271,7 +273,8 @@ public:
if (condition.condition_values.size() != 0) {
filters.emplace_back(_column_name, condition, _runtime_filter_id,
- _predicate_filtered_rows_counter,
_predicate_input_rows_counter);
+ _predicate_filtered_rows_counter,
_predicate_input_rows_counter,
+ _predicate_always_true_rows_counter);
}
}
@@ -311,7 +314,8 @@ public:
void attach_profile_counter(
int runtime_filter_id,
std::shared_ptr<RuntimeProfile::Counter>
predicate_filtered_rows_counter,
- std::shared_ptr<RuntimeProfile::Counter>
predicate_input_rows_counter) {
+ std::shared_ptr<RuntimeProfile::Counter>
predicate_input_rows_counter,
+ std::shared_ptr<RuntimeProfile::Counter>
predicate_always_true_rows_counter) {
DCHECK(predicate_filtered_rows_counter != nullptr);
DCHECK(predicate_input_rows_counter != nullptr);
@@ -323,6 +327,9 @@ public:
if (predicate_input_rows_counter != nullptr) {
_predicate_input_rows_counter = predicate_input_rows_counter;
}
+ if (predicate_always_true_rows_counter != nullptr) {
+ _predicate_always_true_rows_counter =
predicate_always_true_rows_counter;
+ }
}
int precision() const { return _precision; }
@@ -397,6 +404,8 @@ private:
std::make_shared<RuntimeProfile::Counter>(TUnit::UNIT, 0);
std::shared_ptr<RuntimeProfile::Counter> _predicate_input_rows_counter =
std::make_shared<RuntimeProfile::Counter>(TUnit::UNIT, 0);
+ std::shared_ptr<RuntimeProfile::Counter>
_predicate_always_true_rows_counter =
+ std::make_shared<RuntimeProfile::Counter>(TUnit::UNIT, 0);
};
template <>
const typename ColumnValueRange<TYPE_FLOAT>::CppType
ColumnValueRange<TYPE_FLOAT>::TYPE_MIN;
diff --git a/be/src/olap/bitmap_filter_predicate.h
b/be/src/olap/bitmap_filter_predicate.h
index 9bf44354a98..48b26c69359 100644
--- a/be/src/olap/bitmap_filter_predicate.h
+++ b/be/src/olap/bitmap_filter_predicate.h
@@ -111,7 +111,7 @@ uint16_t
BitmapFilterColumnPredicate<T>::_evaluate_inner(const vectorized::IColu
} else {
new_size = evaluate<false>(column, nullptr, sel, size);
}
- update_filter_info(size - new_size, size);
+ update_filter_info(size - new_size, size, 0);
return new_size;
}
} //namespace doris
diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index 431c039dfc1..ff569c0a173 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -190,6 +190,7 @@ public:
Defer defer([&] { try_reset_judge_selectivity(); });
if (always_true()) {
+ update_filter_info(0, 0, size);
return size;
}
@@ -197,7 +198,7 @@ public:
if (_can_ignore()) {
do_judge_selectivity(size - new_size, size);
}
- update_filter_info(size - new_size, size);
+ update_filter_info(size - new_size, size, 0);
return new_size;
}
virtual void evaluate_and(const vectorized::IColumn& column, const
uint16_t* sel, uint16_t size,
@@ -276,7 +277,8 @@ public:
void attach_profile_counter(
int filter_id, std::shared_ptr<RuntimeProfile::Counter>
predicate_filtered_rows_counter,
- std::shared_ptr<RuntimeProfile::Counter>
predicate_input_rows_counter) {
+ std::shared_ptr<RuntimeProfile::Counter>
predicate_input_rows_counter,
+ std::shared_ptr<RuntimeProfile::Counter>
predicate_always_true_rows_counter) {
_runtime_filter_id = filter_id;
DCHECK(predicate_filtered_rows_counter != nullptr);
DCHECK(predicate_input_rows_counter != nullptr);
@@ -287,12 +289,22 @@ public:
if (predicate_input_rows_counter != nullptr) {
_predicate_input_rows_counter = predicate_input_rows_counter;
}
+ if (predicate_always_true_rows_counter != nullptr) {
+ _predicate_always_true_rows_counter =
predicate_always_true_rows_counter;
+ }
}
/// TODO: Currently we only record statistics for runtime filters, in the
future we should record for all predicates
- void update_filter_info(int64_t filter_rows, int64_t input_rows) const {
+ void update_filter_info(int64_t filter_rows, int64_t input_rows,
+ int64_t always_true_rows) const {
+ if (_predicate_input_rows_counter == nullptr ||
+ _predicate_filtered_rows_counter == nullptr ||
+ _predicate_always_true_rows_counter == nullptr) {
+ throw Exception(INTERNAL_ERROR, "Predicate profile counters are
not initialized");
+ }
COUNTER_UPDATE(_predicate_input_rows_counter, input_rows);
COUNTER_UPDATE(_predicate_filtered_rows_counter, filter_rows);
+ COUNTER_UPDATE(_predicate_always_true_rows_counter, always_true_rows);
}
static std::string pred_type_string(PredicateType type) {
@@ -384,9 +396,10 @@ protected:
std::shared_ptr<RuntimeProfile::Counter> _predicate_filtered_rows_counter =
std::make_shared<RuntimeProfile::Counter>(TUnit::UNIT, 0);
-
std::shared_ptr<RuntimeProfile::Counter> _predicate_input_rows_counter =
std::make_shared<RuntimeProfile::Counter>(TUnit::UNIT, 0);
+ std::shared_ptr<RuntimeProfile::Counter>
_predicate_always_true_rows_counter =
+ std::make_shared<RuntimeProfile::Counter>(TUnit::UNIT, 0);
};
} //namespace doris
diff --git a/be/src/olap/comparison_predicate.h
b/be/src/olap/comparison_predicate.h
index f89437bdf95..f798317cbc8 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -349,8 +349,8 @@ public:
}
template <bool is_and>
- void __attribute__((flatten))
- _evaluate_vec_internal(const vectorized::IColumn& column, uint16_t size,
bool* flags) const {
+ void __attribute__((flatten)) _evaluate_vec_internal(const
vectorized::IColumn& column,
+ uint16_t size, bool*
flags) const {
uint16_t current_evaluated_rows = 0;
uint16_t current_passed_rows = 0;
if (_can_ignore()) {
@@ -368,8 +368,8 @@ public:
// so reference here is safe.
//
https://stackoverflow.com/questions/14688285/c-local-variable-destruction-order
Defer defer([&]() {
- update_filter_info(current_evaluated_rows - current_passed_rows,
- current_evaluated_rows);
+ update_filter_info(current_evaluated_rows - current_passed_rows,
current_evaluated_rows,
+ 0);
try_reset_judge_selectivity();
});
@@ -578,9 +578,10 @@ private:
}
template <bool is_nullable, bool is_and, typename TArray, typename TValue>
- void __attribute__((flatten))
- _base_loop_vec(uint16_t size, bool* __restrict bflags, const uint8_t*
__restrict null_map,
- const TArray* __restrict data_array, const TValue& value)
const {
+ void __attribute__((flatten)) _base_loop_vec(uint16_t size, bool*
__restrict bflags,
+ const uint8_t* __restrict
null_map,
+ const TArray* __restrict
data_array,
+ const TValue& value) const {
//uint8_t helps compiler to generate vectorized code
auto* flags = reinterpret_cast<uint8_t*>(bflags);
if constexpr (is_and) {
@@ -695,8 +696,8 @@ private:
}
}
- int32_t __attribute__((flatten))
- _find_code_from_dictionary_column(const vectorized::ColumnDictI32& column)
const {
+ int32_t __attribute__((flatten)) _find_code_from_dictionary_column(
+ const vectorized::ColumnDictI32& column) const {
int32_t code = 0;
if (_segment_id_to_cached_code.if_contains(
column.get_rowset_segment_id(),
diff --git a/be/src/olap/filter_olap_param.h b/be/src/olap/filter_olap_param.h
index 272fee63fb5..0818c088523 100644
--- a/be/src/olap/filter_olap_param.h
+++ b/be/src/olap/filter_olap_param.h
@@ -25,7 +25,8 @@ template <typename T>
struct FilterOlapParam {
FilterOlapParam(std::string column_name, T filter, int runtime_filter_id,
std::shared_ptr<RuntimeProfile::Counter> filtered_counter,
- std::shared_ptr<RuntimeProfile::Counter> input_counter)
+ std::shared_ptr<RuntimeProfile::Counter> input_counter,
+ std::shared_ptr<RuntimeProfile::Counter>
always_true_counter)
: column_name(std::move(column_name)),
filter(std::move(filter)),
runtime_filter_id(runtime_filter_id) {
@@ -37,6 +38,9 @@ struct FilterOlapParam {
if (input_counter != nullptr) {
input_rows_counter = input_counter;
}
+ if (always_true_counter != nullptr) {
+ always_true_rows_counter = always_true_counter;
+ }
}
std::string column_name;
@@ -46,6 +50,8 @@ struct FilterOlapParam {
std::make_shared<RuntimeProfile::Counter>(TUnit::UNIT, 0);
std::shared_ptr<RuntimeProfile::Counter> input_rows_counter =
std::make_shared<RuntimeProfile::Counter>(TUnit::UNIT, 0);
+ std::shared_ptr<RuntimeProfile::Counter> always_true_rows_counter =
+ std::make_shared<RuntimeProfile::Counter>(TUnit::UNIT, 0);
};
} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index e92f4c37ab4..5d882766a5a 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -2254,7 +2254,8 @@ uint16_t
SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_
for (const auto& pred : _pre_eval_block_predicate) {
if (!pred->always_true()) {
all_pred_always_true = false;
- break;
+ } else {
+ pred->update_filter_info(0, 0, selected_size);
}
}
if (all_pred_always_true) {
diff --git a/be/src/olap/tablet_reader.cpp b/be/src/olap/tablet_reader.cpp
index 3dc120ec4c4..56f0a3070bd 100644
--- a/be/src/olap/tablet_reader.cpp
+++ b/be/src/olap/tablet_reader.cpp
@@ -527,7 +527,7 @@ Status TabletReader::_init_conditions_param(const
ReaderParams& read_params) {
for (const auto& param : params) {
ColumnPredicate* predicate =
_parse_to_predicate({param.column_name, param.filter});
predicate->attach_profile_counter(param.runtime_filter_id,
param.filtered_rows_counter,
- param.input_rows_counter);
+ param.input_rows_counter,
param.always_true_rows_counter);
predicates.emplace_back(predicate);
}
};
@@ -545,7 +545,7 @@ Status TabletReader::_init_conditions_param(const
ReaderParams& read_params) {
// record condition value into predicate_params in order to pushdown
segment_iterator,
// _gen_predicate_result_sign will build predicate result unique sign
with condition value
predicate->attach_profile_counter(param.runtime_filter_id,
param.filtered_rows_counter,
- param.input_rows_counter);
+ param.input_rows_counter,
param.always_true_rows_counter);
predicates.emplace_back(predicate);
}
parse_and_emplace_predicates(read_params.bloom_filters);
diff --git a/be/src/pipeline/exec/scan_operator.cpp
b/be/src/pipeline/exec/scan_operator.cpp
index 886d2f919e5..9b1228ace13 100644
--- a/be/src/pipeline/exec/scan_operator.cpp
+++ b/be/src/pipeline/exec/scan_operator.cpp
@@ -335,7 +335,8 @@ Status ScanLocalState<Derived>::_normalize_predicate(
value_range.attach_profile_counter(
rf_expr->filter_id(),
rf_expr->predicate_filtered_rows_counter(),
-
rf_expr->predicate_input_rows_counter());
+
rf_expr->predicate_input_rows_counter(),
+
rf_expr->predicate_always_true_rows_counter());
}
}};
RETURN_IF_PUSH_DOWN(_normalize_in_and_eq_predicate(
@@ -402,7 +403,8 @@ Status
ScanLocalState<Derived>::_normalize_bloom_filter(vectorized::VExpr* expr,
_filter_predicates.bloom_filters.emplace_back(
slot->col_name(), expr->get_bloom_filter_func(),
rf_expr->filter_id(),
rf_expr->predicate_filtered_rows_counter(),
- rf_expr->predicate_input_rows_counter());
+ rf_expr->predicate_input_rows_counter(),
+ rf_expr->predicate_always_true_rows_counter());
*pdt = temp_pdt;
}
}
@@ -422,7 +424,8 @@ Status
ScanLocalState<Derived>::_normalize_bitmap_filter(vectorized::VExpr* expr
_filter_predicates.bitmap_filters.emplace_back(
slot->col_name(), expr->get_bitmap_filter_func(),
rf_expr->filter_id(),
rf_expr->predicate_filtered_rows_counter(),
- rf_expr->predicate_input_rows_counter());
+ rf_expr->predicate_input_rows_counter(),
+ rf_expr->predicate_always_true_rows_counter());
*pdt = temp_pdt;
}
}
@@ -634,16 +637,21 @@ Status
ScanLocalState<Derived>::_normalize_in_and_eq_predicate(vectorized::VExpr
int runtime_filter_id = -1;
std::shared_ptr<RuntimeProfile::Counter>
predicate_filtered_rows_counter = nullptr;
std::shared_ptr<RuntimeProfile::Counter>
predicate_input_rows_counter = nullptr;
+ std::shared_ptr<RuntimeProfile::Counter>
predicate_always_true_rows_counter =
+ nullptr;
if (expr_ctx->root()->is_rf_wrapper()) {
auto* rf_expr =
assert_cast<vectorized::VRuntimeFilterWrapper*>(expr_ctx->root().get());
runtime_filter_id = rf_expr->filter_id();
predicate_filtered_rows_counter =
rf_expr->predicate_filtered_rows_counter();
predicate_input_rows_counter =
rf_expr->predicate_input_rows_counter();
+ predicate_always_true_rows_counter =
+ rf_expr->predicate_always_true_rows_counter();
}
_filter_predicates.in_filters.emplace_back(
slot->col_name(), expr->get_set_func(),
runtime_filter_id,
- predicate_filtered_rows_counter,
predicate_input_rows_counter);
+ predicate_filtered_rows_counter,
predicate_input_rows_counter,
+ predicate_always_true_rows_counter);
*pdt = PushDownType::ACCEPTABLE;
return Status::OK();
}
diff --git a/be/src/vec/exprs/vruntimefilter_wrapper.h
b/be/src/vec/exprs/vruntimefilter_wrapper.h
index f4eed29210f..3535898915b 100644
--- a/be/src/vec/exprs/vruntimefilter_wrapper.h
+++ b/be/src/vec/exprs/vruntimefilter_wrapper.h
@@ -124,6 +124,9 @@ public:
std::shared_ptr<RuntimeProfile::Counter> predicate_input_rows_counter()
const {
return _rf_input_rows;
}
+ std::shared_ptr<RuntimeProfile::Counter>
predicate_always_true_rows_counter() const {
+ return _always_true_filter_rows;
+ }
private:
void reset_judge_selectivity() const {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]