This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 8a274d7851 [feature-wip](new-scan) refactor some interface about
predicate push down in scan node (#12527)
8a274d7851 is described below
commit 8a274d7851b2007c03d31a60b8fedbb329dc8f82
Author: Mingyu Chen <[email protected]>
AuthorDate: Tue Sep 13 10:25:13 2022 +0800
[feature-wip](new-scan) refactor some interface about predicate push down
in scan node (#12527)
This PR introduce a new enum type `PushDownType`:
```
enum class PushDownType {
// The predicate can not be pushed down to data source
UNACCEPTABLE,
// The predicate can be pushed down to data source
// and the data source can fully evaludate it
ACCEPTABLE,
// The predicate can be pushed down to data source
// but the data source can not fully evaluate it.
PARTIAL_ACCEPTABLE
};
```
And derived class of VScanNode can override following method to determine
whether to accept
a bianry/in/bloom filter/is null predicate:
```
PushDownType _should_push_down_binary_predicate();
PushDownType _should_push_down_in_predicate();
PushDownType _should_push_down_function_filter();
PushDownType _should_push_down_bloom_filter();
PushDownType _should_push_down_is_null_predicate();
```
---
be/src/vec/exec/scan/new_olap_scan_node.cpp | 77 ++-------
be/src/vec/exec/scan/new_olap_scan_node.h | 14 +-
be/src/vec/exec/scan/vscan_node.cpp | 259 ++++++++++++++++++----------
be/src/vec/exec/scan/vscan_node.h | 59 +++++--
4 files changed, 224 insertions(+), 185 deletions(-)
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp
b/be/src/vec/exec/scan/new_olap_scan_node.cpp
index d4b1b6a7d7..973e6c23ee 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.cpp
+++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp
@@ -125,7 +125,7 @@ static std::string olap_filters_to_string(const
std::vector<doris::TCondition>&
filters_string += "[";
for (auto it = filters.cbegin(); it != filters.cend(); it++) {
if (it != filters.cbegin()) {
- filters_string += ",";
+ filters_string += ", ";
}
filters_string += olap_filter_to_string(*it);
}
@@ -181,6 +181,12 @@ Status NewOlapScanNode::_build_key_ranges_and_filters() {
}
}
+ // Append value ranges in "_not_in_value_ranges"
+ for (auto& range : _not_in_value_ranges) {
+ std::visit([&](auto&& the_range) {
the_range.to_in_condition(_olap_filters, false); },
+ range);
+ }
+
_runtime_profile->add_info_string("PushDownPredicates",
olap_filters_to_string(_olap_filters));
_runtime_profile->add_info_string("KeyRanges", _scan_keys.debug_string());
VLOG_CRITICAL << _scan_keys.debug_string();
@@ -188,67 +194,12 @@ Status NewOlapScanNode::_build_key_ranges_and_filters() {
return Status::OK();
}
-bool NewOlapScanNode::_should_push_down_binary_predicate(
- VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef*
constant_val,
- int* slot_ref_child, const std::function<bool(const std::string&)>&
fn_checker) {
- if (!fn_checker(fn_call->fn().name.function_name)) {
- return false;
- }
-
- const auto& children = fn_call->children();
- DCHECK(children.size() == 2);
- for (size_t i = 0; i < children.size(); i++) {
- if (VExpr::expr_without_cast(children[i])->node_type() !=
TExprNodeType::SLOT_REF) {
- // not a slot ref(column)
- continue;
- }
- if (!children[1 - i]->is_constant()) {
- // only handle constant value
- return false;
- } else {
- if (const ColumnConst* const_column =
check_and_get_column<ColumnConst>(
- children[1 - i]->get_const_col(expr_ctx)->column_ptr))
{
- *slot_ref_child = i;
- *constant_val = const_column->get_data_at(0);
- } else {
- return false;
- }
- }
- }
- return true;
-}
-
-bool NewOlapScanNode::_should_push_down_in_predicate(VInPredicate* pred,
VExprContext* expr_ctx,
- bool is_not_in) {
- if (pred->is_not_in() != is_not_in) {
- return false;
- }
- InState* state = reinterpret_cast<InState*>(
- expr_ctx->fn_context(pred->fn_context_index())
- ->get_function_state(FunctionContext::FRAGMENT_LOCAL));
- HybridSetBase* set = state->hybrid_set.get();
-
- // if there are too many elements in InPredicate, exceed the limit,
- // we will not push any condition of this column to storage engine.
- // because too many conditions pushed down to storage engine may even
- // slow down the query process.
- // ATTN: This is just an experience value. You may need to try
- // different thresholds to improve performance.
- if (set->size() > _max_pushdown_conditions_per_column) {
- VLOG_NOTICE << "Predicate value num " << set->size() << " exceed limit
"
- << _max_pushdown_conditions_per_column;
- return false;
- }
- return true;
-}
-
-bool NewOlapScanNode::_should_push_down_function_filter(VectorizedFnCall*
fn_call,
- VExprContext* expr_ctx,
- StringVal*
constant_str,
-
doris_udf::FunctionContext** fn_ctx) {
+VScanNode::PushDownType NewOlapScanNode::_should_push_down_function_filter(
+ VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringVal*
constant_str,
+ doris_udf::FunctionContext** fn_ctx) {
// Now only `like` function filters is supported to push down
if (fn_call->fn().name.function_name != "like") {
- return false;
+ return PushDownType::UNACCEPTABLE;
}
const auto& children = fn_call->children();
@@ -262,19 +213,19 @@ bool
NewOlapScanNode::_should_push_down_function_filter(VectorizedFnCall* fn_cal
}
if (!children[1 - i]->is_constant()) {
// only handle constant value
- return false;
+ return PushDownType::UNACCEPTABLE;
} else {
DCHECK(children[1 - i]->type().is_string_type());
if (const ColumnConst* const_column =
check_and_get_column<ColumnConst>(
children[1 - i]->get_const_col(expr_ctx)->column_ptr))
{
*constant_str = const_column->get_data_at(0).to_string_val();
} else {
- return false;
+ return PushDownType::UNACCEPTABLE;
}
}
}
*fn_ctx = func_cxt;
- return true;
+ return PushDownType::ACCEPTABLE;
}
// PlanFragmentExecutor will call this method to set scan range
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.h
b/be/src/vec/exec/scan/new_olap_scan_node.h
index a721737585..a922b009b9 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.h
+++ b/be/src/vec/exec/scan/new_olap_scan_node.h
@@ -38,17 +38,13 @@ protected:
Status _process_conjuncts() override;
bool _is_key_column(const std::string& col_name) override;
- bool _should_push_down_binary_predicate(
- VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef*
constant_val,
- int* slot_ref_child,
- const std::function<bool(const std::string&)>& fn_checker)
override;
+ PushDownType _should_push_down_function_filter(VectorizedFnCall* fn_call,
+ VExprContext* expr_ctx,
StringVal* constant_str,
+
doris_udf::FunctionContext** fn_ctx) override;
- bool _should_push_down_in_predicate(VInPredicate* in_pred, VExprContext*
expr_ctx,
- bool is_not_in) override;
+ PushDownType _should_push_down_bloom_filter() override { return
PushDownType::ACCEPTABLE; }
- bool _should_push_down_function_filter(VectorizedFnCall* fn_call,
VExprContext* expr_ctx,
- StringVal* constant_str,
- doris_udf::FunctionContext**
fn_ctx) override;
+ PushDownType _should_push_down_is_null_predicate() override { return
PushDownType::ACCEPTABLE; }
Status _init_scanners(std::list<VScanner*>* scanners) override;
diff --git a/be/src/vec/exec/scan/vscan_node.cpp
b/be/src/vec/exec/scan/vscan_node.cpp
index 0dd48d58b4..23661f7c5b 100644
--- a/be/src/vec/exec/scan/vscan_node.cpp
+++ b/be/src/vec/exec/scan/vscan_node.cpp
@@ -30,11 +30,11 @@
namespace doris::vectorized {
-#define RETURN_IF_PUSH_DOWN(stmt) \
- if (!push_down) { \
- stmt; \
- } else { \
- return; \
+#define RETURN_IF_PUSH_DOWN(stmt) \
+ if (pdt == PushDownType::UNACCEPTABLE) { \
+ stmt; \
+ } else { \
+ return; \
}
static bool ignore_cast(SlotDescriptor* slot, VExpr* expr) {
@@ -403,43 +403,44 @@ VExpr* VScanNode::_normalize_predicate(VExpr*
conjunct_expr_root) {
if (conjunct_expr_root != nullptr) {
if (is_leaf(conjunct_expr_root)) {
auto impl = conjunct_expr_root->get_impl();
+ // If impl is not null, which means this a conjuncts from runtime
filter.
VExpr* cur_expr = impl ? const_cast<VExpr*>(impl) :
conjunct_expr_root;
SlotDescriptor* slot;
ColumnValueRangeType* range = nullptr;
- bool push_down = false;
- _eval_const_conjuncts(cur_expr, *(_vconjunct_ctx_ptr.get()),
&push_down);
- if (!push_down &&
+ PushDownType pdt = PushDownType::UNACCEPTABLE;
+ _eval_const_conjuncts(cur_expr, *(_vconjunct_ctx_ptr.get()), &pdt);
+ if (pdt == PushDownType::UNACCEPTABLE &&
(_is_predicate_acting_on_slot(cur_expr, in_predicate_checker,
&slot, &range) ||
_is_predicate_acting_on_slot(cur_expr, eq_predicate_checker,
&slot, &range))) {
std::visit(
[&](auto& value_range) {
RETURN_IF_PUSH_DOWN(_normalize_in_and_eq_predicate(
cur_expr, *(_vconjunct_ctx_ptr.get()),
slot, value_range,
- &push_down));
+ &pdt));
RETURN_IF_PUSH_DOWN(_normalize_not_in_and_not_eq_predicate(
cur_expr, *(_vconjunct_ctx_ptr.get()),
slot, value_range,
- &push_down));
+ &pdt));
RETURN_IF_PUSH_DOWN(_normalize_is_null_predicate(
cur_expr, *(_vconjunct_ctx_ptr.get()),
slot, value_range,
- &push_down));
+ &pdt));
RETURN_IF_PUSH_DOWN(_normalize_noneq_binary_predicate(
cur_expr, *(_vconjunct_ctx_ptr.get()),
slot, value_range,
- &push_down));
+ &pdt));
if (_is_key_column(slot->col_name())) {
RETURN_IF_PUSH_DOWN(_normalize_bloom_filter(
- cur_expr, *(_vconjunct_ctx_ptr.get()),
slot, &push_down));
+ cur_expr, *(_vconjunct_ctx_ptr.get()),
slot, &pdt));
if (_state->enable_function_pushdown()) {
RETURN_IF_PUSH_DOWN(_normalize_function_filters(
- cur_expr,
*(_vconjunct_ctx_ptr.get()), slot,
- &push_down));
+ cur_expr,
*(_vconjunct_ctx_ptr.get()), slot, &pdt));
}
}
},
*range);
}
- if (push_down && _is_key_column(slot->col_name())) {
+ if (pdt == PushDownType::ACCEPTABLE &&
_is_key_column(slot->col_name())) {
return nullptr;
} else {
+ // for PARTIAL_ACCEPTABLE and UNACCEPTABLE, do not remove expr
from the tree
return conjunct_expr_root;
}
} else {
@@ -464,17 +465,20 @@ VExpr* VScanNode::_normalize_predicate(VExpr*
conjunct_expr_root) {
}
Status VScanNode::_normalize_bloom_filter(VExpr* expr, VExprContext* expr_ctx,
SlotDescriptor* slot,
- bool* push_down) {
+ PushDownType* pdt) {
if (TExprNodeType::BLOOM_PRED == expr->node_type()) {
DCHECK(expr->children().size() == 1);
- _bloom_filters_push_down.emplace_back(slot->col_name(),
expr->get_bloom_filter_func());
- *push_down = true;
+ PushDownType temp_pdt = _should_push_down_bloom_filter();
+ if (temp_pdt != PushDownType::UNACCEPTABLE) {
+ _bloom_filters_push_down.emplace_back(slot->col_name(),
expr->get_bloom_filter_func());
+ *pdt = temp_pdt;
+ }
}
return Status::OK();
}
Status VScanNode::_normalize_function_filters(VExpr* expr, VExprContext*
expr_ctx,
- SlotDescriptor* slot, bool*
push_down) {
+ SlotDescriptor* slot,
PushDownType* pdt) {
bool opposite = false;
VExpr* fn_expr = expr;
if (TExprNodeType::COMPOUND_PRED == expr->node_type() &&
@@ -486,11 +490,12 @@ Status VScanNode::_normalize_function_filters(VExpr*
expr, VExprContext* expr_ct
if (TExprNodeType::FUNCTION_CALL == fn_expr->node_type()) {
doris_udf::FunctionContext* fn_ctx = nullptr;
StringVal val;
- if
(_should_push_down_function_filter(reinterpret_cast<VectorizedFnCall*>(fn_expr),
- expr_ctx, &val, &fn_ctx)) {
+ PushDownType temp_pdt = _should_push_down_function_filter(
+ reinterpret_cast<VectorizedFnCall*>(fn_expr), expr_ctx, &val,
&fn_ctx);
+ if (temp_pdt != PushDownType::UNACCEPTABLE) {
std::string col = slot->col_name();
_push_down_functions.emplace_back(opposite, col, fn_ctx, val);
- *push_down = true;
+ *pdt = temp_pdt;
}
}
return Status::OK();
@@ -523,14 +528,14 @@ bool VScanNode::_is_predicate_acting_on_slot(
return true;
}
-void VScanNode::_eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx,
bool* push_down) {
+void VScanNode::_eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx,
PushDownType* pdt) {
char* constant_val = nullptr;
if (vexpr->is_constant()) {
if (const ColumnConst* const_column =
check_and_get_column<ColumnConst>(vexpr->get_const_col(expr_ctx)->column_ptr)) {
constant_val =
const_cast<char*>(const_column->get_data_at(0).data);
if (constant_val == nullptr ||
*reinterpret_cast<bool*>(constant_val) == false) {
- *push_down = true;
+ *pdt = PushDownType::ACCEPTABLE;
_eos = true;
}
} else {
@@ -543,14 +548,14 @@ void VScanNode::_eval_const_conjuncts(VExpr* vexpr,
VExprContext* expr_ctx, bool
template <PrimitiveType T>
Status VScanNode::_normalize_in_and_eq_predicate(VExpr* expr, VExprContext*
expr_ctx,
SlotDescriptor* slot,
ColumnValueRange<T>& range,
- bool* push_down) {
+ PushDownType* pdt) {
auto temp_range =
ColumnValueRange<T>::create_empty_column_value_range(slot->type().precision,
slot->type().scale);
- bool effect = false;
// 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
if (TExprNodeType::IN_PRED == expr->node_type()) {
VInPredicate* pred = static_cast<VInPredicate*>(expr);
- if (!_should_push_down_in_predicate(pred, expr_ctx, false)) {
+ PushDownType temp_pdt = _should_push_down_in_predicate(pred, expr_ctx,
false);
+ if (temp_pdt == PushDownType::UNACCEPTABLE) {
return Status::OK();
}
@@ -573,43 +578,45 @@ Status VScanNode::_normalize_in_and_eq_predicate(VExpr*
expr, VExprContext* expr
fn_name,
!state->hybrid_set->is_date_v2()));
iter->next();
}
-
range.intersection(temp_range);
- effect = true;
+ *pdt = temp_pdt;
} else if (TExprNodeType::BINARY_PRED == expr->node_type()) {
DCHECK(expr->children().size() == 2);
auto eq_checker = [](const std::string& fn_name) { return fn_name ==
"eq"; };
StringRef value;
int slot_ref_child = -1;
- if
(_should_push_down_binary_predicate(reinterpret_cast<VectorizedFnCall*>(expr),
expr_ctx,
- &value, &slot_ref_child,
eq_checker)) {
- DCHECK(slot_ref_child >= 0);
- // where A = nullptr should return empty result set
- auto fn_name = std::string("");
- if (value.data != nullptr) {
- if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T ==
TYPE_STRING ||
- T == TYPE_HLL) {
- auto val = StringValue(value.data, value.size);
- RETURN_IF_ERROR(_change_value_range<true>(
- temp_range, reinterpret_cast<void*>(&val),
- ColumnValueRange<T>::add_fixed_value_range,
fn_name));
- } else {
- RETURN_IF_ERROR(_change_value_range<true>(
- temp_range,
reinterpret_cast<void*>(const_cast<char*>(value.data)),
- ColumnValueRange<T>::add_fixed_value_range,
fn_name));
- }
- range.intersection(temp_range);
- effect = true;
+
+ PushDownType temp_pdt =
+
_should_push_down_binary_predicate(reinterpret_cast<VectorizedFnCall*>(expr),
+ expr_ctx, &value,
&slot_ref_child, eq_checker);
+ if (temp_pdt == PushDownType::UNACCEPTABLE) {
+ return Status::OK();
+ }
+ DCHECK(slot_ref_child >= 0);
+ // where A = nullptr should return empty result set
+ auto fn_name = std::string("");
+ if (value.data != nullptr) {
+ if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T ==
TYPE_STRING ||
+ T == TYPE_HLL) {
+ auto val = StringValue(value.data, value.size);
+ RETURN_IF_ERROR(_change_value_range<true>(
+ temp_range, reinterpret_cast<void*>(&val),
+ ColumnValueRange<T>::add_fixed_value_range, fn_name));
+ } else {
+ RETURN_IF_ERROR(_change_value_range<true>(
+ temp_range,
reinterpret_cast<void*>(const_cast<char*>(value.data)),
+ ColumnValueRange<T>::add_fixed_value_range, fn_name));
}
+ range.intersection(temp_range);
}
+ *pdt = temp_pdt;
}
// exceed limit, no conditions will be pushed down to storage engine.
if (range.get_fixed_value_size() > _max_pushdown_conditions_per_column) {
range.set_whole_value_range();
- } else {
- *push_down = effect;
+ *pdt = PushDownType::UNACCEPTABLE;
}
return Status::OK();
}
@@ -618,14 +625,15 @@ template <PrimitiveType T>
Status VScanNode::_normalize_not_in_and_not_eq_predicate(VExpr* expr,
VExprContext* expr_ctx,
SlotDescriptor* slot,
ColumnValueRange<T>&
range,
- bool* push_down) {
+ PushDownType* pdt) {
bool is_fixed_range = range.is_fixed_value_range();
auto not_in_range =
ColumnValueRange<T>::create_empty_column_value_range(range.column_name());
- bool effect = false;
+ PushDownType temp_pdt = PushDownType::UNACCEPTABLE;
// 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
if (TExprNodeType::IN_PRED == expr->node_type()) {
VInPredicate* pred = static_cast<VInPredicate*>(expr);
- if (!_should_push_down_in_predicate(pred, expr_ctx, true)) {
+ if ((temp_pdt = _should_push_down_in_predicate(pred, expr_ctx, true))
==
+ PushDownType::UNACCEPTABLE) {
return Status::OK();
}
@@ -652,55 +660,56 @@ Status
VScanNode::_normalize_not_in_and_not_eq_predicate(VExpr* expr, VExprConte
}
iter->next();
}
- effect = true;
} else if (TExprNodeType::BINARY_PRED == expr->node_type()) {
DCHECK(expr->children().size() == 2);
auto ne_checker = [](const std::string& fn_name) { return fn_name ==
"ne"; };
StringRef value;
int slot_ref_child = -1;
- if
(_should_push_down_binary_predicate(reinterpret_cast<VectorizedFnCall*>(expr),
expr_ctx,
- &value, &slot_ref_child,
ne_checker)) {
- DCHECK(slot_ref_child >= 0);
- // where A = nullptr should return empty result set
- if (value.data != nullptr) {
- auto fn_name = std::string("");
- if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T ==
TYPE_STRING ||
- T == TYPE_HLL) {
- auto val = StringValue(value.data, value.size);
- if (is_fixed_range) {
- RETURN_IF_ERROR(_change_value_range<true>(
- range, reinterpret_cast<void*>(&val),
- ColumnValueRange<T>::remove_fixed_value_range,
fn_name));
- } else {
- RETURN_IF_ERROR(_change_value_range<true>(
- not_in_range, reinterpret_cast<void*>(&val),
- ColumnValueRange<T>::add_fixed_value_range,
fn_name));
- }
+ if ((temp_pdt = _should_push_down_binary_predicate(
+ reinterpret_cast<VectorizedFnCall*>(expr), expr_ctx,
&value, &slot_ref_child,
+ ne_checker)) == PushDownType::UNACCEPTABLE) {
+ return Status::OK();
+ }
+
+ DCHECK(slot_ref_child >= 0);
+ // where A = nullptr should return empty result set
+ if (value.data != nullptr) {
+ auto fn_name = std::string("");
+ if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T ==
TYPE_STRING ||
+ T == TYPE_HLL) {
+ auto val = StringValue(value.data, value.size);
+ if (is_fixed_range) {
+ RETURN_IF_ERROR(_change_value_range<true>(
+ range, reinterpret_cast<void*>(&val),
+ ColumnValueRange<T>::remove_fixed_value_range,
fn_name));
} else {
- if (is_fixed_range) {
- RETURN_IF_ERROR(_change_value_range<true>(
- range,
reinterpret_cast<void*>(const_cast<char*>(value.data)),
- ColumnValueRange<T>::remove_fixed_value_range,
fn_name));
- } else {
- RETURN_IF_ERROR(_change_value_range<true>(
- not_in_range,
-
reinterpret_cast<void*>(const_cast<char*>(value.data)),
- ColumnValueRange<T>::add_fixed_value_range,
fn_name));
- }
+ RETURN_IF_ERROR(_change_value_range<true>(
+ not_in_range, reinterpret_cast<void*>(&val),
+ ColumnValueRange<T>::add_fixed_value_range,
fn_name));
+ }
+ } else {
+ if (is_fixed_range) {
+ RETURN_IF_ERROR(_change_value_range<true>(
+ range,
reinterpret_cast<void*>(const_cast<char*>(value.data)),
+ ColumnValueRange<T>::remove_fixed_value_range,
fn_name));
+ } else {
+ RETURN_IF_ERROR(_change_value_range<true>(
+ not_in_range,
reinterpret_cast<void*>(const_cast<char*>(value.data)),
+ ColumnValueRange<T>::add_fixed_value_range,
fn_name));
}
- effect = true;
}
}
+ } else {
+ return Status::OK();
}
if (is_fixed_range ||
not_in_range.get_fixed_value_size() <=
_max_pushdown_conditions_per_column) {
if (!is_fixed_range) {
- // push down not in condition to storage engine
- not_in_range.to_in_condition(_olap_filters, false);
+ _not_in_value_ranges.push_back(not_in_range);
}
- *push_down = effect;
+ *pdt = temp_pdt;
}
return Status::OK();
}
@@ -708,21 +717,26 @@ Status
VScanNode::_normalize_not_in_and_not_eq_predicate(VExpr* expr, VExprConte
template <PrimitiveType T>
Status VScanNode::_normalize_is_null_predicate(VExpr* expr, VExprContext*
expr_ctx,
SlotDescriptor* slot,
ColumnValueRange<T>& range,
- bool* push_down) {
+ PushDownType* pdt) {
+ PushDownType temp_pdt = _should_push_down_is_null_predicate();
+ if (temp_pdt == PushDownType::UNACCEPTABLE) {
+ return Status::OK();
+ }
+
if (TExprNodeType::FUNCTION_CALL == expr->node_type()) {
if (reinterpret_cast<VectorizedFnCall*>(expr)->fn().name.function_name
== "is_null_pred") {
auto temp_range =
ColumnValueRange<T>::create_empty_column_value_range(
slot->type().precision, slot->type().scale);
temp_range.set_contain_null(true);
range.intersection(temp_range);
- *push_down = true;
+ *pdt = temp_pdt;
} else if
(reinterpret_cast<VectorizedFnCall*>(expr)->fn().name.function_name ==
"is_not_null_pred") {
auto temp_range =
ColumnValueRange<T>::create_empty_column_value_range(
slot->type().precision, slot->type().scale);
temp_range.set_contain_null(false);
range.intersection(temp_range);
- *push_down = true;
+ *pdt = temp_pdt;
}
}
return Status::OK();
@@ -731,7 +745,7 @@ Status VScanNode::_normalize_is_null_predicate(VExpr* expr,
VExprContext* expr_c
template <PrimitiveType T>
Status VScanNode::_normalize_noneq_binary_predicate(VExpr* expr, VExprContext*
expr_ctx,
SlotDescriptor* slot,
- ColumnValueRange<T>&
range, bool* push_down) {
+ ColumnValueRange<T>&
range, PushDownType* pdt) {
if (TExprNodeType::BINARY_PRED == expr->node_type()) {
DCHECK(expr->children().size() == 2);
@@ -740,15 +754,16 @@ Status
VScanNode::_normalize_noneq_binary_predicate(VExpr* expr, VExprContext* e
};
StringRef value;
int slot_ref_child = -1;
- if
(_should_push_down_binary_predicate(reinterpret_cast<VectorizedFnCall*>(expr),
expr_ctx,
- &value, &slot_ref_child,
noneq_checker)) {
+ PushDownType temp_pdt = _should_push_down_binary_predicate(
+ reinterpret_cast<VectorizedFnCall*>(expr), expr_ctx, &value,
&slot_ref_child,
+ noneq_checker);
+ if (temp_pdt != PushDownType::UNACCEPTABLE) {
DCHECK(slot_ref_child >= 0);
const std::string& fn_name =
reinterpret_cast<VectorizedFnCall*>(expr)->fn().name.function_name;
// where A = nullptr should return empty result set
if (value.data != nullptr) {
- *push_down = true;
if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T ==
TYPE_STRING ||
T == TYPE_HLL) {
auto val = StringValue(value.data, value.size);
@@ -760,6 +775,7 @@ Status VScanNode::_normalize_noneq_binary_predicate(VExpr*
expr, VExprContext* e
range,
reinterpret_cast<void*>(const_cast<char*>(value.data)),
ColumnValueRange<T>::add_value_range, fn_name,
true, slot_ref_child));
}
+ *pdt = temp_pdt;
}
}
}
@@ -906,4 +922,59 @@ Status VScanNode::clone_vconjunct_ctx(VExprContext**
_vconjunct_ctx) {
return Status::OK();
}
+VScanNode::PushDownType VScanNode::_should_push_down_binary_predicate(
+ VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef*
constant_val,
+ int* slot_ref_child, const std::function<bool(const std::string&)>&
fn_checker) {
+ if (!fn_checker(fn_call->fn().name.function_name)) {
+ return PushDownType::UNACCEPTABLE;
+ }
+
+ const auto& children = fn_call->children();
+ DCHECK(children.size() == 2);
+ for (size_t i = 0; i < children.size(); i++) {
+ if (VExpr::expr_without_cast(children[i])->node_type() !=
TExprNodeType::SLOT_REF) {
+ // not a slot ref(column)
+ continue;
+ }
+ if (!children[1 - i]->is_constant()) {
+ // only handle constant value
+ return PushDownType::UNACCEPTABLE;
+ } else {
+ if (const ColumnConst* const_column =
check_and_get_column<ColumnConst>(
+ children[1 - i]->get_const_col(expr_ctx)->column_ptr))
{
+ *slot_ref_child = i;
+ *constant_val = const_column->get_data_at(0);
+ } else {
+ return PushDownType::UNACCEPTABLE;
+ }
+ }
+ }
+ return PushDownType::ACCEPTABLE;
+}
+
+VScanNode::PushDownType
VScanNode::_should_push_down_in_predicate(VInPredicate* pred,
+
VExprContext* expr_ctx,
+ bool
is_not_in) {
+ if (pred->is_not_in() != is_not_in) {
+ return PushDownType::UNACCEPTABLE;
+ }
+ InState* state = reinterpret_cast<InState*>(
+ expr_ctx->fn_context(pred->fn_context_index())
+ ->get_function_state(FunctionContext::FRAGMENT_LOCAL));
+ HybridSetBase* set = state->hybrid_set.get();
+
+ // if there are too many elements in InPredicate, exceed the limit,
+ // we will not push any condition of this column to storage engine.
+ // because too many conditions pushed down to storage engine may even
+ // slow down the query process.
+ // ATTN: This is just an experience value. You may need to try
+ // different thresholds to improve performance.
+ if (set->size() > _max_pushdown_conditions_per_column) {
+ VLOG_NOTICE << "Predicate value num " << set->size() << " exceed limit
"
+ << _max_pushdown_conditions_per_column;
+ return PushDownType::UNACCEPTABLE;
+ }
+ return PushDownType::ACCEPTABLE;
+}
+
} // namespace doris::vectorized
diff --git a/be/src/vec/exec/scan/vscan_node.h
b/be/src/vec/exec/scan/vscan_node.h
index a37074948a..33386ffdab 100644
--- a/be/src/vec/exec/scan/vscan_node.h
+++ b/be/src/vec/exec/scan/vscan_node.h
@@ -73,6 +73,17 @@ public:
const TupleDescriptor* input_tuple_desc() const { return
_input_tuple_desc; }
const TupleDescriptor* output_tuple_desc() const { return
_output_tuple_desc; }
+ enum class PushDownType {
+ // The predicate can not be pushed down to data source
+ UNACCEPTABLE,
+ // The predicate can be pushed down to data source
+ // and the data source can fully evaludate it
+ ACCEPTABLE,
+ // The predicate can be pushed down to data source
+ // but the data source can not fully evaluate it.
+ PARTIAL_ACCEPTABLE
+ };
+
protected:
// Different data sources register different profiles by implementing this
method
virtual Status _init_profile();
@@ -105,21 +116,24 @@ protected:
// 2. in/not in predicate
// 3. function predicate
// TODO: these interfaces should be change to become more common.
- virtual bool _should_push_down_binary_predicate(
+ virtual PushDownType _should_push_down_binary_predicate(
VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef*
constant_val,
- int* slot_ref_child, const std::function<bool(const
std::string&)>& fn_checker) {
- return false;
- }
+ int* slot_ref_child, const std::function<bool(const
std::string&)>& fn_checker);
- virtual bool _should_push_down_in_predicate(VInPredicate* in_pred,
VExprContext* expr_ctx,
- bool is_not_in) {
- return false;
+ virtual PushDownType _should_push_down_in_predicate(VInPredicate* in_pred,
+ VExprContext*
expr_ctx, bool is_not_in);
+
+ virtual PushDownType _should_push_down_function_filter(VectorizedFnCall*
fn_call,
+ VExprContext*
expr_ctx,
+ StringVal*
constant_str,
+
doris_udf::FunctionContext** fn_ctx) {
+ return PushDownType::UNACCEPTABLE;
}
- virtual bool _should_push_down_function_filter(VectorizedFnCall* fn_call,
- VExprContext* expr_ctx,
StringVal* constant_str,
-
doris_udf::FunctionContext** fn_ctx) {
- return false;
+ virtual PushDownType _should_push_down_bloom_filter() { return
PushDownType::UNACCEPTABLE; }
+
+ virtual PushDownType _should_push_down_is_null_predicate() {
+ return PushDownType::UNACCEPTABLE;
}
// Return true if it is a key column.
@@ -175,11 +189,18 @@ protected:
std::vector<FunctionFilter> _push_down_functions;
// slot id -> ColumnValueRange
- // Parsed from conjunts
+ // Parsed from conjuncts
phmap::flat_hash_map<int, std::pair<SlotDescriptor*, ColumnValueRangeType>>
_slot_id_to_value_range;
// column -> ColumnValueRange
std::map<std::string, ColumnValueRangeType> _colname_to_value_range;
+ // We use _colname_to_value_range to store a column and its conresponding
value ranges.
+ // But if a col is with value range, eg: 1 < col < 10, which is
"!is_fixed_range",
+ // in this case we can not merge "1 < col < 10" with "col not in (2)".
+ // So we have to save "col not in (2)" to another structure:
"_not_in_value_ranges".
+ // When the data source try to use the value ranges, it should use both
ranges in
+ // "_colname_to_value_range" and in "_not_in_value_ranges"
+ std::vector<ColumnValueRangeType> _not_in_value_ranges;
bool _need_agg_finalize = true;
@@ -233,13 +254,13 @@ private:
Status _normalize_conjuncts();
VExpr* _normalize_predicate(VExpr* conjunct_expr_root);
- void _eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, bool*
push_down);
+ void _eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx,
PushDownType* pdt);
Status _normalize_bloom_filter(VExpr* expr, VExprContext* expr_ctx,
SlotDescriptor* slot,
- bool* push_down);
+ PushDownType* pdt);
Status _normalize_function_filters(VExpr* expr, VExprContext* expr_ctx,
SlotDescriptor* slot,
- bool* push_down);
+ PushDownType* pdt);
bool _is_predicate_acting_on_slot(VExpr* expr,
const std::function<bool(const
std::vector<VExpr*>&,
@@ -249,21 +270,21 @@ private:
template <PrimitiveType T>
Status _normalize_in_and_eq_predicate(vectorized::VExpr* expr,
VExprContext* expr_ctx,
SlotDescriptor* slot,
ColumnValueRange<T>& range,
- bool* push_down);
+ PushDownType* pdt);
template <PrimitiveType T>
Status _normalize_not_in_and_not_eq_predicate(vectorized::VExpr* expr,
VExprContext* expr_ctx,
SlotDescriptor* slot,
ColumnValueRange<T>& range,
- bool* push_down);
+ PushDownType* pdt);
template <PrimitiveType T>
Status _normalize_noneq_binary_predicate(vectorized::VExpr* expr,
VExprContext* expr_ctx,
SlotDescriptor* slot,
ColumnValueRange<T>& range,
- bool* push_down);
+ PushDownType* pdt);
template <PrimitiveType T>
Status _normalize_is_null_predicate(vectorized::VExpr* expr, VExprContext*
expr_ctx,
SlotDescriptor* slot,
ColumnValueRange<T>& range,
- bool* push_down);
+ PushDownType* pdt);
template <bool IsFixed, PrimitiveType PrimitiveType, typename
ChangeFixedValueRangeFunc>
static Status _change_value_range(ColumnValueRange<PrimitiveType>& range,
void* value,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]