hubgeter commented on code in PR #54240:
URL: https://github.com/apache/doris/pull/54240#discussion_r2253062253
##########
be/src/vec/exprs/vtopn_pred.h:
##########
@@ -114,6 +115,50 @@ class VTopNPred : public VExpr {
const std::string& expr_name() const override { return _expr_name; }
+ bool has_value() const { return _predicate->has_value(); }
+
+ VExprSPtr get_binary_expr() const {
+ VExprSPtr root;
+
+ {
+ TFunction fn;
+ TFunctionName fn_name;
+ fn_name.__set_db_name("");
+ fn_name.__set_function_name(_predicate->is_asc() ? "le" : "ge");
+ fn.__set_name(fn_name);
+ fn.__set_binary_type(TFunctionBinaryType::BUILTIN);
+ std::vector<TTypeDesc> arg_types;
+ arg_types.push_back(create_type_desc(PrimitiveType::TYPE_INT));
Review Comment:
Maybe I wrote it wrong
##########
be/src/vec/exec/format/orc/vorc_reader.cpp:
##########
@@ -970,38 +976,61 @@ Status OrcReader::set_fill_columns(
// std::unordered_map<column_name, std::pair<col_id, slot_id>>
std::unordered_map<std::string, std::pair<uint32_t, int>>
predicate_table_columns;
std::function<void(VExpr * expr)> visit_slot = [&](VExpr* expr) {
- if (auto* slot_ref = typeid_cast<VSlotRef*>(expr)) {
+ if (expr->is_slot_ref()) {
+ VSlotRef* slot_ref = static_cast<VSlotRef*>(expr);
auto expr_name = slot_ref->expr_name();
predicate_table_columns.emplace(
expr_name, std::make_pair(slot_ref->column_id(),
slot_ref->slot_id()));
if (slot_ref->column_id() == 0) {
_lazy_read_ctx.resize_first_column = false;
}
return;
- } else if (auto* runtime_filter =
typeid_cast<VRuntimeFilterWrapper*>(expr)) {
- auto* filter_impl =
const_cast<VExpr*>(runtime_filter->get_impl().get());
- if (auto* bloom_predicate =
typeid_cast<VBloomPredicate*>(filter_impl)) {
- for (const auto& child : bloom_predicate->children()) {
+ }
+ for (auto& child : expr->children()) {
+ visit_slot(child.get());
+ }
+ };
+
+ for (const auto& conjunct : _lazy_read_ctx.conjuncts) {
+ auto expr = conjunct->root();
+
+ if (VRuntimeFilterWrapper* runtime_filter =
+ typeid_cast<VRuntimeFilterWrapper*>(expr.get())) {
+ auto filter_impl = runtime_filter->get_impl();
+ if (VBloomPredicate* bloom_predicate =
+ typeid_cast<VBloomPredicate*>(filter_impl.get())) {
+ for (auto& child : bloom_predicate->children()) {
visit_slot(child.get());
}
- } else if (auto* in_predicate =
typeid_cast<VInPredicate*>(filter_impl)) {
- if (!in_predicate->children().empty()) {
+ } else if (VDirectInPredicate* in_predicate =
+
typeid_cast<VDirectInPredicate*>(filter_impl.get())) {
+ if (in_predicate->get_num_children() > 0) {
+ expr = runtime_filter->get_impl();
+
visit_slot(in_predicate->children()[0].get());
}
} else {
- for (const auto& child : filter_impl->children()) {
+ for (auto& child : filter_impl->children()) {
visit_slot(child.get());
}
}
- } else {
- for (const auto& child : expr->children()) {
- visit_slot(child.get());
+ } else if (VTopNPred* topn_pred = typeid_cast<VTopNPred*>(
+ expr.get())) { // top runtime filter : only le &&
ge.
+ if (topn_pred->has_value()) {
+ expr = topn_pred->get_binary_expr();
Review Comment:
Although `VTopNPred` expr the concept of `greater equal or less equal`, it
does not directly wrap `binary_expr` like `VRuntimeFilterWrapper`.
In order to avoid adding many extra case checks when judging whether expr
can be used to push down to filter stripe, it is converted to a more general
`binary_expr`.
##########
be/src/vec/exec/format/orc/vorc_reader.cpp:
##########
@@ -970,38 +976,61 @@ Status OrcReader::set_fill_columns(
// std::unordered_map<column_name, std::pair<col_id, slot_id>>
std::unordered_map<std::string, std::pair<uint32_t, int>>
predicate_table_columns;
std::function<void(VExpr * expr)> visit_slot = [&](VExpr* expr) {
- if (auto* slot_ref = typeid_cast<VSlotRef*>(expr)) {
+ if (expr->is_slot_ref()) {
+ VSlotRef* slot_ref = static_cast<VSlotRef*>(expr);
auto expr_name = slot_ref->expr_name();
predicate_table_columns.emplace(
expr_name, std::make_pair(slot_ref->column_id(),
slot_ref->slot_id()));
if (slot_ref->column_id() == 0) {
_lazy_read_ctx.resize_first_column = false;
}
return;
- } else if (auto* runtime_filter =
typeid_cast<VRuntimeFilterWrapper*>(expr)) {
- auto* filter_impl =
const_cast<VExpr*>(runtime_filter->get_impl().get());
- if (auto* bloom_predicate =
typeid_cast<VBloomPredicate*>(filter_impl)) {
- for (const auto& child : bloom_predicate->children()) {
+ }
+ for (auto& child : expr->children()) {
+ visit_slot(child.get());
+ }
+ };
+
+ for (const auto& conjunct : _lazy_read_ctx.conjuncts) {
+ auto expr = conjunct->root();
+
+ if (VRuntimeFilterWrapper* runtime_filter =
+ typeid_cast<VRuntimeFilterWrapper*>(expr.get())) {
+ auto filter_impl = runtime_filter->get_impl();
+ if (VBloomPredicate* bloom_predicate =
+ typeid_cast<VBloomPredicate*>(filter_impl.get())) {
+ for (auto& child : bloom_predicate->children()) {
visit_slot(child.get());
}
- } else if (auto* in_predicate =
typeid_cast<VInPredicate*>(filter_impl)) {
- if (!in_predicate->children().empty()) {
+ } else if (VDirectInPredicate* in_predicate =
Review Comment:
`runtime_filter_consumer.cpp`:
```cpp
Status
RuntimeFilterConsumer::_get_push_exprs(std::vector<vectorized::VRuntimeFilterPtr>&
container,
const TExpr& probe_expr) {
case RuntimeFilterType::IN_FILTER: {
auto in_pred = vectorized::VDirectInPredicate::create_shared(node,
_wrapper->hybrid_set());
auto wrapper = vectorized::VRuntimeFilterWrapper::create_shared(
node, in_pred,
get_in_list_ignore_thredhold(_wrapper->hybrid_set()->size()),
null_aware, _wrapper->filter_id());
container.push_back(wrapper);
break;
}
}
```
##########
be/src/vec/exec/format/orc/vorc_reader.cpp:
##########
@@ -970,38 +976,61 @@ Status OrcReader::set_fill_columns(
// std::unordered_map<column_name, std::pair<col_id, slot_id>>
std::unordered_map<std::string, std::pair<uint32_t, int>>
predicate_table_columns;
std::function<void(VExpr * expr)> visit_slot = [&](VExpr* expr) {
- if (auto* slot_ref = typeid_cast<VSlotRef*>(expr)) {
+ if (expr->is_slot_ref()) {
+ VSlotRef* slot_ref = static_cast<VSlotRef*>(expr);
auto expr_name = slot_ref->expr_name();
predicate_table_columns.emplace(
expr_name, std::make_pair(slot_ref->column_id(),
slot_ref->slot_id()));
if (slot_ref->column_id() == 0) {
_lazy_read_ctx.resize_first_column = false;
}
return;
- } else if (auto* runtime_filter =
typeid_cast<VRuntimeFilterWrapper*>(expr)) {
- auto* filter_impl =
const_cast<VExpr*>(runtime_filter->get_impl().get());
- if (auto* bloom_predicate =
typeid_cast<VBloomPredicate*>(filter_impl)) {
- for (const auto& child : bloom_predicate->children()) {
+ }
+ for (auto& child : expr->children()) {
+ visit_slot(child.get());
+ }
+ };
+
+ for (const auto& conjunct : _lazy_read_ctx.conjuncts) {
+ auto expr = conjunct->root();
+
+ if (VRuntimeFilterWrapper* runtime_filter =
Review Comment:
The purpose of `visit_slot` is to place all slots in conjuncts into
`predicate_table_columns` for lazy materialization.
`_check_expr_can_push_down` checks whether the predicate can be pushed down
for filter stripe/row group It doesn't necessarily traverse all slots, so the
two can't be written in the same loop.
On the other hand, `VTopNPred` may be empty during `visit_slot` and then
have a value during `_check_expr_can_push_down`. Therefore, I separate
visit_slot from type checking.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]