xiaokang commented on code in PR #38908:
URL: https://github.com/apache/doris/pull/38908#discussion_r1717819551
##########
cloud/src/recycler/hdfs_accessor.h:
##########
@@ -17,6 +17,7 @@
#pragma once
+#include <stdarg.h>
#ifdef USE_HADOOP_HDFS
Review Comment:
unrelated change
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -977,74 +794,20 @@ bool
SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool
return true;
}
-Status SegmentIterator::_apply_inverted_index_except_leafnode_of_andnode(
- ColumnPredicate* pred, roaring::Roaring* output_result) {
- RETURN_IF_ERROR(pred->evaluate(_storage_name_and_type[pred->column_id()],
-
_inverted_index_iterators[pred->column_id()].get(), num_rows(),
- output_result));
- return Status::OK();
-}
-
-Status SegmentIterator::_apply_index_except_leafnode_of_andnode() {
- for (auto* pred : _col_preds_except_leafnode_of_andnode) {
- auto column_id = pred->column_id();
- auto pred_type = pred->type();
- bool is_support = pred_type == PredicateType::EQ || pred_type ==
PredicateType::NE ||
- pred_type == PredicateType::LT || pred_type ==
PredicateType::LE ||
- pred_type == PredicateType::GT || pred_type ==
PredicateType::GE ||
- pred_type == PredicateType::MATCH ||
- pred_type == PredicateType::IN_LIST ||
- pred_type == PredicateType::NOT_IN_LIST;
- if (!is_support) {
- continue;
- }
-
- bool can_apply_by_inverted_index =
_check_apply_by_inverted_index(pred, true);
- roaring::Roaring bitmap = _row_bitmap;
- Status res = Status::OK();
- if (can_apply_by_inverted_index) {
- res = _apply_inverted_index_except_leafnode_of_andnode(pred,
&bitmap);
- } else {
- continue;
- }
-
- bool need_remaining_after_evaluate =
_column_has_fulltext_index(column_id) &&
-
PredicateTypeTraits::is_equal_or_list(pred_type);
- if (!res.ok()) {
- if (_downgrade_without_index(res, need_remaining_after_evaluate)) {
- // downgrade without index query
- _not_apply_index_pred.insert(column_id);
+Status SegmentIterator::_apply_index_expr() {
+ for (const auto& expr_ctx : _common_expr_ctxs_push_down) {
+ if (Status st = expr_ctx->evaluate_inverted_index(num_rows());
!st.ok()) {
+ if (_downgrade_without_index(st) || st.code() ==
ErrorCode::NOT_IMPLEMENTED_ERROR) {
Review Comment:
can you move `st.code() == ErrorCode::NOT_IMPLEMENTED_ERROR` into
`_downgrade_without_index()`
##########
be/src/vec/exprs/vectorized_fn_call.cpp:
##########
@@ -143,37 +139,61 @@ void VectorizedFnCall::close(VExprContext* context,
FunctionContext::FunctionSta
VExpr::close(context, scope);
}
-Status VectorizedFnCall::eval_inverted_index(
- VExprContext* context,
- const std::unordered_map<ColumnId,
std::pair<vectorized::IndexFieldNameAndTypePair,
-
segment_v2::InvertedIndexIterator*>>&
- colid_to_inverted_index_iter,
- uint32_t num_rows, roaring::Roaring* bitmap) const {
+Status VectorizedFnCall::evaluate_inverted_index(VExprContext* context,
+ uint32_t segment_num_rows)
const {
DCHECK_GE(get_num_children(), 1);
if (get_child(0)->is_slot_ref()) {
auto* column_slot_ref = assert_cast<VSlotRef*>(get_child(0).get());
- if (auto iter =
colid_to_inverted_index_iter.find(column_slot_ref->column_id());
- iter != colid_to_inverted_index_iter.end()) {
- const auto& pair = iter->second;
- return
_function->eval_inverted_index(context->fn_context(_fn_context_index),
- pair.first, pair.second,
num_rows, bitmap);
- } else {
- return Status::NotSupported("column id {} not found in
colid_to_inverted_index_iter",
- column_slot_ref->column_id());
+ auto* iter =
+
context->get_inverted_index_iterators_by_column_name(column_slot_ref->expr_name());
+ //column does not have inverted index
+ if (iter == nullptr) {
+ return Status::OK();
+ }
+ auto result_bitmap = segment_v2::InvertedIndexResultBitmap();
+ auto storage_name_type =
+
context->get_storage_name_and_type_by_column_name(column_slot_ref->expr_name());
+ vectorized::ColumnsWithTypeAndName arguments;
+ for (int right_children_size = get_num_children() - 1;
right_children_size > 0;
Review Comment:
i is better than right_children_size
##########
be/src/vec/exprs/vmatch_predicate.cpp:
##########
@@ -130,9 +132,60 @@ void VMatchPredicate::close(VExprContext* context,
FunctionContext::FunctionStat
VExpr::close(context, scope);
}
+Status VMatchPredicate::evaluate_inverted_index(VExprContext* context,
+ uint32_t segment_num_rows)
const {
+ DCHECK_EQ(get_num_children(), 2);
+ if (get_child(0)->is_slot_ref()) {
+ auto* column_slot_ref = assert_cast<VSlotRef*>(get_child(0).get());
+ auto* iter =
+
context->get_inverted_index_iterators_by_column_name(column_slot_ref->expr_name());
+ //column does not have inverted index
+ if (iter == nullptr) {
+ return Status::OK();
+ }
+ auto result_bitmap = segment_v2::InvertedIndexResultBitmap();
+ auto storage_name_type =
+
context->get_storage_name_and_type_by_column_name(column_slot_ref->expr_name());
+ vectorized::ColumnsWithTypeAndName arguments;
+
+ if (get_child(1)->is_literal()) {
Review Comment:
almost the same as FunctionCall except only one child
##########
be/src/vec/exprs/vexpr_context.h:
##########
@@ -191,5 +277,18 @@ class VExprContext {
// This flag only works on VSlotRef.
// Force to materialize even if the slot need_materialize is false, we
just ignore need_materialize flag
bool _force_materialize_slot = false;
+
+ // result for inverted index expr evaluated
+ // [expr address] -> [rowid_list]
+ std::unordered_map<const vectorized::VExpr*,
segment_v2::InvertedIndexResultBitmap>
+ _inverted_index_result_bitmap;
+ std::unordered_map<const vectorized::VExpr*, ColumnPtr>
_inverted_index_result_column;
+ std::unordered_map<std::string, segment_v2::InvertedIndexIterator*>
+ _inverted_index_iterators_by_col_name;
+ // storage type schema related to _schema, since column in segment may be
different with type in _schema
+ std::unordered_map<std::string, vectorized::IndexFieldNameAndTypePair>
+ _storage_name_and_type_by_col_name;
+ std::unordered_map<std::string, std::unordered_map<const
vectorized::VExpr*, bool>>
Review Comment:
add comment for each field
##########
be/src/vec/exprs/vexpr_context.cpp:
##########
@@ -121,12 +121,24 @@ int VExprContext::register_function_context(RuntimeState*
state, const TypeDescr
return _fn_contexts.size() - 1;
}
-Status VExprContext::eval_inverted_index(
- const std::unordered_map<ColumnId,
std::pair<vectorized::IndexFieldNameAndTypePair,
-
segment_v2::InvertedIndexIterator*>>&
- colid_to_inverted_index_iter,
- uint32_t num_rows, roaring::Roaring* bitmap) {
- return _root->eval_inverted_index(this, colid_to_inverted_index_iter,
num_rows, bitmap);
+Status VExprContext::evaluate_inverted_index(uint32_t segment_num_rows) {
+ Status st;
+ RETURN_IF_CATCH_EXCEPTION({ st = _root->evaluate_inverted_index(this,
segment_num_rows); });
+ return st;
+}
+
+bool VExprContext::all_expr_inverted_index_evaluated() {
+ return has_inverted_index_result_for_expr(_root.get());
+}
+
+Status VExprContext::evaluate_inverted_index(const VExprContextSPtrs&
conjuncts,
+ uint32_t segment_num_rows) {
+ for (const auto& conjunct : conjuncts) {
+ if (conjunct->evaluate_inverted_index(segment_num_rows) !=
Status::OK()) {
+ return Status::InternalError("evaluate inverted index failed");
Review Comment:
add conjunct name to error msg
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -544,47 +510,25 @@ Status
SegmentIterator::_get_row_ranges_by_column_conditions() {
return Status::OK();
}
- if (config::enable_index_apply_preds_except_leafnode_of_andnode) {
- size_t input_rows = _row_bitmap.cardinality();
- RETURN_IF_ERROR(_apply_index_except_leafnode_of_andnode());
- if (_can_filter_by_preds_except_leafnode_of_andnode()) {
- for (auto it = _remaining_conjunct_roots.begin();
- it != _remaining_conjunct_roots.end();) {
- _pred_except_leafnode_of_andnode_evaluate_result.clear();
- auto res = _execute_predicates_except_leafnode_of_andnode(*it);
- VLOG_DEBUG << "_execute_predicates_except_leafnode_of_andnode
expr: "
- << (*it)->debug_string() << " res: " << res;
- if (res.ok() &&
_pred_except_leafnode_of_andnode_evaluate_result.size() == 1) {
- _row_bitmap &=
_pred_except_leafnode_of_andnode_evaluate_result[0];
- // Delete expr after it obtains the final result.
- {
- std::erase_if(_common_expr_ctxs_push_down,
- [&it](const auto& iter) { return
iter->root() == *it; });
- VLOG_DEBUG << "_remaining_conjunct_roots erase expr: "
- << (*it)->debug_string();
- it = _remaining_conjunct_roots.erase(it);
- }
- } else {
- ++it;
- }
- }
- _col_preds_except_leafnode_of_andnode.clear();
- compound_func_exprs.clear();
- // 1. if all conditions in the compound hit the inverted index and
there are no other expr to handle.
- // 2. then there is no need to generate index_result_column.
- if (_enable_common_expr_pushdown &&
_remaining_conjunct_roots.empty()) {
- for (auto& iter : _rowid_result_for_index) {
- iter.second.first = false;
- }
- }
+ RETURN_IF_ERROR(_apply_bitmap_index());
+ RETURN_IF_ERROR(_apply_inverted_index());
+ RETURN_IF_ERROR(_apply_index_expr());
+ size_t input_rows = _row_bitmap.cardinality();
+ for (auto it = _common_expr_ctxs_push_down.begin(); it !=
_common_expr_ctxs_push_down.end();) {
+ if ((*it)->all_expr_inverted_index_evaluated()) {
+ auto result = (*it)->get_inverted_index_result_for_root();
+ _row_bitmap &= *result.get_data_bitmap();
Review Comment:
check AND short circuite if _row_bitmap is empty
##########
be/src/vec/exprs/vexpr_context.h:
##########
@@ -51,6 +51,96 @@ class VExprContext {
VExprSPtr root() { return _root; }
void set_root(const VExprSPtr& expr) { _root = expr; }
+ void set_inverted_index_iterators(
+ const std::unordered_map<std::string,
segment_v2::InvertedIndexIterator*>& iterators) {
+ _inverted_index_iterators_by_col_name = iterators;
+ }
+
+ void set_storage_name_and_type(
+ const std::unordered_map<std::string,
vectorized::IndexFieldNameAndTypePair>&
+ storage_name_and_type) {
+ _storage_name_and_type_by_col_name = storage_name_and_type;
+ }
+
+ segment_v2::InvertedIndexIterator*
get_inverted_index_iterators_by_column_name(
+ std::string column_name) {
+ return _inverted_index_iterators_by_col_name[column_name];
+ }
+
+ vectorized::IndexFieldNameAndTypePair
get_storage_name_and_type_by_column_name(
+ std::string column_name) {
+ return _storage_name_and_type_by_col_name[column_name];
+ }
+
+ bool has_inverted_index_result_for_expr(const vectorized::VExpr* expr)
const {
+ return _inverted_index_result_bitmap.find(expr) !=
_inverted_index_result_bitmap.end();
+ }
+
+ void set_inverted_index_result_for_expr(const vectorized::VExpr* expr,
+
segment_v2::InvertedIndexResultBitmap bitmap) {
+ _inverted_index_result_bitmap[expr] = std::move(bitmap);
+ }
+
+ segment_v2::InvertedIndexResultBitmap
get_or_set_inverted_index_result_for_expr(
+ const vectorized::VExpr* expr) {
+ auto iter = _inverted_index_result_bitmap.find(expr);
+ if (iter == _inverted_index_result_bitmap.end()) {
+ _inverted_index_result_bitmap[expr] =
segment_v2::InvertedIndexResultBitmap();
+ return _inverted_index_result_bitmap[expr];
+ }
+ return iter->second;
+ }
+
+ segment_v2::InvertedIndexResultBitmap get_inverted_index_result_for_expr(
+ const vectorized::VExpr* expr) {
+ auto iter = _inverted_index_result_bitmap.find(expr);
+ if (iter == _inverted_index_result_bitmap.end()) {
+ return {};
+ }
+ return iter->second;
+ }
+
+ void set_inverted_index_expr_status(
+ const std::unordered_map<std::string,
+ std::unordered_map<const
vectorized::VExpr*, bool>>& status) {
+ _expr_inverted_index_status = status;
+ }
+
+ segment_v2::InvertedIndexResultBitmap get_inverted_index_result_for_root()
{
+ auto iter = _inverted_index_result_bitmap.find(_root.get());
+ if (iter == _inverted_index_result_bitmap.end()) {
+ return {};
+ }
+ return iter->second;
+ }
+
+ std::unordered_map<std::string, std::unordered_map<const
vectorized::VExpr*, bool>>
+ get_expr_inverted_index_status() {
+ return _expr_inverted_index_status;
+ }
+
+ void set_true_for_inverted_index_status(const vectorized::VExpr* expr,
+ const std::string& column_name) {
+ if (_expr_inverted_index_status.contains(column_name)) {
+ if (_expr_inverted_index_status[column_name].contains(expr)) {
+ _expr_inverted_index_status[column_name][expr] = true;
+ }
+ }
+ }
+
+ std::unordered_map<const vectorized::VExpr*,
segment_v2::InvertedIndexResultBitmap>
+ get_inverted_index_result_bitmap() {
Review Comment:
map is copied, const & is better for performance
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -2836,125 +2401,46 @@ Status
SegmentIterator::current_block_row_locations(std::vector<RowLocation>* bl
return Status::OK();
}
-void SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
- const vectorized::VExprSPtr& expr) {
- if (expr == nullptr) {
- return;
- }
-
- if (expr->fn().name.function_name == "multi_match") {
- return;
- }
-
- auto& children = expr->children();
- for (int i = 0; i < children.size(); ++i) {
- _calculate_pred_in_remaining_conjunct_root(children[i]);
- }
-
- auto node_type = expr->node_type();
- if (node_type == TExprNodeType::SLOT_REF) {
- auto slot_expr =
std::dynamic_pointer_cast<doris::vectorized::VSlotRef>(expr);
- if (_column_predicate_info->column_name.empty()) {
- _column_predicate_info->column_name = expr->expr_name();
- _column_predicate_info->column_id = slot_expr->column_id();
- } else {
- // If column name already exists, create a new ColumnPredicateInfo
- // if expr is columnA > columnB, then column name will exist, in
this situation, we need to add it to _column_pred_in_remaining_vconjunct
- auto new_column_pred_info =
std::make_shared<ColumnPredicateInfo>();
- new_column_pred_info->column_name = expr->expr_name();
- new_column_pred_info->column_id = slot_expr->column_id();
-
_column_pred_in_remaining_vconjunct[new_column_pred_info->column_name].push_back(
- *new_column_pred_info);
- }
- } else if (_is_literal_node(node_type)) {
- auto v_literal_expr = static_cast<const
doris::vectorized::VLiteral*>(expr.get());
- _column_predicate_info->query_values.insert(v_literal_expr->value());
- } else if (node_type == TExprNodeType::NULL_LITERAL) {
- if (!_column_predicate_info->column_name.empty()) {
- auto v_literal_expr = static_cast<const
doris::vectorized::VLiteral*>(expr.get());
-
_column_predicate_info->query_values.insert(v_literal_expr->value());
- }
- } else {
- if (node_type == TExprNodeType::MATCH_PRED) {
- _column_predicate_info->query_op = "match";
- } else if (node_type == TExprNodeType::IN_PRED) {
- if (expr->op() == TExprOpcode::type::FILTER_IN) {
- _column_predicate_info->query_op = "in";
- } else {
- _column_predicate_info->query_op = "not_in";
- }
- } else if (node_type != TExprNodeType::COMPOUND_PRED) {
- _column_predicate_info->query_op = expr->fn().name.function_name;
- }
-
- if (!_column_predicate_info->is_empty()) {
-
_column_pred_in_remaining_vconjunct[_column_predicate_info->column_name].push_back(
- *_column_predicate_info);
- _column_predicate_info.reset(new ColumnPredicateInfo());
- }
+Status SegmentIterator::_construct_compound_expr_context() {
+ for (const auto& expr_ctx : _opts.common_expr_ctxs_push_down) {
+ vectorized::VExprContextSPtr context;
+ RETURN_IF_ERROR(expr_ctx->clone(_opts.runtime_state, context));
+
context->set_inverted_index_iterators(_inverted_index_iterators_by_col_name);
+ context->set_storage_name_and_type(_storage_name_and_type_by_col_name);
+
context->set_inverted_index_expr_status(_common_expr_inverted_index_status);
+ _common_expr_ctxs_push_down.emplace_back(context);
}
+ return Status::OK();
}
-void SegmentIterator::_calculate_func_in_remaining_conjunct_root() {
- auto hash = [](const vectorized::VExprSPtr& expr) -> std::size_t {
- return std::hash<std::string>()(expr->expr_name());
- };
- auto equal = [](const vectorized::VExprSPtr& lhs, const
vectorized::VExprSPtr& rhs) -> bool {
- return lhs->equals(*rhs);
- };
-
- uint32_t next_id = 0;
- std::unordered_map<vectorized::VExprSPtr, uint32_t, decltype(hash),
decltype(equal)> unique_map(
- 0, hash, equal);
-
- auto gen_func_unique_id = [&unique_map, &next_id](const
vectorized::VExprSPtr& expr) {
- auto it = unique_map.find(expr);
- if (it != unique_map.end()) {
- return it->second;
- } else {
- unique_map[expr] = ++next_id;
- return next_id;
- }
- };
-
+void SegmentIterator::_calculate_expr_in_remaining_conjunct_root() {
for (const auto& root_expr_ctx : _common_expr_ctxs_push_down) {
const auto& root_expr = root_expr_ctx->root();
if (root_expr == nullptr) {
continue;
}
- std::stack<std::pair<vectorized::VExprSPtr, bool>> stack;
- stack.emplace(root_expr, false);
+ std::stack<vectorized::VExprSPtr> stack;
+ stack.emplace(root_expr);
while (!stack.empty()) {
- const auto& [expr, has_compound_pred] = stack.top();
+ const auto& expr = stack.top();
stack.pop();
- bool current_has_compound_pred =
- has_compound_pred || (expr->node_type() ==
TExprNodeType::COMPOUND_PRED);
-
- if (expr->fn().name.function_name == "multi_match") {
- expr->set_index_unique_id(gen_func_unique_id(expr));
- if (current_has_compound_pred) {
- compound_func_exprs.emplace_back(expr);
- } else {
- no_compound_func_exprs.emplace_back(expr);
- }
-
- for (int32_t i = expr->get_num_children() - 1; i >= 0; i--) {
- auto child_expr = expr->get_child(i);
- if (child_expr->node_type() ==
TExprNodeType::type::SLOT_REF) {
- std::string result_sign =
BeConsts::BLOCK_TEMP_COLUMN_PREFIX +
-
std::to_string(expr->index_unique_id());
-
_func_name_to_result_sign[child_expr->expr_name()].push_back(result_sign);
+ if (vectorized::VExpr::is_directly_action_on_a_slot(*expr)) {
+ for (const auto& child : expr->children()) {
+ if (child->is_slot_ref()) {
+ auto* column_slot_ref =
assert_cast<vectorized::VSlotRef*>(child.get());
+
_common_expr_inverted_index_status[column_slot_ref->expr_name()]
Review Comment:
why use expr_name()?
##########
be/src/vec/exprs/vectorized_fn_call.cpp:
##########
@@ -143,37 +139,61 @@ void VectorizedFnCall::close(VExprContext* context,
FunctionContext::FunctionSta
VExpr::close(context, scope);
}
-Status VectorizedFnCall::eval_inverted_index(
- VExprContext* context,
- const std::unordered_map<ColumnId,
std::pair<vectorized::IndexFieldNameAndTypePair,
-
segment_v2::InvertedIndexIterator*>>&
- colid_to_inverted_index_iter,
- uint32_t num_rows, roaring::Roaring* bitmap) const {
+Status VectorizedFnCall::evaluate_inverted_index(VExprContext* context,
+ uint32_t segment_num_rows)
const {
DCHECK_GE(get_num_children(), 1);
if (get_child(0)->is_slot_ref()) {
auto* column_slot_ref = assert_cast<VSlotRef*>(get_child(0).get());
- if (auto iter =
colid_to_inverted_index_iter.find(column_slot_ref->column_id());
- iter != colid_to_inverted_index_iter.end()) {
- const auto& pair = iter->second;
- return
_function->eval_inverted_index(context->fn_context(_fn_context_index),
- pair.first, pair.second,
num_rows, bitmap);
- } else {
- return Status::NotSupported("column id {} not found in
colid_to_inverted_index_iter",
- column_slot_ref->column_id());
+ auto* iter =
+
context->get_inverted_index_iterators_by_column_name(column_slot_ref->expr_name());
+ //column does not have inverted index
+ if (iter == nullptr) {
+ return Status::OK();
+ }
+ auto result_bitmap = segment_v2::InvertedIndexResultBitmap();
+ auto storage_name_type =
+
context->get_storage_name_and_type_by_column_name(column_slot_ref->expr_name());
+ vectorized::ColumnsWithTypeAndName arguments;
+ for (int right_children_size = get_num_children() - 1;
right_children_size > 0;
+ --right_children_size) {
+ if (get_child(right_children_size)->is_literal()) {
+ auto* column_literal =
assert_cast<VLiteral*>(get_child(right_children_size).get());
+ arguments.emplace_back(column_literal->get_column_ptr(),
Review Comment:
arguments order is opposite to original children and the order is not common
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -544,47 +510,25 @@ Status
SegmentIterator::_get_row_ranges_by_column_conditions() {
return Status::OK();
}
- if (config::enable_index_apply_preds_except_leafnode_of_andnode) {
- size_t input_rows = _row_bitmap.cardinality();
- RETURN_IF_ERROR(_apply_index_except_leafnode_of_andnode());
- if (_can_filter_by_preds_except_leafnode_of_andnode()) {
- for (auto it = _remaining_conjunct_roots.begin();
- it != _remaining_conjunct_roots.end();) {
- _pred_except_leafnode_of_andnode_evaluate_result.clear();
- auto res = _execute_predicates_except_leafnode_of_andnode(*it);
- VLOG_DEBUG << "_execute_predicates_except_leafnode_of_andnode
expr: "
- << (*it)->debug_string() << " res: " << res;
- if (res.ok() &&
_pred_except_leafnode_of_andnode_evaluate_result.size() == 1) {
- _row_bitmap &=
_pred_except_leafnode_of_andnode_evaluate_result[0];
- // Delete expr after it obtains the final result.
- {
- std::erase_if(_common_expr_ctxs_push_down,
- [&it](const auto& iter) { return
iter->root() == *it; });
- VLOG_DEBUG << "_remaining_conjunct_roots erase expr: "
- << (*it)->debug_string();
- it = _remaining_conjunct_roots.erase(it);
- }
- } else {
- ++it;
- }
- }
- _col_preds_except_leafnode_of_andnode.clear();
- compound_func_exprs.clear();
- // 1. if all conditions in the compound hit the inverted index and
there are no other expr to handle.
- // 2. then there is no need to generate index_result_column.
- if (_enable_common_expr_pushdown &&
_remaining_conjunct_roots.empty()) {
- for (auto& iter : _rowid_result_for_index) {
- iter.second.first = false;
- }
- }
+ RETURN_IF_ERROR(_apply_bitmap_index());
+ RETURN_IF_ERROR(_apply_inverted_index());
+ RETURN_IF_ERROR(_apply_index_expr());
+ size_t input_rows = _row_bitmap.cardinality();
+ for (auto it = _common_expr_ctxs_push_down.begin(); it !=
_common_expr_ctxs_push_down.end();) {
+ if ((*it)->all_expr_inverted_index_evaluated()) {
+ auto result = (*it)->get_inverted_index_result_for_root();
+ _row_bitmap &= *result.get_data_bitmap();
+ it = _common_expr_ctxs_push_down.erase(it);
+ } else {
+ ++it;
}
- _opts.stats->rows_inverted_index_filtered += (input_rows -
_row_bitmap.cardinality());
}
- RETURN_IF_ERROR(_apply_bitmap_index());
- RETURN_IF_ERROR(_apply_inverted_index());
+ _opts.stats->rows_inverted_index_filtered += (input_rows -
_row_bitmap.cardinality());
for (auto cid : _schema->column_ids()) {
- bool result_true =
_check_all_predicates_passed_inverted_index_for_column(cid);
+ bool result_true =
_check_all_predicates_passed_inverted_index_for_column(cid) &&
+
_check_all_exprs_passed_inverted_index_for_column(cid);
Review Comment:
default_return is false for the two _check functions. So if one cid is not
in status map, it will return false. I think it's not right.
##########
be/src/vec/exprs/vectorized_fn_call.cpp:
##########
@@ -189,17 +209,10 @@ Status
VectorizedFnCall::_do_execute(doris::vectorized::VExprContext* context,
size_t num_columns_without_result = block->columns();
// prepare a column to save result
block->insert({nullptr, _data_type, _expr_name});
- if (_can_fast_execute) {
- auto can_fast_execute = fast_execute(*block, args,
num_columns_without_result,
- block->rows(),
_function->get_name());
- if (can_fast_execute) {
- *result_column_id = num_columns_without_result;
- return Status::OK();
- }
- }
RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index),
*block, args,
num_columns_without_result,
block->rows(), false));
*result_column_id = num_columns_without_result;
+ auto result_column =
block->get_by_position(num_columns_without_result).column;
Review Comment:
useless code?
##########
be/src/vec/exprs/vcompound_pred.h:
##########
@@ -53,76 +54,94 @@ class VCompoundPred : public VectorizedFnCall {
const std::string& expr_name() const override { return _expr_name; }
- // 1. when meet 'or' conjunct: a or b, if b can apply index, return all
rows, so b should not be extracted
- // 2. when meet 'and' conjunct, function with column b can not apply
inverted index
- // eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not
for index, so b should not be extracted
- // but a and array_contains(b, 1), b can be applied inverted
index, which b can be extracted
- Status eval_inverted_index(
- VExprContext* context,
- const std::unordered_map<ColumnId,
std::pair<vectorized::IndexFieldNameAndTypePair,
-
segment_v2::InvertedIndexIterator*>>&
- colid_to_inverted_index_iter,
- uint32_t num_rows, roaring::Roaring* bitmap) const override {
- std::shared_ptr<roaring::Roaring> res =
std::make_shared<roaring::Roaring>();
- if (_op == TExprOpcode::COMPOUND_OR) {
- for (auto child : _children) {
- std::shared_ptr<roaring::Roaring> child_roaring =
- std::make_shared<roaring::Roaring>();
- Status st = child->eval_inverted_index(context,
colid_to_inverted_index_iter,
- num_rows,
child_roaring.get());
- if (!st.ok()) {
- bitmap->addRange(0, num_rows);
- return st;
+ Status evaluate_inverted_index(VExprContext* context,
+ uint32_t segment_num_rows) const override {
+ segment_v2::InvertedIndexResultBitmap res;
+ bool all_pass = true;
+
+ switch (_op) {
+ case TExprOpcode::COMPOUND_OR: {
+ for (const auto& child : _children) {
+ if (Status st = child->evaluate_inverted_index(context,
segment_num_rows);
+ !st.ok()) {
+ LOG(ERROR) << "expr:" << child->expr_name()
+ << " evaluate_inverted_index error:" <<
st.to_string();
+ all_pass = false;
+ continue;
}
- if (child_roaring->cardinality() == 0) {
- // means inverted index filter do not reduce any rows
- // the left expr no need to be extracted by inverted index,
- // and cur roaring is all rows which means this inverted
index is not useful,
- // do not need to calculate with res bitmap
- bitmap->addRange(0, num_rows);
- return Status::OK();
+ if (context->has_inverted_index_result_for_expr(child.get())) {
+ auto index_result =
context->get_inverted_index_result_for_expr(child.get());
+ if (res.is_empty()) {
+ res = std::move(index_result);
+ } else {
+ res |= index_result;
+ }
+ } else {
+ all_pass = false;
}
- *res |= *child_roaring;
}
- *bitmap = *res;
- } else if (_op == TExprOpcode::COMPOUND_AND) {
- for (int i = 0; i < _children.size(); ++i) {
- std::shared_ptr<roaring::Roaring> child_roaring =
- std::make_shared<roaring::Roaring>();
- Status st = _children[0]->eval_inverted_index(context,
colid_to_inverted_index_iter,
- num_rows,
child_roaring.get());
- if (!st.ok()) {
+ break;
+ }
+ case TExprOpcode::COMPOUND_AND: {
+ for (const auto& child : _children) {
+ if (Status st = child->evaluate_inverted_index(context,
segment_num_rows);
Review Comment:
consider AND short circurit before evaluate_inverted_index
##########
be/src/vec/exprs/vin_predicate.cpp:
##########
@@ -101,10 +103,59 @@ void VInPredicate::close(VExprContext* context,
FunctionContext::FunctionStateSc
VExpr::close(context, scope);
}
+Status VInPredicate::evaluate_inverted_index(VExprContext* context,
+ uint32_t segment_num_rows) const {
+ DCHECK_GE(get_num_children(), 2);
+ if (get_child(0)->is_slot_ref()) {
Review Comment:
almost the same as FunctionCall
##########
be/src/vec/exprs/vcompound_pred.h:
##########
@@ -53,76 +54,94 @@ class VCompoundPred : public VectorizedFnCall {
const std::string& expr_name() const override { return _expr_name; }
- // 1. when meet 'or' conjunct: a or b, if b can apply index, return all
rows, so b should not be extracted
- // 2. when meet 'and' conjunct, function with column b can not apply
inverted index
- // eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not
for index, so b should not be extracted
- // but a and array_contains(b, 1), b can be applied inverted
index, which b can be extracted
- Status eval_inverted_index(
- VExprContext* context,
- const std::unordered_map<ColumnId,
std::pair<vectorized::IndexFieldNameAndTypePair,
-
segment_v2::InvertedIndexIterator*>>&
- colid_to_inverted_index_iter,
- uint32_t num_rows, roaring::Roaring* bitmap) const override {
- std::shared_ptr<roaring::Roaring> res =
std::make_shared<roaring::Roaring>();
- if (_op == TExprOpcode::COMPOUND_OR) {
- for (auto child : _children) {
- std::shared_ptr<roaring::Roaring> child_roaring =
- std::make_shared<roaring::Roaring>();
- Status st = child->eval_inverted_index(context,
colid_to_inverted_index_iter,
- num_rows,
child_roaring.get());
- if (!st.ok()) {
- bitmap->addRange(0, num_rows);
- return st;
+ Status evaluate_inverted_index(VExprContext* context,
+ uint32_t segment_num_rows) const override {
+ segment_v2::InvertedIndexResultBitmap res;
+ bool all_pass = true;
+
+ switch (_op) {
+ case TExprOpcode::COMPOUND_OR: {
+ for (const auto& child : _children) {
+ if (Status st = child->evaluate_inverted_index(context,
segment_num_rows);
+ !st.ok()) {
+ LOG(ERROR) << "expr:" << child->expr_name()
+ << " evaluate_inverted_index error:" <<
st.to_string();
+ all_pass = false;
+ continue;
}
- if (child_roaring->cardinality() == 0) {
- // means inverted index filter do not reduce any rows
- // the left expr no need to be extracted by inverted index,
- // and cur roaring is all rows which means this inverted
index is not useful,
- // do not need to calculate with res bitmap
- bitmap->addRange(0, num_rows);
- return Status::OK();
+ if (context->has_inverted_index_result_for_expr(child.get())) {
+ auto index_result =
context->get_inverted_index_result_for_expr(child.get());
+ if (res.is_empty()) {
+ res = std::move(index_result);
+ } else {
+ res |= index_result;
+ }
+ } else {
+ all_pass = false;
}
- *res |= *child_roaring;
}
- *bitmap = *res;
- } else if (_op == TExprOpcode::COMPOUND_AND) {
- for (int i = 0; i < _children.size(); ++i) {
- std::shared_ptr<roaring::Roaring> child_roaring =
- std::make_shared<roaring::Roaring>();
- Status st = _children[0]->eval_inverted_index(context,
colid_to_inverted_index_iter,
- num_rows,
child_roaring.get());
- if (!st.ok()) {
+ break;
+ }
+ case TExprOpcode::COMPOUND_AND: {
+ for (const auto& child : _children) {
+ if (Status st = child->evaluate_inverted_index(context,
segment_num_rows);
+ !st.ok()) {
+ LOG(ERROR) << "expr:" << child->expr_name()
+ << " evaluate_inverted_index error:" <<
st.to_string();
+ all_pass = false;
continue;
}
- if (i == 0) {
- *res = *child_roaring;
+ if (context->has_inverted_index_result_for_expr(child.get())) {
+ auto index_result =
context->get_inverted_index_result_for_expr(child.get());
+ if (res.is_empty()) {
+ res = std::move(index_result);
+ } else {
+ res &= index_result;
+ }
+
+ if (res.get_data_bitmap()->isEmpty()) {
+ break; // Early exit if result is empty
+ }
} else {
- *res &= *child_roaring;
- }
- if (res->isEmpty()) {
- // the left expr no need to be extracted by inverted
index, just return 0 rows
- // res bitmap will be zero
- return Status::OK();
+ all_pass = false;
}
}
- *bitmap = *res;
- } else if (_op == TExprOpcode::COMPOUND_NOT) {
- Status st = _children[0]->eval_inverted_index(context,
colid_to_inverted_index_iter,
- num_rows, res.get());
+ break;
+ }
+ case TExprOpcode::COMPOUND_NOT: {
+ const auto& child = _children[0];
+ Status st = child->evaluate_inverted_index(context,
segment_num_rows);
if (!st.ok()) {
+ LOG(ERROR) << "expr:" << child->expr_name()
+ << " evaluate_inverted_index error:" <<
st.to_string();
return st;
}
- std::shared_ptr<roaring::Roaring> all_rows =
std::make_shared<roaring::Roaring>();
- all_rows->addRange(0, num_rows);
- *bitmap = *all_rows - *res;
- } else {
+
+ if (context->has_inverted_index_result_for_expr(child.get())) {
+ auto index_result =
context->get_inverted_index_result_for_expr(child.get());
+ roaring::Roaring full_result;
+ full_result.addRange(0, segment_num_rows);
+ res = std::move(index_result.op_not(&full_result));
+ } else {
+ all_pass = false;
+ }
+ break;
+ }
+ default:
return Status::NotSupported(
- "Compound operator must be AND or OR or Not can execute
with inverted index.");
+ "Compound operator must be AND, OR, or NOT to execute with
inverted index.");
+ }
+
+ if (all_pass && !res.is_empty()) {
Review Comment:
if all_pass && res.is_empty(), set_inverted_index_result_for_expr should be
called.
##########
be/src/vec/exprs/vcompound_pred.h:
##########
@@ -53,76 +54,94 @@ class VCompoundPred : public VectorizedFnCall {
const std::string& expr_name() const override { return _expr_name; }
- // 1. when meet 'or' conjunct: a or b, if b can apply index, return all
rows, so b should not be extracted
- // 2. when meet 'and' conjunct, function with column b can not apply
inverted index
- // eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not
for index, so b should not be extracted
- // but a and array_contains(b, 1), b can be applied inverted
index, which b can be extracted
- Status eval_inverted_index(
- VExprContext* context,
- const std::unordered_map<ColumnId,
std::pair<vectorized::IndexFieldNameAndTypePair,
-
segment_v2::InvertedIndexIterator*>>&
- colid_to_inverted_index_iter,
- uint32_t num_rows, roaring::Roaring* bitmap) const override {
- std::shared_ptr<roaring::Roaring> res =
std::make_shared<roaring::Roaring>();
- if (_op == TExprOpcode::COMPOUND_OR) {
- for (auto child : _children) {
- std::shared_ptr<roaring::Roaring> child_roaring =
- std::make_shared<roaring::Roaring>();
- Status st = child->eval_inverted_index(context,
colid_to_inverted_index_iter,
- num_rows,
child_roaring.get());
- if (!st.ok()) {
- bitmap->addRange(0, num_rows);
- return st;
+ Status evaluate_inverted_index(VExprContext* context,
+ uint32_t segment_num_rows) const override {
+ segment_v2::InvertedIndexResultBitmap res;
+ bool all_pass = true;
+
+ switch (_op) {
+ case TExprOpcode::COMPOUND_OR: {
+ for (const auto& child : _children) {
+ if (Status st = child->evaluate_inverted_index(context,
segment_num_rows);
Review Comment:
consider OR short circurit before evaluate_inverted_index
##########
be/src/vec/functions/in.h:
##########
@@ -135,6 +135,60 @@ class FunctionIn : public IFunction {
return Status::OK();
}
+ Status evaluate_inverted_index(
+ const ColumnsWithTypeAndName& arguments,
+ const vectorized::IndexFieldNameAndTypePair& data_type_with_name,
+ segment_v2::InvertedIndexIterator* iter, uint32_t num_rows,
+ segment_v2::InvertedIndexResultBitmap& bitmap_result) const
override {
+ std::shared_ptr<roaring::Roaring> roaring =
std::make_shared<roaring::Roaring>();
+ if (iter == nullptr) {
+ return Status::OK();
+ }
+ if (iter->get_inverted_index_reader_type() ==
+ segment_v2::InvertedIndexReaderType::FULLTEXT) {
+ //NOT support in list when parser is FULLTEXT for expr inverted
index evaluate.
+ return Status::OK();
+ }
+ std::string column_name = data_type_with_name.first;
+ //NOTE: maybe we got NULL process problem here, need to figure it out.
+ for (const auto& arg : arguments) {
+ Field param_value;
+ arg.column->get(0, param_value);
+ auto param_type = arg.type->get_type_as_type_descriptor().type;
+ // predicate like column NOT IN (NULL, '') should not push down to
index.
+ if (negative && param_value.is_null()) {
+ return Status::OK();
+ }
+ std::unique_ptr<InvertedIndexQueryParamFactory> query_param =
nullptr;
+ RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value(
+ param_type, ¶m_value, query_param));
+ InvertedIndexQueryType query_type =
InvertedIndexQueryType::EQUAL_QUERY;
+ std::shared_ptr<roaring::Roaring> index =
std::make_shared<roaring::Roaring>();
+ RETURN_IF_ERROR(iter->read_from_inverted_index(column_name,
query_param->get_value(),
+ query_type,
num_rows, index));
+ *roaring |= *index;
+ }
+
+ if (iter->has_null()) {
+ segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
+ RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
+ std::shared_ptr<roaring::Roaring> null_bitmap =
null_bitmap_cache_handle.get_bitmap();
+ segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
+ bitmap_result = result;
+ } else {
+ std::shared_ptr<roaring::Roaring> null_bitmap =
std::make_shared<roaring::Roaring>();
Review Comment:
duplicate with if. It can be moved outside else.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]