This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new b0da8430bc3 [opt](inverted index) Optimize the usage of the
multi_match function (#39472)
b0da8430bc3 is described below
commit b0da8430bc3c50a03c5c3dc671d543f5756ac6d8
Author: zzzxl <[email protected]>
AuthorDate: Sat Aug 17 16:53:52 2024 +0800
[opt](inverted index) Optimize the usage of the multi_match function
(#39472)
## Proposed changes
https://github.com/apache/doris/pull/39193
<!--Describe your changes.-->
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 117 ++++++++------------
be/src/olap/rowset/segment_v2/segment_iterator.h | 10 +-
be/src/vec/exprs/vectorized_fn_call.cpp | 5 +-
be/src/vec/exprs/vectorized_fn_call.h | 2 +-
be/src/vec/exprs/vexpr.cpp | 2 +-
be/src/vec/exprs/vexpr.h | 2 +-
be/src/vec/functions/function.h | 8 +-
be/src/vec/functions/function_multi_match.cpp | 121 +++++++--------------
be/src/vec/functions/function_multi_match.h | 17 +--
.../expressions/functions/scalar/MultiMatch.java | 22 +---
gensrc/script/doris_builtins_functions.py | 2 +-
.../test_index_multi_match.groovy | 16 +--
12 files changed, 123 insertions(+), 201 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 2da3c5ece93..e82a2f57d40 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -389,6 +389,14 @@ void SegmentIterator::_initialize_predicate_results() {
_column_predicate_inverted_index_status[cid][pred_sign] = false;
}
}
+
+ // Initialize from _func_name_to_result_sign
+ for (auto& iter : _func_name_to_result_sign) {
+ for (auto& pred_sign : iter.second) {
+ auto column_id = _opts.tablet_schema->field_index(iter.first);
+ _column_predicate_inverted_index_status[column_id][pred_sign] =
false;
+ }
+ }
}
Status SegmentIterator::init_iterators() {
@@ -570,7 +578,6 @@ Status
SegmentIterator::_get_row_ranges_by_column_conditions() {
RETURN_IF_ERROR(_apply_inverted_index());
for (auto cid : _schema->column_ids()) {
bool result_true =
_check_all_predicates_passed_inverted_index_for_column(cid);
-
if (result_true) {
_need_read_data_indices[cid] = false;
}
@@ -886,8 +893,7 @@ bool
SegmentIterator::_can_filter_by_preds_except_leafnode_of_andnode() {
return false;
}
}
- for (const auto& func_expr_pair : compound_func_exprs) {
- const auto& expr = func_expr_pair.first;
+ for (const auto& expr : compound_func_exprs) {
std::string pred_result_sign =
BeConsts::BLOCK_TEMP_COLUMN_PREFIX +
std::to_string(expr->index_unique_id());
if (!_rowid_result_for_index.contains(pred_result_sign)) {
@@ -999,14 +1005,20 @@ Status
SegmentIterator::_apply_index_except_leafnode_of_andnode() {
}
}
- for (const auto& func_expr_pair : compound_func_exprs) {
- const auto& expr = func_expr_pair.first;
- const auto& expr_ctx = func_expr_pair.second;
+ for (const auto& expr : compound_func_exprs) {
+ roaring::Roaring bitmap = _row_bitmap;
auto result = std::make_shared<roaring::Roaring>();
- RETURN_IF_ERROR(execute_func_expr(expr, expr_ctx, result));
+ RETURN_IF_ERROR(execute_func_expr(expr, result));
+ bitmap &= *result;
std::string result_sign =
BeConsts::BLOCK_TEMP_COLUMN_PREFIX +
std::to_string(expr->index_unique_id());
- _rowid_result_for_index.emplace(result_sign, std::make_pair(true,
std::move(*result)));
+ _rowid_result_for_index.emplace(result_sign, std::make_pair(true,
std::move(bitmap)));
+ for (const auto& child_expr : expr->children()) {
+ if (child_expr->node_type() == TExprNodeType::type::SLOT_REF) {
+ auto column_id =
_opts.tablet_schema->field_index(child_expr->expr_name());
+
_column_predicate_inverted_index_status[column_id][result_sign] = true;
+ }
+ }
}
return Status::OK();
@@ -1300,11 +1312,9 @@ Status SegmentIterator::_apply_inverted_index() {
}
}
- for (const auto& func_expr_pair : no_compound_func_exprs) {
- const auto& expr = func_expr_pair.first;
- const auto& expr_ctx = func_expr_pair.second;
+ for (const auto& expr : no_compound_func_exprs) {
auto result = std::make_shared<roaring::Roaring>();
- RETURN_IF_ERROR(execute_func_expr(expr, expr_ctx, result));
+ RETURN_IF_ERROR(execute_func_expr(expr, result));
_row_bitmap &= *result;
for (auto it = _remaining_conjunct_roots.begin(); it !=
_remaining_conjunct_roots.end();) {
if (*it == expr) {
@@ -1315,6 +1325,14 @@ Status SegmentIterator::_apply_inverted_index() {
++it;
}
}
+ std::string result_sign =
+ BeConsts::BLOCK_TEMP_COLUMN_PREFIX +
std::to_string(expr->index_unique_id());
+ for (const auto& child_expr : expr->children()) {
+ if (child_expr->node_type() == TExprNodeType::type::SLOT_REF) {
+ auto column_id =
_opts.tablet_schema->field_index(child_expr->expr_name());
+
_column_predicate_inverted_index_status[column_id][result_sign] = true;
+ }
+ }
}
_col_predicates = std::move(remaining_predicates);
@@ -1408,17 +1426,6 @@ Status SegmentIterator::_init_inverted_index_iterators()
{
return Status::OK();
}
-Status SegmentIterator::_init_inverted_index_iterators(ColumnId cid) {
- std::lock_guard lock(_idx_init_lock);
- if (_inverted_index_iterators[cid] == nullptr) {
- return _segment->new_inverted_index_iterator(
- _opts.tablet_schema->column(cid),
-
_segment->_tablet_schema->get_inverted_index(_opts.tablet_schema->column(cid)),
- _opts, &_inverted_index_iterators[cid]);
- }
- return Status::OK();
-}
-
Status SegmentIterator::_lookup_ordinal(const RowCursor& key, bool is_include,
rowid_t upper_bound,
rowid_t* rowid) {
if (_segment->_tablet_schema->keys_type() == UNIQUE_KEYS &&
@@ -2753,53 +2760,16 @@ Status
SegmentIterator::current_block_row_locations(std::vector<RowLocation>* bl
return Status::OK();
}
-/**
- * solution 1: where cluase included nodes are all `and` leaf nodes,
- * predicate pushed down and remove from vconjunct.
- * for example: where A = 1 and B = 'test' and B like '%he%';
- * column A : `A = 1` pushed down, this column's predicates all pushed
down,
- * call _check_column_pred_all_push_down will return true.
- * column B : `B = 'test'` pushed down, but `B like '%he%'` remain in
vconjunct,
- * call _check_column_pred_all_push_down will return false.
- *
- * solution 2: where cluase included nodes are compound or other complex
conditions,
- * predicate pushed down but still remain in vconjunct.
- * for exmple: where (A = 1 and B = 'test') or B = 'hi' or (C like '%ye%' and
C > 'aa');
- * column A : `A = 1` pushed down, check it applyed by index,
- * call _check_column_pred_all_push_down will return true.
- * column B : `B = 'test'`, `B = 'hi'` all pushed down, check them all
applyed by index,
- * call _check_column_pred_all_push_down will return true.
- * column C : `C like '%ye%'` not pushed down, `C > 'aa'` pushed down,
only `C > 'aa'` applyed by index,
- * call _check_column_pred_all_push_down will return false.
-*/
-bool SegmentIterator::_check_column_pred_all_push_down(const std::string&
column_name,
- bool in_compound, bool
is_match) {
- if (_remaining_conjunct_roots.empty()) {
- return true;
- }
-
- if (in_compound || is_match) {
- auto preds_in_remaining_vconjuct =
_column_pred_in_remaining_vconjunct[column_name];
- for (auto pred_info : preds_in_remaining_vconjuct) {
- auto column_sign = _gen_predicate_result_sign(&pred_info);
- if (!_rowid_result_for_index.contains(column_sign)) {
- return false;
- }
- }
- } else {
- if (_column_pred_in_remaining_vconjunct[column_name].size() != 0) {
- return false;
- }
- }
- return true;
-}
-
void SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
const vectorized::VExprSPtr& expr) {
if (expr == nullptr) {
return;
}
+ if (expr->fn().name.function_name == "multi_match") {
+ return;
+ }
+
auto& children = expr->children();
for (int i = 0; i < children.size(); ++i) {
_calculate_pred_in_remaining_conjunct_root(children[i]);
@@ -2887,13 +2857,21 @@ void
SegmentIterator::_calculate_func_in_remaining_conjunct_root() {
bool current_has_compound_pred =
has_compound_pred || (expr->node_type() ==
TExprNodeType::COMPOUND_PRED);
- if (expr->node_type() == TExprNodeType::FUNCTION_CALL &&
- expr->can_push_down_to_index()) {
+ if (expr->fn().name.function_name == "multi_match") {
expr->set_index_unique_id(gen_func_unique_id(expr));
if (current_has_compound_pred) {
- compound_func_exprs.emplace_back(expr, root_expr_ctx);
+ compound_func_exprs.emplace_back(expr);
} else {
- no_compound_func_exprs.emplace_back(expr, root_expr_ctx);
+ no_compound_func_exprs.emplace_back(expr);
+ }
+
+ for (int32_t i = expr->get_num_children() - 1; i >= 0; i--) {
+ auto child_expr = expr->get_child(i);
+ if (child_expr->node_type() ==
TExprNodeType::type::SLOT_REF) {
+ std::string result_sign =
BeConsts::BLOCK_TEMP_COLUMN_PREFIX +
+
std::to_string(expr->index_unique_id());
+
_func_name_to_result_sign[child_expr->expr_name()].push_back(result_sign);
+ }
}
}
@@ -2986,7 +2964,6 @@ bool SegmentIterator::_can_opt_topn_reads() const {
}
Status SegmentIterator::execute_func_expr(const vectorized::VExprSPtr& expr,
- const vectorized::VExprContextSPtr&
expr_ctx,
std::shared_ptr<roaring::Roaring>&
result) {
const auto& expr0 = expr->get_child(0);
if (!expr0 || expr0->node_type() != TExprNodeType::SLOT_REF) {
@@ -3000,7 +2977,7 @@ Status SegmentIterator::execute_func_expr(const
vectorized::VExprSPtr& expr,
params._column_name =
_opts.tablet_schema->column(params._column_id).name();
params._segment_iterator = this;
- return expr->eval_inverted_index(expr_ctx.get(), params, result);
+ return expr->eval_inverted_index(params, result);
}
} // namespace segment_v2
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 2f293f01b0c..2a5bdf4fd13 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -156,7 +156,6 @@ public:
std::vector<std::unique_ptr<InvertedIndexIterator>>&
inverted_index_iterators() {
return _inverted_index_iterators;
}
- [[nodiscard]] Status _init_inverted_index_iterators(ColumnId cid);
private:
Status _next_batch_internal(vectorized::Block* block);
@@ -404,7 +403,6 @@ private:
bool _can_opt_topn_reads() const;
Status execute_func_expr(const vectorized::VExprSPtr& expr,
- const vectorized::VExprContextSPtr& expr_ctx,
std::shared_ptr<roaring::Roaring>& result);
void _initialize_predicate_results();
bool _check_all_predicates_passed_inverted_index_for_column(ColumnId cid);
@@ -475,9 +473,8 @@ private:
std::vector<ColumnPredicate*> _col_predicates;
std::vector<ColumnPredicate*> _col_preds_except_leafnode_of_andnode;
- using FuncExprPair = std::pair<vectorized::VExprSPtr,
vectorized::VExprContextSPtr>;
- std::vector<FuncExprPair> no_compound_func_exprs;
- std::vector<FuncExprPair> compound_func_exprs;
+ std::vector<vectorized::VExprSPtr> no_compound_func_exprs;
+ std::vector<vectorized::VExprSPtr> compound_func_exprs;
vectorized::VExprContextSPtrs _common_expr_ctxs_push_down;
bool _enable_common_expr_pushdown = false;
@@ -486,6 +483,7 @@ private:
std::unique_ptr<ColumnPredicateInfo> _column_predicate_info;
std::unordered_map<std::string, std::vector<ColumnPredicateInfo>>
_column_pred_in_remaining_vconjunct;
+ std::unordered_map<std::string, std::vector<std::string>>
_func_name_to_result_sign;
std::set<ColumnId> _not_apply_index_pred;
// row schema of the key to seek
@@ -525,8 +523,6 @@ private:
std::unordered_map<int, std::unordered_map<std::string, bool>>
_column_predicate_inverted_index_status;
-
- std::mutex _idx_init_lock;
};
} // namespace segment_v2
diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp
b/be/src/vec/exprs/vectorized_fn_call.cpp
index 5b5cdc2e8a0..2082a93d423 100644
--- a/be/src/vec/exprs/vectorized_fn_call.cpp
+++ b/be/src/vec/exprs/vectorized_fn_call.cpp
@@ -230,10 +230,9 @@ bool VectorizedFnCall::can_fast_execute() const {
return _function->can_push_down_to_index();
}
-Status VectorizedFnCall::eval_inverted_index(VExprContext* context,
- segment_v2::FuncExprParams&
params,
+Status VectorizedFnCall::eval_inverted_index(segment_v2::FuncExprParams&
params,
std::shared_ptr<roaring::Roaring>& result) {
- return
_function->eval_inverted_index(context->fn_context(_fn_context_index), params,
result);
+ return _function->eval_inverted_index(this, params, result);
}
bool VectorizedFnCall::equals(const VExpr& other) {
diff --git a/be/src/vec/exprs/vectorized_fn_call.h
b/be/src/vec/exprs/vectorized_fn_call.h
index 3ef7a7464f5..dea5e90f9b7 100644
--- a/be/src/vec/exprs/vectorized_fn_call.h
+++ b/be/src/vec/exprs/vectorized_fn_call.h
@@ -68,7 +68,7 @@ public:
bool can_push_down_to_index() const override;
bool can_fast_execute() const override;
- Status eval_inverted_index(VExprContext* context,
segment_v2::FuncExprParams& params,
+ Status eval_inverted_index(segment_v2::FuncExprParams& params,
std::shared_ptr<roaring::Roaring>& result)
override;
bool equals(const VExpr& other) override;
diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
index e6a2cf117a2..bd7f1c2ab97 100644
--- a/be/src/vec/exprs/vexpr.cpp
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -626,7 +626,7 @@ bool VExpr::fast_execute(Block& block, const ColumnNumbers&
arguments, size_t re
std::string VExpr::gen_predicate_result_sign(Block& block, const
ColumnNumbers& arguments,
const std::string& function_name)
const {
std::string pred_result_sign;
- if (this->node_type() == TExprNodeType::FUNCTION_CALL) {
+ if (this->fn().name.function_name == "multi_match") {
pred_result_sign =
BeConsts::BLOCK_TEMP_COLUMN_PREFIX +
std::to_string(this->index_unique_id());
} else {
diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h
index 708b6c8ea90..0f611cb0920 100644
--- a/be/src/vec/exprs/vexpr.h
+++ b/be/src/vec/exprs/vexpr.h
@@ -230,7 +230,7 @@ public:
virtual bool can_push_down_to_index() const { return false; }
virtual bool can_fast_execute() const { return false; }
- virtual Status eval_inverted_index(VExprContext* context,
segment_v2::FuncExprParams& params,
+ virtual Status eval_inverted_index(segment_v2::FuncExprParams& params,
std::shared_ptr<roaring::Roaring>&
result) {
return Status::NotSupported("Not supported
execute_with_inverted_index");
}
diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h
index d880836248f..110e064a59f 100644
--- a/be/src/vec/functions/function.h
+++ b/be/src/vec/functions/function.h
@@ -220,7 +220,8 @@ public:
}
virtual bool can_push_down_to_index() const { return false; }
- virtual Status eval_inverted_index(FunctionContext* context,
segment_v2::FuncExprParams& params,
+
+ virtual Status eval_inverted_index(VExpr* context,
segment_v2::FuncExprParams& params,
std::shared_ptr<roaring::Roaring>&
result) {
return Status::NotSupported("eval_inverted_index is not supported in
function: ",
get_name());
@@ -506,9 +507,10 @@ public:
}
bool can_push_down_to_index() const override { return
function->can_push_down_to_index(); }
- Status eval_inverted_index(FunctionContext* context,
segment_v2::FuncExprParams& params,
+
+ Status eval_inverted_index(VExpr* expr, segment_v2::FuncExprParams& params,
std::shared_ptr<roaring::Roaring>& result)
override {
- return function->eval_inverted_index(context, params, result);
+ return function->eval_inverted_index(expr, params, result);
}
private:
diff --git a/be/src/vec/functions/function_multi_match.cpp
b/be/src/vec/functions/function_multi_match.cpp
index d34526d0f29..fa36453105e 100644
--- a/be/src/vec/functions/function_multi_match.cpp
+++ b/be/src/vec/functions/function_multi_match.cpp
@@ -49,91 +49,59 @@ Status FunctionMultiMatch::execute_impl(FunctionContext*
/*context*/, Block& blo
return Status::RuntimeError("only inverted index queries are supported");
}
-Status FunctionMultiMatch::open(FunctionContext* context,
- FunctionContext::FunctionStateScope scope) {
- if (scope == FunctionContext::THREAD_LOCAL) {
- return Status::OK();
- }
-
- DCHECK(context->get_num_args() == 4);
- for (int i = 0; i < context->get_num_args(); ++i) {
- DCHECK(is_string_type(context->get_arg_type(i)->type));
+InvertedIndexQueryType get_query_type(const std::string& query_type) {
+ if (query_type == "phrase_prefix") {
+ return InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY;
}
+ return InvertedIndexQueryType::UNKNOWN_QUERY;
+}
- std::shared_ptr<MatchParam> state = std::make_shared<MatchParam>();
- context->set_function_state(scope, state);
- for (int i = 0; i < context->get_num_args(); ++i) {
- const auto& const_column_ptr = context->get_constant_col(i);
- if (const_column_ptr) {
- auto const_data = const_column_ptr->column_ptr->get_data_at(0);
- switch (i) {
- case 1: {
- std::string field_names_str = const_data.to_string();
- field_names_str.erase(
- std::remove_if(field_names_str.begin(),
field_names_str.end(),
- [](unsigned char c) { return
std::isspace(c); }),
- field_names_str.end());
- std::vector<std::string> field_names;
- boost::split(field_names, field_names_str,
boost::algorithm::is_any_of(","));
- for (const auto& field_name : field_names) {
- if (!field_name.empty()) {
- state->fields.insert(field_name);
- }
- }
- } break;
- case 2:
- state->type = const_data.to_string();
- break;
- case 3:
- state->query = const_data.to_string();
- break;
- default:
- break;
- }
+Status FunctionMultiMatch::eval_inverted_index(VExpr* expr,
segment_v2::FuncExprParams& params,
+
std::shared_ptr<roaring::Roaring>& result) {
+ // fields
+ std::vector<std::string> query_fileds;
+ size_t i = 0;
+ for (; i < expr->get_num_children(); i++) {
+ auto child_expr = expr->get_child(i);
+ if (child_expr->node_type() == TExprNodeType::type::SLOT_REF) {
+ query_fileds.emplace_back(child_expr->expr_name());
+ } else {
+ break;
}
}
+ if (i != expr->get_num_children() - 2) {
+ return Status::RuntimeError("parameter type incorrect: slot = {}", i);
+ }
- return Status::OK();
-}
-
-Status FunctionMultiMatch::eval_inverted_index(FunctionContext* context,
- segment_v2::FuncExprParams&
params,
-
std::shared_ptr<roaring::Roaring>& result) {
- auto* match_param = reinterpret_cast<MatchParam*>(
- context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
- if (match_param == nullptr) {
- return Status::RuntimeError("function parameter parsing failed");
+ // type
+ std::string param1 =
std::static_pointer_cast<VLiteral>(expr->get_child(i))->value();
+ auto query_type = get_query_type(param1);
+ if (query_type == InvertedIndexQueryType::UNKNOWN_QUERY) {
+ return Status::RuntimeError("parameter query type incorrect:
query_type = {}", query_type);
}
- match_param->fields.insert(params._column_name);
- const auto& segment_iterator = params._segment_iterator;
- const auto& opts = segment_iterator->storage_read_options();
- const auto& tablet_schema = opts.tablet_schema;
+ // query
+ std::string query_str =
std::static_pointer_cast<VLiteral>(expr->get_child(i + 1))->value();
+ auto& segment_iterator = params._segment_iterator;
+ auto& segment = segment_iterator->segment();
+ auto& opts = segment_iterator->storage_read_options();
+ auto& tablet_schema = opts.tablet_schema;
+ auto& idx_iterators = segment_iterator->inverted_index_iterators();
+
+ // check
std::vector<ColumnId> columns_ids;
- for (const auto& column_name : match_param->fields) {
+ for (const auto& column_name : query_fileds) {
auto cid = tablet_schema->field_index(column_name);
if (cid < 0) {
- return Status::RuntimeError("column name is incorrect");
- }
- const auto& column = tablet_schema->column(cid);
- if (!is_string_type(column.type())) {
- return Status::RuntimeError("column type is incorrect");
+ return Status::RuntimeError("column name is incorrect: {}",
column_name);
}
- if (!tablet_schema->has_inverted_index(column)) {
- return Status::RuntimeError("column index is incorrect");
+ if (idx_iterators[cid] == nullptr) {
+ return Status::RuntimeError("column idx is incorrect: {}",
column_name);
}
columns_ids.emplace_back(cid);
}
- // query type
- InvertedIndexQueryType query_type;
- if (match_param->type == "phrase_prefix") {
- query_type = InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY;
- } else {
- return Status::RuntimeError("query type is incorrect");
- }
-
// cache key
roaring::Roaring cids_str;
cids_str.addMany(columns_ids.size(), columns_ids.data());
@@ -142,11 +110,11 @@ Status
FunctionMultiMatch::eval_inverted_index(FunctionContext* context,
cids_str.write(column_name_binary.data());
InvertedIndexQueryCache::CacheKey cache_key;
- io::Path index_path = segment_iterator->segment().file_reader()->path();
+ io::Path index_path = segment.file_reader()->path();
cache_key.index_path = index_path.parent_path() / index_path.stem();
cache_key.column_name = column_name_binary;
cache_key.query_type = query_type;
- cache_key.value = match_param->query;
+ cache_key.value = query_str;
// query cache
auto* cache = InvertedIndexQueryCache::instance();
@@ -157,18 +125,13 @@ Status
FunctionMultiMatch::eval_inverted_index(FunctionContext* context,
}
// search
- for (const auto& column_name : match_param->fields) {
+ for (const auto& column_name : query_fileds) {
auto cid = tablet_schema->field_index(column_name);
const auto& column = tablet_schema->column(column_name);
-
- auto& index_iterator =
segment_iterator->inverted_index_iterators()[cid];
- if (!index_iterator) {
-
RETURN_IF_ERROR(segment_iterator->_init_inverted_index_iterators(cid));
- }
- const auto& index_reader = index_iterator->reader();
+ const auto& index_reader = idx_iterators[cid]->reader();
auto single_result = std::make_shared<roaring::Roaring>();
- StringRef query_value(match_param->query.data());
+ StringRef query_value(query_str.data());
auto index_version =
tablet_schema->get_inverted_index_storage_format();
if (index_version == InvertedIndexStorageFormatPB::V1) {
RETURN_IF_ERROR(index_reader->query(opts.stats,
opts.runtime_state, column_name,
diff --git a/be/src/vec/functions/function_multi_match.h
b/be/src/vec/functions/function_multi_match.h
index b7d2bd3c30e..8e30b6e9f22 100644
--- a/be/src/vec/functions/function_multi_match.h
+++ b/be/src/vec/functions/function_multi_match.h
@@ -26,13 +26,6 @@
namespace doris::vectorized {
-class MatchParam {
-public:
- std::string query;
- std::set<std::string> fields;
- std::string type;
-};
-
class FunctionMultiMatch : public IFunction {
public:
static constexpr auto name = "multi_match";
@@ -42,9 +35,9 @@ public:
String get_name() const override { return name; }
- bool is_variadic() const override { return false; }
+ bool is_variadic() const override { return true; }
- size_t get_number_of_arguments() const override { return 4; }
+ size_t get_number_of_arguments() const override { return 0; }
bool use_default_implementation_for_nulls() const override { return false;
}
@@ -52,7 +45,9 @@ public:
return std::make_shared<DataTypeUInt8>();
}
- Status open(FunctionContext* context, FunctionContext::FunctionStateScope
scope) override;
+ Status open(FunctionContext* context, FunctionContext::FunctionStateScope
scope) override {
+ return Status::OK();
+ }
Status close(FunctionContext* context, FunctionContext::FunctionStateScope
scope) override {
return Status::OK();
@@ -63,7 +58,7 @@ public:
bool can_push_down_to_index() const override { return true; }
- Status eval_inverted_index(FunctionContext* context,
segment_v2::FuncExprParams& params,
+ Status eval_inverted_index(VExpr* expr, segment_v2::FuncExprParams& params,
std::shared_ptr<roaring::Roaring>& result)
override;
};
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MultiMatch.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MultiMatch.java
index 3df662808ed..1f7c739eefe 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MultiMatch.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MultiMatch.java
@@ -32,33 +32,23 @@ import com.google.common.collect.ImmutableList;
import java.util.List;
/**
- * ScalarFunction 'multi_match'. This class is generated by GenerateFunction.
+ * ScalarFunction 'multi_match'.
*/
public class MultiMatch extends ScalarFunction
implements BinaryExpression, ExplicitlyCastableSignature,
AlwaysNotNullable {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
- FunctionSignature.ret(BooleanType.INSTANCE)
- .args(StringType.INSTANCE,
- StringType.INSTANCE,
- StringType.INSTANCE,
- StringType.INSTANCE)
+
FunctionSignature.ret(BooleanType.INSTANCE).varArgs(StringType.INSTANCE)
);
- /**
- * constructor with 4 arguments.
- */
- public MultiMatch(Expression arg0, Expression arg1, Expression arg2,
Expression arg3) {
- super("multi_match", arg0, arg1, arg2, arg3);
+ public MultiMatch(Expression ...varArgs) {
+ super("multi_match", varArgs);
}
- /**
- * withChildren.
- */
@Override
public MultiMatch withChildren(List<Expression> children) {
- Preconditions.checkArgument(children.size() == 4);
- return new MultiMatch(children.get(0), children.get(1),
children.get(2), children.get(3));
+ Preconditions.checkArgument(children.size() >= 3);
+ return new MultiMatch(children.toArray(new Expression[0]));
}
@Override
diff --git a/gensrc/script/doris_builtins_functions.py
b/gensrc/script/doris_builtins_functions.py
index ea2a15f6fcf..018d71385e0 100644
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -2212,7 +2212,7 @@ visible_functions = {
# multi match functions
"MultiMatch": [
- [['multi_match'], 'BOOLEAN', ['STRING', 'STRING', 'STRING', 'STRING'],
'ALWAYS_NOT_NULLABLE']
+ [['multi_match'], 'BOOLEAN', ['STRING', '...'], 'ALWAYS_NOT_NULLABLE']
]
}
diff --git
a/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy
b/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy
index 90f9f7a751b..c926bc4d510 100644
--- a/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy
+++ b/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy
@@ -113,15 +113,15 @@ suite("test_index_multi_match", "p0"){
qt_sql """ select count() from ${indexTbName2} where (clientip
match_phrase_prefix '2' or request match_phrase_prefix '2' or status
match_phrase_prefix '2' or size match_phrase_prefix '2'); """
qt_sql """ select count() from ${indexTbName2} where (clientip
match_phrase_prefix 'a' or request match_phrase_prefix 'a' or status
match_phrase_prefix 'a' or size match_phrase_prefix 'a'); """
- qt_sql """ select count() from ${indexTbName3} where
multi_match(clientip, '', 'phrase_prefix', '2'); """
- qt_sql """ select count() from ${indexTbName3} where
multi_match(clientip, 'request', 'phrase_prefix', '2'); """
- qt_sql """ select count() from ${indexTbName3} where
multi_match(clientip, 'request, status, size', 'phrase_prefix', '2'); """
- qt_sql """ select count() from ${indexTbName3} where
multi_match(clientip, 'request, status, size', 'phrase_prefix', 'a'); """
+ qt_sql """ select count() from ${indexTbName3} where
multi_match(clientip, 'phrase_prefix', '2'); """
+ qt_sql """ select count() from ${indexTbName3} where
multi_match(clientip, request, 'phrase_prefix', '2'); """
+ qt_sql """ select count() from ${indexTbName3} where
multi_match(clientip, request, status, size, 'phrase_prefix', '2'); """
+ qt_sql """ select count() from ${indexTbName3} where
multi_match(clientip, request, status, size, 'phrase_prefix', 'a'); """
- qt_sql """ select count() from ${indexTbName4} where
multi_match(clientip, '', 'phrase_prefix', '2'); """
- qt_sql """ select count() from ${indexTbName4} where
multi_match(clientip, 'request', 'phrase_prefix', '2'); """
- qt_sql """ select count() from ${indexTbName4} where
multi_match(clientip, 'request, status, size', 'phrase_prefix', '2'); """
- qt_sql """ select count() from ${indexTbName4} where
multi_match(clientip, 'request, status, size', 'phrase_prefix', 'a'); """
+ qt_sql """ select count() from ${indexTbName4} where
multi_match(clientip, 'phrase_prefix', '2'); """
+ qt_sql """ select count() from ${indexTbName4} where
multi_match(clientip, request, 'phrase_prefix', '2'); """
+ qt_sql """ select count() from ${indexTbName4} where
multi_match(clientip, request, status, size, 'phrase_prefix', '2'); """
+ qt_sql """ select count() from ${indexTbName4} where
multi_match(clientip, request, status, size, 'phrase_prefix', 'a'); """
} finally {
//try_sql("DROP TABLE IF EXISTS ${testTable}")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]