This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new b0da8430bc3 [opt](inverted index) Optimize the usage of the 
multi_match function (#39472)
b0da8430bc3 is described below

commit b0da8430bc3c50a03c5c3dc671d543f5756ac6d8
Author: zzzxl <[email protected]>
AuthorDate: Sat Aug 17 16:53:52 2024 +0800

    [opt](inverted index) Optimize the usage of the multi_match function 
(#39472)
    
    ## Proposed changes
    
    https://github.com/apache/doris/pull/39193
    
    <!--Describe your changes.-->
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 117 ++++++++------------
 be/src/olap/rowset/segment_v2/segment_iterator.h   |  10 +-
 be/src/vec/exprs/vectorized_fn_call.cpp            |   5 +-
 be/src/vec/exprs/vectorized_fn_call.h              |   2 +-
 be/src/vec/exprs/vexpr.cpp                         |   2 +-
 be/src/vec/exprs/vexpr.h                           |   2 +-
 be/src/vec/functions/function.h                    |   8 +-
 be/src/vec/functions/function_multi_match.cpp      | 121 +++++++--------------
 be/src/vec/functions/function_multi_match.h        |  17 +--
 .../expressions/functions/scalar/MultiMatch.java   |  22 +---
 gensrc/script/doris_builtins_functions.py          |   2 +-
 .../test_index_multi_match.groovy                  |  16 +--
 12 files changed, 123 insertions(+), 201 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 2da3c5ece93..e82a2f57d40 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -389,6 +389,14 @@ void SegmentIterator::_initialize_predicate_results() {
             _column_predicate_inverted_index_status[cid][pred_sign] = false;
         }
     }
+
+    // Initialize from _func_name_to_result_sign
+    for (auto& iter : _func_name_to_result_sign) {
+        for (auto& pred_sign : iter.second) {
+            auto column_id = _opts.tablet_schema->field_index(iter.first);
+            _column_predicate_inverted_index_status[column_id][pred_sign] = 
false;
+        }
+    }
 }
 
 Status SegmentIterator::init_iterators() {
@@ -570,7 +578,6 @@ Status 
SegmentIterator::_get_row_ranges_by_column_conditions() {
     RETURN_IF_ERROR(_apply_inverted_index());
     for (auto cid : _schema->column_ids()) {
         bool result_true = 
_check_all_predicates_passed_inverted_index_for_column(cid);
-
         if (result_true) {
             _need_read_data_indices[cid] = false;
         }
@@ -886,8 +893,7 @@ bool 
SegmentIterator::_can_filter_by_preds_except_leafnode_of_andnode() {
             return false;
         }
     }
-    for (const auto& func_expr_pair : compound_func_exprs) {
-        const auto& expr = func_expr_pair.first;
+    for (const auto& expr : compound_func_exprs) {
         std::string pred_result_sign =
                 BeConsts::BLOCK_TEMP_COLUMN_PREFIX + 
std::to_string(expr->index_unique_id());
         if (!_rowid_result_for_index.contains(pred_result_sign)) {
@@ -999,14 +1005,20 @@ Status 
SegmentIterator::_apply_index_except_leafnode_of_andnode() {
         }
     }
 
-    for (const auto& func_expr_pair : compound_func_exprs) {
-        const auto& expr = func_expr_pair.first;
-        const auto& expr_ctx = func_expr_pair.second;
+    for (const auto& expr : compound_func_exprs) {
+        roaring::Roaring bitmap = _row_bitmap;
         auto result = std::make_shared<roaring::Roaring>();
-        RETURN_IF_ERROR(execute_func_expr(expr, expr_ctx, result));
+        RETURN_IF_ERROR(execute_func_expr(expr, result));
+        bitmap &= *result;
         std::string result_sign =
                 BeConsts::BLOCK_TEMP_COLUMN_PREFIX + 
std::to_string(expr->index_unique_id());
-        _rowid_result_for_index.emplace(result_sign, std::make_pair(true, 
std::move(*result)));
+        _rowid_result_for_index.emplace(result_sign, std::make_pair(true, 
std::move(bitmap)));
+        for (const auto& child_expr : expr->children()) {
+            if (child_expr->node_type() == TExprNodeType::type::SLOT_REF) {
+                auto column_id = 
_opts.tablet_schema->field_index(child_expr->expr_name());
+                
_column_predicate_inverted_index_status[column_id][result_sign] = true;
+            }
+        }
     }
 
     return Status::OK();
@@ -1300,11 +1312,9 @@ Status SegmentIterator::_apply_inverted_index() {
         }
     }
 
-    for (const auto& func_expr_pair : no_compound_func_exprs) {
-        const auto& expr = func_expr_pair.first;
-        const auto& expr_ctx = func_expr_pair.second;
+    for (const auto& expr : no_compound_func_exprs) {
         auto result = std::make_shared<roaring::Roaring>();
-        RETURN_IF_ERROR(execute_func_expr(expr, expr_ctx, result));
+        RETURN_IF_ERROR(execute_func_expr(expr, result));
         _row_bitmap &= *result;
         for (auto it = _remaining_conjunct_roots.begin(); it != 
_remaining_conjunct_roots.end();) {
             if (*it == expr) {
@@ -1315,6 +1325,14 @@ Status SegmentIterator::_apply_inverted_index() {
                 ++it;
             }
         }
+        std::string result_sign =
+                BeConsts::BLOCK_TEMP_COLUMN_PREFIX + 
std::to_string(expr->index_unique_id());
+        for (const auto& child_expr : expr->children()) {
+            if (child_expr->node_type() == TExprNodeType::type::SLOT_REF) {
+                auto column_id = 
_opts.tablet_schema->field_index(child_expr->expr_name());
+                
_column_predicate_inverted_index_status[column_id][result_sign] = true;
+            }
+        }
     }
 
     _col_predicates = std::move(remaining_predicates);
@@ -1408,17 +1426,6 @@ Status SegmentIterator::_init_inverted_index_iterators() 
{
     return Status::OK();
 }
 
-Status SegmentIterator::_init_inverted_index_iterators(ColumnId cid) {
-    std::lock_guard lock(_idx_init_lock);
-    if (_inverted_index_iterators[cid] == nullptr) {
-        return _segment->new_inverted_index_iterator(
-                _opts.tablet_schema->column(cid),
-                
_segment->_tablet_schema->get_inverted_index(_opts.tablet_schema->column(cid)),
-                _opts, &_inverted_index_iterators[cid]);
-    }
-    return Status::OK();
-}
-
 Status SegmentIterator::_lookup_ordinal(const RowCursor& key, bool is_include, 
rowid_t upper_bound,
                                         rowid_t* rowid) {
     if (_segment->_tablet_schema->keys_type() == UNIQUE_KEYS &&
@@ -2753,53 +2760,16 @@ Status 
SegmentIterator::current_block_row_locations(std::vector<RowLocation>* bl
     return Status::OK();
 }
 
-/**
- * solution 1: where cluase included nodes are all `and` leaf nodes,
- * predicate pushed down and remove from vconjunct.
- *  for example: where A = 1 and B = 'test' and B like '%he%';
- *      column A : `A = 1` pushed down, this column's predicates all pushed 
down,
- *                  call _check_column_pred_all_push_down will return true.
- *      column B : `B = 'test'` pushed down, but `B like '%he%'` remain in 
vconjunct,
- *                  call _check_column_pred_all_push_down will return false.
- *
- * solution 2: where cluase included nodes are compound or other complex 
conditions,
- * predicate pushed down but still remain in vconjunct.
- *  for exmple: where (A = 1 and B = 'test') or B = 'hi' or (C like '%ye%' and 
C > 'aa');
- *      column A : `A = 1` pushed down, check it applyed by index,
- *                  call _check_column_pred_all_push_down will return true.
- *      column B : `B = 'test'`, `B = 'hi'` all pushed down, check them all 
applyed by index,
- *                  call _check_column_pred_all_push_down will return true.
- *      column C : `C like '%ye%'` not pushed down, `C > 'aa'` pushed down, 
only `C > 'aa'` applyed by index,
- *                  call _check_column_pred_all_push_down will return false.
-*/
-bool SegmentIterator::_check_column_pred_all_push_down(const std::string& 
column_name,
-                                                       bool in_compound, bool 
is_match) {
-    if (_remaining_conjunct_roots.empty()) {
-        return true;
-    }
-
-    if (in_compound || is_match) {
-        auto preds_in_remaining_vconjuct = 
_column_pred_in_remaining_vconjunct[column_name];
-        for (auto pred_info : preds_in_remaining_vconjuct) {
-            auto column_sign = _gen_predicate_result_sign(&pred_info);
-            if (!_rowid_result_for_index.contains(column_sign)) {
-                return false;
-            }
-        }
-    } else {
-        if (_column_pred_in_remaining_vconjunct[column_name].size() != 0) {
-            return false;
-        }
-    }
-    return true;
-}
-
 void SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
         const vectorized::VExprSPtr& expr) {
     if (expr == nullptr) {
         return;
     }
 
+    if (expr->fn().name.function_name == "multi_match") {
+        return;
+    }
+
     auto& children = expr->children();
     for (int i = 0; i < children.size(); ++i) {
         _calculate_pred_in_remaining_conjunct_root(children[i]);
@@ -2887,13 +2857,21 @@ void 
SegmentIterator::_calculate_func_in_remaining_conjunct_root() {
             bool current_has_compound_pred =
                     has_compound_pred || (expr->node_type() == 
TExprNodeType::COMPOUND_PRED);
 
-            if (expr->node_type() == TExprNodeType::FUNCTION_CALL &&
-                expr->can_push_down_to_index()) {
+            if (expr->fn().name.function_name == "multi_match") {
                 expr->set_index_unique_id(gen_func_unique_id(expr));
                 if (current_has_compound_pred) {
-                    compound_func_exprs.emplace_back(expr, root_expr_ctx);
+                    compound_func_exprs.emplace_back(expr);
                 } else {
-                    no_compound_func_exprs.emplace_back(expr, root_expr_ctx);
+                    no_compound_func_exprs.emplace_back(expr);
+                }
+
+                for (int32_t i = expr->get_num_children() - 1; i >= 0; i--) {
+                    auto child_expr = expr->get_child(i);
+                    if (child_expr->node_type() == 
TExprNodeType::type::SLOT_REF) {
+                        std::string result_sign = 
BeConsts::BLOCK_TEMP_COLUMN_PREFIX +
+                                                  
std::to_string(expr->index_unique_id());
+                        
_func_name_to_result_sign[child_expr->expr_name()].push_back(result_sign);
+                    }
                 }
             }
 
@@ -2986,7 +2964,6 @@ bool SegmentIterator::_can_opt_topn_reads() const {
 }
 
 Status SegmentIterator::execute_func_expr(const vectorized::VExprSPtr& expr,
-                                          const vectorized::VExprContextSPtr& 
expr_ctx,
                                           std::shared_ptr<roaring::Roaring>& 
result) {
     const auto& expr0 = expr->get_child(0);
     if (!expr0 || expr0->node_type() != TExprNodeType::SLOT_REF) {
@@ -3000,7 +2977,7 @@ Status SegmentIterator::execute_func_expr(const 
vectorized::VExprSPtr& expr,
     params._column_name = 
_opts.tablet_schema->column(params._column_id).name();
     params._segment_iterator = this;
 
-    return expr->eval_inverted_index(expr_ctx.get(), params, result);
+    return expr->eval_inverted_index(params, result);
 }
 
 } // namespace segment_v2
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h 
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 2f293f01b0c..2a5bdf4fd13 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -156,7 +156,6 @@ public:
     std::vector<std::unique_ptr<InvertedIndexIterator>>& 
inverted_index_iterators() {
         return _inverted_index_iterators;
     }
-    [[nodiscard]] Status _init_inverted_index_iterators(ColumnId cid);
 
 private:
     Status _next_batch_internal(vectorized::Block* block);
@@ -404,7 +403,6 @@ private:
     bool _can_opt_topn_reads() const;
 
     Status execute_func_expr(const vectorized::VExprSPtr& expr,
-                             const vectorized::VExprContextSPtr& expr_ctx,
                              std::shared_ptr<roaring::Roaring>& result);
     void _initialize_predicate_results();
     bool _check_all_predicates_passed_inverted_index_for_column(ColumnId cid);
@@ -475,9 +473,8 @@ private:
     std::vector<ColumnPredicate*> _col_predicates;
     std::vector<ColumnPredicate*> _col_preds_except_leafnode_of_andnode;
 
-    using FuncExprPair = std::pair<vectorized::VExprSPtr, 
vectorized::VExprContextSPtr>;
-    std::vector<FuncExprPair> no_compound_func_exprs;
-    std::vector<FuncExprPair> compound_func_exprs;
+    std::vector<vectorized::VExprSPtr> no_compound_func_exprs;
+    std::vector<vectorized::VExprSPtr> compound_func_exprs;
 
     vectorized::VExprContextSPtrs _common_expr_ctxs_push_down;
     bool _enable_common_expr_pushdown = false;
@@ -486,6 +483,7 @@ private:
     std::unique_ptr<ColumnPredicateInfo> _column_predicate_info;
     std::unordered_map<std::string, std::vector<ColumnPredicateInfo>>
             _column_pred_in_remaining_vconjunct;
+    std::unordered_map<std::string, std::vector<std::string>> 
_func_name_to_result_sign;
     std::set<ColumnId> _not_apply_index_pred;
 
     // row schema of the key to seek
@@ -525,8 +523,6 @@ private:
 
     std::unordered_map<int, std::unordered_map<std::string, bool>>
             _column_predicate_inverted_index_status;
-
-    std::mutex _idx_init_lock;
 };
 
 } // namespace segment_v2
diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp 
b/be/src/vec/exprs/vectorized_fn_call.cpp
index 5b5cdc2e8a0..2082a93d423 100644
--- a/be/src/vec/exprs/vectorized_fn_call.cpp
+++ b/be/src/vec/exprs/vectorized_fn_call.cpp
@@ -230,10 +230,9 @@ bool VectorizedFnCall::can_fast_execute() const {
     return _function->can_push_down_to_index();
 }
 
-Status VectorizedFnCall::eval_inverted_index(VExprContext* context,
-                                             segment_v2::FuncExprParams& 
params,
+Status VectorizedFnCall::eval_inverted_index(segment_v2::FuncExprParams& 
params,
                                              
std::shared_ptr<roaring::Roaring>& result) {
-    return 
_function->eval_inverted_index(context->fn_context(_fn_context_index), params, 
result);
+    return _function->eval_inverted_index(this, params, result);
 }
 
 bool VectorizedFnCall::equals(const VExpr& other) {
diff --git a/be/src/vec/exprs/vectorized_fn_call.h 
b/be/src/vec/exprs/vectorized_fn_call.h
index 3ef7a7464f5..dea5e90f9b7 100644
--- a/be/src/vec/exprs/vectorized_fn_call.h
+++ b/be/src/vec/exprs/vectorized_fn_call.h
@@ -68,7 +68,7 @@ public:
 
     bool can_push_down_to_index() const override;
     bool can_fast_execute() const override;
-    Status eval_inverted_index(VExprContext* context, 
segment_v2::FuncExprParams& params,
+    Status eval_inverted_index(segment_v2::FuncExprParams& params,
                                std::shared_ptr<roaring::Roaring>& result) 
override;
     bool equals(const VExpr& other) override;
 
diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
index e6a2cf117a2..bd7f1c2ab97 100644
--- a/be/src/vec/exprs/vexpr.cpp
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -626,7 +626,7 @@ bool VExpr::fast_execute(Block& block, const ColumnNumbers& 
arguments, size_t re
 std::string VExpr::gen_predicate_result_sign(Block& block, const 
ColumnNumbers& arguments,
                                              const std::string& function_name) 
const {
     std::string pred_result_sign;
-    if (this->node_type() == TExprNodeType::FUNCTION_CALL) {
+    if (this->fn().name.function_name == "multi_match") {
         pred_result_sign =
                 BeConsts::BLOCK_TEMP_COLUMN_PREFIX + 
std::to_string(this->index_unique_id());
     } else {
diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h
index 708b6c8ea90..0f611cb0920 100644
--- a/be/src/vec/exprs/vexpr.h
+++ b/be/src/vec/exprs/vexpr.h
@@ -230,7 +230,7 @@ public:
 
     virtual bool can_push_down_to_index() const { return false; }
     virtual bool can_fast_execute() const { return false; }
-    virtual Status eval_inverted_index(VExprContext* context, 
segment_v2::FuncExprParams& params,
+    virtual Status eval_inverted_index(segment_v2::FuncExprParams& params,
                                        std::shared_ptr<roaring::Roaring>& 
result) {
         return Status::NotSupported("Not supported 
execute_with_inverted_index");
     }
diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h
index d880836248f..110e064a59f 100644
--- a/be/src/vec/functions/function.h
+++ b/be/src/vec/functions/function.h
@@ -220,7 +220,8 @@ public:
     }
 
     virtual bool can_push_down_to_index() const { return false; }
-    virtual Status eval_inverted_index(FunctionContext* context, 
segment_v2::FuncExprParams& params,
+
+    virtual Status eval_inverted_index(VExpr* context, 
segment_v2::FuncExprParams& params,
                                        std::shared_ptr<roaring::Roaring>& 
result) {
         return Status::NotSupported("eval_inverted_index is not supported in 
function: ",
                                     get_name());
@@ -506,9 +507,10 @@ public:
     }
 
     bool can_push_down_to_index() const override { return 
function->can_push_down_to_index(); }
-    Status eval_inverted_index(FunctionContext* context, 
segment_v2::FuncExprParams& params,
+
+    Status eval_inverted_index(VExpr* expr, segment_v2::FuncExprParams& params,
                                std::shared_ptr<roaring::Roaring>& result) 
override {
-        return function->eval_inverted_index(context, params, result);
+        return function->eval_inverted_index(expr, params, result);
     }
 
 private:
diff --git a/be/src/vec/functions/function_multi_match.cpp 
b/be/src/vec/functions/function_multi_match.cpp
index d34526d0f29..fa36453105e 100644
--- a/be/src/vec/functions/function_multi_match.cpp
+++ b/be/src/vec/functions/function_multi_match.cpp
@@ -49,91 +49,59 @@ Status FunctionMultiMatch::execute_impl(FunctionContext* 
/*context*/, Block& blo
     return Status::RuntimeError("only inverted index queries are supported");
 }
 
-Status FunctionMultiMatch::open(FunctionContext* context,
-                                FunctionContext::FunctionStateScope scope) {
-    if (scope == FunctionContext::THREAD_LOCAL) {
-        return Status::OK();
-    }
-
-    DCHECK(context->get_num_args() == 4);
-    for (int i = 0; i < context->get_num_args(); ++i) {
-        DCHECK(is_string_type(context->get_arg_type(i)->type));
+InvertedIndexQueryType get_query_type(const std::string& query_type) {
+    if (query_type == "phrase_prefix") {
+        return InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY;
     }
+    return InvertedIndexQueryType::UNKNOWN_QUERY;
+}
 
-    std::shared_ptr<MatchParam> state = std::make_shared<MatchParam>();
-    context->set_function_state(scope, state);
-    for (int i = 0; i < context->get_num_args(); ++i) {
-        const auto& const_column_ptr = context->get_constant_col(i);
-        if (const_column_ptr) {
-            auto const_data = const_column_ptr->column_ptr->get_data_at(0);
-            switch (i) {
-            case 1: {
-                std::string field_names_str = const_data.to_string();
-                field_names_str.erase(
-                        std::remove_if(field_names_str.begin(), 
field_names_str.end(),
-                                       [](unsigned char c) { return 
std::isspace(c); }),
-                        field_names_str.end());
-                std::vector<std::string> field_names;
-                boost::split(field_names, field_names_str, 
boost::algorithm::is_any_of(","));
-                for (const auto& field_name : field_names) {
-                    if (!field_name.empty()) {
-                        state->fields.insert(field_name);
-                    }
-                }
-            } break;
-            case 2:
-                state->type = const_data.to_string();
-                break;
-            case 3:
-                state->query = const_data.to_string();
-                break;
-            default:
-                break;
-            }
+Status FunctionMultiMatch::eval_inverted_index(VExpr* expr, 
segment_v2::FuncExprParams& params,
+                                               
std::shared_ptr<roaring::Roaring>& result) {
+    // fields
+    std::vector<std::string> query_fileds;
+    size_t i = 0;
+    for (; i < expr->get_num_children(); i++) {
+        auto child_expr = expr->get_child(i);
+        if (child_expr->node_type() == TExprNodeType::type::SLOT_REF) {
+            query_fileds.emplace_back(child_expr->expr_name());
+        } else {
+            break;
         }
     }
+    if (i != expr->get_num_children() - 2) {
+        return Status::RuntimeError("parameter type incorrect: slot = {}", i);
+    }
 
-    return Status::OK();
-}
-
-Status FunctionMultiMatch::eval_inverted_index(FunctionContext* context,
-                                               segment_v2::FuncExprParams& 
params,
-                                               
std::shared_ptr<roaring::Roaring>& result) {
-    auto* match_param = reinterpret_cast<MatchParam*>(
-            context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
-    if (match_param == nullptr) {
-        return Status::RuntimeError("function parameter parsing failed");
+    // type
+    std::string param1 = 
std::static_pointer_cast<VLiteral>(expr->get_child(i))->value();
+    auto query_type = get_query_type(param1);
+    if (query_type == InvertedIndexQueryType::UNKNOWN_QUERY) {
+        return Status::RuntimeError("parameter query type incorrect: 
query_type = {}", query_type);
     }
-    match_param->fields.insert(params._column_name);
 
-    const auto& segment_iterator = params._segment_iterator;
-    const auto& opts = segment_iterator->storage_read_options();
-    const auto& tablet_schema = opts.tablet_schema;
+    // query
+    std::string query_str = 
std::static_pointer_cast<VLiteral>(expr->get_child(i + 1))->value();
 
+    auto& segment_iterator = params._segment_iterator;
+    auto& segment = segment_iterator->segment();
+    auto& opts = segment_iterator->storage_read_options();
+    auto& tablet_schema = opts.tablet_schema;
+    auto& idx_iterators = segment_iterator->inverted_index_iterators();
+
+    // check
     std::vector<ColumnId> columns_ids;
-    for (const auto& column_name : match_param->fields) {
+    for (const auto& column_name : query_fileds) {
         auto cid = tablet_schema->field_index(column_name);
         if (cid < 0) {
-            return Status::RuntimeError("column name is incorrect");
-        }
-        const auto& column = tablet_schema->column(cid);
-        if (!is_string_type(column.type())) {
-            return Status::RuntimeError("column type is incorrect");
+            return Status::RuntimeError("column name is incorrect: {}", 
column_name);
         }
-        if (!tablet_schema->has_inverted_index(column)) {
-            return Status::RuntimeError("column index is incorrect");
+        if (idx_iterators[cid] == nullptr) {
+            return Status::RuntimeError("column idx is incorrect: {}", 
column_name);
         }
         columns_ids.emplace_back(cid);
     }
 
-    // query type
-    InvertedIndexQueryType query_type;
-    if (match_param->type == "phrase_prefix") {
-        query_type = InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY;
-    } else {
-        return Status::RuntimeError("query type is incorrect");
-    }
-
     // cache key
     roaring::Roaring cids_str;
     cids_str.addMany(columns_ids.size(), columns_ids.data());
@@ -142,11 +110,11 @@ Status 
FunctionMultiMatch::eval_inverted_index(FunctionContext* context,
     cids_str.write(column_name_binary.data());
 
     InvertedIndexQueryCache::CacheKey cache_key;
-    io::Path index_path = segment_iterator->segment().file_reader()->path();
+    io::Path index_path = segment.file_reader()->path();
     cache_key.index_path = index_path.parent_path() / index_path.stem();
     cache_key.column_name = column_name_binary;
     cache_key.query_type = query_type;
-    cache_key.value = match_param->query;
+    cache_key.value = query_str;
 
     // query cache
     auto* cache = InvertedIndexQueryCache::instance();
@@ -157,18 +125,13 @@ Status 
FunctionMultiMatch::eval_inverted_index(FunctionContext* context,
     }
 
     // search
-    for (const auto& column_name : match_param->fields) {
+    for (const auto& column_name : query_fileds) {
         auto cid = tablet_schema->field_index(column_name);
         const auto& column = tablet_schema->column(column_name);
-
-        auto& index_iterator = 
segment_iterator->inverted_index_iterators()[cid];
-        if (!index_iterator) {
-            
RETURN_IF_ERROR(segment_iterator->_init_inverted_index_iterators(cid));
-        }
-        const auto& index_reader = index_iterator->reader();
+        const auto& index_reader = idx_iterators[cid]->reader();
 
         auto single_result = std::make_shared<roaring::Roaring>();
-        StringRef query_value(match_param->query.data());
+        StringRef query_value(query_str.data());
         auto index_version = 
tablet_schema->get_inverted_index_storage_format();
         if (index_version == InvertedIndexStorageFormatPB::V1) {
             RETURN_IF_ERROR(index_reader->query(opts.stats, 
opts.runtime_state, column_name,
diff --git a/be/src/vec/functions/function_multi_match.h 
b/be/src/vec/functions/function_multi_match.h
index b7d2bd3c30e..8e30b6e9f22 100644
--- a/be/src/vec/functions/function_multi_match.h
+++ b/be/src/vec/functions/function_multi_match.h
@@ -26,13 +26,6 @@
 
 namespace doris::vectorized {
 
-class MatchParam {
-public:
-    std::string query;
-    std::set<std::string> fields;
-    std::string type;
-};
-
 class FunctionMultiMatch : public IFunction {
 public:
     static constexpr auto name = "multi_match";
@@ -42,9 +35,9 @@ public:
 
     String get_name() const override { return name; }
 
-    bool is_variadic() const override { return false; }
+    bool is_variadic() const override { return true; }
 
-    size_t get_number_of_arguments() const override { return 4; }
+    size_t get_number_of_arguments() const override { return 0; }
 
     bool use_default_implementation_for_nulls() const override { return false; 
}
 
@@ -52,7 +45,9 @@ public:
         return std::make_shared<DataTypeUInt8>();
     }
 
-    Status open(FunctionContext* context, FunctionContext::FunctionStateScope 
scope) override;
+    Status open(FunctionContext* context, FunctionContext::FunctionStateScope 
scope) override {
+        return Status::OK();
+    }
 
     Status close(FunctionContext* context, FunctionContext::FunctionStateScope 
scope) override {
         return Status::OK();
@@ -63,7 +58,7 @@ public:
 
     bool can_push_down_to_index() const override { return true; }
 
-    Status eval_inverted_index(FunctionContext* context, 
segment_v2::FuncExprParams& params,
+    Status eval_inverted_index(VExpr* expr, segment_v2::FuncExprParams& params,
                                std::shared_ptr<roaring::Roaring>& result) 
override;
 };
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MultiMatch.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MultiMatch.java
index 3df662808ed..1f7c739eefe 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MultiMatch.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MultiMatch.java
@@ -32,33 +32,23 @@ import com.google.common.collect.ImmutableList;
 import java.util.List;
 
 /**
- * ScalarFunction 'multi_match'. This class is generated by GenerateFunction.
+ * ScalarFunction 'multi_match'.
  */
 public class MultiMatch extends ScalarFunction
         implements BinaryExpression, ExplicitlyCastableSignature, 
AlwaysNotNullable {
 
     public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
-            FunctionSignature.ret(BooleanType.INSTANCE)
-                    .args(StringType.INSTANCE,
-                            StringType.INSTANCE,
-                            StringType.INSTANCE,
-                            StringType.INSTANCE)
+            
FunctionSignature.ret(BooleanType.INSTANCE).varArgs(StringType.INSTANCE)
     );
 
-    /**
-     * constructor with 4 arguments.
-     */
-    public MultiMatch(Expression arg0, Expression arg1, Expression arg2, 
Expression arg3) {
-        super("multi_match", arg0, arg1, arg2, arg3);
+    public MultiMatch(Expression ...varArgs) {
+        super("multi_match", varArgs);
     }
 
-    /**
-     * withChildren.
-     */
     @Override
     public MultiMatch withChildren(List<Expression> children) {
-        Preconditions.checkArgument(children.size() == 4);
-        return new MultiMatch(children.get(0), children.get(1), 
children.get(2), children.get(3));
+        Preconditions.checkArgument(children.size() >= 3);
+        return new MultiMatch(children.toArray(new Expression[0]));
     }
 
     @Override
diff --git a/gensrc/script/doris_builtins_functions.py 
b/gensrc/script/doris_builtins_functions.py
index ea2a15f6fcf..018d71385e0 100644
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -2212,7 +2212,7 @@ visible_functions = {
 
     # multi match functions
     "MultiMatch": [
-        [['multi_match'], 'BOOLEAN', ['STRING', 'STRING', 'STRING', 'STRING'], 
'ALWAYS_NOT_NULLABLE']
+        [['multi_match'], 'BOOLEAN', ['STRING', '...'], 'ALWAYS_NOT_NULLABLE']
     ]
 }
 
diff --git 
a/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy 
b/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy
index 90f9f7a751b..c926bc4d510 100644
--- a/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy
+++ b/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy
@@ -113,15 +113,15 @@ suite("test_index_multi_match", "p0"){
         qt_sql """ select count() from ${indexTbName2} where (clientip 
match_phrase_prefix '2' or request match_phrase_prefix '2' or status 
match_phrase_prefix '2' or size match_phrase_prefix '2'); """
         qt_sql """ select count() from ${indexTbName2} where (clientip 
match_phrase_prefix 'a' or request match_phrase_prefix 'a' or status 
match_phrase_prefix 'a' or size match_phrase_prefix 'a'); """
 
-        qt_sql """ select count() from ${indexTbName3} where 
multi_match(clientip, '', 'phrase_prefix', '2'); """
-        qt_sql """ select count() from ${indexTbName3} where 
multi_match(clientip, 'request', 'phrase_prefix', '2'); """
-        qt_sql """ select count() from ${indexTbName3} where 
multi_match(clientip, 'request, status, size', 'phrase_prefix', '2'); """
-        qt_sql """ select count() from ${indexTbName3} where 
multi_match(clientip, 'request, status, size', 'phrase_prefix', 'a'); """
+        qt_sql """ select count() from ${indexTbName3} where 
multi_match(clientip, 'phrase_prefix', '2'); """
+        qt_sql """ select count() from ${indexTbName3} where 
multi_match(clientip, request, 'phrase_prefix', '2'); """
+        qt_sql """ select count() from ${indexTbName3} where 
multi_match(clientip, request, status, size, 'phrase_prefix', '2'); """
+        qt_sql """ select count() from ${indexTbName3} where 
multi_match(clientip, request, status, size, 'phrase_prefix', 'a'); """
 
-        qt_sql """ select count() from ${indexTbName4} where 
multi_match(clientip, '', 'phrase_prefix', '2'); """
-        qt_sql """ select count() from ${indexTbName4} where 
multi_match(clientip, 'request', 'phrase_prefix', '2'); """
-        qt_sql """ select count() from ${indexTbName4} where 
multi_match(clientip, 'request, status, size', 'phrase_prefix', '2'); """
-        qt_sql """ select count() from ${indexTbName4} where 
multi_match(clientip, 'request, status, size', 'phrase_prefix', 'a'); """
+        qt_sql """ select count() from ${indexTbName4} where 
multi_match(clientip, 'phrase_prefix', '2'); """
+        qt_sql """ select count() from ${indexTbName4} where 
multi_match(clientip, request, 'phrase_prefix', '2'); """
+        qt_sql """ select count() from ${indexTbName4} where 
multi_match(clientip, request, status, size, 'phrase_prefix', '2'); """
+        qt_sql """ select count() from ${indexTbName4} where 
multi_match(clientip, request, status, size, 'phrase_prefix', 'a'); """
 
     } finally {
         //try_sql("DROP TABLE IF EXISTS ${testTable}")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to