This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit ae38f2828082cbbbb5f577cade04babb589a5026
Author: zzzxl <[email protected]>
AuthorDate: Mon Jan 29 10:42:12 2024 +0800

    [feature](invert index) does not create an inverted index to support the 
match_phrase_prefix feature. (#30414)
---
 be/src/vec/functions/match.cpp                     | 59 ++++++++++++++++++++++
 be/src/vec/functions/match.h                       |  5 +-
 .../test_index_match_phrase_prefix.out             | 15 ++++++
 .../test_index_match_phrase_prefix.groovy          | 47 +++++++++++++----
 4 files changed, 111 insertions(+), 15 deletions(-)

diff --git a/be/src/vec/functions/match.cpp b/be/src/vec/functions/match.cpp
index 38145342a0b..35fdb7a42b3 100644
--- a/be/src/vec/functions/match.cpp
+++ b/be/src/vec/functions/match.cpp
@@ -311,6 +311,65 @@ Status FunctionMatchPhrase::execute_match(const 
std::string& column_name,
     return Status::OK();
 }
 
+Status FunctionMatchPhrasePrefix::execute_match(
+        const std::string& column_name, const std::string& match_query_str, 
size_t input_rows_count,
+        const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx,
+        const ColumnArray::Offsets64* array_offsets, ColumnUInt8::Container& 
result) const {
+    doris::InvertedIndexParserType parser_type = 
doris::InvertedIndexParserType::PARSER_UNKNOWN;
+    if (inverted_index_ctx) {
+        parser_type = inverted_index_ctx->parser_type;
+    }
+
+    auto reader = 
doris::segment_v2::InvertedIndexReader::create_reader(inverted_index_ctx,
+                                                                        
match_query_str);
+    std::vector<std::string> query_tokens;
+    doris::segment_v2::InvertedIndexReader::get_analyse_result(
+            query_tokens, reader.get(), inverted_index_ctx->analyzer, 
column_name,
+            doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_QUERY);
+
+    if (query_tokens.empty()) {
+        LOG(WARNING) << fmt::format(
+                "token parser result is empty for query, "
+                "please check your query: '{}' and index parser: '{}'",
+                match_query_str, 
inverted_index_parser_type_to_string(parser_type));
+        return Status::OK();
+    }
+
+    int32_t current_src_array_offset = 0;
+    for (size_t i = 0; i < input_rows_count; i++) {
+        std::vector<std::string> data_tokens =
+                analyse_data_token(column_name, inverted_index_ctx, 
string_col, i, array_offsets,
+                                   current_src_array_offset);
+
+        for (size_t j = 0; j < data_tokens.size() - query_tokens.size() + 1; 
j++) {
+            if (data_tokens[j] == query_tokens[0] || query_tokens.size() == 1) 
{
+                bool match = true;
+                for (size_t k = 0; k < query_tokens.size(); k++) {
+                    const std::string& data_token = data_tokens[j + k];
+                    const std::string& query_token = query_tokens[k];
+                    if (k == query_tokens.size() - 1) {
+                        if (data_token.compare(0, query_token.size(), 
query_token) != 0) {
+                            match = false;
+                            break;
+                        }
+                    } else {
+                        if (data_token != query_token) {
+                            match = false;
+                            break;
+                        }
+                    }
+                }
+                if (match) {
+                    result[i] = true;
+                    break;
+                }
+            }
+        }
+    }
+
+    return Status::OK();
+}
+
 void register_function_match(SimpleFunctionFactory& factory) {
     factory.register_function<FunctionMatchAny>();
     factory.register_function<FunctionMatchAll>();
diff --git a/be/src/vec/functions/match.h b/be/src/vec/functions/match.h
index 5ca981e7021..ebd6a48ba23 100644
--- a/be/src/vec/functions/match.h
+++ b/be/src/vec/functions/match.h
@@ -139,10 +139,7 @@ public:
                          size_t input_rows_count, const ColumnString* 
string_col,
                          InvertedIndexCtx* inverted_index_ctx,
                          const ColumnArray::Offsets64* array_offsets,
-                         ColumnUInt8::Container& result) const override {
-        return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
-                "FunctionMatchPhrasePrefix not support execute_match");
-    }
+                         ColumnUInt8::Container& result) const override;
 };
 
 class FunctionMatchRegexp : public FunctionMatchBase {
diff --git 
a/regression-test/data/inverted_index_p0/test_index_match_phrase_prefix.out 
b/regression-test/data/inverted_index_p0/test_index_match_phrase_prefix.out
index 140fd5ee937..11af32e55e6 100644
--- a/regression-test/data/inverted_index_p0/test_index_match_phrase_prefix.out
+++ b/regression-test/data/inverted_index_p0/test_index_match_phrase_prefix.out
@@ -5,6 +5,12 @@
 -- !sql --
 863
 
+-- !sql --
+863
+
+-- !sql --
+235
+
 -- !sql --
 235
 
@@ -17,6 +23,12 @@
 -- !sql --
 166
 
+-- !sql --
+166
+
+-- !sql --
+56
+
 -- !sql --
 56
 
@@ -29,3 +41,6 @@
 -- !sql --
 7
 
+-- !sql --
+7
+
diff --git 
a/regression-test/suites/inverted_index_p0/test_index_match_phrase_prefix.groovy
 
b/regression-test/suites/inverted_index_p0/test_index_match_phrase_prefix.groovy
index 68f9624035f..1f83d48a977 100644
--- 
a/regression-test/suites/inverted_index_p0/test_index_match_phrase_prefix.groovy
+++ 
b/regression-test/suites/inverted_index_p0/test_index_match_phrase_prefix.groovy
@@ -18,8 +18,10 @@
 
 suite("test_index_match_phrase_prefix", "p0"){
     def indexTbName1 = "test_index_match_phrase_prefix"
+    def indexTbName2 = "test_index_match_phrase_prefix2"
 
     sql "DROP TABLE IF EXISTS ${indexTbName1}"
+    sql "DROP TABLE IF EXISTS ${indexTbName2}"
 
     sql """
       CREATE TABLE ${indexTbName1} (
@@ -38,6 +40,22 @@ suite("test_index_match_phrase_prefix", "p0"){
       );
     """
 
+    sql """
+      CREATE TABLE ${indexTbName2} (
+      `@timestamp` int(11) NULL COMMENT "",
+      `clientip` varchar(20) NULL COMMENT "",
+      `request` text NULL COMMENT "",
+      `status` int(11) NULL COMMENT "",
+      `size` int(11) NULL COMMENT ""
+      ) ENGINE=OLAP
+      DUPLICATE KEY(`@timestamp`)
+      COMMENT "OLAP"
+      DISTRIBUTED BY RANDOM BUCKETS 1
+      PROPERTIES (
+      "replication_allocation" = "tag.location.default: 1"
+      );
+    """
+
     def load_httplogs_data = {table_name, label, read_flag, format_flag, 
file_name, ignore_failure=false,
                         expected_succ_rows = -1, load_to_single_tablet = 
'true' ->
         
@@ -76,24 +94,31 @@ suite("test_index_match_phrase_prefix", "p0"){
     }
 
     try {
-        load_httplogs_data.call(indexTbName1, 
'test_index_match_phrase_prefix', 'true', 'json', 'documents-1000.json')
+        load_httplogs_data.call(indexTbName1, indexTbName1, 'true', 'json', 
'documents-1000.json')
+        load_httplogs_data.call(indexTbName2, indexTbName2, 'true', 'json', 
'documents-1000.json')
 
         sql "sync"
 
-        qt_sql """ select count() from test_index_match_phrase_prefix where 
request match_phrase_prefix 'ima'; """
-        qt_sql """ select count() from test_index_match_phrase_prefix where 
request like '%ima%'; """
+        qt_sql """ select count() from ${indexTbName1} where request 
match_phrase_prefix 'ima'; """
+        qt_sql """ select count() from ${indexTbName2} where request 
match_phrase_prefix 'ima'; """
+        qt_sql """ select count() from ${indexTbName1} where request like 
'%ima%'; """
+
+        qt_sql """ select count() from ${indexTbName1} where request 
match_phrase_prefix 'images/h'; """
+        qt_sql """ select count() from ${indexTbName2} where request 
match_phrase_prefix 'images/h'; """
+        qt_sql """ select count() from ${indexTbName1} where request like 
'%images/h%'; """
 
-        qt_sql """ select count() from test_index_match_phrase_prefix where 
request match_phrase_prefix 'images/h'; """
-        qt_sql """ select count() from test_index_match_phrase_prefix where 
request like '%images/h%'; """
+        qt_sql """ select count() from ${indexTbName1} where request 
match_phrase_prefix 'images/hm'; """
+        qt_sql """ select count() from ${indexTbName2} where request 
match_phrase_prefix 'images/hm'; """
+        qt_sql """ select count() from ${indexTbName1} where request like 
'%images/hm%'; """
 
-        qt_sql """ select count() from test_index_match_phrase_prefix where 
request match_phrase_prefix 'images/hm'; """
-        qt_sql """ select count() from test_index_match_phrase_prefix where 
request like '%images/hm%'; """
+        qt_sql """ select count() from ${indexTbName1} where request 
match_phrase_prefix '/french/images/n'; """
+        qt_sql """ select count() from ${indexTbName2} where request 
match_phrase_prefix '/french/images/n'; """
+        qt_sql """ select count() from ${indexTbName1} where request like 
'%/french/images/n%'; """
 
-        qt_sql """ select count() from test_index_match_phrase_prefix where 
request match_phrase_prefix '/french/images/n'; """
-        qt_sql """ select count() from test_index_match_phrase_prefix where 
request like '%/french/images/n%'; """
+        qt_sql """ select count() from ${indexTbName1} where request 
match_phrase_prefix '/french/tickets/images/ti'; """
+        qt_sql """ select count() from ${indexTbName2} where request 
match_phrase_prefix '/french/tickets/images/ti'; """
+        qt_sql """ select count() from ${indexTbName1} where request like 
'%/french/tickets/images/ti%'; """
 
-        qt_sql """ select count() from test_index_match_phrase_prefix where 
request match_phrase_prefix '/french/tickets/images/ti'; """
-        qt_sql """ select count() from test_index_match_phrase_prefix where 
request like '%/french/tickets/images/ti%'; """
     } finally {
         //try_sql("DROP TABLE IF EXISTS ${testTable}")
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to