This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 8b895c0d360 branch-4.0: [fix](inverted index) fix match without 
inverted index #57468 (#57492)
8b895c0d360 is described below

commit 8b895c0d360075b57c07c91cffd602f1dd7e5a8b
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Oct 31 09:25:00 2025 +0800

    branch-4.0: [fix](inverted index) fix match without inverted index #57468 
(#57492)
    
    Cherry-picked from #57468
    
    Co-authored-by: Sun Chenyang <[email protected]>
---
 be/src/vec/functions/match.cpp                     |  6 ++-
 .../analyzer/test_custom_analyzer.out              | 45 +++++++++++++++++
 .../analyzer/test_custom_analyzer.groovy           | 59 ++++++++++++++++++++++
 3 files changed, 109 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/functions/match.cpp b/be/src/vec/functions/match.cpp
index 33c8e34eb3b..8009374e4af 100644
--- a/be/src/vec/functions/match.cpp
+++ b/be/src/vec/functions/match.cpp
@@ -179,6 +179,7 @@ std::vector<TermInfo> 
FunctionMatchBase::analyse_query_str_token(
     if (inverted_index_ctx == nullptr) {
         return query_tokens;
     }
+    // parse is none and custom analyzer is empty mean no analyzer is set
     if (inverted_index_ctx->parser_type == 
InvertedIndexParserType::PARSER_NONE &&
         inverted_index_ctx->custom_analyzer.empty()) {
         query_tokens.emplace_back(match_query_str);
@@ -201,7 +202,9 @@ inline std::vector<TermInfo> 
FunctionMatchBase::analyse_data_token(
         for (auto next_src_array_offset = 
(*array_offsets)[current_block_row_idx];
              current_src_array_offset < next_src_array_offset; 
++current_src_array_offset) {
             const auto& str_ref = 
string_col->get_data_at(current_src_array_offset);
-            if (inverted_index_ctx->parser_type == 
InvertedIndexParserType::PARSER_NONE) {
+            // parse is none and custom analyzer is empty mean no analyzer is 
set
+            if (inverted_index_ctx->parser_type == 
InvertedIndexParserType::PARSER_NONE &&
+                inverted_index_ctx->custom_analyzer.empty()) {
                 data_tokens.emplace_back(str_ref.to_string());
                 continue;
             }
@@ -215,6 +218,7 @@ inline std::vector<TermInfo> 
FunctionMatchBase::analyse_data_token(
         }
     } else {
         const auto& str_ref = string_col->get_data_at(current_block_row_idx);
+        // parse is none and custom analyzer is empty mean no analyzer is set
         if (inverted_index_ctx->parser_type == 
InvertedIndexParserType::PARSER_NONE &&
             inverted_index_ctx->custom_analyzer.empty()) {
             data_tokens.emplace_back(str_ref.to_string());
diff --git 
a/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer.out 
b/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer.out
index 687807afbfd..828b46ca65b 100644
--- a/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer.out
+++ b/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer.out
@@ -47,3 +47,48 @@
 -- !sql --
 1      GET /french/images/nav_venue_off.gif HTTP/1.0
 
+-- !sql --
+[{\n        "token": "foo"\n    }, {\n        "token": "bar"\n    }]
+
+-- !sql --
+[{\n        "token": "foo"\n    }]
+
+-- !sql --
+[{\n        "token": "bar"\n    }]
+
+-- !sql --
+1      FOO BAR
+
+-- !sql --
+1      FOO BAR
+
+-- !sql --
+1      FOO BAR
+
+-- !sql --
+1      FOO BAR
+
+-- !sql --
+1      FOO BAR
+
+-- !sql --
+1      FOO BAR
+
+-- !sql --
+1      {"string_1":"FOO BAR","string_2":"FOO BAR","string_3":"FOO BAR"}
+
+-- !sql --
+1      {"string_1":"FOO BAR","string_2":"FOO BAR","string_3":"FOO BAR"}
+
+-- !sql --
+1      {"string_1":"FOO BAR","string_2":"FOO BAR","string_3":"FOO BAR"}
+
+-- !sql --
+1      {"string_1":"FOO BAR","string_2":"FOO BAR","string_3":"FOO BAR"}
+
+-- !sql --
+1      {"string_1":"FOO BAR","string_2":"FOO BAR","string_3":"FOO BAR"}
+
+-- !sql --
+1      {"string_1":"FOO BAR","string_2":"FOO BAR","string_3":"FOO BAR"}
+
diff --git 
a/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer.groovy 
b/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer.groovy
index 14ee4c6819c..f558f075fe9 100644
--- 
a/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer.groovy
+++ 
b/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer.groovy
@@ -179,4 +179,63 @@ suite("test_custom_analyzer", "p0") {
         qt_sql """ select * from test_custom_analyzer_3 where ch match 
'nav_venue_off.gif'; """
     } catch (SQLException e) {
     }
+
+    sql "DROP TABLE IF EXISTS ${indexTbName1}"
+    sql """
+        CREATE TABLE ${indexTbName1} (
+            `a` bigint NOT NULL,
+            `ch` text NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`a`)
+        DISTRIBUTED BY RANDOM BUCKETS 1
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+
+    sql """ insert into ${indexTbName1} values(1, "FOO BAR"); """
+    qt_sql """ select tokenize("FOO BAR", '"analyzer"="lowercase_delimited"'); 
"""
+    qt_sql """ select tokenize("FOO", '"analyzer"="lowercase_delimited"'); """
+    qt_sql """ select tokenize("BAR", '"analyzer"="lowercase_delimited"'); """
+
+    sql """ alter table ${indexTbName1} add index idx_ch_default(`ch`)  using 
inverted; """
+    wait_for_last_build_index_finish("${indexTbName1}", 60000)
+    sql """ alter table ${indexTbName1} add index idx_ch(`ch`) using inverted 
properties("support_phrase" = "true", "analyzer" = "lowercase_delimited"); """
+    wait_for_last_build_index_finish("${indexTbName1}", 60000)
+
+    qt_sql """ select * from ${indexTbName1} where ch match_all 'FOO'; """
+    qt_sql """ select * from ${indexTbName1} where ch match_all 'BAR'; """
+    qt_sql """ select * from ${indexTbName1} where ch match_all 'FOO BAR'; """
+
+    qt_sql """ select * from ${indexTbName1} where ch match_phrase_prefix 
'FOO'; """
+    qt_sql """ select * from ${indexTbName1} where ch match_phrase_prefix 
'BAR'; """
+    qt_sql """ select * from ${indexTbName1} where ch match_phrase_prefix 'FOO 
BAR'; """
+
+    def variantTableName = "test_custom_analyzer_2"
+    sql "DROP TABLE IF EXISTS ${variantTableName}"
+    sql """
+        CREATE TABLE ${variantTableName} (
+            `a` bigint NOT NULL,
+            `var` variant<'string_*' : string,
+                properties("variant_max_subcolumns_count" = "1", 
"variant_enable_typed_paths_to_sparse" = "true")
+            > NULL,
+            INDEX idx_string (var) USING INVERTED PROPERTIES("field_pattern" = 
"string_*"),
+            INDEX idx_string_prefix (var) USING INVERTED 
PROPERTIES("field_pattern" = "string_*", "support_phrase" = "true", "analyzer" 
= "lowercase_delimited")
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`a`)
+        DISTRIBUTED BY RANDOM BUCKETS 1
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+
+    sql """ insert into ${variantTableName} values(1, '{"string_1" : "FOO 
BAR", "string_2" : "FOO BAR", "string_3" : "FOO BAR"}'), (2, '{"string_3" : 
"FOO BAR"}'); """
+
+    qt_sql """ select * from ${variantTableName} where cast(var['string_1'] as 
varchar) match_all 'FOO'; """
+    qt_sql """ select * from ${variantTableName} where cast(var['string_1'] as 
varchar) match_all 'BAR'; """
+    qt_sql """ select * from ${variantTableName} where cast(var['string_1'] as 
varchar) match_all 'FOO BAR'; """
+
+    qt_sql """ select * from ${variantTableName} where cast(var['string_1'] as 
varchar) match_phrase_prefix 'FOO'; """
+    qt_sql """ select * from ${variantTableName} where cast(var['string_1'] as 
varchar) match_phrase_prefix 'BAR'; """
+    qt_sql """ select * from ${variantTableName} where cast(var['string_1'] as 
varchar) match_phrase_prefix 'FOO BAR'; """
 }
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to