This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 8b895c0d360 branch-4.0: [fix](inverted index) fix match without
inverted index #57468 (#57492)
8b895c0d360 is described below
commit 8b895c0d360075b57c07c91cffd602f1dd7e5a8b
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Oct 31 09:25:00 2025 +0800
branch-4.0: [fix](inverted index) fix match without inverted index #57468
(#57492)
Cherry-picked from #57468
Co-authored-by: Sun Chenyang <[email protected]>
---
be/src/vec/functions/match.cpp | 6 ++-
.../analyzer/test_custom_analyzer.out | 45 +++++++++++++++++
.../analyzer/test_custom_analyzer.groovy | 59 ++++++++++++++++++++++
3 files changed, 109 insertions(+), 1 deletion(-)
diff --git a/be/src/vec/functions/match.cpp b/be/src/vec/functions/match.cpp
index 33c8e34eb3b..8009374e4af 100644
--- a/be/src/vec/functions/match.cpp
+++ b/be/src/vec/functions/match.cpp
@@ -179,6 +179,7 @@ std::vector<TermInfo>
FunctionMatchBase::analyse_query_str_token(
if (inverted_index_ctx == nullptr) {
return query_tokens;
}
+ // parse is none and custom analyzer is empty mean no analyzer is set
if (inverted_index_ctx->parser_type ==
InvertedIndexParserType::PARSER_NONE &&
inverted_index_ctx->custom_analyzer.empty()) {
query_tokens.emplace_back(match_query_str);
@@ -201,7 +202,9 @@ inline std::vector<TermInfo>
FunctionMatchBase::analyse_data_token(
for (auto next_src_array_offset =
(*array_offsets)[current_block_row_idx];
current_src_array_offset < next_src_array_offset;
++current_src_array_offset) {
const auto& str_ref =
string_col->get_data_at(current_src_array_offset);
- if (inverted_index_ctx->parser_type ==
InvertedIndexParserType::PARSER_NONE) {
+ // parse is none and custom analyzer is empty mean no analyzer is
set
+ if (inverted_index_ctx->parser_type ==
InvertedIndexParserType::PARSER_NONE &&
+ inverted_index_ctx->custom_analyzer.empty()) {
data_tokens.emplace_back(str_ref.to_string());
continue;
}
@@ -215,6 +218,7 @@ inline std::vector<TermInfo>
FunctionMatchBase::analyse_data_token(
}
} else {
const auto& str_ref = string_col->get_data_at(current_block_row_idx);
+ // parse is none and custom analyzer is empty mean no analyzer is set
if (inverted_index_ctx->parser_type ==
InvertedIndexParserType::PARSER_NONE &&
inverted_index_ctx->custom_analyzer.empty()) {
data_tokens.emplace_back(str_ref.to_string());
diff --git
a/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer.out
b/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer.out
index 687807afbfd..828b46ca65b 100644
--- a/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer.out
+++ b/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer.out
@@ -47,3 +47,48 @@
-- !sql --
1 GET /french/images/nav_venue_off.gif HTTP/1.0
+-- !sql --
+[{\n "token": "foo"\n }, {\n "token": "bar"\n }]
+
+-- !sql --
+[{\n "token": "foo"\n }]
+
+-- !sql --
+[{\n "token": "bar"\n }]
+
+-- !sql --
+1 FOO BAR
+
+-- !sql --
+1 FOO BAR
+
+-- !sql --
+1 FOO BAR
+
+-- !sql --
+1 FOO BAR
+
+-- !sql --
+1 FOO BAR
+
+-- !sql --
+1 FOO BAR
+
+-- !sql --
+1 {"string_1":"FOO BAR","string_2":"FOO BAR","string_3":"FOO BAR"}
+
+-- !sql --
+1 {"string_1":"FOO BAR","string_2":"FOO BAR","string_3":"FOO BAR"}
+
+-- !sql --
+1 {"string_1":"FOO BAR","string_2":"FOO BAR","string_3":"FOO BAR"}
+
+-- !sql --
+1 {"string_1":"FOO BAR","string_2":"FOO BAR","string_3":"FOO BAR"}
+
+-- !sql --
+1 {"string_1":"FOO BAR","string_2":"FOO BAR","string_3":"FOO BAR"}
+
+-- !sql --
+1 {"string_1":"FOO BAR","string_2":"FOO BAR","string_3":"FOO BAR"}
+
diff --git
a/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer.groovy
b/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer.groovy
index 14ee4c6819c..f558f075fe9 100644
---
a/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer.groovy
+++
b/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer.groovy
@@ -179,4 +179,63 @@ suite("test_custom_analyzer", "p0") {
qt_sql """ select * from test_custom_analyzer_3 where ch match
'nav_venue_off.gif'; """
} catch (SQLException e) {
}
+
+ sql "DROP TABLE IF EXISTS ${indexTbName1}"
+ sql """
+ CREATE TABLE ${indexTbName1} (
+ `a` bigint NOT NULL,
+ `ch` text NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`a`)
+ DISTRIBUTED BY RANDOM BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql """ insert into ${indexTbName1} values(1, "FOO BAR"); """
+ qt_sql """ select tokenize("FOO BAR", '"analyzer"="lowercase_delimited"');
"""
+ qt_sql """ select tokenize("FOO", '"analyzer"="lowercase_delimited"'); """
+ qt_sql """ select tokenize("BAR", '"analyzer"="lowercase_delimited"'); """
+
+ sql """ alter table ${indexTbName1} add index idx_ch_default(`ch`) using
inverted; """
+ wait_for_last_build_index_finish("${indexTbName1}", 60000)
+ sql """ alter table ${indexTbName1} add index idx_ch(`ch`) using inverted
properties("support_phrase" = "true", "analyzer" = "lowercase_delimited"); """
+ wait_for_last_build_index_finish("${indexTbName1}", 60000)
+
+ qt_sql """ select * from ${indexTbName1} where ch match_all 'FOO'; """
+ qt_sql """ select * from ${indexTbName1} where ch match_all 'BAR'; """
+ qt_sql """ select * from ${indexTbName1} where ch match_all 'FOO BAR'; """
+
+ qt_sql """ select * from ${indexTbName1} where ch match_phrase_prefix
'FOO'; """
+ qt_sql """ select * from ${indexTbName1} where ch match_phrase_prefix
'BAR'; """
+ qt_sql """ select * from ${indexTbName1} where ch match_phrase_prefix 'FOO
BAR'; """
+
+ def variantTableName = "test_custom_analyzer_2"
+ sql "DROP TABLE IF EXISTS ${variantTableName}"
+ sql """
+ CREATE TABLE ${variantTableName} (
+ `a` bigint NOT NULL,
+ `var` variant<'string_*' : string,
+ properties("variant_max_subcolumns_count" = "1",
"variant_enable_typed_paths_to_sparse" = "true")
+ > NULL,
+ INDEX idx_string (var) USING INVERTED PROPERTIES("field_pattern" =
"string_*"),
+ INDEX idx_string_prefix (var) USING INVERTED
PROPERTIES("field_pattern" = "string_*", "support_phrase" = "true", "analyzer"
= "lowercase_delimited")
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`a`)
+ DISTRIBUTED BY RANDOM BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql """ insert into ${variantTableName} values(1, '{"string_1" : "FOO
BAR", "string_2" : "FOO BAR", "string_3" : "FOO BAR"}'), (2, '{"string_3" :
"FOO BAR"}'); """
+
+ qt_sql """ select * from ${variantTableName} where cast(var['string_1'] as
varchar) match_all 'FOO'; """
+ qt_sql """ select * from ${variantTableName} where cast(var['string_1'] as
varchar) match_all 'BAR'; """
+ qt_sql """ select * from ${variantTableName} where cast(var['string_1'] as
varchar) match_all 'FOO BAR'; """
+
+ qt_sql """ select * from ${variantTableName} where cast(var['string_1'] as
varchar) match_phrase_prefix 'FOO'; """
+ qt_sql """ select * from ${variantTableName} where cast(var['string_1'] as
varchar) match_phrase_prefix 'BAR'; """
+ qt_sql """ select * from ${variantTableName} where cast(var['string_1'] as
varchar) match_phrase_prefix 'FOO BAR'; """
}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]