This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new d47151ca741 branch-3.1: [fix](inverted index) fix match without
inverted index #57468 (#57673)
d47151ca741 is described below
commit d47151ca741811df64a0305079c4af50e479d83b
Author: Sun Chenyang <[email protected]>
AuthorDate: Wed Nov 5 14:12:17 2025 +0800
branch-3.1: [fix](inverted index) fix match without inverted index #57468
(#57673)
pick from master #57468
---
be/src/vec/functions/match.cpp | 6 +-
.../analyzer/test_custom_analyzer.out | 45 +++++++++++++
.../analyzer/test_custom_analyzer.groovy | 77 ++++++++++++++++++++++
3 files changed, 127 insertions(+), 1 deletion(-)
diff --git a/be/src/vec/functions/match.cpp b/be/src/vec/functions/match.cpp
index ef94d534933..b46caa9b0a9 100644
--- a/be/src/vec/functions/match.cpp
+++ b/be/src/vec/functions/match.cpp
@@ -176,6 +176,7 @@ std::vector<TermInfo>
FunctionMatchBase::analyse_query_str_token(
if (inverted_index_ctx == nullptr) {
return query_tokens;
}
+ // parse is none and custom analyzer is empty mean no analyzer is set
if (inverted_index_ctx->parser_type ==
InvertedIndexParserType::PARSER_NONE &&
inverted_index_ctx->custom_analyzer.empty()) {
query_tokens.emplace_back(match_query_str);
@@ -198,7 +199,9 @@ inline std::vector<TermInfo>
FunctionMatchBase::analyse_data_token(
for (auto next_src_array_offset =
(*array_offsets)[current_block_row_idx];
current_src_array_offset < next_src_array_offset;
++current_src_array_offset) {
const auto& str_ref =
string_col->get_data_at(current_src_array_offset);
- if (inverted_index_ctx->parser_type ==
InvertedIndexParserType::PARSER_NONE) {
+ // parse is none and custom analyzer is empty mean no analyzer is
set
+ if (inverted_index_ctx->parser_type ==
InvertedIndexParserType::PARSER_NONE &&
+ inverted_index_ctx->custom_analyzer.empty()) {
data_tokens.emplace_back(str_ref.to_string());
continue;
}
@@ -212,6 +215,7 @@ inline std::vector<TermInfo>
FunctionMatchBase::analyse_data_token(
}
} else {
const auto& str_ref = string_col->get_data_at(current_block_row_idx);
+ // parse is none and custom analyzer is empty mean no analyzer is set
if (inverted_index_ctx->parser_type ==
InvertedIndexParserType::PARSER_NONE &&
inverted_index_ctx->custom_analyzer.empty()) {
data_tokens.emplace_back(str_ref.to_string());
diff --git
a/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer.out
b/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer.out
index 687807afbfd..828b46ca65b 100644
--- a/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer.out
+++ b/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer.out
@@ -47,3 +47,48 @@
-- !sql --
1 GET /french/images/nav_venue_off.gif HTTP/1.0
+-- !sql --
+[{\n "token": "foo"\n }, {\n "token": "bar"\n }]
+
+-- !sql --
+[{\n "token": "foo"\n }]
+
+-- !sql --
+[{\n "token": "bar"\n }]
+
+-- !sql --
+1 FOO BAR
+
+-- !sql --
+1 FOO BAR
+
+-- !sql --
+1 FOO BAR
+
+-- !sql --
+1 FOO BAR
+
+-- !sql --
+1 FOO BAR
+
+-- !sql --
+1 FOO BAR
+
+-- !sql --
+1 {"string_1":"FOO BAR","string_2":"FOO BAR","string_3":"FOO BAR"}
+
+-- !sql --
+1 {"string_1":"FOO BAR","string_2":"FOO BAR","string_3":"FOO BAR"}
+
+-- !sql --
+1 {"string_1":"FOO BAR","string_2":"FOO BAR","string_3":"FOO BAR"}
+
+-- !sql --
+1 {"string_1":"FOO BAR","string_2":"FOO BAR","string_3":"FOO BAR"}
+
+-- !sql --
+1 {"string_1":"FOO BAR","string_2":"FOO BAR","string_3":"FOO BAR"}
+
+-- !sql --
+1 {"string_1":"FOO BAR","string_2":"FOO BAR","string_3":"FOO BAR"}
+
diff --git
a/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer.groovy
b/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer.groovy
index 14ee4c6819c..e730a1f4a1c 100644
---
a/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer.groovy
+++
b/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer.groovy
@@ -179,4 +179,81 @@ suite("test_custom_analyzer", "p0") {
qt_sql """ select * from test_custom_analyzer_3 where ch match
'nav_venue_off.gif'; """
} catch (SQLException e) {
}
+
+ def delta_time = 1000
+ def alter_res = "null"
+ def useTime = 0
+ def wait_for_latest_op_on_table_finish = { tableName, OpTimeout ->
+ for(int t = delta_time; t <= OpTimeout; t += delta_time){
+ alter_res = sql """SHOW ALTER TABLE COLUMN WHERE TableName =
"${tableName}" ORDER BY CreateTime DESC LIMIT 1;"""
+ alter_res = alter_res.toString()
+ if(alter_res.contains("FINISHED")) {
+ sleep(3000) // wait change table state to normal
+ logger.info(tableName + " latest alter job finished, detail: "
+ alter_res)
+ break
+ }
+ useTime = t
+ sleep(delta_time)
+ }
+ assertTrue(useTime <= OpTimeout, "wait_for_latest_op_on_table_finish
timeout")
+ }
+
+ sql "DROP TABLE IF EXISTS ${indexTbName1}"
+ sql """
+ CREATE TABLE ${indexTbName1} (
+ `a` bigint NOT NULL,
+ `ch` text NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`a`)
+ DISTRIBUTED BY RANDOM BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql """ insert into ${indexTbName1} values(1, "FOO BAR"); """
+ qt_sql """ select tokenize("FOO BAR", '"analyzer"="lowercase_delimited"');
"""
+ qt_sql """ select tokenize("FOO", '"analyzer"="lowercase_delimited"'); """
+ qt_sql """ select tokenize("BAR", '"analyzer"="lowercase_delimited"'); """
+
+ sql """ alter table ${indexTbName1} add index idx_ch_default(`ch`) using
inverted; """
+ wait_for_latest_op_on_table_finish("${indexTbName1}", 60000)
+ sql """ alter table ${indexTbName1} add index idx_ch(`ch`) using inverted
properties("support_phrase" = "true", "analyzer" = "lowercase_delimited"); """
+ wait_for_latest_op_on_table_finish("${indexTbName1}", 60000)
+
+ qt_sql """ select * from ${indexTbName1} where ch match_all 'FOO'; """
+ qt_sql """ select * from ${indexTbName1} where ch match_all 'BAR'; """
+ qt_sql """ select * from ${indexTbName1} where ch match_all 'FOO BAR'; """
+
+ qt_sql """ select * from ${indexTbName1} where ch match_phrase_prefix
'FOO'; """
+ qt_sql """ select * from ${indexTbName1} where ch match_phrase_prefix
'BAR'; """
+ qt_sql """ select * from ${indexTbName1} where ch match_phrase_prefix 'FOO
BAR'; """
+
+ def variantTableName = "test_custom_analyzer_2"
+ sql "DROP TABLE IF EXISTS ${variantTableName}"
+ sql """
+ CREATE TABLE ${variantTableName} (
+ `a` bigint NOT NULL,
+ `var` variant<'string_*' : string,
+ properties("variant_max_subcolumns_count" = "1",
"variant_enable_typed_paths_to_sparse" = "true")
+ > NULL,
+ INDEX idx_string (var) USING INVERTED PROPERTIES("field_pattern" =
"string_*"),
+ INDEX idx_string_prefix (var) USING INVERTED
PROPERTIES("field_pattern" = "string_*", "support_phrase" = "true", "analyzer"
= "lowercase_delimited")
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`a`)
+ DISTRIBUTED BY RANDOM BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql """ insert into ${variantTableName} values(1, '{"string_1" : "FOO
BAR", "string_2" : "FOO BAR", "string_3" : "FOO BAR"}'), (2, '{"string_3" :
"FOO BAR"}'); """
+
+ qt_sql """ select * from ${variantTableName} where cast(var['string_1'] as
varchar) match_all 'FOO'; """
+ qt_sql """ select * from ${variantTableName} where cast(var['string_1'] as
varchar) match_all 'BAR'; """
+ qt_sql """ select * from ${variantTableName} where cast(var['string_1'] as
varchar) match_all 'FOO BAR'; """
+
+ qt_sql """ select * from ${variantTableName} where cast(var['string_1'] as
varchar) match_phrase_prefix 'FOO'; """
+ qt_sql """ select * from ${variantTableName} where cast(var['string_1'] as
varchar) match_phrase_prefix 'BAR'; """
+ qt_sql """ select * from ${variantTableName} where cast(var['string_1'] as
varchar) match_phrase_prefix 'FOO BAR'; """
}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]