This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 34f7c66a1ca [fix](inverted index) multi match distinguishes the
inverted index v1 and v2 (#39149)
34f7c66a1ca is described below
commit 34f7c66a1ca10f3570e5c0715875cc1390609fb9
Author: zzzxl <[email protected]>
AuthorDate: Fri Aug 9 19:49:30 2024 +0800
[fix](inverted index) multi match distinguishes the inverted index v1 and
v2 (#39149)
## Proposed changes
1. reversed Index v1 and v2 have different column names
---
be/src/vec/functions/function_multi_match.cpp | 12 ++-
.../inverted_index_p0/test_index_multi_match.out | 24 ++++++
.../test_index_multi_match.groovy | 95 ++++++++++++----------
3 files changed, 83 insertions(+), 48 deletions(-)
diff --git a/be/src/vec/functions/function_multi_match.cpp
b/be/src/vec/functions/function_multi_match.cpp
index ba7fa887f19..4fc8103a2d3 100644
--- a/be/src/vec/functions/function_multi_match.cpp
+++ b/be/src/vec/functions/function_multi_match.cpp
@@ -169,9 +169,15 @@ Status
FunctionMultiMatch::eval_inverted_index(FunctionContext* context,
auto single_result = std::make_shared<roaring::Roaring>();
StringRef query_value(match_param->query.data());
- RETURN_IF_ERROR(index_reader->query(opts.stats, opts.runtime_state,
-
std::to_string(column.unique_id()), &query_value,
- query_type, single_result));
+ auto index_version =
tablet_schema->get_inverted_index_storage_format();
+ if (index_version == InvertedIndexStorageFormatPB::V1) {
+ RETURN_IF_ERROR(index_reader->query(opts.stats,
opts.runtime_state, column_name,
+ &query_value, query_type,
single_result));
+ } else if (index_version == InvertedIndexStorageFormatPB::V2) {
+ RETURN_IF_ERROR(index_reader->query(opts.stats, opts.runtime_state,
+
std::to_string(column.unique_id()), &query_value,
+ query_type, single_result));
+ }
(*result) |= (*single_result);
}
diff --git a/regression-test/data/inverted_index_p0/test_index_multi_match.out
b/regression-test/data/inverted_index_p0/test_index_multi_match.out
index 0a2ed2730b4..77e3c86623e 100644
--- a/regression-test/data/inverted_index_p0/test_index_multi_match.out
+++ b/regression-test/data/inverted_index_p0/test_index_multi_match.out
@@ -23,3 +23,27 @@
-- !sql --
44
+-- !sql --
+178
+
+-- !sql --
+180
+
+-- !sql --
+859
+
+-- !sql --
+44
+
+-- !sql --
+178
+
+-- !sql --
+180
+
+-- !sql --
+859
+
+-- !sql --
+44
+
diff --git
a/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy
b/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy
index f08dd984a67..90f9f7a751b 100644
--- a/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy
+++ b/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy
@@ -19,51 +19,37 @@
suite("test_index_multi_match", "p0"){
def indexTbName1 = "test_index_multi_match_1"
def indexTbName2 = "test_index_multi_match_2"
+ def indexTbName3 = "test_index_multi_match_3"
+ def indexTbName4 = "test_index_multi_match_4"
sql "DROP TABLE IF EXISTS ${indexTbName1}"
sql "DROP TABLE IF EXISTS ${indexTbName2}"
+ sql "DROP TABLE IF EXISTS ${indexTbName3}"
+ sql "DROP TABLE IF EXISTS ${indexTbName4}"
- sql """
- CREATE TABLE ${indexTbName1} (
- `@timestamp` int(11) NULL COMMENT "",
- `clientip` text NULL COMMENT "",
- `request` text NULL COMMENT "",
- `status` text NULL COMMENT "",
- `size` text NULL COMMENT "",
- INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT '',
- INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT '',
- INDEX status_idx (`status`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT '',
- INDEX size_idx (`size`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT ''
- ) ENGINE=OLAP
- DUPLICATE KEY(`@timestamp`)
- COMMENT "OLAP"
- DISTRIBUTED BY RANDOM BUCKETS 1
- PROPERTIES (
- "replication_allocation" = "tag.location.default: 1",
- "disable_auto_compaction" = "true"
- );
- """
-
- sql """
- CREATE TABLE ${indexTbName2} (
- `@timestamp` int(11) NULL COMMENT "",
- `clientip` text NULL COMMENT "",
- `request` text NULL COMMENT "",
- `status` text NULL COMMENT "",
- `size` text NULL COMMENT "",
- INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT '',
- INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT '',
- INDEX status_idx (`status`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT '',
- INDEX size_idx (`size`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT ''
- ) ENGINE=OLAP
- DUPLICATE KEY(`@timestamp`)
- COMMENT "OLAP"
- DISTRIBUTED BY RANDOM BUCKETS 1
- PROPERTIES (
- "replication_allocation" = "tag.location.default: 1",
- "disable_auto_compaction" = "true"
- );
- """
+ def create_table = {table_name, idx_version ->
+ sql """
+ CREATE TABLE ${table_name} (
+ `@timestamp` int(11) NULL COMMENT "",
+ `clientip` text NULL COMMENT "",
+ `request` text NULL COMMENT "",
+ `status` text NULL COMMENT "",
+ `size` text NULL COMMENT "",
+ INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT '',
+ INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT '',
+ INDEX status_idx (`status`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT '',
+ INDEX size_idx (`size`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`@timestamp`)
+ COMMENT "OLAP"
+ DISTRIBUTED BY RANDOM BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "inverted_index_storage_format" = "${idx_version}",
+ "disable_auto_compaction" = "true"
+ );
+ """
+ }
def load_httplogs_data = {table_name, label, read_flag, format_flag,
file_name, ignore_failure=false,
expected_succ_rows = -1, load_to_single_tablet =
'true' ->
@@ -103,20 +89,39 @@ suite("test_index_multi_match", "p0"){
}
try {
+ create_table(indexTbName1, 'V1')
+ create_table(indexTbName2, 'V2')
+ create_table(indexTbName3, 'V1')
+ create_table(indexTbName4, 'V2')
+
load_httplogs_data.call(indexTbName1, 'test_index_multi_match_1',
'true', 'json', 'documents-1000.json')
load_httplogs_data.call(indexTbName2, 'test_index_multi_match_2',
'true', 'json', 'documents-1000.json')
+ load_httplogs_data.call(indexTbName3, 'test_index_multi_match_3',
'true', 'json', 'documents-1000.json')
+ load_httplogs_data.call(indexTbName4, 'test_index_multi_match_4',
'true', 'json', 'documents-1000.json')
sql "sync"
+ sql """ set enable_common_expr_pushdown = true """
+
qt_sql """ select count() from ${indexTbName1} where (clientip
match_phrase_prefix '2'); """
qt_sql """ select count() from ${indexTbName1} where (clientip
match_phrase_prefix '2' or request match_phrase_prefix '2'); """
qt_sql """ select count() from ${indexTbName1} where (clientip
match_phrase_prefix '2' or request match_phrase_prefix '2' or status
match_phrase_prefix '2' or size match_phrase_prefix '2'); """
qt_sql """ select count() from ${indexTbName1} where (clientip
match_phrase_prefix 'a' or request match_phrase_prefix 'a' or status
match_phrase_prefix 'a' or size match_phrase_prefix 'a'); """
- qt_sql """ select count() from ${indexTbName2} where
multi_match(clientip, '', 'phrase_prefix', '2'); """
- qt_sql """ select count() from ${indexTbName2} where
multi_match(clientip, 'request', 'phrase_prefix', '2'); """
- qt_sql """ select count() from ${indexTbName2} where
multi_match(clientip, 'request, status, size', 'phrase_prefix', '2'); """
- qt_sql """ select count() from ${indexTbName2} where
multi_match(clientip, 'request, status, size', 'phrase_prefix', 'a'); """
+ qt_sql """ select count() from ${indexTbName2} where (clientip
match_phrase_prefix '2'); """
+ qt_sql """ select count() from ${indexTbName2} where (clientip
match_phrase_prefix '2' or request match_phrase_prefix '2'); """
+ qt_sql """ select count() from ${indexTbName2} where (clientip
match_phrase_prefix '2' or request match_phrase_prefix '2' or status
match_phrase_prefix '2' or size match_phrase_prefix '2'); """
+ qt_sql """ select count() from ${indexTbName2} where (clientip
match_phrase_prefix 'a' or request match_phrase_prefix 'a' or status
match_phrase_prefix 'a' or size match_phrase_prefix 'a'); """
+
+ qt_sql """ select count() from ${indexTbName3} where
multi_match(clientip, '', 'phrase_prefix', '2'); """
+ qt_sql """ select count() from ${indexTbName3} where
multi_match(clientip, 'request', 'phrase_prefix', '2'); """
+ qt_sql """ select count() from ${indexTbName3} where
multi_match(clientip, 'request, status, size', 'phrase_prefix', '2'); """
+ qt_sql """ select count() from ${indexTbName3} where
multi_match(clientip, 'request, status, size', 'phrase_prefix', 'a'); """
+
+ qt_sql """ select count() from ${indexTbName4} where
multi_match(clientip, '', 'phrase_prefix', '2'); """
+ qt_sql """ select count() from ${indexTbName4} where
multi_match(clientip, 'request', 'phrase_prefix', '2'); """
+ qt_sql """ select count() from ${indexTbName4} where
multi_match(clientip, 'request, status, size', 'phrase_prefix', '2'); """
+ qt_sql """ select count() from ${indexTbName4} where
multi_match(clientip, 'request, status, size', 'phrase_prefix', 'a'); """
} finally {
//try_sql("DROP TABLE IF EXISTS ${testTable}")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]