This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new f2adb18b90b [Fix](Variant) variant fallthrough with inverted index
(#40069) (#40327)
f2adb18b90b is described below
commit f2adb18b90b2766ac6938323c58960cc9f436ad4
Author: lihangyu <[email protected]>
AuthorDate: Tue Sep 3 17:58:20 2024 +0800
[Fix](Variant) variant fallthrough with inverted index (#40069) (#40327)
#40069
---
be/src/olap/rowset/segment_v2/segment.cpp | 6 ++++--
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 7 ++++++-
regression-test/data/variant_github_events_p2/load.out | 10 ++++++++++
regression-test/suites/variant_github_events_p2/load.groovy | 9 +++++++--
regression-test/suites/variant_p0/with_index/load.groovy | 1 +
regression-test/suites/variant_p0/with_index/var_index.groovy | 2 ++
6 files changed, 30 insertions(+), 5 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index ac54c9252c9..64f58e546c2 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -203,7 +203,8 @@ Status Segment::new_iterator(SchemaSPtr schema, const
StorageReadOptions& read_o
ColumnReader* reader = nullptr;
if (col.is_extracted_column()) {
auto relative_path = col.path_info_ptr()->copy_pop_front();
- const auto* node =
_sub_column_tree[col.unique_id()].find_exact(relative_path);
+ int32_t unique_id = col.unique_id() > 0 ? col.unique_id() :
col.parent_unique_id();
+ const auto* node =
_sub_column_tree[unique_id].find_exact(relative_path);
reader = node != nullptr ? node->data.reader.get() : nullptr;
} else {
reader = _column_readers.contains(col.unique_id())
@@ -775,8 +776,9 @@ ColumnReader* Segment::_get_column_reader(const
TabletColumn& col) {
// init column iterator by path info
if (col.has_path_info() || col.is_variant_type()) {
auto relative_path = col.path_info_ptr()->copy_pop_front();
+ int32_t unique_id = col.unique_id() > 0 ? col.unique_id() :
col.parent_unique_id();
const auto* node = col.has_path_info()
- ?
_sub_column_tree[col.unique_id()].find_exact(relative_path)
+ ?
_sub_column_tree[unique_id].find_exact(relative_path)
: nullptr;
if (node != nullptr) {
return node->data.reader.get();
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 958647a6bed..db69be01882 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1488,10 +1488,15 @@ Status
SegmentIterator::_init_inverted_index_iterators() {
}
for (auto cid : _schema->column_ids()) {
if (_inverted_index_iterators[cid] == nullptr) {
+ // Not check type valid, since we need to get inverted index for
related variant type when reading the segment.
+ // If check type valid, we can not get inverted index for variant
type, and result nullptr.The result for calling
+ // get_inverted_index with variant suffix should return
corresponding inverted index meta.
+ bool check_inverted_index_by_type = false;
// Use segment’s own index_meta, for compatibility with future
indexing needs to default to lowercase.
RETURN_IF_ERROR(_segment->new_inverted_index_iterator(
_opts.tablet_schema->column(cid),
-
_segment->_tablet_schema->get_inverted_index(_opts.tablet_schema->column(cid)),
+
_segment->_tablet_schema->get_inverted_index(_opts.tablet_schema->column(cid),
+
check_inverted_index_by_type),
_opts, &_inverted_index_iterators[cid]));
}
}
diff --git a/regression-test/data/variant_github_events_p2/load.out
b/regression-test/data/variant_github_events_p2/load.out
index 8d5e3327e3c..4bee99a71a9 100644
--- a/regression-test/data/variant_github_events_p2/load.out
+++ b/regression-test/data/variant_github_events_p2/load.out
@@ -11,3 +11,13 @@
5451
{"actor":{"avatar_url":"https://avatars.githubusercontent.com/u/3437916?","gravatar_id":"","id":3437916,"login":"misol","url":"https://api.github.com/users/misol"},"created_at":"2015-01-01T02:48:28Z","id":"2489433218","org":{"avatar_url":"https://avatars.githubusercontent.com/u/1429259?","gravatar_id":"","id":1429259,"login":"xpressengine","url":"https://api.github.com/orgs/xpressengine"},"payload":{"action":"created","comment":{"body":"Html5
도 같이 지원하는 업로더였으면 좋겠어요! 구글링 해보면 꽤 나와요 :)" [...]
5995
{"actor":{"avatar_url":"https://avatars.githubusercontent.com/u/3437916?","gravatar_id":"","id":3437916,"login":"misol","url":"https://api.github.com/users/misol"},"created_at":"2015-01-01T01:47:44Z","id":"2489414108","org":{"avatar_url":"https://avatars.githubusercontent.com/u/1429259?","gravatar_id":"","id":1429259,"login":"xpressengine","url":"https://api.github.com/orgs/xpressengine"},"payload":{"action":"opened","number":1120,"pull_request":{"_links":{"comments":{"href":"https:
[...]
+-- !sql --
+\N
+\N
+\N
+\N
+4748
+
+-- !sql --
+135
+
diff --git a/regression-test/suites/variant_github_events_p2/load.groovy
b/regression-test/suites/variant_github_events_p2/load.groovy
index 4e81dc2237c..e1742231afc 100644
--- a/regression-test/suites/variant_github_events_p2/load.groovy
+++ b/regression-test/suites/variant_github_events_p2/load.groovy
@@ -169,7 +169,7 @@ suite("regression_test_variant_github_events_p2",
"nonConcurrent,p2"){
// // build inverted index at middle of loading the data
// ADD INDEX
- sql """ ALTER TABLE github_events ADD INDEX idx_var (`v`) USING INVERTED
PROPERTIES("parser" = "chinese", "parser_mode" = "fine_grained",
"support_phrase" = "true") """
+ sql """ ALTER TABLE github_events ADD INDEX idx_var (`v`) USING INVERTED
PROPERTIES("parser" = "english", "support_phrase" = "true") """
wait_for_latest_op_on_table_finish("github_events", timeout)
// 2022
@@ -214,7 +214,8 @@ suite("regression_test_variant_github_events_p2",
"nonConcurrent,p2"){
} while (running)
}
-
+ sql """set enable_match_without_inverted_index = false"""
+ // filter by bloom filter
qt_sql """select cast(v["payload"]["pull_request"]["additions"] as int)
from github_events where cast(v["repo"]["name"] as string) =
'xpressengine/xe-core' order by 1;"""
qt_sql """select * from github_events where cast(v["repo"]["name"] as
string) = 'xpressengine/xe-core' order by 1 limit 10"""
sql """select * from github_events order by k limit 10"""
@@ -230,4 +231,8 @@ suite("regression_test_variant_github_events_p2",
"nonConcurrent,p2"){
sql """insert into github_events2 select * from github_events order by k"""
sql """select v['payload']['commits'] from github_events order by k ;"""
sql """select v['payload']['commits'] from github_events2 order by k ;"""
+
+ // query with inverted index
+ qt_sql """select cast(v["payload"]["pull_request"]["additions"] as int)
from github_events where v["repo"]["name"] match 'xpressengine' order by 1;"""
+ qt_sql """select count() from github_events where v["repo"]["name"] match
'apache' order by 1;"""
}
\ No newline at end of file
diff --git a/regression-test/suites/variant_p0/with_index/load.groovy
b/regression-test/suites/variant_p0/with_index/load.groovy
index a5abbae2ab9..ba46e7a9eee 100644
--- a/regression-test/suites/variant_p0/with_index/load.groovy
+++ b/regression-test/suites/variant_p0/with_index/load.groovy
@@ -61,6 +61,7 @@ suite("regression_test_variant_with_index", "nonConcurrent"){
properties("replication_num" = "1", "disable_auto_compaction" =
"true");
"""
sql """insert into var_with_index values(1, '{"a" : 0, "b": 3}', 'hello
world'), (2, '{"a" : 123}', 'world'),(3, '{"a" : 123}', 'hello world')"""
+ sql """set enable_match_without_inverted_index = false"""
qt_sql_inv_1 """select v["a"] from var_with_index where inv match 'hello'
order by k"""
qt_sql_inv_2 """select v["a"] from var_with_index where inv match 'hello'
and cast(v['a'] as int) > 0 order by k"""
qt_sql_inv_3 """select * from var_with_index where inv match 'hello' and
cast(v["a"] as int) > 0 order by k"""
diff --git a/regression-test/suites/variant_p0/with_index/var_index.groovy
b/regression-test/suites/variant_p0/with_index/var_index.groovy
index 5f061f3208b..a21d2429545 100644
--- a/regression-test/suites/variant_p0/with_index/var_index.groovy
+++ b/regression-test/suites/variant_p0/with_index/var_index.groovy
@@ -33,7 +33,9 @@ suite("regression_test_variant_var_index", "p0"){
sql """insert into var_index values(2, '{"a" : 18811, "b" : "hello world",
"c" : 1181111}')"""
sql """insert into var_index values(3, '{"a" : 18811, "b" : "hello
wworld", "c" : 11111}')"""
sql """insert into var_index values(4, '{"a" : 1234, "b" : "hello xxx
world", "c" : 8181111}')"""
+ sql """set enable_match_without_inverted_index = false"""
qt_sql """select * from var_index where cast(v["a"] as smallint) > 123 and
cast(v["b"] as string) match 'hello' and cast(v["c"] as int) > 1024 order by
k"""
+ sql """set enable_match_without_inverted_index = true"""
sql """insert into var_index values(5, '{"a" : 123456789, "b" : 123456,
"c" : 8181111}')"""
qt_sql """select * from var_index where cast(v["a"] as int) > 123 and
cast(v["b"] as string) match 'hello' and cast(v["c"] as int) > 11111 order by
k"""
// insert double/float/array/json
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]