This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new f2adb18b90b [Fix](Variant) variant fallthrough with inverted index 
(#40069) (#40327)
f2adb18b90b is described below

commit f2adb18b90b2766ac6938323c58960cc9f436ad4
Author: lihangyu <[email protected]>
AuthorDate: Tue Sep 3 17:58:20 2024 +0800

    [Fix](Variant) variant fallthrough with inverted index (#40069) (#40327)
    
    #40069
---
 be/src/olap/rowset/segment_v2/segment.cpp                     |  6 ++++--
 be/src/olap/rowset/segment_v2/segment_iterator.cpp            |  7 ++++++-
 regression-test/data/variant_github_events_p2/load.out        | 10 ++++++++++
 regression-test/suites/variant_github_events_p2/load.groovy   |  9 +++++++--
 regression-test/suites/variant_p0/with_index/load.groovy      |  1 +
 regression-test/suites/variant_p0/with_index/var_index.groovy |  2 ++
 6 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment.cpp 
b/be/src/olap/rowset/segment_v2/segment.cpp
index ac54c9252c9..64f58e546c2 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -203,7 +203,8 @@ Status Segment::new_iterator(SchemaSPtr schema, const 
StorageReadOptions& read_o
         ColumnReader* reader = nullptr;
         if (col.is_extracted_column()) {
             auto relative_path = col.path_info_ptr()->copy_pop_front();
-            const auto* node = 
_sub_column_tree[col.unique_id()].find_exact(relative_path);
+            int32_t unique_id = col.unique_id() > 0 ? col.unique_id() : 
col.parent_unique_id();
+            const auto* node = 
_sub_column_tree[unique_id].find_exact(relative_path);
             reader = node != nullptr ? node->data.reader.get() : nullptr;
         } else {
             reader = _column_readers.contains(col.unique_id())
@@ -775,8 +776,9 @@ ColumnReader* Segment::_get_column_reader(const 
TabletColumn& col) {
     // init column iterator by path info
     if (col.has_path_info() || col.is_variant_type()) {
         auto relative_path = col.path_info_ptr()->copy_pop_front();
+        int32_t unique_id = col.unique_id() > 0 ? col.unique_id() : 
col.parent_unique_id();
         const auto* node = col.has_path_info()
-                                   ? 
_sub_column_tree[col.unique_id()].find_exact(relative_path)
+                                   ? 
_sub_column_tree[unique_id].find_exact(relative_path)
                                    : nullptr;
         if (node != nullptr) {
             return node->data.reader.get();
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 958647a6bed..db69be01882 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1488,10 +1488,15 @@ Status 
SegmentIterator::_init_inverted_index_iterators() {
     }
     for (auto cid : _schema->column_ids()) {
         if (_inverted_index_iterators[cid] == nullptr) {
+            // Not check type valid, since we need to get inverted index for 
related variant type when reading the segment.
+            // If check type valid, we can not get inverted index for variant 
type, and result nullptr.The result for calling
+            // get_inverted_index with variant suffix should return 
corresponding inverted index meta.
+            bool check_inverted_index_by_type = false;
             // Use segment’s own index_meta, for compatibility with future 
indexing needs to default to lowercase.
             RETURN_IF_ERROR(_segment->new_inverted_index_iterator(
                     _opts.tablet_schema->column(cid),
-                    
_segment->_tablet_schema->get_inverted_index(_opts.tablet_schema->column(cid)),
+                    
_segment->_tablet_schema->get_inverted_index(_opts.tablet_schema->column(cid),
+                                                                 
check_inverted_index_by_type),
                     _opts, &_inverted_index_iterators[cid]));
         }
     }
diff --git a/regression-test/data/variant_github_events_p2/load.out 
b/regression-test/data/variant_github_events_p2/load.out
index 8d5e3327e3c..4bee99a71a9 100644
--- a/regression-test/data/variant_github_events_p2/load.out
+++ b/regression-test/data/variant_github_events_p2/load.out
@@ -11,3 +11,13 @@
 5451   
{"actor":{"avatar_url":"https://avatars.githubusercontent.com/u/3437916?","gravatar_id":"","id":3437916,"login":"misol","url":"https://api.github.com/users/misol"},"created_at":"2015-01-01T02:48:28Z","id":"2489433218","org":{"avatar_url":"https://avatars.githubusercontent.com/u/1429259?","gravatar_id":"","id":1429259,"login":"xpressengine","url":"https://api.github.com/orgs/xpressengine"},"payload":{"action":"created","comment":{"body":"Html5
 도 같이 지원하는 업로더였으면 좋겠어요! 구글링 해보면 꽤 나와요 :)" [...]
 5995   
{"actor":{"avatar_url":"https://avatars.githubusercontent.com/u/3437916?","gravatar_id":"","id":3437916,"login":"misol","url":"https://api.github.com/users/misol"},"created_at":"2015-01-01T01:47:44Z","id":"2489414108","org":{"avatar_url":"https://avatars.githubusercontent.com/u/1429259?","gravatar_id":"","id":1429259,"login":"xpressengine","url":"https://api.github.com/orgs/xpressengine"},"payload":{"action":"opened","number":1120,"pull_request":{"_links":{"comments":{"href":"https:
 [...]
 
+-- !sql --
+\N
+\N
+\N
+\N
+4748
+
+-- !sql --
+135
+
diff --git a/regression-test/suites/variant_github_events_p2/load.groovy 
b/regression-test/suites/variant_github_events_p2/load.groovy
index 4e81dc2237c..e1742231afc 100644
--- a/regression-test/suites/variant_github_events_p2/load.groovy
+++ b/regression-test/suites/variant_github_events_p2/load.groovy
@@ -169,7 +169,7 @@ suite("regression_test_variant_github_events_p2", 
"nonConcurrent,p2"){
 
     // // build inverted index at middle of loading the data
     // ADD INDEX
-    sql """ ALTER TABLE github_events ADD INDEX idx_var (`v`) USING INVERTED 
PROPERTIES("parser" = "chinese", "parser_mode" = "fine_grained", 
"support_phrase" = "true") """
+    sql """ ALTER TABLE github_events ADD INDEX idx_var (`v`) USING INVERTED 
PROPERTIES("parser" = "english", "support_phrase" = "true") """
     wait_for_latest_op_on_table_finish("github_events", timeout)
 
     // 2022
@@ -214,7 +214,8 @@ suite("regression_test_variant_github_events_p2", 
"nonConcurrent,p2"){
         } while (running)
     }
 
-    
+    sql """set enable_match_without_inverted_index = false""" 
+    // filter by bloom filter
     qt_sql """select cast(v["payload"]["pull_request"]["additions"] as int)  
from github_events where cast(v["repo"]["name"] as string) = 
'xpressengine/xe-core' order by 1;"""
     qt_sql """select * from github_events where  cast(v["repo"]["name"] as 
string) = 'xpressengine/xe-core' order by 1 limit 10"""
     sql """select * from github_events order by k limit 10"""
@@ -230,4 +231,8 @@ suite("regression_test_variant_github_events_p2", 
"nonConcurrent,p2"){
     sql """insert into github_events2 select * from github_events order by k"""
     sql """select v['payload']['commits'] from github_events order by k ;"""
     sql """select v['payload']['commits'] from github_events2 order by k ;"""
+
+    // query with inverted index
+    qt_sql """select cast(v["payload"]["pull_request"]["additions"] as int)  
from github_events where v["repo"]["name"] match 'xpressengine' order by 1;"""
+    qt_sql """select count()  from github_events where v["repo"]["name"] match 
'apache' order by 1;"""
 }
\ No newline at end of file
diff --git a/regression-test/suites/variant_p0/with_index/load.groovy 
b/regression-test/suites/variant_p0/with_index/load.groovy
index a5abbae2ab9..ba46e7a9eee 100644
--- a/regression-test/suites/variant_p0/with_index/load.groovy
+++ b/regression-test/suites/variant_p0/with_index/load.groovy
@@ -61,6 +61,7 @@ suite("regression_test_variant_with_index", "nonConcurrent"){
         properties("replication_num" = "1", "disable_auto_compaction" = 
"true");
     """
     sql """insert into var_with_index values(1, '{"a" : 0, "b": 3}', 'hello 
world'), (2, '{"a" : 123}', 'world'),(3, '{"a" : 123}', 'hello world')"""
+    sql """set enable_match_without_inverted_index = false""" 
     qt_sql_inv_1 """select v["a"] from var_with_index where inv match 'hello' 
order by k"""
     qt_sql_inv_2 """select v["a"] from var_with_index where inv match 'hello' 
and cast(v['a'] as int) > 0 order by k"""
     qt_sql_inv_3 """select * from var_with_index where inv match 'hello' and 
cast(v["a"] as int) > 0 order by k"""
diff --git a/regression-test/suites/variant_p0/with_index/var_index.groovy 
b/regression-test/suites/variant_p0/with_index/var_index.groovy
index 5f061f3208b..a21d2429545 100644
--- a/regression-test/suites/variant_p0/with_index/var_index.groovy
+++ b/regression-test/suites/variant_p0/with_index/var_index.groovy
@@ -33,7 +33,9 @@ suite("regression_test_variant_var_index", "p0"){
     sql """insert into var_index values(2, '{"a" : 18811, "b" : "hello world", 
"c" : 1181111}')"""
     sql """insert into var_index values(3, '{"a" : 18811, "b" : "hello 
wworld", "c" : 11111}')"""
     sql """insert into var_index values(4, '{"a" : 1234, "b" : "hello xxx 
world", "c" : 8181111}')"""
+    sql """set enable_match_without_inverted_index = false""" 
     qt_sql """select * from var_index where cast(v["a"] as smallint) > 123 and 
cast(v["b"] as string) match 'hello' and cast(v["c"] as int) > 1024 order by 
k"""
+    sql """set enable_match_without_inverted_index = true""" 
     sql """insert into var_index values(5, '{"a" : 123456789, "b" : 123456, 
"c" : 8181111}')"""
     qt_sql """select * from var_index where cast(v["a"] as int) > 123 and 
cast(v["b"] as string) match 'hello' and cast(v["c"] as int) > 11111 order by 
k"""
     // insert double/float/array/json


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to