This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 373aa35c603 [fix](inverted index) in_list support inverted index 
#37921 #37842 (#38738)
373aa35c603 is described below

commit 373aa35c603e9bec5fb88cc3df5c1c2fbdc28ac9
Author: zzzxl <[email protected]>
AuthorDate: Sat Aug 3 06:25:09 2024 +0800

    [fix](inverted index) in_list support inverted index #37921 #37842 (#38738)
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  19 ++-
 be/src/vec/exec/scan/vscan_node.cpp                |  20 +--
 .../data/inverted_index_p0/test_index_rqg_bug4.out |  18 +++
 .../inverted_index_p0/test_index_rqg_bug4.groovy   | 162 +++++++++++++++++++++
 4 files changed, 209 insertions(+), 10 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index bfca52cec00..2dcf3b26aeb 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -737,9 +737,16 @@ Status 
SegmentIterator::_execute_predicates_except_leafnode_of_andnode(
     } else if (_is_literal_node(node_type)) {
         auto v_literal_expr = 
std::dynamic_pointer_cast<doris::vectorized::VLiteral>(expr);
         _column_predicate_info->query_values.insert(v_literal_expr->value());
-    } else if (node_type == TExprNodeType::BINARY_PRED || node_type == 
TExprNodeType::MATCH_PRED) {
+    } else if (node_type == TExprNodeType::BINARY_PRED || node_type == 
TExprNodeType::MATCH_PRED ||
+               node_type == TExprNodeType::IN_PRED) {
         if (node_type == TExprNodeType::MATCH_PRED) {
             _column_predicate_info->query_op = "match";
+        } else if (node_type == TExprNodeType::IN_PRED) {
+            if (expr->op() == TExprOpcode::type::FILTER_IN) {
+                _column_predicate_info->query_op = "in";
+            } else {
+                _column_predicate_info->query_op = "not_in";
+            }
         } else {
             _column_predicate_info->query_op = expr->fn().name.function_name;
         }
@@ -872,6 +879,10 @@ Status 
SegmentIterator::_apply_index_except_leafnode_of_andnode() {
                           pred_type == PredicateType::LT || pred_type == 
PredicateType::LE ||
                           pred_type == PredicateType::GT || pred_type == 
PredicateType::GE ||
                           pred_type == PredicateType::MATCH;
+        if 
(_opts.runtime_state->query_options().enable_inverted_index_compound_inlist) {
+            is_support |= (pred_type == PredicateType::IN_LIST ||
+                           pred_type == PredicateType::NOT_IN_LIST);
+        }
         if (!is_support) {
             _need_read_data_indices[column_id] = true;
             continue;
@@ -2527,6 +2538,12 @@ void 
SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
     } else {
         if (node_type == TExprNodeType::MATCH_PRED) {
             _column_predicate_info->query_op = "match";
+        } else if (node_type == TExprNodeType::IN_PRED) {
+            if (expr->op() == TExprOpcode::type::FILTER_IN) {
+                _column_predicate_info->query_op = "in";
+            } else {
+                _column_predicate_info->query_op = "not_in";
+            }
         } else if (node_type != TExprNodeType::COMPOUND_PRED) {
             _column_predicate_info->query_op = expr->fn().name.function_name;
         }
diff --git a/be/src/vec/exec/scan/vscan_node.cpp 
b/be/src/vec/exec/scan/vscan_node.cpp
index 3b2711a4541..fe6195d2e3c 100644
--- a/be/src/vec/exec/scan/vscan_node.cpp
+++ b/be/src/vec/exec/scan/vscan_node.cpp
@@ -1133,21 +1133,23 @@ Status 
VScanNode::_normalize_in_and_not_in_compound_predicate(vectorized::VExpr*
         std::string fn_name =
                 expr->op() == TExprOpcode::type::FILTER_IN ? "in_list" : 
"not_in_list";
 
+        for (const auto& child_expr : expr->children()) {
+            if (child_expr->node_type() == TExprNodeType::NULL_LITERAL) {
+                *pdt = PushDownType::UNACCEPTABLE;
+                return Status::OK();
+            }
+        }
+
         HybridSetBase::IteratorBase* iter = nullptr;
         auto hybrid_set = expr->get_set_func();
 
         if (hybrid_set != nullptr) {
-            if (hybrid_set->size() <= _max_pushdown_conditions_per_column) {
-                iter = hybrid_set->begin();
-            } else {
-                _filter_predicates.in_filters.emplace_back(slot->col_name(), 
expr->get_set_func());
-                *pdt = PushDownType::ACCEPTABLE;
-                return Status::OK();
-            }
+            *pdt = PushDownType::UNACCEPTABLE;
+            return Status::OK();
         } else {
-            VInPredicate* pred = static_cast<VInPredicate*>(expr);
+            auto* pred = static_cast<vectorized::VInPredicate*>(expr);
 
-            InState* state = reinterpret_cast<InState*>(
+            auto* state = reinterpret_cast<vectorized::InState*>(
                     expr_ctx->fn_context(pred->fn_context_index())
                             
->get_function_state(FunctionContext::FRAGMENT_LOCAL));
 
diff --git a/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out 
b/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out
new file mode 100644
index 00000000000..784795cbbc1
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out
@@ -0,0 +1,18 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+\N
+a
+b
+f
+h
+i
+j
+k
+l
+o
+p
+q
+v
+y
+z
+
diff --git 
a/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy 
b/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy
new file mode 100644
index 00000000000..9353616f95f
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+suite("test_index_rqg_bug4", "test_index_rqg_bug"){
+    def table1 = "test_index_rqg_bug4_table1"
+    def table2 = "test_index_rqg_bug4_table2"
+
+    sql "drop table if exists ${table1}"
+    sql "drop table if exists ${table2}"
+
+    sql """
+      create table ${table1} (
+      col_date_undef_signed_not_null_index_inverted date  not null  ,
+      col_bigint_undef_signed_not_null_index_inverted bigint  not null  ,
+      col_bigint_undef_signed_not_null bigint  not null  ,
+      col_int_undef_signed int  null  ,
+      col_int_undef_signed_index_inverted int  null  ,
+      col_int_undef_signed_not_null int  not null  ,
+      col_int_undef_signed_not_null_index_inverted int  not null  ,
+      col_bigint_undef_signed bigint  null  ,
+      col_bigint_undef_signed_index_inverted bigint  null  ,
+      col_date_undef_signed date  null  ,
+      col_date_undef_signed_index_inverted date  null  ,
+      col_date_undef_signed_not_null date  not null  ,
+      col_varchar_10__undef_signed varchar(10)  null  ,
+      col_varchar_10__undef_signed_index_inverted varchar(10)  null  ,
+      col_varchar_10__undef_signed_not_null varchar(10)  not null  ,
+      col_varchar_10__undef_signed_not_null_index_inverted varchar(10)  not 
null  ,
+      col_varchar_1024__undef_signed varchar(1024)  null  ,
+      col_varchar_1024__undef_signed_index_inverted varchar(1024)  null  ,
+      col_varchar_1024__undef_signed_not_null varchar(1024)  not null  ,
+      col_varchar_1024__undef_signed_not_null_index_inverted varchar(1024)  
not null  ,
+      pk int,
+      INDEX col_int_undef_signed_index_inverted_idx 
(`col_int_undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_int_undef_signed_not_null_index_inverted_idx 
(`col_int_undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_bigint_undef_signed_index_inverted_idx 
(`col_bigint_undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_bigint_undef_signed_not_null_index_inverted_idx 
(`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_date_undef_signed_index_inverted_idx 
(`col_date_undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_date_undef_signed_not_null_index_inverted_idx 
(`col_date_undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_varchar_10__undef_signed_index_inverted_idx 
(`col_varchar_10__undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_varchar_10__undef_signed_not_null_index_inverted_idx 
(`col_varchar_10__undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_varchar_1024__undef_signed_index_inverted_idx 
(`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx 
(`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED
+      ) engine=olap
+      UNIQUE KEY(col_date_undef_signed_not_null_index_inverted, 
col_bigint_undef_signed_not_null_index_inverted, 
col_bigint_undef_signed_not_null)
+      PARTITION BY             
RANGE(col_date_undef_signed_not_null_index_inverted) (
+                      FROM ('2023-12-09') TO ('2024-03-09') INTERVAL 1 DAY,
+                      FROM ('2025-02-16') TO ('2025-03-09') INTERVAL 1 DAY,
+                      FROM ('2025-06-18') TO ('2025-06-20') INTERVAL 1 DAY,
+                      FROM ('2026-01-01') TO ('2026-03-09') INTERVAL 1 DAY,
+                      FROM ('2027-01-01') TO ('2027-02-09') INTERVAL 1 DAY
+                  )
+      distributed by hash(col_bigint_undef_signed_not_null_index_inverted)
+      properties("enable_unique_key_merge_on_write" = "true", 
"replication_num" = "1");
+    """
+
+    sql """
+      create table ${table2} (
+      col_date_undef_signed_not_null date  not null  ,
+      col_bigint_undef_signed_not_null_index_inverted bigint  not null  ,
+      col_bigint_undef_signed_not_null bigint  not null  ,
+      col_int_undef_signed int  null  ,
+      col_int_undef_signed_index_inverted int  null  ,
+      col_int_undef_signed_not_null int  not null  ,
+      col_int_undef_signed_not_null_index_inverted int  not null  ,
+      col_bigint_undef_signed bigint  null  ,
+      col_bigint_undef_signed_index_inverted bigint  null  ,
+      col_date_undef_signed date  null  ,
+      col_date_undef_signed_index_inverted date  null  ,
+      col_date_undef_signed_not_null_index_inverted date  not null  ,
+      col_varchar_10__undef_signed varchar(10)  null  ,
+      col_varchar_10__undef_signed_index_inverted varchar(10)  null  ,
+      col_varchar_10__undef_signed_not_null varchar(10)  not null  ,
+      col_varchar_10__undef_signed_not_null_index_inverted varchar(10)  not 
null  ,
+      col_varchar_1024__undef_signed varchar(1024)  null  ,
+      col_varchar_1024__undef_signed_index_inverted varchar(1024)  null  ,
+      col_varchar_1024__undef_signed_not_null varchar(1024)  not null  ,
+      col_varchar_1024__undef_signed_not_null_index_inverted varchar(1024)  
not null  ,
+      pk int,
+      INDEX col_int_undef_signed_index_inverted_idx 
(`col_int_undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_int_undef_signed_not_null_index_inverted_idx 
(`col_int_undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_bigint_undef_signed_index_inverted_idx 
(`col_bigint_undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_bigint_undef_signed_not_null_index_inverted_idx 
(`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_date_undef_signed_index_inverted_idx 
(`col_date_undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_date_undef_signed_not_null_index_inverted_idx 
(`col_date_undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_varchar_10__undef_signed_index_inverted_idx 
(`col_varchar_10__undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_varchar_10__undef_signed_not_null_index_inverted_idx 
(`col_varchar_10__undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_varchar_1024__undef_signed_index_inverted_idx 
(`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx 
(`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED
+      ) engine=olap
+      UNIQUE KEY(col_date_undef_signed_not_null, 
col_bigint_undef_signed_not_null_index_inverted, 
col_bigint_undef_signed_not_null)
+      PARTITION BY             RANGE(col_date_undef_signed_not_null) (
+                      PARTITION p0 VALUES LESS THAN ('2023-12-11'),
+                      PARTITION p1 VALUES LESS THAN ('2023-12-15'),
+                      PARTITION p2 VALUES LESS THAN ('2023-12-16'),
+                      PARTITION p3 VALUES LESS THAN ('2023-12-25'),
+                      PARTITION p4 VALUES LESS THAN ('2024-01-18'),
+                      PARTITION p5 VALUES LESS THAN ('2026-02-18'),
+                      PARTITION p6 VALUES LESS THAN ('5024-02-18'),
+                      PARTITION p100 VALUES LESS THAN ('9999-12-31')
+                  )
+      distributed by hash(col_bigint_undef_signed_not_null_index_inverted)
+      properties("enable_unique_key_merge_on_write" = "true", 
"replication_num" = "1");
+    """
+
+    sql """
+      insert into 
${table1}(pk,col_int_undef_signed,col_int_undef_signed_index_inverted,col_int_undef_signed_not_null,col_int_undef_signed_not_null_index_inverted,col_bigint_undef_signed,col_bigint_undef_signed_index_inverted,col_bigint_undef_signed_not_null,col_bigint_undef_signed_not_null_index_inverted,col_date_undef_signed,col_date_undef_signed_index_inverted,col_date_undef_signed_not_null,col_date_undef_signed_not_null_index_inverted,col_varchar_10__undef_signed,col_varchar_10__unde
 [...]
+    """
+
+    sql """
+      insert into 
${table2}(pk,col_int_undef_signed,col_int_undef_signed_index_inverted,col_int_undef_signed_not_null,col_int_undef_signed_not_null_index_inverted,col_bigint_undef_signed,col_bigint_undef_signed_index_inverted,col_bigint_undef_signed_not_null,col_bigint_undef_signed_not_null_index_inverted,col_date_undef_signed,col_date_undef_signed_index_inverted,col_date_undef_signed_not_null,col_date_undef_signed_not_null_index_inverted,col_varchar_10__undef_signed,col_varchar_10__unde
 [...]
+    """
+
+    try {
+        sql "sync"
+
+        qt_sql """
+          select
+            table1.col_varchar_1024__undef_signed_not_null as field1
+          from
+            ${table1} as table1
+            right join ${table2} as table2 on (
+              table2.col_date_undef_signed_index_inverted = 
table1.col_date_undef_signed_index_inverted
+            )
+          where
+            not (
+              (
+                table2.`col_date_undef_signed_not_null_index_inverted` in (
+                  '2027-01-16',
+                  '2023-12-17',
+                  '2024-02-18',
+                  null,
+                  '2000-10-18',
+                  '2023-12-14',
+                  '2023-12-18'
+                )
+              )
+              and table2.`col_date_undef_signed_not_null_index_inverted` < 
'2025-06-18'
+            )
+          group by
+            field1
+          order by
+            field1
+          limit
+            10000;
+        """
+    } finally {
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to