This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 70daa1f85d0 [opt](inverted index) Controls whether the in_list can 
execute fast_execute. (#40141)
70daa1f85d0 is described below

commit 70daa1f85d0b2ec080b34c02f9de010d2f0f49f0
Author: zzzxl <[email protected]>
AuthorDate: Fri Aug 30 10:32:43 2024 +0800

    [opt](inverted index) Controls whether the in_list can execute 
fast_execute. (#40141)
    
    https://github.com/apache/doris/pull/40022
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp          | 13 ++++++++++---
 be/src/vec/exprs/vexpr.cpp                                  |  3 +++
 be/src/vec/exprs/vexpr.h                                    |  1 +
 be/src/vec/exprs/vin_predicate.cpp                          |  2 ++
 .../src/main/java/org/apache/doris/qe/SessionVariable.java  | 10 ++++++++++
 gensrc/thrift/PaloInternalService.thrift                    |  1 +
 .../test_index_inlist_fault_injection.out                   |  6 ++++++
 .../test_index_inlist_fault_injection.groovy                |  6 ++++++
 8 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 86476139a44..b9f9615f008 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -917,10 +917,17 @@ bool 
SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool
         return false;
     }
 
-    if ((pred->type() == PredicateType::IN_LIST || pred->type() == 
PredicateType::NOT_IN_LIST) &&
-        pred->predicate_params()->marked_by_runtime_filter) {
+    if (pred->type() == PredicateType::IN_LIST || pred->type() == 
PredicateType::NOT_IN_LIST) {
+        auto predicate_param = pred->predicate_params();
         // in_list or not_in_list predicate produced by runtime filter
-        return false;
+        if (predicate_param->marked_by_runtime_filter) {
+            return false;
+        }
+        // the in_list or not_in_list value count cannot be greater than 
threshold
+        int32_t threshold = 
_opts.runtime_state->query_options().in_list_value_count_threshold;
+        if (pred_in_compound && predicate_param->values.size() > threshold) {
+            return false;
+        }
     }
 
     // UNTOKENIZED strings exceed ignore_above, they are written as null, 
causing range query errors
diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
index b66c8aa80a7..5cb0607411d 100644
--- a/be/src/vec/exprs/vexpr.cpp
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -652,6 +652,9 @@ std::string VExpr::gen_predicate_result_sign(Block& block, 
const ColumnNumbers&
         pred_result_sign +=
                 BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_" + 
function_name + "_";
         if (function_name == "in" || function_name == "not_in") {
+            if (arguments.size() - 1 > _in_list_value_count_threshold) {
+                return pred_result_sign;
+            }
             // Generating 'result_sign' from 'inlist' requires sorting the 
values.
             std::set<std::string> values;
             for (size_t i = 1; i < arguments.size(); i++) {
diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h
index 777d485156a..88b18c67870 100644
--- a/be/src/vec/exprs/vexpr.h
+++ b/be/src/vec/exprs/vexpr.h
@@ -307,6 +307,7 @@ protected:
     uint32_t _index_unique_id = 0;
     bool _can_fast_execute = false;
     bool _enable_inverted_index_query = true;
+    uint32_t _in_list_value_count_threshold = 10;
 };
 
 } // namespace vectorized
diff --git a/be/src/vec/exprs/vin_predicate.cpp 
b/be/src/vec/exprs/vin_predicate.cpp
index 4affec791a4..4d518f9f923 100644
--- a/be/src/vec/exprs/vin_predicate.cpp
+++ b/be/src/vec/exprs/vin_predicate.cpp
@@ -28,6 +28,7 @@
 #include <vector>
 
 #include "common/status.h"
+#include "runtime/runtime_state.h"
 #include "vec/core/block.h"
 #include "vec/core/column_numbers.h"
 #include "vec/core/column_with_type_and_name.h"
@@ -79,6 +80,7 @@ Status VInPredicate::prepare(RuntimeState* state, const 
RowDescriptor& desc,
     VExpr::register_function_context(state, context);
     _prepare_finished = true;
     _can_fast_execute = can_fast_execute();
+    _in_list_value_count_threshold = 
state->query_options().in_list_value_count_threshold;
     return Status::OK();
 }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 4822a41fc4b..b7d977fd386 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -621,6 +621,8 @@ public class SessionVariable implements Serializable, 
Writable {
     public static final String ENABLE_MATCH_WITHOUT_INVERTED_INDEX = 
"enable_match_without_inverted_index";
     public static final String ENABLE_FALLBACK_ON_MISSING_INVERTED_INDEX = 
"enable_fallback_on_missing_inverted_index";
 
+    public static final String IN_LIST_VALUE_COUNT_THRESHOLD = 
"in_list_value_count_threshold";
+
     /**
      * If set false, user couldn't submit analyze SQL and FE won't allocate 
any related resources.
      */
@@ -2022,6 +2024,13 @@ public class SessionVariable implements Serializable, 
Writable {
     })
     public boolean enableFallbackOnMissingInvertedIndex = true;
 
+    @VariableMgr.VarAttr(name = IN_LIST_VALUE_COUNT_THRESHOLD, description = {
+        "in条件value数量大于这个threshold后将不会走fast_execute",
+        "When the number of values in the IN condition exceeds this threshold,"
+                + " fast_execute will not be used."
+    })
+    public int inListValueCountThreshold = 10;
+
     public void setEnableEsParallelScroll(boolean enableESParallelScroll) {
         this.enableESParallelScroll = enableESParallelScroll;
     }
@@ -3525,6 +3534,7 @@ public class SessionVariable implements Serializable, 
Writable {
         
tResult.setEnableFallbackOnMissingInvertedIndex(enableFallbackOnMissingInvertedIndex);
 
         tResult.setKeepCarriageReturn(keepCarriageReturn);
+        tResult.setInListValueCountThreshold(inListValueCountThreshold);
         return tResult;
     }
 
diff --git a/gensrc/thrift/PaloInternalService.thrift 
b/gensrc/thrift/PaloInternalService.thrift
index 3ffa27788ac..b26e271b911 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -322,6 +322,7 @@ struct TQueryOptions {
 
   126: optional i32 runtime_bloom_filter_max_size = 16777216;
 
+  127: optional i32 in_list_value_count_threshold = 10;
   128: optional bool enable_verbose_profile = false;
   129: optional i32 rpc_verbose_profile_max_instance_count = 0;
 
diff --git 
a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out 
b/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
index 528b4008084..8409a168a00 100644
--- 
a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
+++ 
b/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
@@ -65,3 +65,9 @@
 -- !sql --
 2
 
+-- !sql --
+852
+
+-- !sql --
+852
+
diff --git 
a/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
 
b/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
index 8d22c001ed0..a9b3d513521 100644
--- 
a/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
+++ 
b/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
@@ -117,6 +117,12 @@ suite("test_index_inlist_fault_injection", 
"nonConcurrent") {
         qt_sql """ select count() from ${indexTbName} where (clientip = 
'2.1.0.0' or clientip = NULL and clientip = '40.135.0.0'); """
 
         sql """ set enable_common_expr_pushdown = true; """
+
+        sql """ set in_list_value_count_threshold = 0; """
+        qt_sql """ select count() from ${indexTbName} where (clientip in 
('40.135.0.0', '232.0.0.0', '26.1.0.0', '247.37.0.0') or status = 200); """
+        sql """ set in_list_value_count_threshold = 10; """
+        qt_sql """ select count() from ${indexTbName} where (clientip in 
('40.135.0.0', '232.0.0.0', '26.1.0.0', '247.37.0.0') or status = 200); """
+
       } finally {
       }
     } finally {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to