This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 70daa1f85d0 [opt](inverted index) Controls whether the in_list can
execute fast_execute. (#40141)
70daa1f85d0 is described below
commit 70daa1f85d0b2ec080b34c02f9de010d2f0f49f0
Author: zzzxl <[email protected]>
AuthorDate: Fri Aug 30 10:32:43 2024 +0800
[opt](inverted index) Controls whether the in_list can execute
fast_execute. (#40141)
https://github.com/apache/doris/pull/40022
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 13 ++++++++++---
be/src/vec/exprs/vexpr.cpp | 3 +++
be/src/vec/exprs/vexpr.h | 1 +
be/src/vec/exprs/vin_predicate.cpp | 2 ++
.../src/main/java/org/apache/doris/qe/SessionVariable.java | 10 ++++++++++
gensrc/thrift/PaloInternalService.thrift | 1 +
.../test_index_inlist_fault_injection.out | 6 ++++++
.../test_index_inlist_fault_injection.groovy | 6 ++++++
8 files changed, 39 insertions(+), 3 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 86476139a44..b9f9615f008 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -917,10 +917,17 @@ bool
SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool
return false;
}
- if ((pred->type() == PredicateType::IN_LIST || pred->type() ==
PredicateType::NOT_IN_LIST) &&
- pred->predicate_params()->marked_by_runtime_filter) {
+ if (pred->type() == PredicateType::IN_LIST || pred->type() ==
PredicateType::NOT_IN_LIST) {
+ auto predicate_param = pred->predicate_params();
// in_list or not_in_list predicate produced by runtime filter
- return false;
+ if (predicate_param->marked_by_runtime_filter) {
+ return false;
+ }
+ // the in_list or not_in_list value count cannot be greater than
threshold
+ int32_t threshold =
_opts.runtime_state->query_options().in_list_value_count_threshold;
+ if (pred_in_compound && predicate_param->values.size() > threshold) {
+ return false;
+ }
}
// UNTOKENIZED strings exceed ignore_above, they are written as null,
causing range query errors
diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
index b66c8aa80a7..5cb0607411d 100644
--- a/be/src/vec/exprs/vexpr.cpp
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -652,6 +652,9 @@ std::string VExpr::gen_predicate_result_sign(Block& block,
const ColumnNumbers&
pred_result_sign +=
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_" +
function_name + "_";
if (function_name == "in" || function_name == "not_in") {
+ if (arguments.size() - 1 > _in_list_value_count_threshold) {
+ return pred_result_sign;
+ }
// Generating 'result_sign' from 'inlist' requires sorting the
values.
std::set<std::string> values;
for (size_t i = 1; i < arguments.size(); i++) {
diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h
index 777d485156a..88b18c67870 100644
--- a/be/src/vec/exprs/vexpr.h
+++ b/be/src/vec/exprs/vexpr.h
@@ -307,6 +307,7 @@ protected:
uint32_t _index_unique_id = 0;
bool _can_fast_execute = false;
bool _enable_inverted_index_query = true;
+ uint32_t _in_list_value_count_threshold = 10;
};
} // namespace vectorized
diff --git a/be/src/vec/exprs/vin_predicate.cpp
b/be/src/vec/exprs/vin_predicate.cpp
index 4affec791a4..4d518f9f923 100644
--- a/be/src/vec/exprs/vin_predicate.cpp
+++ b/be/src/vec/exprs/vin_predicate.cpp
@@ -28,6 +28,7 @@
#include <vector>
#include "common/status.h"
+#include "runtime/runtime_state.h"
#include "vec/core/block.h"
#include "vec/core/column_numbers.h"
#include "vec/core/column_with_type_and_name.h"
@@ -79,6 +80,7 @@ Status VInPredicate::prepare(RuntimeState* state, const
RowDescriptor& desc,
VExpr::register_function_context(state, context);
_prepare_finished = true;
_can_fast_execute = can_fast_execute();
+ _in_list_value_count_threshold =
state->query_options().in_list_value_count_threshold;
return Status::OK();
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 4822a41fc4b..b7d977fd386 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -621,6 +621,8 @@ public class SessionVariable implements Serializable,
Writable {
public static final String ENABLE_MATCH_WITHOUT_INVERTED_INDEX =
"enable_match_without_inverted_index";
public static final String ENABLE_FALLBACK_ON_MISSING_INVERTED_INDEX =
"enable_fallback_on_missing_inverted_index";
+ public static final String IN_LIST_VALUE_COUNT_THRESHOLD =
"in_list_value_count_threshold";
+
/**
* If set false, user couldn't submit analyze SQL and FE won't allocate
any related resources.
*/
@@ -2022,6 +2024,13 @@ public class SessionVariable implements Serializable,
Writable {
})
public boolean enableFallbackOnMissingInvertedIndex = true;
+ @VariableMgr.VarAttr(name = IN_LIST_VALUE_COUNT_THRESHOLD, description = {
+ "in条件value数量大于这个threshold后将不会走fast_execute",
+ "When the number of values in the IN condition exceeds this threshold,"
+ + " fast_execute will not be used."
+ })
+ public int inListValueCountThreshold = 10;
+
public void setEnableEsParallelScroll(boolean enableESParallelScroll) {
this.enableESParallelScroll = enableESParallelScroll;
}
@@ -3525,6 +3534,7 @@ public class SessionVariable implements Serializable,
Writable {
tResult.setEnableFallbackOnMissingInvertedIndex(enableFallbackOnMissingInvertedIndex);
tResult.setKeepCarriageReturn(keepCarriageReturn);
+ tResult.setInListValueCountThreshold(inListValueCountThreshold);
return tResult;
}
diff --git a/gensrc/thrift/PaloInternalService.thrift
b/gensrc/thrift/PaloInternalService.thrift
index 3ffa27788ac..b26e271b911 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -322,6 +322,7 @@ struct TQueryOptions {
126: optional i32 runtime_bloom_filter_max_size = 16777216;
+ 127: optional i32 in_list_value_count_threshold = 10;
128: optional bool enable_verbose_profile = false;
129: optional i32 rpc_verbose_profile_max_instance_count = 0;
diff --git
a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
b/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
index 528b4008084..8409a168a00 100644
---
a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
+++
b/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
@@ -65,3 +65,9 @@
-- !sql --
2
+-- !sql --
+852
+
+-- !sql --
+852
+
diff --git
a/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
b/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
index 8d22c001ed0..a9b3d513521 100644
---
a/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
+++
b/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
@@ -117,6 +117,12 @@ suite("test_index_inlist_fault_injection",
"nonConcurrent") {
qt_sql """ select count() from ${indexTbName} where (clientip =
'2.1.0.0' or clientip = NULL and clientip = '40.135.0.0'); """
sql """ set enable_common_expr_pushdown = true; """
+
+ sql """ set in_list_value_count_threshold = 0; """
+ qt_sql """ select count() from ${indexTbName} where (clientip in
('40.135.0.0', '232.0.0.0', '26.1.0.0', '247.37.0.0') or status = 200); """
+ sql """ set in_list_value_count_threshold = 10; """
+ qt_sql """ select count() from ${indexTbName} where (clientip in
('40.135.0.0', '232.0.0.0', '26.1.0.0', '247.37.0.0') or status = 200); """
+
} finally {
}
} finally {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]