This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 4a277affdcc [fix](scan) In-predicate should not be pushed down for
non-key column(#35913) (#35968)
4a277affdcc is described below
commit 4a277affdcc3277009577f0109b84d31faa55570
Author: Jerry Hu <[email protected]>
AuthorDate: Tue Jun 11 11:13:34 2024 +0800
[fix](scan) In-predicate should not be pushed down for non-key
column(#35913) (#35968)
pick #35913
---
be/src/pipeline/exec/scan_operator.cpp | 13 +++-
be/src/vec/exec/scan/vscan_node.cpp | 13 +++-
.../correctness_p0/test_in_predicate_push_down.out | 5 ++
.../test_in_predicate_push_down.groovy | 88 ++++++++++++++++++++++
4 files changed, 117 insertions(+), 2 deletions(-)
diff --git a/be/src/pipeline/exec/scan_operator.cpp
b/be/src/pipeline/exec/scan_operator.cpp
index 41e029bce34..268089be1e5 100644
--- a/be/src/pipeline/exec/scan_operator.cpp
+++ b/be/src/pipeline/exec/scan_operator.cpp
@@ -642,10 +642,13 @@ Status
ScanLocalState<Derived>::_normalize_in_and_eq_predicate(
if (hybrid_set->size() <=
_parent->cast<typename
Derived::Parent>()._max_pushdown_conditions_per_column) {
iter = hybrid_set->begin();
- } else {
+ } else if (_is_key_column(slot->col_name()) ||
_storage_no_merge()) {
_filter_predicates.in_filters.emplace_back(slot->col_name(),
expr->get_set_func());
*pdt = vectorized::VScanNode::PushDownType::ACCEPTABLE;
return Status::OK();
+ } else {
+ *pdt = vectorized::VScanNode::PushDownType::UNACCEPTABLE;
+ return Status::OK();
}
} else {
// normal in predicate
@@ -786,6 +789,14 @@ Status
ScanLocalState<Derived>::_normalize_not_in_and_not_eq_predicate(
vectorized::VScanNode::PushDownType::UNACCEPTABLE;
// 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
if (TExprNodeType::IN_PRED == expr->node_type()) {
+ /// `VDirectInPredicate` here should not be pushed down.
+ /// here means the `VDirectInPredicate` is too big to be converted
into `ColumnValueRange`.
+ /// For non-key columns and `_storage_no_merge()` is false, this
predicate should not be pushed down.
+ if (expr->get_set_func() != nullptr) {
+ *pdt = vectorized::VScanNode::PushDownType::UNACCEPTABLE;
+ return Status::OK();
+ }
+
vectorized::VInPredicate* pred =
static_cast<vectorized::VInPredicate*>(expr);
if ((temp_pdt = _should_push_down_in_predicate(pred, expr_ctx, true))
==
vectorized::VScanNode::PushDownType::UNACCEPTABLE) {
diff --git a/be/src/vec/exec/scan/vscan_node.cpp
b/be/src/vec/exec/scan/vscan_node.cpp
index 22f0094b03a..ed18150e66a 100644
--- a/be/src/vec/exec/scan/vscan_node.cpp
+++ b/be/src/vec/exec/scan/vscan_node.cpp
@@ -753,10 +753,13 @@ Status VScanNode::_normalize_in_and_eq_predicate(VExpr*
expr, VExprContext* expr
// runtime filter produce VDirectInPredicate
if (hybrid_set->size() <= _max_pushdown_conditions_per_column) {
iter = hybrid_set->begin();
- } else {
+ } else if (_is_key_column(slot->col_name()) ||
_storage_no_merge()) {
_filter_predicates.in_filters.emplace_back(slot->col_name(),
expr->get_set_func());
*pdt = PushDownType::ACCEPTABLE;
return Status::OK();
+ } else {
+ *pdt = PushDownType::UNACCEPTABLE;
+ return Status::OK();
}
} else {
// normal in predicate
@@ -847,6 +850,14 @@ Status
VScanNode::_normalize_not_in_and_not_eq_predicate(VExpr* expr, VExprConte
PushDownType temp_pdt = PushDownType::UNACCEPTABLE;
// 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
if (TExprNodeType::IN_PRED == expr->node_type()) {
+ /// `VDirectInPredicate` here should not be pushed down.
+ /// here means the `VDirectInPredicate` is too big to be converted
into `ColumnValueRange`.
+ /// For non-key columns and `_storage_no_merge()` is false, this
predicate should not be pushed down.
+ if (expr->get_set_func() != nullptr) {
+ *pdt = PushDownType::UNACCEPTABLE;
+ return Status::OK();
+ }
+
auto* pred = static_cast<VInPredicate*>(expr);
if ((temp_pdt = _should_push_down_in_predicate(pred, expr_ctx, true))
==
PushDownType::UNACCEPTABLE) {
diff --git
a/regression-test/data/correctness_p0/test_in_predicate_push_down.out
b/regression-test/data/correctness_p0/test_in_predicate_push_down.out
new file mode 100644
index 00000000000..5fb13260f3c
--- /dev/null
+++ b/regression-test/data/correctness_p0/test_in_predicate_push_down.out
@@ -0,0 +1,5 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select --
+3 3 3 3
+4 4 4 4
+
diff --git
a/regression-test/suites/correctness_p0/test_in_predicate_push_down.groovy
b/regression-test/suites/correctness_p0/test_in_predicate_push_down.groovy
new file mode 100644
index 00000000000..c76fb9202e1
--- /dev/null
+++ b/regression-test/suites/correctness_p0/test_in_predicate_push_down.groovy
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_in_predicate_push_down") {
+ sql """
+ drop table if exists tbl_test_in_predicate_push_down_t1;
+ """
+
+ sql """
+ drop table if exists tbl_test_in_predicate_push_down_t2;
+ """
+
+ sql """
+ CREATE TABLE tbl_test_in_predicate_push_down_t1 (id int, value int)
+ UNIQUE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "enable_unique_key_merge_on_write" = "false",
+ "disable_auto_compaction" = "true"
+ );
+ """
+
+ sql """
+ CREATE TABLE tbl_test_in_predicate_push_down_t2 (id int, value int)
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql """
+ insert into tbl_test_in_predicate_push_down_t1 values(1, 1);
+ """
+ sql """
+ insert into tbl_test_in_predicate_push_down_t1 values(1, null);
+ """
+ sql """
+ insert into tbl_test_in_predicate_push_down_t1 values(2, 2);
+ """
+ sql """
+ insert into tbl_test_in_predicate_push_down_t1 values(2, null);
+ """
+ sql """
+ insert into tbl_test_in_predicate_push_down_t1 values
+ (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9), (10, 10),
+ (11, 11), (12, 12);
+ """
+
+ sql """
+ insert into tbl_test_in_predicate_push_down_t2 values
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4);
+ """
+
+ sql " analyze table tbl_test_in_predicate_push_down_t1 with full with
sync; "
+ sql " analyze table tbl_test_in_predicate_push_down_t2 with full with
sync; "
+
+ sql " set max_pushdown_conditions_per_column = 2; "
+ sql " set runtime_filter_type = 'IN'; "
+ sql " set runtime_filter_max_in_num = 1024; "
+ sql " set runtime_filter_wait_time_ms = 10000; "
+
+ qt_select """
+ select
+ *
+ from tbl_test_in_predicate_push_down_t1 t1,
tbl_test_in_predicate_push_down_t2 t2
+ where t1.value = t2.value
+ order by 1, 2, 3, 4;
+ """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]