This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 1d68ca85cb2 branch-4.0: [Bug](join) fix left_semi_direct_return_opt
get wrong result with local shuffle #60952 (#60984)
1d68ca85cb2 is described below
commit 1d68ca85cb2d568e8a71285c650c159366f07f7e
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Mar 3 21:19:53 2026 +0800
branch-4.0: [Bug](join) fix left_semi_direct_return_opt get wrong result
with local shuffle #60952 (#60984)
Cherry-picked from #60952
Co-authored-by: Pxl <[email protected]>
---
be/src/runtime_filter/runtime_filter_producer.cpp | 13 +---
be/src/runtime_filter/runtime_filter_producer.h | 18 ++++-
.../runtime_filter_producer_helper.cpp | 2 +-
be/src/runtime_filter/runtime_filter_wrapper.cpp | 3 +-
.../join/test_cte_exists/test_cte_exists.out | 44 ++++++++++++
.../join/test_cte_exists/test_cte_exists.groovy | 78 ++++++++++++++++++++++
6 files changed, 142 insertions(+), 16 deletions(-)
diff --git a/be/src/runtime_filter/runtime_filter_producer.cpp
b/be/src/runtime_filter/runtime_filter_producer.cpp
index 6d8e1c1b08a..da5fbca15a9 100644
--- a/be/src/runtime_filter/runtime_filter_producer.cpp
+++ b/be/src/runtime_filter/runtime_filter_producer.cpp
@@ -50,12 +50,7 @@ Status RuntimeFilterProducer::publish(RuntimeState* state,
bool build_hash_table
_check_state({State::READY_TO_PUBLISH});
auto do_merge = [&]() {
- // two case we need do local merge:
- // 1. has remote target
- // 2. has local target and has global consumer (means target scan has
local shuffle)
- if (!_has_remote_target && state->global_runtime_filter_mgr()
-
->get_consume_filters(_wrapper->filter_id())
- .empty()) {
+ if (!_need_do_merge(state)) {
// when global consumer not exist, send_to_local_targets will do
nothing, so merge rf is useless
return Status::OK();
}
@@ -161,11 +156,7 @@ Status RuntimeFilterProducer::send_size(RuntimeState*
state, uint64_t local_filt
DCHECK(_dependency != nullptr);
set_state(State::WAITING_FOR_SYNCED_SIZE);
- // two case we need do local merge:
- // 1. has remote target
- // 2. has local target and has global consumer (means target scan has
local shuffle)
- if (_has_remote_target ||
-
!state->global_runtime_filter_mgr()->get_consume_filters(_wrapper->filter_id()).empty())
{
+ if (_need_do_merge(state)) {
LocalMergeContext* merger_context = nullptr;
RETURN_IF_ERROR(state->global_runtime_filter_mgr()->get_local_merge_producer_filters(
_wrapper->filter_id(), &merger_context));
diff --git a/be/src/runtime_filter/runtime_filter_producer.h
b/be/src/runtime_filter/runtime_filter_producer.h
index e686a97ee3a..a21697c7005 100644
--- a/be/src/runtime_filter/runtime_filter_producer.h
+++ b/be/src/runtime_filter/runtime_filter_producer.h
@@ -131,11 +131,14 @@ public:
_wrapper = wrapper;
}
- std::shared_ptr<RuntimeFilterWrapper> detect_in_filter() {
- if (_has_remote_target) {
+ std::shared_ptr<RuntimeFilterWrapper> detect_local_in_filter(RuntimeState*
state) {
+ std::unique_lock<std::recursive_mutex> l(_rmtx);
+ // need merge mean this filter not pure local
+ // the data not directly colocated with scan node
+ // so that can not enable local in filter optimization
+ if (_need_do_merge(state)) {
return nullptr;
}
- std::unique_lock<std::recursive_mutex> l(_rmtx);
if (_wrapper->is_ready_in_filter()) {
return _wrapper;
}
@@ -157,6 +160,15 @@ private:
}
}
+ bool _need_do_merge(RuntimeState* state) {
+ // two cases where we need to do a local merge:
+ // 1. has remote target
+ // 2. has local target and has global consumer (means target scan has
local shuffle)
+ return (_has_remote_target || !state->global_runtime_filter_mgr()
+
->get_consume_filters(_wrapper->filter_id())
+ .empty());
+ }
+
Status _init_with_desc(const TRuntimeFilterDesc* desc, const
TQueryOptions* options) override {
RETURN_IF_ERROR(RuntimeFilter::_init_with_desc(desc, options));
_need_sync_filter_size = _wrapper->build_bf_by_runtime_size() &&
!_is_broadcast_join;
diff --git a/be/src/runtime_filter/runtime_filter_producer_helper.cpp
b/be/src/runtime_filter/runtime_filter_producer_helper.cpp
index 9611d38f7ac..58d7d6c2500 100644
--- a/be/src/runtime_filter/runtime_filter_producer_helper.cpp
+++ b/be/src/runtime_filter/runtime_filter_producer_helper.cpp
@@ -169,7 +169,7 @@ std::shared_ptr<RuntimeFilterWrapper>
RuntimeFilterProducerHelper::detect_local_
// If any runtime filter is local in filter, return true.
// Local in filter is used to LEFT_SEMI_DIRECT_RETURN_OPT
for (const auto& filter : _producers) {
- if (auto wrapper = filter->detect_in_filter(); wrapper != nullptr) {
+ if (auto wrapper = filter->detect_local_in_filter(state); wrapper !=
nullptr) {
return wrapper;
}
}
diff --git a/be/src/runtime_filter/runtime_filter_wrapper.cpp
b/be/src/runtime_filter/runtime_filter_wrapper.cpp
index 448899aa013..33ecd7d9ebc 100644
--- a/be/src/runtime_filter/runtime_filter_wrapper.cpp
+++ b/be/src/runtime_filter/runtime_filter_wrapper.cpp
@@ -171,7 +171,8 @@ Status RuntimeFilterWrapper::merge(const
RuntimeFilterWrapper* other) {
_hybrid_set->insert(other->_hybrid_set.get());
if (_max_in_num >= 0 && _hybrid_set->size() > _max_in_num) {
_hybrid_set->clear();
- set_state(State::DISABLED, fmt::format("reach max in num: {}",
_max_in_num));
+ set_state(State::DISABLED,
+ fmt::format("merge reached max IN num threshold: {}",
_max_in_num));
}
break;
}
diff --git
a/regression-test/data/query_p0/join/test_cte_exists/test_cte_exists.out
b/regression-test/data/query_p0/join/test_cte_exists/test_cte_exists.out
new file mode 100644
index 00000000000..6162a7faf43
--- /dev/null
+++ b/regression-test/data/query_p0/join/test_cte_exists/test_cte_exists.out
@@ -0,0 +1,44 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select_with_cte_exists --
+0 -26516 42 28.17350 66.01650 \N on \N
my \N 2002-03-23 2015-05-15 2003-04-22T00:00
2007-04-10T00:00 13 8 \N 25 78.1141000000
51.1176000000
+1 1 -100 41.17160 \N ⭐ now \N \N
r 2013-08-21 2009-05-26 2005-10-11T00:00
2003-01-17T14:47:30 2 9 6 13 2.1381000000
32.0589000000
+2 15 \N 30.00870 42.09590 “ 2016-12-25
w will \N 2018-04-23 2011-02-17 2007-02-23T10:25:54
2008-06-14T00:00 \N 13 53 \N 14.0125000000
98.0868000000
+3 108 -18369 53.16640 68.19440 back 2005-07-03
15:52:40 \N 2007-05-06 o 2014-04-11 2002-06-22
2004-12-27T10:53:04 2016-02-12T00:00 2 8 \N 72
49.1006000000 84.1622000000
+5 \N \N 94.10040 89.14280 \N was \N
÷ f 2011-12-10 2008-01-13 2019-01-22T10:32:16
2014-11-11T22:37:16 5 6 84 34 61.0046000000
58.1874000000
+7 10 \N 69.09860 75.12140 \N w were
2000-01-25 06:33:02 a 2003-11-01 2008-07-11
2006-04-24T21:56:50 2013-11-25T00:00 1 13 46 14
3.1337000000 37.1948000000
+8 \N 2109123390 20.09540 60.03210 then b
tell 2004-04-05 13:37:57 m 2004-12-13 2007-03-25
2018-10-14T00:00 2018-07-02T00:00 6 14 32 40
\N 31.1016000000
+9 77 -321144118 48.11470 32.10830 k \N
\N j q 2011-05-22 2008-03-10 2017-03-27T00:00
2003-11-13T00:00 14 4 39 66 14.1892000000
27.0975000000
+10 1339657358 6 67.07760 63.01650 I'll or
b just \N 2010-06-11 2006-03-09 2018-09-01T00:00
2017-01-27T00:00 2 3 \N 73 18.1075000000
18.1771000000
+12 840810997 \N 20.15110 31.11050 2005-06-04
09:31:03 \N 2015-04-03 21:51:11 2015-06-25 \N 2018-05-24
2011-10-24 2007-04-23T00:00 2016-05-26T03:48:15 \N 13
\N 19 55.1909000000 87.1879000000
+13 \N 213501779 70.10390 28.06790 DEL about
mean 2013-08-24 u 2002-09-06 2001-07-20
2008-06-04T04:41:48 2013-09-09T00:00 8 14 3 95
68.1937000000 15.1266000000
+14 -14681 46 35.17140 83.07530 2008-03-25 17:28:53
you're k 2016-02-28 f 2012-04-21 2004-01-24
2010-05-20T00:00 2018-05-10T18:31:16 4 1 9 25
18.1370000000 \N
+15 87 -39 65.06230 66.10880 2014-10-27 07:04:00
¥ n \N e 2001-10-14 2003-05-15
2013-06-26T00:00 2016-11-12T10:50:57 1 9 10 91
\N 60.1894000000
+16 7876 34 19.02430 98.11340 ♪ up
2010-09-17 15:10:51 2011-05-14 h 2006-02-12 2009-01-13
2000-04-26T00:04:07 2010-10-12T00:00 6 9 83 76
\N 6.0435000000
+17 -94 -426161667 48.01440 18.02980 k
2015-05-16 19:40:14 2013-11-12 \N e 2009-12-08
2010-04-03 2005-01-17T00:00 2014-05-07T07:32:31 \N 13
\N \N 20.1666000000 \N
+18 \N -28299 62.18820 37.07860 some 2006-12-18
2015-09-12 \N \N 2013-05-25 2006-01-07
2006-11-12T01:13:11 2001-12-27T00:00 12 7 90 41
82.0662000000 2.0787000000
+19 96 -5149 10.17780 2.04820 oh é a f
z 2004-06-26 2014-10-23 2004-06-15T00:00
2015-12-06T00:00 1 12 91 86 \N 41.1384000000
+20 \N -367544364 15.16010 69.02020 2019-03-16
x and 2012-11-25 \N 2003-12-16 2001-12-12
2019-05-08T00:00 2013-09-23T00:00 14 5 44 \N
\N \N
+21 83504484 102 67.07690 17.09480 2016-03-08
09:20:32 look u 2005-01-24 13:11:51 t 2017-11-10
2015-03-04 2013-02-17T02:49:09 2009-01-15T00:00 6 3
34 88 93.1511000000 90.1330000000
+22 -30359 18454 62.06520 59.06150 o 2006-06-18
19:24:41 2001-04-19 i c 2011-09-21 2019-05-20
2007-05-01T00:00 2000-01-18T00:00 7 11 17 39
60.1966000000 27.0348000000
+23 47 55 92.14180 11.06560 \N m
2019-06-21 09:51:53 2003-12-20 22:29:10 \N 2013-10-03
2000-07-14 2005-08-08T00:50:51 2001-09-22T00:00 11 5
23 54 67.0501000000 72.1124000000
+24 174 \N 15.05920 53.17740 a 2014-01-02
09:56:05 look when d 2010-12-22 2004-01-27
2017-01-03T00:00 2014-07-26T17:04:22 1 12 34 16
\N 31.1348000000
+27 -4501 -37 72.16810 7.08690 2017-01-09 \N as
2001-05-15 11:36:04 z 2002-07-20 2003-02-09
2002-04-02T15:01:52 2012-08-22T00:00 3 1 15 85
36.0699000000 71.0232000000
+28 \N 80 84.01840 \N 2008-09-08 09:41:36
2018-10-08 2017-01-08 y n 2007-12-15 2010-08-27
2005-04-25T00:00 2007-07-22T00:00 12 8 36 53
63.1981000000 25.1409000000
+30 1678810266 1507463100 62.03340 23.08120 t
2003-07-06 you're it r 2018-02-15 2004-09-28
2003-04-21T15:21:25 2019-03-25T00:00 5 12 9 85
32.0118000000 57.1966000000
+31 \N \N 35.17790 \N 2004-04-21 2013-09-04
2018-06-02 2004-09-24 01:32:23 p 2015-08-01 2017-11-08
2013-10-21T00:00 2009-02-13T22:10:15 5 12 \N 78
72.1830000000 34.1290000000
+32 \N -8223 27.12450 68.00550 b oh how
\N g 2019-01-02 2014-12-05 2016-11-17T00:00
2016-03-13T00:00 7 8 47 20 4.0035000000
33.1087000000
+33 -27712 109 20.17710 5.15390 2019-09-06 yeah
2009-12-14 07:08:30 here k 2004-08-01 2007-06-07
2000-11-16T00:00 2010-03-12T00:00 13 \N 0 \N
\N 81.1432000000
+35 -83 -11149 47.14330 \N who i \N back
x 2006-02-19 2015-10-28 2007-08-20T00:00
2012-04-26T00:00 13 10 \N 62 78.0910000000
36.0115000000
+36 16516 -112 13.10140 \N \N + 2013-11-27
05:39:59 “ d 2004-02-19 2004-01-18 2001-09-27T00:00
2013-03-27T10:45:14 3 11 \N \N 21.1940000000
10.1906000000
+37 2231 -140663446 82.14340 79.19350 \N
2004-10-01 05:53:25 \N \N \N 2002-02-23 2002-04-03
2019-05-24T00:00 2012-03-02T00:00 \N 3 51 15
75.0719000000 81.1907000000
+38 19 \N 87.07490 61.07680 something s
2011-10-24 2010-03-19 10:30:14 s 2016-04-08 2010-04-04
2009-10-05T13:25:07 2013-01-02T00:00 2 5 8 98
\N 36.1245000000
+39 -44 18 41.01640 69.03730 2014-11-04 19:39:52
2019-05-17 02:09:47 2001-10-25 a u 2015-04-07
2003-02-28 2018-09-08T10:56:03 2007-05-21T08:08:42 2 3
66 63 30.1838000000 53.1648000000
+41 36 60 82.17390 100.03910 \N 2017-12-08
06:14:04 2000-05-22 from p 2011-12-17 2000-11-14
2019-04-04T00:00 2019-03-21T00:00 11 11 \N \N
56.0672000000 54.1659000000
+42 \N 75 15.11650 30.06680 him about
2008-04-13 2008-01-12 23:59:57 x 2011-04-07 2016-06-26
2000-01-23T00:00 2019-08-19T00:00 13 14 80 48
60.1511000000 50.1581000000
+43 1486635733 \N 23.02720 9.17860 get could \N
2008-11-27 14:13:54 d 2005-09-27 2016-12-05
2017-11-23T15:48:28 2015-07-17T07:44:43 7 14 8 41
19.1376000000 27.0721000000
+44 21183 91 49.13480 16.10830 2008-07-05 09:22:06
2006-02-24 it's 2000-01-14 09:38:35 y 2007-10-11
2010-09-19 2014-10-23T00:00 2001-06-23T07:32:12 14 15
\N 11 \N 20.1074000000
+45 -19092 -10 75.10910 37.07120 2017-12-16 09:51:22
his é 2012-11-28 05:50:31 f 2003-01-27 2018-02-12
2018-03-12T00:00 2005-02-08T00:00 5 4 98 14
96.1744000000 91.0384000000
+47 -1585723780 \N 68.05130 59.16600 could \N
2011-01-12 m b 2012-11-15 2003-10-03
2016-05-08T00:00 2002-05-23T00:00 \N 5 61 99
30.0624000000 \N
+48 26182 \N 85.09230 13.19010 🍕 all \N
2007-11-02 14:22:31 y 2014-02-24 2006-05-10
2003-04-02T00:00 2005-06-12T00:00 13 6 49 25
36.0866000000 97.1200000000
+49 -814629551 -320575127 92.19940 \N and
2010-09-03 21:32:52 x well z 2009-08-21 2017-12-05
2005-08-24T00:00 2012-04-06T19:44:44 5 14 93 94
20.0309000000 94.0261000000
+
diff --git
a/regression-test/suites/query_p0/join/test_cte_exists/test_cte_exists.groovy
b/regression-test/suites/query_p0/join/test_cte_exists/test_cte_exists.groovy
new file mode 100644
index 00000000000..ff3f6538888
--- /dev/null
+++
b/regression-test/suites/query_p0/join/test_cte_exists/test_cte_exists.groovy
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_cte_exists") {
+ def tableName =
"table_20_50_undef_partitions2_keys3_properties4_distributed_by5"
+
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+
+ sql "set enable_left_semi_direct_return_opt = true; "
+
+ sql "set parallel_pipeline_task_num=16;"
+
+ sql "set runtime_filter_max_in_num=10;"
+
+ sql "set runtime_filter_type = 'IN';"
+
+ sql "set enable_local_shuffle = true;"
+
+ sql """
+ create table ${tableName} (
+ pk int,
+ col_int_undef_signed int ,
+ col_int_undef_signed__index_inverted int ,
+ col_decimal_20_5__undef_signed decimal(20,5) ,
+ col_decimal_20_5__undef_signed__index_inverted decimal(20,5) ,
+ col_varchar_100__undef_signed varchar(100) ,
+ col_varchar_100__undef_signed__index_inverted varchar(100) ,
+ col_char_50__undef_signed char(50) ,
+ col_char_50__undef_signed__index_inverted char(50) ,
+ col_string_undef_signed string ,
+ col_date_undef_signed date ,
+ col_date_undef_signed__index_inverted date ,
+ col_datetime_undef_signed datetime ,
+ col_datetime_undef_signed__index_inverted datetime ,
+ col_tinyint_undef_signed tinyint ,
+ col_tinyint_undef_signed__index_inverted tinyint ,
+ col_decimal_10_0__undef_signed decimal(10,0) ,
+ col_decimal_10_0__undef_signed__index_inverted decimal(10,0) ,
+ col_decimal_38_10__undef_signed decimal(38,10) ,
+ col_decimal_38_10__undef_signed__index_inverted decimal(38,10) ,
+ INDEX col_int_undef_signed__index_inverted_idx
(`col_int_undef_signed__index_inverted`) USING INVERTED,
+ INDEX col_decimal_20_5__undef_signed__index_inverted_idx
(`col_decimal_20_5__undef_signed__index_inverted`) USING INVERTED,
+ INDEX col_varchar_100__undef_signed__index_inverted_idx
(`col_varchar_100__undef_signed__index_inverted`) USING INVERTED,
+ INDEX col_char_50__undef_signed__index_inverted_idx
(`col_char_50__undef_signed__index_inverted`) USING INVERTED,
+ INDEX col_date_undef_signed__index_inverted_idx
(`col_date_undef_signed__index_inverted`) USING INVERTED,
+ INDEX col_datetime_undef_signed__index_inverted_idx
(`col_datetime_undef_signed__index_inverted`) USING INVERTED,
+ INDEX col_tinyint_undef_signed__index_inverted_idx
(`col_tinyint_undef_signed__index_inverted`) USING INVERTED,
+ INDEX col_decimal_10_0__undef_signed__index_inverted_idx
(`col_decimal_10_0__undef_signed__index_inverted`) USING INVERTED,
+ INDEX col_decimal_38_10__undef_signed__index_inverted_idx
(`col_decimal_38_10__undef_signed__index_inverted`) USING INVERTED
+ ) engine=olap
+ DUPLICATE KEY(pk, col_int_undef_signed,
col_int_undef_signed__index_inverted)
+ distributed by hash(pk) buckets 10
+ properties("store_row_column" = "false", "replication_num" = "1");
+ """
+
+ sql """
+ insert into
${tableName}(pk,col_int_undef_signed,col_int_undef_signed__index_inverted,col_decimal_20_5__undef_signed,col_decimal_20_5__undef_signed__index_inverted,col_varchar_100__undef_signed,col_varchar_100__undef_signed__index_inverted,col_char_50__undef_signed,col_char_50__undef_signed__index_inverted,col_string_undef_signed,col_date_undef_signed,col_date_undef_signed__index_inverted,col_datetime_undef_signed,col_datetime_undef_signed__index_inverted,col_tinyint_undef_signed
[...]
+ """
+
+ qt_select_with_cte_exists """
+ with cte1 as (select pk from ${tableName} where
col_decimal_20_5__undef_signed != 8)
+ select * from ${tableName} o where exists(select 1 from cte1 au where
au.pk = o.pk) order by pk;
+ """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]