This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 1d68ca85cb2 branch-4.0: [Bug](join) fix left_semi_direct_return_opt 
get wrong result with local shuffle #60952 (#60984)
1d68ca85cb2 is described below

commit 1d68ca85cb2d568e8a71285c650c159366f07f7e
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Mar 3 21:19:53 2026 +0800

    branch-4.0: [Bug](join) fix left_semi_direct_return_opt get wrong result 
with local shuffle #60952 (#60984)
    
    Cherry-picked from #60952
    
    Co-authored-by: Pxl <[email protected]>
---
 be/src/runtime_filter/runtime_filter_producer.cpp  | 13 +---
 be/src/runtime_filter/runtime_filter_producer.h    | 18 ++++-
 .../runtime_filter_producer_helper.cpp             |  2 +-
 be/src/runtime_filter/runtime_filter_wrapper.cpp   |  3 +-
 .../join/test_cte_exists/test_cte_exists.out       | 44 ++++++++++++
 .../join/test_cte_exists/test_cte_exists.groovy    | 78 ++++++++++++++++++++++
 6 files changed, 142 insertions(+), 16 deletions(-)

diff --git a/be/src/runtime_filter/runtime_filter_producer.cpp 
b/be/src/runtime_filter/runtime_filter_producer.cpp
index 6d8e1c1b08a..da5fbca15a9 100644
--- a/be/src/runtime_filter/runtime_filter_producer.cpp
+++ b/be/src/runtime_filter/runtime_filter_producer.cpp
@@ -50,12 +50,7 @@ Status RuntimeFilterProducer::publish(RuntimeState* state, 
bool build_hash_table
     _check_state({State::READY_TO_PUBLISH});
 
     auto do_merge = [&]() {
-        // two case we need do local merge:
-        // 1. has remote target
-        // 2. has local target and has global consumer (means target scan has 
local shuffle)
-        if (!_has_remote_target && state->global_runtime_filter_mgr()
-                                           
->get_consume_filters(_wrapper->filter_id())
-                                           .empty()) {
+        if (!_need_do_merge(state)) {
             // when global consumer not exist, send_to_local_targets will do 
nothing, so merge rf is useless
             return Status::OK();
         }
@@ -161,11 +156,7 @@ Status RuntimeFilterProducer::send_size(RuntimeState* 
state, uint64_t local_filt
     DCHECK(_dependency != nullptr);
     set_state(State::WAITING_FOR_SYNCED_SIZE);
 
-    // two case we need do local merge:
-    // 1. has remote target
-    // 2. has local target and has global consumer (means target scan has 
local shuffle)
-    if (_has_remote_target ||
-        
!state->global_runtime_filter_mgr()->get_consume_filters(_wrapper->filter_id()).empty())
 {
+    if (_need_do_merge(state)) {
         LocalMergeContext* merger_context = nullptr;
         
RETURN_IF_ERROR(state->global_runtime_filter_mgr()->get_local_merge_producer_filters(
                 _wrapper->filter_id(), &merger_context));
diff --git a/be/src/runtime_filter/runtime_filter_producer.h 
b/be/src/runtime_filter/runtime_filter_producer.h
index e686a97ee3a..a21697c7005 100644
--- a/be/src/runtime_filter/runtime_filter_producer.h
+++ b/be/src/runtime_filter/runtime_filter_producer.h
@@ -131,11 +131,14 @@ public:
         _wrapper = wrapper;
     }
 
-    std::shared_ptr<RuntimeFilterWrapper> detect_in_filter() {
-        if (_has_remote_target) {
+    std::shared_ptr<RuntimeFilterWrapper> detect_local_in_filter(RuntimeState* 
state) {
+        std::unique_lock<std::recursive_mutex> l(_rmtx);
+        // need merge mean this filter not pure local
+        // the data not directly colocated with scan node
+        // so that can not enable local in filter optimization
+        if (_need_do_merge(state)) {
             return nullptr;
         }
-        std::unique_lock<std::recursive_mutex> l(_rmtx);
         if (_wrapper->is_ready_in_filter()) {
             return _wrapper;
         }
@@ -157,6 +160,15 @@ private:
         }
     }
 
+    bool _need_do_merge(RuntimeState* state) {
+        // two cases where we need to do a local merge:
+        // 1. has remote target
+        // 2. has local target and has global consumer (means target scan has 
local shuffle)
+        return (_has_remote_target || !state->global_runtime_filter_mgr()
+                                               
->get_consume_filters(_wrapper->filter_id())
+                                               .empty());
+    }
+
     Status _init_with_desc(const TRuntimeFilterDesc* desc, const 
TQueryOptions* options) override {
         RETURN_IF_ERROR(RuntimeFilter::_init_with_desc(desc, options));
         _need_sync_filter_size = _wrapper->build_bf_by_runtime_size() && 
!_is_broadcast_join;
diff --git a/be/src/runtime_filter/runtime_filter_producer_helper.cpp 
b/be/src/runtime_filter/runtime_filter_producer_helper.cpp
index 9611d38f7ac..58d7d6c2500 100644
--- a/be/src/runtime_filter/runtime_filter_producer_helper.cpp
+++ b/be/src/runtime_filter/runtime_filter_producer_helper.cpp
@@ -169,7 +169,7 @@ std::shared_ptr<RuntimeFilterWrapper> 
RuntimeFilterProducerHelper::detect_local_
     // If any runtime filter is local in filter, return true.
     // Local in filter is used to LEFT_SEMI_DIRECT_RETURN_OPT
     for (const auto& filter : _producers) {
-        if (auto wrapper = filter->detect_in_filter(); wrapper != nullptr) {
+        if (auto wrapper = filter->detect_local_in_filter(state); wrapper != 
nullptr) {
             return wrapper;
         }
     }
diff --git a/be/src/runtime_filter/runtime_filter_wrapper.cpp 
b/be/src/runtime_filter/runtime_filter_wrapper.cpp
index 448899aa013..33ecd7d9ebc 100644
--- a/be/src/runtime_filter/runtime_filter_wrapper.cpp
+++ b/be/src/runtime_filter/runtime_filter_wrapper.cpp
@@ -171,7 +171,8 @@ Status RuntimeFilterWrapper::merge(const 
RuntimeFilterWrapper* other) {
         _hybrid_set->insert(other->_hybrid_set.get());
         if (_max_in_num >= 0 && _hybrid_set->size() > _max_in_num) {
             _hybrid_set->clear();
-            set_state(State::DISABLED, fmt::format("reach max in num: {}", 
_max_in_num));
+            set_state(State::DISABLED,
+                      fmt::format("merge reached max IN num threshold: {}", 
_max_in_num));
         }
         break;
     }
diff --git 
a/regression-test/data/query_p0/join/test_cte_exists/test_cte_exists.out 
b/regression-test/data/query_p0/join/test_cte_exists/test_cte_exists.out
new file mode 100644
index 00000000000..6162a7faf43
--- /dev/null
+++ b/regression-test/data/query_p0/join/test_cte_exists/test_cte_exists.out
@@ -0,0 +1,44 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select_with_cte_exists --
+0      -26516  42      28.17350        66.01650        \N      on      \N      
my      \N      2002-03-23      2015-05-15      2003-04-22T00:00        
2007-04-10T00:00        13      8       \N      25      78.1141000000   
51.1176000000
+1      1       -100    41.17160        \N      ⭐       now     \N      \N      
r       2013-08-21      2009-05-26      2005-10-11T00:00        
2003-01-17T14:47:30     2       9       6       13      2.1381000000    
32.0589000000
+2      15      \N      30.00870        42.09590        “       2016-12-25      
w       will    \N      2018-04-23      2011-02-17      2007-02-23T10:25:54     
2008-06-14T00:00        \N      13      53      \N      14.0125000000   
98.0868000000
+3      108     -18369  53.16640        68.19440        back    2005-07-03 
15:52:40     \N      2007-05-06      o       2014-04-11      2002-06-22      
2004-12-27T10:53:04     2016-02-12T00:00        2       8       \N      72      
49.1006000000   84.1622000000
+5      \N      \N      94.10040        89.14280        \N      was     \N      
÷       f       2011-12-10      2008-01-13      2019-01-22T10:32:16     
2014-11-11T22:37:16     5       6       84      34      61.0046000000   
58.1874000000
+7      10      \N      69.09860        75.12140        \N      w       were    
2000-01-25 06:33:02     a       2003-11-01      2008-07-11      
2006-04-24T21:56:50     2013-11-25T00:00        1       13      46      14      
3.1337000000    37.1948000000
+8      \N      2109123390      20.09540        60.03210        then    b       
tell    2004-04-05 13:37:57     m       2004-12-13      2007-03-25      
2018-10-14T00:00        2018-07-02T00:00        6       14      32      40      
\N      31.1016000000
+9      77      -321144118      48.11470        32.10830        k       \N      
\N      j       q       2011-05-22      2008-03-10      2017-03-27T00:00        
2003-11-13T00:00        14      4       39      66      14.1892000000   
27.0975000000
+10     1339657358      6       67.07760        63.01650        I'll    or      
b       just    \N      2010-06-11      2006-03-09      2018-09-01T00:00        
2017-01-27T00:00        2       3       \N      73      18.1075000000   
18.1771000000
+12     840810997       \N      20.15110        31.11050        2005-06-04 
09:31:03     \N      2015-04-03 21:51:11     2015-06-25      \N      2018-05-24 
     2011-10-24      2007-04-23T00:00        2016-05-26T03:48:15     \N      13 
     \N      19      55.1909000000   87.1879000000
+13     \N      213501779       70.10390        28.06790        DEL     about   
mean    2013-08-24      u       2002-09-06      2001-07-20      
2008-06-04T04:41:48     2013-09-09T00:00        8       14      3       95      
68.1937000000   15.1266000000
+14     -14681  46      35.17140        83.07530        2008-03-25 17:28:53     
you're  k       2016-02-28      f       2012-04-21      2004-01-24      
2010-05-20T00:00        2018-05-10T18:31:16     4       1       9       25      
18.1370000000   \N
+15     87      -39     65.06230        66.10880        2014-10-27 07:04:00     
¥       n       \N      e       2001-10-14      2003-05-15      
2013-06-26T00:00        2016-11-12T10:50:57     1       9       10      91      
\N      60.1894000000
+16     7876    34      19.02430        98.11340        ♪       up      
2010-09-17 15:10:51     2011-05-14      h       2006-02-12      2009-01-13      
2000-04-26T00:04:07     2010-10-12T00:00        6       9       83      76      
\N      6.0435000000
+17     -94     -426161667      48.01440        18.02980        k       
2015-05-16 19:40:14     2013-11-12      \N      e       2009-12-08      
2010-04-03      2005-01-17T00:00        2014-05-07T07:32:31     \N      13      
\N      \N      20.1666000000   \N
+18     \N      -28299  62.18820        37.07860        some    2006-12-18      
2015-09-12      \N      \N      2013-05-25      2006-01-07      
2006-11-12T01:13:11     2001-12-27T00:00        12      7       90      41      
82.0662000000   2.0787000000
+19     96      -5149   10.17780        2.04820 oh      é       a       f       
z       2004-06-26      2014-10-23      2004-06-15T00:00        
2015-12-06T00:00        1       12      91      86      \N      41.1384000000
+20     \N      -367544364      15.16010        69.02020        2019-03-16      
x       and     2012-11-25      \N      2003-12-16      2001-12-12      
2019-05-08T00:00        2013-09-23T00:00        14      5       44      \N      
\N      \N
+21     83504484        102     67.07690        17.09480        2016-03-08 
09:20:32     look    u       2005-01-24 13:11:51     t       2017-11-10      
2015-03-04      2013-02-17T02:49:09     2009-01-15T00:00        6       3       
34      88      93.1511000000   90.1330000000
+22     -30359  18454   62.06520        59.06150        o       2006-06-18 
19:24:41     2001-04-19      i       c       2011-09-21      2019-05-20      
2007-05-01T00:00        2000-01-18T00:00        7       11      17      39      
60.1966000000   27.0348000000
+23     47      55      92.14180        11.06560        \N      m       
2019-06-21 09:51:53     2003-12-20 22:29:10     \N      2013-10-03      
2000-07-14      2005-08-08T00:50:51     2001-09-22T00:00        11      5       
23      54      67.0501000000   72.1124000000
+24     174     \N      15.05920        53.17740        a       2014-01-02 
09:56:05     look    when    d       2010-12-22      2004-01-27      
2017-01-03T00:00        2014-07-26T17:04:22     1       12      34      16      
\N      31.1348000000
+27     -4501   -37     72.16810        7.08690 2017-01-09      \N      as      
2001-05-15 11:36:04     z       2002-07-20      2003-02-09      
2002-04-02T15:01:52     2012-08-22T00:00        3       1       15      85      
36.0699000000   71.0232000000
+28     \N      80      84.01840        \N      2008-09-08 09:41:36     
2018-10-08      2017-01-08      y       n       2007-12-15      2010-08-27      
2005-04-25T00:00        2007-07-22T00:00        12      8       36      53      
63.1981000000   25.1409000000
+30     1678810266      1507463100      62.03340        23.08120        t       
2003-07-06      you're  it      r       2018-02-15      2004-09-28      
2003-04-21T15:21:25     2019-03-25T00:00        5       12      9       85      
32.0118000000   57.1966000000
+31     \N      \N      35.17790        \N      2004-04-21      2013-09-04      
2018-06-02      2004-09-24 01:32:23     p       2015-08-01      2017-11-08      
2013-10-21T00:00        2009-02-13T22:10:15     5       12      \N      78      
72.1830000000   34.1290000000
+32     \N      -8223   27.12450        68.00550        b       oh      how     
\N      g       2019-01-02      2014-12-05      2016-11-17T00:00        
2016-03-13T00:00        7       8       47      20      4.0035000000    
33.1087000000
+33     -27712  109     20.17710        5.15390 2019-09-06      yeah    
2009-12-14 07:08:30     here    k       2004-08-01      2007-06-07      
2000-11-16T00:00        2010-03-12T00:00        13      \N      0       \N      
\N      81.1432000000
+35     -83     -11149  47.14330        \N      who     i       \N      back    
x       2006-02-19      2015-10-28      2007-08-20T00:00        
2012-04-26T00:00        13      10      \N      62      78.0910000000   
36.0115000000
+36     16516   -112    13.10140        \N      \N      +       2013-11-27 
05:39:59     “       d       2004-02-19      2004-01-18      2001-09-27T00:00   
     2013-03-27T10:45:14     3       11      \N      \N      21.1940000000   
10.1906000000
+37     2231    -140663446      82.14340        79.19350        \N      
2004-10-01 05:53:25     \N      \N      \N      2002-02-23      2002-04-03      
2019-05-24T00:00        2012-03-02T00:00        \N      3       51      15      
75.0719000000   81.1907000000
+38     19      \N      87.07490        61.07680        something       s       
2011-10-24      2010-03-19 10:30:14     s       2016-04-08      2010-04-04      
2009-10-05T13:25:07     2013-01-02T00:00        2       5       8       98      
\N      36.1245000000
+39     -44     18      41.01640        69.03730        2014-11-04 19:39:52     
2019-05-17 02:09:47     2001-10-25      a       u       2015-04-07      
2003-02-28      2018-09-08T10:56:03     2007-05-21T08:08:42     2       3       
66      63      30.1838000000   53.1648000000
+41     36      60      82.17390        100.03910       \N      2017-12-08 
06:14:04     2000-05-22      from    p       2011-12-17      2000-11-14      
2019-04-04T00:00        2019-03-21T00:00        11      11      \N      \N      
56.0672000000   54.1659000000
+42     \N      75      15.11650        30.06680        him     about   
2008-04-13      2008-01-12 23:59:57     x       2011-04-07      2016-06-26      
2000-01-23T00:00        2019-08-19T00:00        13      14      80      48      
60.1511000000   50.1581000000
+43     1486635733      \N      23.02720        9.17860 get     could   \N      
2008-11-27 14:13:54     d       2005-09-27      2016-12-05      
2017-11-23T15:48:28     2015-07-17T07:44:43     7       14      8       41      
19.1376000000   27.0721000000
+44     21183   91      49.13480        16.10830        2008-07-05 09:22:06     
2006-02-24      it's    2000-01-14 09:38:35     y       2007-10-11      
2010-09-19      2014-10-23T00:00        2001-06-23T07:32:12     14      15      
\N      11      \N      20.1074000000
+45     -19092  -10     75.10910        37.07120        2017-12-16 09:51:22     
his     é       2012-11-28 05:50:31     f       2003-01-27      2018-02-12      
2018-03-12T00:00        2005-02-08T00:00        5       4       98      14      
96.1744000000   91.0384000000
+47     -1585723780     \N      68.05130        59.16600        could   \N      
2011-01-12      m       b       2012-11-15      2003-10-03      
2016-05-08T00:00        2002-05-23T00:00        \N      5       61      99      
30.0624000000   \N
+48     26182   \N      85.09230        13.19010        🍕       all     \N      
2007-11-02 14:22:31     y       2014-02-24      2006-05-10      
2003-04-02T00:00        2005-06-12T00:00        13      6       49      25      
36.0866000000   97.1200000000
+49     -814629551      -320575127      92.19940        \N      and     
2010-09-03 21:32:52     x       well    z       2009-08-21      2017-12-05      
2005-08-24T00:00        2012-04-06T19:44:44     5       14      93      94      
20.0309000000   94.0261000000
+
diff --git 
a/regression-test/suites/query_p0/join/test_cte_exists/test_cte_exists.groovy 
b/regression-test/suites/query_p0/join/test_cte_exists/test_cte_exists.groovy
new file mode 100644
index 00000000000..ff3f6538888
--- /dev/null
+++ 
b/regression-test/suites/query_p0/join/test_cte_exists/test_cte_exists.groovy
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_cte_exists") {
+    def tableName = 
"table_20_50_undef_partitions2_keys3_properties4_distributed_by5"
+    
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    
+    sql "set enable_left_semi_direct_return_opt = true; "
+
+    sql "set parallel_pipeline_task_num=16;"
+
+    sql "set runtime_filter_max_in_num=10;"
+
+    sql "set runtime_filter_type = 'IN';"
+
+    sql "set enable_local_shuffle = true;"
+    
+    sql """
+        create table ${tableName} (
+            pk int,
+            col_int_undef_signed int    ,
+            col_int_undef_signed__index_inverted int    ,
+            col_decimal_20_5__undef_signed decimal(20,5)    ,
+            col_decimal_20_5__undef_signed__index_inverted decimal(20,5)    ,
+            col_varchar_100__undef_signed varchar(100)    ,
+            col_varchar_100__undef_signed__index_inverted varchar(100)    ,
+            col_char_50__undef_signed char(50)    ,
+            col_char_50__undef_signed__index_inverted char(50)    ,
+            col_string_undef_signed string    ,
+            col_date_undef_signed date    ,
+            col_date_undef_signed__index_inverted date    ,
+            col_datetime_undef_signed datetime    ,
+            col_datetime_undef_signed__index_inverted datetime    ,
+            col_tinyint_undef_signed tinyint    ,
+            col_tinyint_undef_signed__index_inverted tinyint    ,
+            col_decimal_10_0__undef_signed decimal(10,0)    ,
+            col_decimal_10_0__undef_signed__index_inverted decimal(10,0)    ,
+            col_decimal_38_10__undef_signed decimal(38,10)    ,
+            col_decimal_38_10__undef_signed__index_inverted decimal(38,10)    ,
+            INDEX col_int_undef_signed__index_inverted_idx 
(`col_int_undef_signed__index_inverted`) USING INVERTED,
+            INDEX col_decimal_20_5__undef_signed__index_inverted_idx 
(`col_decimal_20_5__undef_signed__index_inverted`) USING INVERTED,
+            INDEX col_varchar_100__undef_signed__index_inverted_idx 
(`col_varchar_100__undef_signed__index_inverted`) USING INVERTED,
+            INDEX col_char_50__undef_signed__index_inverted_idx 
(`col_char_50__undef_signed__index_inverted`) USING INVERTED,
+            INDEX col_date_undef_signed__index_inverted_idx 
(`col_date_undef_signed__index_inverted`) USING INVERTED,
+            INDEX col_datetime_undef_signed__index_inverted_idx 
(`col_datetime_undef_signed__index_inverted`) USING INVERTED,
+            INDEX col_tinyint_undef_signed__index_inverted_idx 
(`col_tinyint_undef_signed__index_inverted`) USING INVERTED,
+            INDEX col_decimal_10_0__undef_signed__index_inverted_idx 
(`col_decimal_10_0__undef_signed__index_inverted`) USING INVERTED,
+            INDEX col_decimal_38_10__undef_signed__index_inverted_idx 
(`col_decimal_38_10__undef_signed__index_inverted`) USING INVERTED
+        ) engine=olap
+        DUPLICATE KEY(pk, col_int_undef_signed, 
col_int_undef_signed__index_inverted)
+        distributed by hash(pk) buckets 10
+        properties("store_row_column" = "false", "replication_num" = "1");
+    """
+
+    sql """
+        insert into 
${tableName}(pk,col_int_undef_signed,col_int_undef_signed__index_inverted,col_decimal_20_5__undef_signed,col_decimal_20_5__undef_signed__index_inverted,col_varchar_100__undef_signed,col_varchar_100__undef_signed__index_inverted,col_char_50__undef_signed,col_char_50__undef_signed__index_inverted,col_string_undef_signed,col_date_undef_signed,col_date_undef_signed__index_inverted,col_datetime_undef_signed,col_datetime_undef_signed__index_inverted,col_tinyint_undef_signed
 [...]
+    """
+
+    qt_select_with_cte_exists """
+        with cte1 as (select pk from ${tableName} where 
col_decimal_20_5__undef_signed != 8) 
+        select * from ${tableName} o where exists(select 1 from cte1 au where 
au.pk = o.pk) order by pk;
+    """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to