This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d81ed7b0309 [opt](nereids) refine left semi/anti shortcut cost (#37060)
d81ed7b0309 is described below

commit d81ed7b03092fc59ba0406c03bf1305f906122df
Author: xzj7019 <[email protected]>
AuthorDate: Wed Jul 3 20:38:40 2024 +0800

    [opt](nereids) refine left semi/anti shortcut cost (#37060)
    
    Current left semi/anti join's cost doesn't consider be's short-cut opt,
    this pr will refine this part of computing and distinguish the left and
    right join costs.
    
    ---------
    
    Co-authored-by: zhongjian.xzj 
<[email protected]>
---
 .../org/apache/doris/nereids/cost/CostModelV1.java | 18 ++++++---
 .../java/org/apache/doris/qe/SessionVariable.java  | 12 ++++++
 .../shape/query16.out                              | 38 +++++++++----------
 .../shape/query78.out                              | 30 +++++++--------
 .../shape/query94.out                              | 38 +++++++++----------
 .../noStatsRfPrune/query14.out                     |  8 ++--
 .../noStatsRfPrune/query58.out                     | 18 ++++-----
 .../noStatsRfPrune/query95.out                     | 28 +++++++-------
 .../no_stats_shape/query14.out                     | 10 ++---
 .../no_stats_shape/query58.out                     | 24 ++++++------
 .../no_stats_shape/query95.out                     | 28 +++++++-------
 .../rf_prune/query14.out                           | 39 ++++++++++----------
 .../rf_prune/query16.out                           | 32 ++++++++--------
 .../rf_prune/query78.out                           | 30 +++++++--------
 .../rf_prune/query94.out                           | 38 +++++++++----------
 .../nereids_tpcds_shape_sf100_p0/shape/query14.out | 43 +++++++++++-----------
 .../nereids_tpcds_shape_sf100_p0/shape/query16.out | 32 ++++++++--------
 .../nereids_tpcds_shape_sf100_p0/shape/query78.out | 30 +++++++--------
 .../nereids_tpcds_shape_sf100_p0/shape/query94.out | 38 +++++++++----------
 19 files changed, 276 insertions(+), 258 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
index 418f63dd68e..68ddbac13aa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
@@ -386,7 +386,13 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
                     leftRowCount + rightRowCount
             );
         }
-
+        double probeShortcutFactor = 1.0;
+        if (physicalHashJoin.getJoinType().isLeftSemiOrAntiJoin()
+                && physicalHashJoin.getOtherJoinConjuncts().isEmpty()
+                && physicalHashJoin.getMarkJoinConjuncts().isEmpty()) {
+            // left semi/anti has short-cut opt, add probe side factor for 
distinguishing from the right ones
+            probeShortcutFactor = 
context.getSessionVariable().getLeftSemiOrAntiProbeFactor();
+        }
         if (context.isBroadcastJoin()) {
             // compared with shuffle join, bc join will be taken a penalty for 
both build and probe side;
             // currently we use the following factor as the penalty factor:
@@ -408,14 +414,16 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
                 }
             }
             return CostV1.of(context.getSessionVariable(),
-                    leftRowCount + rightRowCount * buildSideFactor + 
outputRowCount * probeSideFactor,
+                    leftRowCount * probeShortcutFactor
+                            + rightRowCount * buildSideFactor
+                            + outputRowCount * probeSideFactor,
                     rightRowCount,
                     0
             );
         }
-        return CostV1.of(context.getSessionVariable(), leftRowCount + 
rightRowCount + outputRowCount,
-                rightRowCount,
-                0
+        return CostV1.of(context.getSessionVariable(),
+                leftRowCount * probeShortcutFactor + rightRowCount + 
outputRowCount,
+                        rightRowCount, 0
         );
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 29d3fbf985b..238acefd730 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -319,6 +319,7 @@ public class SessionVariable implements Serializable, 
Writable {
 
     public static final String FORBID_UNKNOWN_COLUMN_STATS = 
"forbid_unknown_col_stats";
     public static final String BROADCAST_RIGHT_TABLE_SCALE_FACTOR = 
"broadcast_right_table_scale_factor";
+    public static final String LEFT_SEMI_OR_ANTI_PROBE_FACTOR = 
"left_semi_or_anti_probe_factor";
     public static final String BROADCAST_ROW_COUNT_LIMIT = 
"broadcast_row_count_limit";
 
     // percentage of EXEC_MEM_LIMIT
@@ -1285,6 +1286,9 @@ public class SessionVariable implements Serializable, 
Writable {
     @VariableMgr.VarAttr(name = BROADCAST_RIGHT_TABLE_SCALE_FACTOR)
     private double broadcastRightTableScaleFactor = 0.0;
 
+    @VariableMgr.VarAttr(name = LEFT_SEMI_OR_ANTI_PROBE_FACTOR)
+    private double leftSemiOrAntiProbeFactor = 0.1;
+
     @VariableMgr.VarAttr(name = BROADCAST_ROW_COUNT_LIMIT, needForward = true)
     private double broadcastRowCountLimit = 30000000;
 
@@ -2733,6 +2737,14 @@ public class SessionVariable implements Serializable, 
Writable {
         this.broadcastRightTableScaleFactor = broadcastRightTableScaleFactor;
     }
 
+    public double getLeftSemiOrAntiProbeFactor() {
+        return leftSemiOrAntiProbeFactor;
+    }
+
+    public void setLeftSemiOrAntiProbeFactor(double leftSemiOrAntiProbeFactor) 
{
+        this.leftSemiOrAntiProbeFactor = leftSemiOrAntiProbeFactor;
+    }
+
     public double getBroadcastRowCountLimit() {
         return broadcastRowCountLimit;
     }
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query16.out 
b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query16.out
index db5bf9b39a0..20329136007 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query16.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query16.out
@@ -8,27 +8,27 @@ PhysicalResultSink
 ----------hashAgg[GLOBAL]
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
-----------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( 
not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4 
cs_order_number->[cs_order_number]
+----------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( 
not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF3 
cs_order_number->[cs_order_number]
 ------------------PhysicalProject
---------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4
-------------------hashJoin[RIGHT_ANTI_JOIN shuffle] 
hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() 
build RFs:RF3 cs_order_number->[cr_order_number]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[catalog_returns] apply RFs: RF3
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) 
otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() 
build RFs:RF1 d_date_sk->[cs_ship_date_sk]
-----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk]
+--------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) 
otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() 
build RFs:RF1 d_date_sk->[cs_ship_date_sk]
+--------------------------PhysicalProject
+----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk]
+------------------------------hashJoin[LEFT_ANTI_JOIN shuffle] 
hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=()
 --------------------------------PhysicalProject
 ----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: 
RF0 RF1 RF2
 --------------------------------PhysicalProject
-----------------------------------filter((customer_address.ca_state = 'PA'))
-------------------------------------PhysicalOlapScan[customer_address]
-----------------------------PhysicalProject
-------------------------------filter((date_dim.d_date <= '2002-05-31') and 
(date_dim.d_date >= '2002-04-01'))
---------------------------------PhysicalOlapScan[date_dim]
-------------------------PhysicalProject
---------------------------filter((call_center.cc_county = 'Williamson County'))
-----------------------------PhysicalOlapScan[call_center]
+----------------------------------PhysicalOlapScan[catalog_returns]
+------------------------------PhysicalProject
+--------------------------------filter((customer_address.ca_state = 'PA'))
+----------------------------------PhysicalOlapScan[customer_address]
+--------------------------PhysicalProject
+----------------------------filter((date_dim.d_date <= '2002-05-31') and 
(date_dim.d_date >= '2002-04-01'))
+------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalProject
+------------------------filter((call_center.cc_county = 'Williamson County'))
+--------------------------PhysicalOlapScan[call_center]
 
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query78.out 
b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query78.out
index f7bf1048960..3b2d980a6ac 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query78.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query78.out
@@ -14,44 +14,44 @@ PhysicalResultSink
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------hashAgg[LOCAL]
 --------------------------PhysicalProject
-----------------------------hashJoin[LEFT_ANTI_JOIN colocated] 
hashCondition=((store_returns.sr_ticket_number = store_sales.ss_ticket_number) 
and (store_sales.ss_item_sk = store_returns.sr_item_sk)) otherCondition=()
+----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
 ------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
+--------------------------------hashJoin[LEFT_ANTI_JOIN colocated] 
hashCondition=((store_returns.sr_ticket_number = store_sales.ss_ticket_number) 
and (store_sales.ss_item_sk = store_returns.sr_item_sk)) otherCondition=()
 ----------------------------------PhysicalProject
 ------------------------------------PhysicalOlapScan[store_sales] apply RFs: 
RF2
 ----------------------------------PhysicalProject
-------------------------------------filter((date_dim.d_year = 1998))
---------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------PhysicalOlapScan[store_returns]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[store_returns]
+--------------------------------filter((date_dim.d_year = 1998))
+----------------------------------PhysicalOlapScan[date_dim]
 ------------------PhysicalProject
 --------------------hashAgg[GLOBAL]
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------hashAgg[LOCAL]
 --------------------------PhysicalProject
-----------------------------hashJoin[LEFT_ANTI_JOIN colocated] 
hashCondition=((web_returns.wr_order_number = web_sales.ws_order_number) and 
(web_sales.ws_item_sk = web_returns.wr_item_sk)) otherCondition=()
+----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
 ------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
+--------------------------------hashJoin[LEFT_ANTI_JOIN colocated] 
hashCondition=((web_returns.wr_order_number = web_sales.ws_order_number) and 
(web_sales.ws_item_sk = web_returns.wr_item_sk)) otherCondition=()
 ----------------------------------PhysicalProject
 ------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1
 ----------------------------------PhysicalProject
-------------------------------------filter((date_dim.d_year = 1998))
---------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------PhysicalOlapScan[web_returns]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[web_returns]
+--------------------------------filter((date_dim.d_year = 1998))
+----------------------------------PhysicalOlapScan[date_dim]
 --------------PhysicalProject
 ----------------hashAgg[GLOBAL]
 ------------------PhysicalDistribute[DistributionSpecHash]
 --------------------hashAgg[LOCAL]
 ----------------------PhysicalProject
-------------------------hashJoin[LEFT_ANTI_JOIN colocated] 
hashCondition=((catalog_returns.cr_order_number = 
catalog_sales.cs_order_number) and (catalog_sales.cs_item_sk = 
catalog_returns.cr_item_sk)) otherCondition=()
+------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk]
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk]
+----------------------------hashJoin[LEFT_ANTI_JOIN colocated] 
hashCondition=((catalog_returns.cr_order_number = 
catalog_sales.cs_order_number) and (catalog_sales.cs_item_sk = 
catalog_returns.cr_item_sk)) otherCondition=()
 ------------------------------PhysicalProject
 --------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
 ------------------------------PhysicalProject
---------------------------------filter((date_dim.d_year = 1998))
-----------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalOlapScan[catalog_returns]
 --------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_returns]
+----------------------------filter((date_dim.d_year = 1998))
+------------------------------PhysicalOlapScan[date_dim]
 
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query94.out 
b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query94.out
index 0f35f2dc29e..5dc756bf1d8 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query94.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query94.out
@@ -8,27 +8,27 @@ PhysicalResultSink
 ----------hashAgg[GLOBAL]
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
-----------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( 
not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF4 
ws_order_number->[ws_order_number]
+----------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( 
not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF3 
ws_order_number->[ws_order_number]
 ------------------PhysicalProject
---------------------PhysicalOlapScan[web_sales] apply RFs: RF4
-------------------hashJoin[RIGHT_ANTI_JOIN shuffle] 
hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=() 
build RFs:RF3 ws_order_number->[wr_order_number]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[web_returns] apply RFs: RF3
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() 
build RFs:RF2 web_site_sk->[ws_web_site_sk]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() 
build RFs:RF1 d_date_sk->[ws_ship_date_sk]
-----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk]
+--------------------PhysicalOlapScan[web_sales] apply RFs: RF3
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() 
build RFs:RF2 web_site_sk->[ws_web_site_sk]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() 
build RFs:RF1 d_date_sk->[ws_ship_date_sk]
+--------------------------PhysicalProject
+----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk]
+------------------------------hashJoin[LEFT_ANTI_JOIN shuffle] 
hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=()
 --------------------------------PhysicalProject
 ----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 
RF1 RF2
 --------------------------------PhysicalProject
-----------------------------------filter((customer_address.ca_state = 'OK'))
-------------------------------------PhysicalOlapScan[customer_address]
-----------------------------PhysicalProject
-------------------------------filter((date_dim.d_date <= '2002-06-30') and 
(date_dim.d_date >= '2002-05-01'))
---------------------------------PhysicalOlapScan[date_dim]
-------------------------PhysicalProject
---------------------------filter((web_site.web_company_name = 'pri'))
-----------------------------PhysicalOlapScan[web_site]
+----------------------------------PhysicalOlapScan[web_returns]
+------------------------------PhysicalProject
+--------------------------------filter((customer_address.ca_state = 'OK'))
+----------------------------------PhysicalOlapScan[customer_address]
+--------------------------PhysicalProject
+----------------------------filter((date_dim.d_date <= '2002-06-30') and 
(date_dim.d_date >= '2002-05-01'))
+------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalProject
+------------------------filter((web_site.web_company_name = 'pri'))
+--------------------------PhysicalOlapScan[web_site]
 
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query14.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query14.out
index 16f3bec6f15..52fd962d6ab 100644
--- 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query14.out
+++ 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query14.out
@@ -136,13 +136,13 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------------------hashAgg[LOCAL]
 ------------------------------------PhysicalProject
---------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF18 d_date_sk->[ws_sold_date_sk]
+--------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF17 d_date_sk->[ws_sold_date_sk]
 ----------------------------------------PhysicalProject
 ------------------------------------------hashJoin[INNER_JOIN bucketShuffle] 
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=()
---------------------------------------------hashJoin[RIGHT_SEMI_JOIN shuffle] 
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk)) 
otherCondition=()
-----------------------------------------------PhysicalCteConsumer ( 
cteId=CTEId#0 )
+--------------------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk)) 
otherCondition=()
 ----------------------------------------------PhysicalProject
-------------------------------------------------PhysicalOlapScan[web_sales] 
apply RFs: RF18
+------------------------------------------------PhysicalOlapScan[web_sales] 
apply RFs: RF17
+----------------------------------------------PhysicalCteConsumer ( 
cteId=CTEId#0 )
 --------------------------------------------PhysicalProject
 ----------------------------------------------PhysicalOlapScan[item]
 ----------------------------------------PhysicalProject
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query58.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query58.out
index a154c3b953b..29925634b2a 100644
--- 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query58.out
+++ 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query58.out
@@ -21,7 +21,9 @@ PhysicalResultSink
 --------------------------------PhysicalProject
 ----------------------------------PhysicalOlapScan[item] apply RFs: RF12 RF13
 ----------------------------PhysicalProject
-------------------------------hashJoin[RIGHT_SEMI_JOIN shuffle] 
hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=()
+------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build 
RFs:RF9 d_date->[d_date]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF9
 --------------------------------PhysicalProject
 ----------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() 
build RFs:RF8 d_week_seq->[d_week_seq]
 ------------------------------------PhysicalProject
@@ -31,8 +33,6 @@ PhysicalResultSink
 ----------------------------------------PhysicalProject
 ------------------------------------------filter((date_dim.d_date = 
'2001-03-24'))
 --------------------------------------------PhysicalOlapScan[date_dim]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[date_dim]
 ----------------PhysicalProject
 ------------------hashAgg[GLOBAL]
 --------------------PhysicalDistribute[DistributionSpecHash]
@@ -46,7 +46,9 @@ PhysicalResultSink
 --------------------------------PhysicalProject
 ----------------------------------PhysicalOlapScan[item] apply RFs: RF13
 ----------------------------PhysicalProject
-------------------------------hashJoin[RIGHT_SEMI_JOIN shuffle] 
hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=()
+------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build 
RFs:RF5 d_date->[d_date]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF5
 --------------------------------PhysicalProject
 ----------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() 
build RFs:RF4 d_week_seq->[d_week_seq]
 ------------------------------------PhysicalProject
@@ -56,8 +58,6 @@ PhysicalResultSink
 ----------------------------------------PhysicalProject
 ------------------------------------------filter((date_dim.d_date = 
'2001-03-24'))
 --------------------------------------------PhysicalOlapScan[date_dim]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[date_dim]
 ------------PhysicalProject
 --------------hashAgg[GLOBAL]
 ----------------PhysicalDistribute[DistributionSpecHash]
@@ -71,7 +71,9 @@ PhysicalResultSink
 ----------------------------PhysicalProject
 ------------------------------PhysicalOlapScan[item]
 ------------------------PhysicalProject
---------------------------hashJoin[RIGHT_SEMI_JOIN shuffle] 
hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=()
+--------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build 
RFs:RF1 d_date->[d_date]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[date_dim] apply RFs: RF1
 ----------------------------PhysicalProject
 ------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() 
build RFs:RF0 d_week_seq->[d_week_seq]
 --------------------------------PhysicalProject
@@ -81,6 +83,4 @@ PhysicalResultSink
 ------------------------------------PhysicalProject
 --------------------------------------filter((date_dim.d_date = '2001-03-24'))
 ----------------------------------------PhysicalOlapScan[date_dim]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[date_dim]
 
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query95.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query95.out
index fce09b1b604..313d625fa34 100644
--- 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query95.out
+++ 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query95.out
@@ -5,9 +5,9 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ----PhysicalProject
 ------hashJoin[INNER_JOIN shuffle] hashCondition=((ws1.ws_order_number = 
ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = 
ws_warehouse_sk))) build RFs:RF0 ws_order_number->[ws_order_number]
 --------PhysicalProject
-----------PhysicalOlapScan[web_sales] apply RFs: RF0 RF7
+----------PhysicalOlapScan[web_sales] apply RFs: RF0 RF6
 --------PhysicalProject
-----------PhysicalOlapScan[web_sales] apply RFs: RF7
+----------PhysicalOlapScan[web_sales] apply RFs: RF6
 --PhysicalResultSink
 ----PhysicalTopN[GATHER_SORT]
 ------hashAgg[DISTINCT_GLOBAL]
@@ -16,21 +16,21 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------hashAgg[GLOBAL]
 --------------hashAgg[LOCAL]
 ----------------PhysicalProject
-------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() 
build RFs:RF6 web_site_sk->[ws_web_site_sk]
+------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() 
build RFs:RF5 web_site_sk->[ws_web_site_sk]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF5 ca_address_sk->[ws_ship_addr_sk]
+----------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF4 ca_address_sk->[ws_ship_addr_sk]
 ------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() 
build RFs:RF4 d_date_sk->[ws_ship_date_sk]
-----------------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((ws1.ws_order_number = ws_wh.ws_order_number)) otherCondition=() 
build RFs:RF3 ws_order_number->[ws_order_number]
-------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: 
RF3
-------------------------------hashJoin[RIGHT_SEMI_JOIN bucketShuffle] 
hashCondition=((ws1.ws_order_number = web_returns.wr_order_number)) 
otherCondition=() build RFs:RF2 ws_order_number->[wr_order_number];RF7 
ws_order_number->[ws_order_number,ws_order_number]
+--------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() 
build RFs:RF3 d_date_sk->[ws_ship_date_sk]
+----------------------------hashJoin[LEFT_SEMI_JOIN colocated] 
hashCondition=((ws1.ws_order_number = web_returns.wr_order_number)) 
otherCondition=()
+------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((ws1.ws_order_number = ws_wh.ws_order_number)) otherCondition=()
 --------------------------------PhysicalProject
-----------------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((web_returns.wr_order_number = ws_wh.ws_order_number)) 
otherCondition=()
-------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
-------------------------------------PhysicalProject
---------------------------------------PhysicalOlapScan[web_returns] apply RFs: 
RF2
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 
RF5 RF6
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF3 
RF4 RF5
+--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------------------------PhysicalProject
+--------------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((web_returns.wr_order_number = ws_wh.ws_order_number)) 
otherCondition=() build RFs:RF6 wr_order_number->[ws_order_number]
+----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------------------PhysicalProject
+------------------------------------PhysicalOlapScan[web_returns]
 ----------------------------PhysicalProject
 ------------------------------filter((date_dim.d_date <= '1999-04-02') and 
(date_dim.d_date >= '1999-02-01'))
 --------------------------------PhysicalOlapScan[date_dim]
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query14.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query14.out
index a7e5eb01cf5..1e37ac8f702 100644
--- 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query14.out
+++ 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query14.out
@@ -136,13 +136,13 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------------------hashAgg[LOCAL]
 ------------------------------------PhysicalProject
---------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF18 d_date_sk->[ws_sold_date_sk]
+--------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF17 d_date_sk->[ws_sold_date_sk]
 ----------------------------------------PhysicalProject
-------------------------------------------hashJoin[INNER_JOIN bucketShuffle] 
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build 
RFs:RF17 i_item_sk->[ss_item_sk,ws_item_sk]
---------------------------------------------hashJoin[RIGHT_SEMI_JOIN shuffle] 
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk)) 
otherCondition=() build RFs:RF16 ws_item_sk->[ss_item_sk]
-----------------------------------------------PhysicalCteConsumer ( 
cteId=CTEId#0 ) apply RFs: RF16 RF17
+------------------------------------------hashJoin[INNER_JOIN bucketShuffle] 
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build 
RFs:RF16 i_item_sk->[ss_item_sk,ws_item_sk]
+--------------------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk)) 
otherCondition=()
 ----------------------------------------------PhysicalProject
-------------------------------------------------PhysicalOlapScan[web_sales] 
apply RFs: RF17 RF18
+------------------------------------------------PhysicalOlapScan[web_sales] 
apply RFs: RF16 RF17
+----------------------------------------------PhysicalCteConsumer ( 
cteId=CTEId#0 ) apply RFs: RF16
 --------------------------------------------PhysicalProject
 ----------------------------------------------PhysicalOlapScan[item]
 ----------------------------------------PhysicalProject
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query58.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query58.out
index cd737bad3f6..fcb11cadc05 100644
--- 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query58.out
+++ 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query58.out
@@ -21,18 +21,18 @@ PhysicalResultSink
 --------------------------------PhysicalProject
 ----------------------------------PhysicalOlapScan[item] apply RFs: RF12 RF13
 ----------------------------PhysicalProject
-------------------------------hashJoin[RIGHT_SEMI_JOIN shuffle] 
hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build 
RFs:RF9 d_date->[d_date]
+------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build 
RFs:RF9 d_date->[d_date]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF9
 --------------------------------PhysicalProject
 ----------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() 
build RFs:RF8 d_week_seq->[d_week_seq]
 ------------------------------------PhysicalProject
---------------------------------------PhysicalOlapScan[date_dim] apply RFs: 
RF8 RF9
+--------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF8
 ------------------------------------PhysicalAssertNumRows
 
--------------------------------------PhysicalDistribute[DistributionSpecGather]
 ----------------------------------------PhysicalProject
 ------------------------------------------filter((date_dim.d_date = 
'2001-03-24'))
 --------------------------------------------PhysicalOlapScan[date_dim]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[date_dim]
 ----------------PhysicalProject
 ------------------hashAgg[GLOBAL]
 --------------------PhysicalDistribute[DistributionSpecHash]
@@ -46,18 +46,18 @@ PhysicalResultSink
 --------------------------------PhysicalProject
 ----------------------------------PhysicalOlapScan[item] apply RFs: RF13
 ----------------------------PhysicalProject
-------------------------------hashJoin[RIGHT_SEMI_JOIN shuffle] 
hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build 
RFs:RF5 d_date->[d_date]
+------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build 
RFs:RF5 d_date->[d_date]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF5
 --------------------------------PhysicalProject
 ----------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() 
build RFs:RF4 d_week_seq->[d_week_seq]
 ------------------------------------PhysicalProject
---------------------------------------PhysicalOlapScan[date_dim] apply RFs: 
RF4 RF5
+--------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF4
 ------------------------------------PhysicalAssertNumRows
 
--------------------------------------PhysicalDistribute[DistributionSpecGather]
 ----------------------------------------PhysicalProject
 ------------------------------------------filter((date_dim.d_date = 
'2001-03-24'))
 --------------------------------------------PhysicalOlapScan[date_dim]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[date_dim]
 ------------PhysicalProject
 --------------hashAgg[GLOBAL]
 ----------------PhysicalDistribute[DistributionSpecHash]
@@ -71,16 +71,16 @@ PhysicalResultSink
 ----------------------------PhysicalProject
 ------------------------------PhysicalOlapScan[item]
 ------------------------PhysicalProject
---------------------------hashJoin[RIGHT_SEMI_JOIN shuffle] 
hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build 
RFs:RF1 d_date->[d_date]
+--------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build 
RFs:RF1 d_date->[d_date]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[date_dim] apply RFs: RF1
 ----------------------------PhysicalProject
 ------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() 
build RFs:RF0 d_week_seq->[d_week_seq]
 --------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF0 RF1
+----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF0
 --------------------------------PhysicalAssertNumRows
 ----------------------------------PhysicalDistribute[DistributionSpecGather]
 ------------------------------------PhysicalProject
 --------------------------------------filter((date_dim.d_date = '2001-03-24'))
 ----------------------------------------PhysicalOlapScan[date_dim]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[date_dim]
 
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query95.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query95.out
index aaaba243279..8486403dba9 100644
--- 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query95.out
+++ 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query95.out
@@ -5,9 +5,9 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ----PhysicalProject
 ------hashJoin[INNER_JOIN shuffle] hashCondition=((ws1.ws_order_number = 
ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = 
ws_warehouse_sk))) build RFs:RF0 ws_order_number->[ws_order_number];RF1 
ws_order_number->[ws_order_number]
 --------PhysicalProject
-----------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 RF14 RF15
+----------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 RF12 RF13
 --------PhysicalProject
-----------PhysicalOlapScan[web_sales] apply RFs: RF14 RF15
+----------PhysicalOlapScan[web_sales] apply RFs: RF12 RF13
 --PhysicalResultSink
 ----PhysicalTopN[GATHER_SORT]
 ------hashAgg[DISTINCT_GLOBAL]
@@ -16,21 +16,21 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------hashAgg[GLOBAL]
 --------------hashAgg[LOCAL]
 ----------------PhysicalProject
-------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() 
build RFs:RF12 web_site_sk->[ws_web_site_sk];RF13 web_site_sk->[ws_web_site_sk]
+------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() 
build RFs:RF10 web_site_sk->[ws_web_site_sk];RF11 web_site_sk->[ws_web_site_sk]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF10 ca_address_sk->[ws_ship_addr_sk];RF11 
ca_address_sk->[ws_ship_addr_sk]
+----------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF8 ca_address_sk->[ws_ship_addr_sk];RF9 
ca_address_sk->[ws_ship_addr_sk]
 ------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() 
build RFs:RF8 d_date_sk->[ws_ship_date_sk];RF9 d_date_sk->[ws_ship_date_sk]
-----------------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((ws1.ws_order_number = ws_wh.ws_order_number)) otherCondition=() 
build RFs:RF6 ws_order_number->[ws_order_number];RF7 
ws_order_number->[ws_order_number]
-------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: 
RF6 RF7
-------------------------------hashJoin[RIGHT_SEMI_JOIN bucketShuffle] 
hashCondition=((ws1.ws_order_number = web_returns.wr_order_number)) 
otherCondition=() build RFs:RF4 ws_order_number->[wr_order_number];RF5 
ws_order_number->[wr_order_number];RF14 
ws_order_number->[ws_order_number,ws_order_number];RF15 
ws_order_number->[ws_order_number,ws_order_number]
+--------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() 
build RFs:RF6 d_date_sk->[ws_ship_date_sk];RF7 d_date_sk->[ws_ship_date_sk]
+----------------------------hashJoin[LEFT_SEMI_JOIN colocated] 
hashCondition=((ws1.ws_order_number = web_returns.wr_order_number)) 
otherCondition=() build RFs:RF4 
wr_order_number->[ws_order_number,ws_order_number];RF5 
wr_order_number->[ws_order_number,ws_order_number]
+------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((ws1.ws_order_number = ws_wh.ws_order_number)) otherCondition=()
 --------------------------------PhysicalProject
-----------------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((web_returns.wr_order_number = ws_wh.ws_order_number)) 
otherCondition=() build RFs:RF2 wr_order_number->[ws_order_number];RF3 
wr_order_number->[ws_order_number]
-------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) 
apply RFs: RF2 RF3
-------------------------------------PhysicalProject
---------------------------------------PhysicalOlapScan[web_returns] apply RFs: 
RF4 RF5
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF8 
RF9 RF10 RF11 RF12 RF13
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 
RF5 RF6 RF7 RF8 RF9 RF10 RF11
+--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply 
RFs: RF4 RF5
+------------------------------PhysicalProject
+--------------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((web_returns.wr_order_number = ws_wh.ws_order_number)) 
otherCondition=() build RFs:RF12 
wr_order_number->[ws_order_number,ws_order_number];RF13 
wr_order_number->[ws_order_number,ws_order_number]
+----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------------------PhysicalProject
+------------------------------------PhysicalOlapScan[web_returns]
 ----------------------------PhysicalProject
 ------------------------------filter((date_dim.d_date <= '1999-04-02') and 
(date_dim.d_date >= '1999-02-01'))
 --------------------------------PhysicalOlapScan[date_dim]
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query14.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query14.out
index 781c5f673c5..be165273766 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query14.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query14.out
@@ -96,19 +96,18 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------------------hashAgg[LOCAL]
 ------------------------------------PhysicalProject
---------------------------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((store_sales.ss_item_sk = cross_items.ss_item_sk)) 
otherCondition=() build RFs:RF14 ss_item_sk->[ss_item_sk]
-----------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) 
apply RFs: RF14
+--------------------------------------hashJoin[INNER_JOIN bucketShuffle] 
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
+----------------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((store_sales.ss_item_sk = cross_items.ss_item_sk)) 
otherCondition=()
+------------------------------------------PhysicalProject
+--------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF12 d_date_sk->[ss_sold_date_sk]
+----------------------------------------------PhysicalProject
+------------------------------------------------PhysicalOlapScan[store_sales] 
apply RFs: RF12
+----------------------------------------------PhysicalProject
+------------------------------------------------filter((date_dim.d_moy = 11) 
and (date_dim.d_year = 2002))
+--------------------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ----------------------------------------PhysicalProject
-------------------------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
---------------------------------------------PhysicalProject
-----------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF12 d_date_sk->[ss_sold_date_sk]
-------------------------------------------------PhysicalProject
---------------------------------------------------PhysicalOlapScan[store_sales]
 apply RFs: RF12
-------------------------------------------------PhysicalProject
---------------------------------------------------filter((date_dim.d_moy = 11) 
and (date_dim.d_year = 2002))
-----------------------------------------------------PhysicalOlapScan[date_dim]
---------------------------------------------PhysicalProject
-----------------------------------------------PhysicalOlapScan[item]
+------------------------------------------PhysicalOlapScan[item]
 ----------------------------PhysicalProject
 ------------------------------PhysicalAssertNumRows
 --------------------------------PhysicalDistribute[DistributionSpecGather]
@@ -120,14 +119,14 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------------------hashAgg[LOCAL]
 ------------------------------------PhysicalProject
---------------------------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((catalog_sales.cs_item_sk = cross_items.ss_item_sk)) 
otherCondition=() build RFs:RF17 cs_item_sk->[ss_item_sk]
-----------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) 
apply RFs: RF17
+--------------------------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((catalog_sales.cs_item_sk = cross_items.ss_item_sk)) 
otherCondition=() build RFs:RF16 cs_item_sk->[ss_item_sk]
+----------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) 
apply RFs: RF16
 ----------------------------------------PhysicalProject
 ------------------------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=()
 --------------------------------------------PhysicalProject
-----------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF15 d_date_sk->[cs_sold_date_sk]
+----------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF14 d_date_sk->[cs_sold_date_sk]
 ------------------------------------------------PhysicalProject
---------------------------------------------------PhysicalOlapScan[catalog_sales]
 apply RFs: RF15
+--------------------------------------------------PhysicalOlapScan[catalog_sales]
 apply RFs: RF14
 ------------------------------------------------PhysicalProject
 --------------------------------------------------filter((date_dim.d_moy = 11) 
and (date_dim.d_year = 2002))
 ----------------------------------------------------PhysicalOlapScan[date_dim]
@@ -144,14 +143,14 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------------------hashAgg[LOCAL]
 ------------------------------------PhysicalProject
---------------------------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk)) 
otherCondition=() build RFs:RF20 ws_item_sk->[ss_item_sk]
-----------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) 
apply RFs: RF20
+--------------------------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk)) 
otherCondition=() build RFs:RF19 ws_item_sk->[ss_item_sk]
+----------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) 
apply RFs: RF19
 ----------------------------------------PhysicalProject
 ------------------------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=()
 --------------------------------------------PhysicalProject
-----------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF18 d_date_sk->[ws_sold_date_sk]
+----------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF17 d_date_sk->[ws_sold_date_sk]
 ------------------------------------------------PhysicalProject
---------------------------------------------------PhysicalOlapScan[web_sales] 
apply RFs: RF18
+--------------------------------------------------PhysicalOlapScan[web_sales] 
apply RFs: RF17
 ------------------------------------------------PhysicalProject
 --------------------------------------------------filter((date_dim.d_moy = 11) 
and (date_dim.d_year = 2002))
 ----------------------------------------------------PhysicalOlapScan[date_dim]
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query16.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query16.out
index 6e5b1b55f3d..ba90af331bb 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query16.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query16.out
@@ -8,26 +8,26 @@ PhysicalResultSink
 ----------hashAgg[GLOBAL]
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
-----------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( 
not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4 
cs_order_number->[cs_order_number]
+----------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( 
not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF3 
cs_order_number->[cs_order_number]
 ------------------PhysicalProject
---------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4
+--------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) 
otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk]
-----------------------hashJoin[RIGHT_ANTI_JOIN shuffle] 
hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() 
build RFs:RF2 cs_order_number->[cr_order_number]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF2
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() 
build RFs:RF1 d_date_sk->[cs_ship_date_sk]
-----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk]
+--------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) 
otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() 
build RFs:RF1 d_date_sk->[cs_ship_date_sk]
+--------------------------PhysicalProject
+----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk]
+------------------------------hashJoin[LEFT_ANTI_JOIN shuffle] 
hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=()
 --------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: 
RF0 RF1 RF3
+----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: 
RF0 RF1 RF2
 --------------------------------PhysicalProject
-----------------------------------filter((customer_address.ca_state = 'WV'))
-------------------------------------PhysicalOlapScan[customer_address]
-----------------------------PhysicalProject
-------------------------------filter((date_dim.d_date <= '2002-05-31') and 
(date_dim.d_date >= '2002-04-01'))
---------------------------------PhysicalOlapScan[date_dim]
+----------------------------------PhysicalOlapScan[catalog_returns]
+------------------------------PhysicalProject
+--------------------------------filter((customer_address.ca_state = 'WV'))
+----------------------------------PhysicalOlapScan[customer_address]
+--------------------------PhysicalProject
+----------------------------filter((date_dim.d_date <= '2002-05-31') and 
(date_dim.d_date >= '2002-04-01'))
+------------------------------PhysicalOlapScan[date_dim]
 ----------------------PhysicalProject
 ------------------------filter(cc_county IN ('Barrow County', 'Daviess 
County', 'Luce County', 'Richland County', 'Ziebach County'))
 --------------------------PhysicalOlapScan[call_center]
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query78.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query78.out
index 911302fb3f0..30527883a49 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query78.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query78.out
@@ -14,44 +14,44 @@ PhysicalResultSink
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------hashAgg[LOCAL]
 --------------------------PhysicalProject
-----------------------------hashJoin[LEFT_ANTI_JOIN colocated] 
hashCondition=((store_returns.sr_ticket_number = store_sales.ss_ticket_number) 
and (store_sales.ss_item_sk = store_returns.sr_item_sk)) otherCondition=()
+----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
 ------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
+--------------------------------hashJoin[LEFT_ANTI_JOIN colocated] 
hashCondition=((store_returns.sr_ticket_number = store_sales.ss_ticket_number) 
and (store_sales.ss_item_sk = store_returns.sr_item_sk)) otherCondition=()
 ----------------------------------PhysicalProject
 ------------------------------------PhysicalOlapScan[store_sales] apply RFs: 
RF2
 ----------------------------------PhysicalProject
-------------------------------------filter((date_dim.d_year = 2000))
---------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------PhysicalOlapScan[store_returns]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[store_returns]
+--------------------------------filter((date_dim.d_year = 2000))
+----------------------------------PhysicalOlapScan[date_dim]
 ------------------PhysicalProject
 --------------------hashAgg[GLOBAL]
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------hashAgg[LOCAL]
 --------------------------PhysicalProject
-----------------------------hashJoin[LEFT_ANTI_JOIN colocated] 
hashCondition=((web_returns.wr_order_number = web_sales.ws_order_number) and 
(web_sales.ws_item_sk = web_returns.wr_item_sk)) otherCondition=()
+----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
 ------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
+--------------------------------hashJoin[LEFT_ANTI_JOIN colocated] 
hashCondition=((web_returns.wr_order_number = web_sales.ws_order_number) and 
(web_sales.ws_item_sk = web_returns.wr_item_sk)) otherCondition=()
 ----------------------------------PhysicalProject
 ------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1
 ----------------------------------PhysicalProject
-------------------------------------filter((date_dim.d_year = 2000))
---------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------PhysicalOlapScan[web_returns]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[web_returns]
+--------------------------------filter((date_dim.d_year = 2000))
+----------------------------------PhysicalOlapScan[date_dim]
 --------------PhysicalProject
 ----------------hashAgg[GLOBAL]
 ------------------PhysicalDistribute[DistributionSpecHash]
 --------------------hashAgg[LOCAL]
 ----------------------PhysicalProject
-------------------------hashJoin[LEFT_ANTI_JOIN colocated] 
hashCondition=((catalog_returns.cr_order_number = 
catalog_sales.cs_order_number) and (catalog_sales.cs_item_sk = 
catalog_returns.cr_item_sk)) otherCondition=()
+------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk]
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk]
+----------------------------hashJoin[LEFT_ANTI_JOIN colocated] 
hashCondition=((catalog_returns.cr_order_number = 
catalog_sales.cs_order_number) and (catalog_sales.cs_item_sk = 
catalog_returns.cr_item_sk)) otherCondition=()
 ------------------------------PhysicalProject
 --------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
 ------------------------------PhysicalProject
---------------------------------filter((date_dim.d_year = 2000))
-----------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalOlapScan[catalog_returns]
 --------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_returns]
+----------------------------filter((date_dim.d_year = 2000))
+------------------------------PhysicalOlapScan[date_dim]
 
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query94.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query94.out
index 27eda9b8240..e13bdc06291 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query94.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query94.out
@@ -8,27 +8,27 @@ PhysicalResultSink
 ----------hashAgg[GLOBAL]
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
-----------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( 
not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF4 
ws_order_number->[ws_order_number]
+----------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( 
not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF3 
ws_order_number->[ws_order_number]
 ------------------PhysicalProject
---------------------PhysicalOlapScan[web_sales] apply RFs: RF4
-------------------hashJoin[RIGHT_ANTI_JOIN shuffle] 
hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=() 
build RFs:RF3 ws_order_number->[wr_order_number]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[web_returns] apply RFs: RF3
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() 
build RFs:RF2 web_site_sk->[ws_web_site_sk]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() 
build RFs:RF1 d_date_sk->[ws_ship_date_sk]
-----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk]
+--------------------PhysicalOlapScan[web_sales] apply RFs: RF3
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() 
build RFs:RF2 web_site_sk->[ws_web_site_sk]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() 
build RFs:RF1 d_date_sk->[ws_ship_date_sk]
+--------------------------PhysicalProject
+----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk]
+------------------------------hashJoin[LEFT_ANTI_JOIN shuffle] 
hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=()
 --------------------------------PhysicalProject
 ----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 
RF1 RF2
 --------------------------------PhysicalProject
-----------------------------------filter((customer_address.ca_state = 'OK'))
-------------------------------------PhysicalOlapScan[customer_address]
-----------------------------PhysicalProject
-------------------------------filter((date_dim.d_date <= '2000-04-01') and 
(date_dim.d_date >= '2000-02-01'))
---------------------------------PhysicalOlapScan[date_dim]
-------------------------PhysicalProject
---------------------------filter((web_site.web_company_name = 'pri'))
-----------------------------PhysicalOlapScan[web_site]
+----------------------------------PhysicalOlapScan[web_returns]
+------------------------------PhysicalProject
+--------------------------------filter((customer_address.ca_state = 'OK'))
+----------------------------------PhysicalOlapScan[customer_address]
+--------------------------PhysicalProject
+----------------------------filter((date_dim.d_date <= '2000-04-01') and 
(date_dim.d_date >= '2000-02-01'))
+------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalProject
+------------------------filter((web_site.web_company_name = 'pri'))
+--------------------------PhysicalOlapScan[web_site]
 
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query14.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query14.out
index 706cc1ffe4d..4ad3fc850f6 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query14.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query14.out
@@ -96,19 +96,18 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------------------hashAgg[LOCAL]
 ------------------------------------PhysicalProject
---------------------------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((store_sales.ss_item_sk = cross_items.ss_item_sk)) 
otherCondition=() build RFs:RF14 ss_item_sk->[ss_item_sk]
-----------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) 
apply RFs: RF14
+--------------------------------------hashJoin[INNER_JOIN bucketShuffle] 
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() 
build RFs:RF13 i_item_sk->[ss_item_sk,ss_item_sk]
+----------------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((store_sales.ss_item_sk = cross_items.ss_item_sk)) 
otherCondition=()
+------------------------------------------PhysicalProject
+--------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF12 d_date_sk->[ss_sold_date_sk]
+----------------------------------------------PhysicalProject
+------------------------------------------------PhysicalOlapScan[store_sales] 
apply RFs: RF12 RF13
+----------------------------------------------PhysicalProject
+------------------------------------------------filter((date_dim.d_moy = 11) 
and (date_dim.d_year = 2002))
+--------------------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 
) apply RFs: RF13
 ----------------------------------------PhysicalProject
-------------------------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() 
build RFs:RF13 i_item_sk->[ss_item_sk]
---------------------------------------------PhysicalProject
-----------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF12 d_date_sk->[ss_sold_date_sk]
-------------------------------------------------PhysicalProject
---------------------------------------------------PhysicalOlapScan[store_sales]
 apply RFs: RF12 RF13
-------------------------------------------------PhysicalProject
---------------------------------------------------filter((date_dim.d_moy = 11) 
and (date_dim.d_year = 2002))
-----------------------------------------------------PhysicalOlapScan[date_dim]
---------------------------------------------PhysicalProject
-----------------------------------------------PhysicalOlapScan[item]
+------------------------------------------PhysicalOlapScan[item]
 ----------------------------PhysicalProject
 ------------------------------PhysicalAssertNumRows
 --------------------------------PhysicalDistribute[DistributionSpecGather]
@@ -120,14 +119,14 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------------------hashAgg[LOCAL]
 ------------------------------------PhysicalProject
---------------------------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((catalog_sales.cs_item_sk = cross_items.ss_item_sk)) 
otherCondition=() build RFs:RF17 cs_item_sk->[ss_item_sk]
-----------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) 
apply RFs: RF17
+--------------------------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((catalog_sales.cs_item_sk = cross_items.ss_item_sk)) 
otherCondition=() build RFs:RF16 cs_item_sk->[ss_item_sk]
+----------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) 
apply RFs: RF16
 ----------------------------------------PhysicalProject
-------------------------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() 
build RFs:RF16 i_item_sk->[cs_item_sk]
+------------------------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() 
build RFs:RF15 i_item_sk->[cs_item_sk]
 --------------------------------------------PhysicalProject
-----------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF15 d_date_sk->[cs_sold_date_sk]
+----------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF14 d_date_sk->[cs_sold_date_sk]
 ------------------------------------------------PhysicalProject
---------------------------------------------------PhysicalOlapScan[catalog_sales]
 apply RFs: RF15 RF16
+--------------------------------------------------PhysicalOlapScan[catalog_sales]
 apply RFs: RF14 RF15
 ------------------------------------------------PhysicalProject
 --------------------------------------------------filter((date_dim.d_moy = 11) 
and (date_dim.d_year = 2002))
 ----------------------------------------------------PhysicalOlapScan[date_dim]
@@ -144,14 +143,14 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------------------hashAgg[LOCAL]
 ------------------------------------PhysicalProject
---------------------------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk)) 
otherCondition=() build RFs:RF20 ws_item_sk->[ss_item_sk]
-----------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) 
apply RFs: RF20
+--------------------------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk)) 
otherCondition=() build RFs:RF19 ws_item_sk->[ss_item_sk]
+----------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) 
apply RFs: RF19
 ----------------------------------------PhysicalProject
-------------------------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build 
RFs:RF19 i_item_sk->[ws_item_sk]
+------------------------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build 
RFs:RF18 i_item_sk->[ws_item_sk]
 --------------------------------------------PhysicalProject
-----------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF18 d_date_sk->[ws_sold_date_sk]
+----------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF17 d_date_sk->[ws_sold_date_sk]
 ------------------------------------------------PhysicalProject
---------------------------------------------------PhysicalOlapScan[web_sales] 
apply RFs: RF18 RF19
+--------------------------------------------------PhysicalOlapScan[web_sales] 
apply RFs: RF17 RF18
 ------------------------------------------------PhysicalProject
 --------------------------------------------------filter((date_dim.d_moy = 11) 
and (date_dim.d_year = 2002))
 ----------------------------------------------------PhysicalOlapScan[date_dim]
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query16.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query16.out
index 6e5b1b55f3d..ba90af331bb 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query16.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query16.out
@@ -8,26 +8,26 @@ PhysicalResultSink
 ----------hashAgg[GLOBAL]
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
-----------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( 
not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4 
cs_order_number->[cs_order_number]
+----------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( 
not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF3 
cs_order_number->[cs_order_number]
 ------------------PhysicalProject
---------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4
+--------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) 
otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk]
-----------------------hashJoin[RIGHT_ANTI_JOIN shuffle] 
hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() 
build RFs:RF2 cs_order_number->[cr_order_number]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF2
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() 
build RFs:RF1 d_date_sk->[cs_ship_date_sk]
-----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk]
+--------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) 
otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() 
build RFs:RF1 d_date_sk->[cs_ship_date_sk]
+--------------------------PhysicalProject
+----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk]
+------------------------------hashJoin[LEFT_ANTI_JOIN shuffle] 
hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=()
 --------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: 
RF0 RF1 RF3
+----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: 
RF0 RF1 RF2
 --------------------------------PhysicalProject
-----------------------------------filter((customer_address.ca_state = 'WV'))
-------------------------------------PhysicalOlapScan[customer_address]
-----------------------------PhysicalProject
-------------------------------filter((date_dim.d_date <= '2002-05-31') and 
(date_dim.d_date >= '2002-04-01'))
---------------------------------PhysicalOlapScan[date_dim]
+----------------------------------PhysicalOlapScan[catalog_returns]
+------------------------------PhysicalProject
+--------------------------------filter((customer_address.ca_state = 'WV'))
+----------------------------------PhysicalOlapScan[customer_address]
+--------------------------PhysicalProject
+----------------------------filter((date_dim.d_date <= '2002-05-31') and 
(date_dim.d_date >= '2002-04-01'))
+------------------------------PhysicalOlapScan[date_dim]
 ----------------------PhysicalProject
 ------------------------filter(cc_county IN ('Barrow County', 'Daviess 
County', 'Luce County', 'Richland County', 'Ziebach County'))
 --------------------------PhysicalOlapScan[call_center]
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query78.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query78.out
index 911302fb3f0..30527883a49 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query78.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query78.out
@@ -14,44 +14,44 @@ PhysicalResultSink
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------hashAgg[LOCAL]
 --------------------------PhysicalProject
-----------------------------hashJoin[LEFT_ANTI_JOIN colocated] 
hashCondition=((store_returns.sr_ticket_number = store_sales.ss_ticket_number) 
and (store_sales.ss_item_sk = store_returns.sr_item_sk)) otherCondition=()
+----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
 ------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
+--------------------------------hashJoin[LEFT_ANTI_JOIN colocated] 
hashCondition=((store_returns.sr_ticket_number = store_sales.ss_ticket_number) 
and (store_sales.ss_item_sk = store_returns.sr_item_sk)) otherCondition=()
 ----------------------------------PhysicalProject
 ------------------------------------PhysicalOlapScan[store_sales] apply RFs: 
RF2
 ----------------------------------PhysicalProject
-------------------------------------filter((date_dim.d_year = 2000))
---------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------PhysicalOlapScan[store_returns]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[store_returns]
+--------------------------------filter((date_dim.d_year = 2000))
+----------------------------------PhysicalOlapScan[date_dim]
 ------------------PhysicalProject
 --------------------hashAgg[GLOBAL]
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------hashAgg[LOCAL]
 --------------------------PhysicalProject
-----------------------------hashJoin[LEFT_ANTI_JOIN colocated] 
hashCondition=((web_returns.wr_order_number = web_sales.ws_order_number) and 
(web_sales.ws_item_sk = web_returns.wr_item_sk)) otherCondition=()
+----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
 ------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
+--------------------------------hashJoin[LEFT_ANTI_JOIN colocated] 
hashCondition=((web_returns.wr_order_number = web_sales.ws_order_number) and 
(web_sales.ws_item_sk = web_returns.wr_item_sk)) otherCondition=()
 ----------------------------------PhysicalProject
 ------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1
 ----------------------------------PhysicalProject
-------------------------------------filter((date_dim.d_year = 2000))
---------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------PhysicalOlapScan[web_returns]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[web_returns]
+--------------------------------filter((date_dim.d_year = 2000))
+----------------------------------PhysicalOlapScan[date_dim]
 --------------PhysicalProject
 ----------------hashAgg[GLOBAL]
 ------------------PhysicalDistribute[DistributionSpecHash]
 --------------------hashAgg[LOCAL]
 ----------------------PhysicalProject
-------------------------hashJoin[LEFT_ANTI_JOIN colocated] 
hashCondition=((catalog_returns.cr_order_number = 
catalog_sales.cs_order_number) and (catalog_sales.cs_item_sk = 
catalog_returns.cr_item_sk)) otherCondition=()
+------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk]
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk]
+----------------------------hashJoin[LEFT_ANTI_JOIN colocated] 
hashCondition=((catalog_returns.cr_order_number = 
catalog_sales.cs_order_number) and (catalog_sales.cs_item_sk = 
catalog_returns.cr_item_sk)) otherCondition=()
 ------------------------------PhysicalProject
 --------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
 ------------------------------PhysicalProject
---------------------------------filter((date_dim.d_year = 2000))
-----------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalOlapScan[catalog_returns]
 --------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_returns]
+----------------------------filter((date_dim.d_year = 2000))
+------------------------------PhysicalOlapScan[date_dim]
 
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query94.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query94.out
index 27eda9b8240..e13bdc06291 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query94.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query94.out
@@ -8,27 +8,27 @@ PhysicalResultSink
 ----------hashAgg[GLOBAL]
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
-----------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( 
not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF4 
ws_order_number->[ws_order_number]
+----------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( 
not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF3 
ws_order_number->[ws_order_number]
 ------------------PhysicalProject
---------------------PhysicalOlapScan[web_sales] apply RFs: RF4
-------------------hashJoin[RIGHT_ANTI_JOIN shuffle] 
hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=() 
build RFs:RF3 ws_order_number->[wr_order_number]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[web_returns] apply RFs: RF3
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() 
build RFs:RF2 web_site_sk->[ws_web_site_sk]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() 
build RFs:RF1 d_date_sk->[ws_ship_date_sk]
-----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk]
+--------------------PhysicalOlapScan[web_sales] apply RFs: RF3
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() 
build RFs:RF2 web_site_sk->[ws_web_site_sk]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() 
build RFs:RF1 d_date_sk->[ws_ship_date_sk]
+--------------------------PhysicalProject
+----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk]
+------------------------------hashJoin[LEFT_ANTI_JOIN shuffle] 
hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=()
 --------------------------------PhysicalProject
 ----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 
RF1 RF2
 --------------------------------PhysicalProject
-----------------------------------filter((customer_address.ca_state = 'OK'))
-------------------------------------PhysicalOlapScan[customer_address]
-----------------------------PhysicalProject
-------------------------------filter((date_dim.d_date <= '2000-04-01') and 
(date_dim.d_date >= '2000-02-01'))
---------------------------------PhysicalOlapScan[date_dim]
-------------------------PhysicalProject
---------------------------filter((web_site.web_company_name = 'pri'))
-----------------------------PhysicalOlapScan[web_site]
+----------------------------------PhysicalOlapScan[web_returns]
+------------------------------PhysicalProject
+--------------------------------filter((customer_address.ca_state = 'OK'))
+----------------------------------PhysicalOlapScan[customer_address]
+--------------------------PhysicalProject
+----------------------------filter((date_dim.d_date <= '2000-04-01') and 
(date_dim.d_date >= '2000-02-01'))
+------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalProject
+------------------------filter((web_site.web_company_name = 'pri'))
+--------------------------PhysicalOlapScan[web_site]
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to