This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit e26a53d8a6f75bcbaf0f8d4ed62533bac0566494
Author: starocean999 <[email protected]>
AuthorDate: Thu Apr 11 15:02:58 2024 +0800

    [fix](nereids) SemiJoinSemiJoinTransposeProject rule didn't handle mark 
join correctly (#33401)
---
 .../join/SemiJoinSemiJoinTransposeProject.java     | 13 +++++++-----
 .../join/SemiJoinSemiJoinTransposeProjectTest.java | 24 ++++++++++++++++++++++
 .../rf_prune/query10.out                           | 12 +++++------
 .../nereids_tpcds_shape_sf100_p0/shape/query10.out | 12 +++++------
 4 files changed, 44 insertions(+), 17 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProject.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProject.java
index 2a70ae157b5..8be8a0fbdf5 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProject.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProject.java
@@ -23,7 +23,6 @@ import org.apache.doris.nereids.rules.RuleType;
 import org.apache.doris.nereids.rules.exploration.CBOUtils;
 import org.apache.doris.nereids.rules.exploration.OneExplorationRuleFactory;
 import org.apache.doris.nereids.trees.expressions.ExprId;
-import org.apache.doris.nereids.trees.expressions.MarkJoinSlotReference;
 import org.apache.doris.nereids.trees.expressions.NamedExpression;
 import org.apache.doris.nereids.trees.plans.GroupPlan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
@@ -44,7 +43,7 @@ public class SemiJoinSemiJoinTransposeProject extends 
OneExplorationRuleFactory
     /*
      *        topSemi                   newTopSemi
      *        /     \                   /       \
-     *    aProject   C              aProject     B
+     *    abProject   C              acProject     B
      *      |            ──►          |
      * bottomSemi                newBottomSemi
      *    /   \                     /   \
@@ -59,13 +58,17 @@ public class SemiJoinSemiJoinTransposeProject extends 
OneExplorationRuleFactory
                 .when(join -> join.left().isAllSlots())
                 .then(topSemi -> {
                     LogicalJoin<GroupPlan, GroupPlan> bottomSemi = 
topSemi.left().child();
-                    LogicalProject abProject = topSemi.left();
+                    LogicalProject<LogicalJoin<GroupPlan, GroupPlan>> 
abProject = topSemi.left();
                     GroupPlan a = bottomSemi.left();
                     GroupPlan b = bottomSemi.right();
                     GroupPlan c = topSemi.right();
                     Set<ExprId> aOutputExprIdSet = a.getOutputExprIdSet();
-                    Set<NamedExpression> acProjects = (Set<NamedExpression>) 
abProject.getProjects()
-                            .stream().filter(slot -> !(slot instanceof 
MarkJoinSlotReference))
+                    // if bottom semi join is mark join, we need remove the 
mark join slot creating by bottom semi join
+                    // from the project list before swapping the bottom semi 
to top semi
+                    Set<NamedExpression> acProjects = 
abProject.getProjects().stream()
+                            .filter(slot -> !(abProject.child().isMarkJoin()
+                                    && 
abProject.child().getMarkJoinSlotReference().get()
+                                            .getExprId() == slot.getExprId()))
                             .collect(Collectors.toSet());
 
                     bottomSemi.getConditionSlot()
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProjectTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProjectTest.java
index ba82d58f2a5..e68d9fa93c2 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProjectTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProjectTest.java
@@ -69,4 +69,28 @@ public class SemiJoinSemiJoinTransposeProjectTest implements 
MemoPatternMatchSup
                         )
                 );
     }
+
+    @Test
+    public void testSemiProjectSemiCommuteMarkJoin() {
+        LogicalPlan topJoin = new LogicalPlanBuilder(scan1)
+                .markJoin(scan2, JoinType.LEFT_SEMI_JOIN, Pair.of(0, 0))
+                .project(ImmutableList.of(0, 2))
+                .markJoin(scan3, JoinType.LEFT_SEMI_JOIN, Pair.of(0, 1))
+                .build();
+        PlanChecker.from(MemoTestUtils.createConnectContext(), topJoin)
+                
.applyExploration(SemiJoinSemiJoinTransposeProject.INSTANCE.build())
+                .matchesExploration(
+                        logicalProject(
+                                logicalJoin(
+                                        logicalProject(
+                                                logicalJoin(
+                                                        
logicalOlapScan().when(scan -> scan.getTable().getName().equals("t1")),
+                                                        
logicalOlapScan().when(scan -> scan.getTable().getName().equals("t3"))
+                                                ).when(join -> 
join.getJoinType() == JoinType.LEFT_SEMI_JOIN)
+                                        ).when(project -> 
project.getProjects().size() == 2),
+                                        logicalOlapScan().when(scan -> 
scan.getTable().getName().equals("t2"))
+                                ).when(join -> join.getJoinType() == 
JoinType.LEFT_SEMI_JOIN)
+                        )
+                );
+    }
 }
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query10.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query10.out
index a08212e8cc9..15b70cd548a 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query10.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query10.out
@@ -10,23 +10,23 @@ PhysicalResultSink
 --------------hashAgg[LOCAL]
 ----------------PhysicalProject
 ------------------filter((ifnull($c$1, FALSE) OR ifnull($c$2, FALSE)))
---------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((c.c_customer_sk 
= web_sales.ws_bill_customer_sk)) otherCondition=()
+--------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((c.c_customer_sk 
= store_sales.ss_customer_sk)) otherCondition=() build RFs:RF5 
c_customer_sk->[ss_customer_sk]
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
+--------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk]
 ----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[web_sales] apply RFs: RF5
+------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5
 ----------------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------------PhysicalProject
 --------------------------------filter((date_dim.d_moy <= 4) and 
(date_dim.d_moy >= 1) and (date_dim.d_year = 2001))
 ----------------------------------PhysicalOlapScan[date_dim]
 ----------------------PhysicalProject
-------------------------hashJoin[RIGHT_SEMI_JOIN] 
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) 
otherCondition=() build RFs:RF4 c_customer_sk->[ss_customer_sk]
+------------------------hashJoin[RIGHT_SEMI_JOIN] 
hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) 
otherCondition=()
 --------------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk]
+------------------------------hashJoin[INNER_JOIN] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF3 d_date_sk->[ws_sold_date_sk]
 --------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF3 
RF4
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF3
 --------------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------------PhysicalProject
 ------------------------------------filter((date_dim.d_moy <= 4) and 
(date_dim.d_moy >= 1) and (date_dim.d_year = 2001))
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query10.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query10.out
index a08212e8cc9..15b70cd548a 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query10.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query10.out
@@ -10,23 +10,23 @@ PhysicalResultSink
 --------------hashAgg[LOCAL]
 ----------------PhysicalProject
 ------------------filter((ifnull($c$1, FALSE) OR ifnull($c$2, FALSE)))
---------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((c.c_customer_sk 
= web_sales.ws_bill_customer_sk)) otherCondition=()
+--------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((c.c_customer_sk 
= store_sales.ss_customer_sk)) otherCondition=() build RFs:RF5 
c_customer_sk->[ss_customer_sk]
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
+--------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk]
 ----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[web_sales] apply RFs: RF5
+------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5
 ----------------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------------PhysicalProject
 --------------------------------filter((date_dim.d_moy <= 4) and 
(date_dim.d_moy >= 1) and (date_dim.d_year = 2001))
 ----------------------------------PhysicalOlapScan[date_dim]
 ----------------------PhysicalProject
-------------------------hashJoin[RIGHT_SEMI_JOIN] 
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) 
otherCondition=() build RFs:RF4 c_customer_sk->[ss_customer_sk]
+------------------------hashJoin[RIGHT_SEMI_JOIN] 
hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) 
otherCondition=()
 --------------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk]
+------------------------------hashJoin[INNER_JOIN] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF3 d_date_sk->[ws_sold_date_sk]
 --------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF3 
RF4
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF3
 --------------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------------PhysicalProject
 ------------------------------------filter((date_dim.d_moy <= 4) and 
(date_dim.d_moy >= 1) and (date_dim.d_year = 2001))


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to