This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit e26a53d8a6f75bcbaf0f8d4ed62533bac0566494 Author: starocean999 <[email protected]> AuthorDate: Thu Apr 11 15:02:58 2024 +0800 [fix](nereids) SemiJoinSemiJoinTransposeProject rule didn't handle mark join correctly (#33401) --- .../join/SemiJoinSemiJoinTransposeProject.java | 13 +++++++----- .../join/SemiJoinSemiJoinTransposeProjectTest.java | 24 ++++++++++++++++++++++ .../rf_prune/query10.out | 12 +++++------ .../nereids_tpcds_shape_sf100_p0/shape/query10.out | 12 +++++------ 4 files changed, 44 insertions(+), 17 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProject.java index 2a70ae157b5..8be8a0fbdf5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProject.java @@ -23,7 +23,6 @@ import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.rules.exploration.CBOUtils; import org.apache.doris.nereids.rules.exploration.OneExplorationRuleFactory; import org.apache.doris.nereids.trees.expressions.ExprId; -import org.apache.doris.nereids.trees.expressions.MarkJoinSlotReference; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.plans.GroupPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; @@ -44,7 +43,7 @@ public class SemiJoinSemiJoinTransposeProject extends OneExplorationRuleFactory /* * topSemi newTopSemi * / \ / \ - * aProject C aProject B + * abProject C acProject B * | ──► | * bottomSemi newBottomSemi * / \ / \ @@ -59,13 +58,17 @@ public class SemiJoinSemiJoinTransposeProject extends OneExplorationRuleFactory .when(join -> join.left().isAllSlots()) .then(topSemi -> { LogicalJoin<GroupPlan, GroupPlan> bottomSemi = topSemi.left().child(); - LogicalProject abProject = topSemi.left(); + LogicalProject<LogicalJoin<GroupPlan, GroupPlan>> abProject = topSemi.left(); GroupPlan a = bottomSemi.left(); GroupPlan b = bottomSemi.right(); GroupPlan c = topSemi.right(); Set<ExprId> aOutputExprIdSet = a.getOutputExprIdSet(); - Set<NamedExpression> acProjects = (Set<NamedExpression>) abProject.getProjects() - .stream().filter(slot -> !(slot instanceof MarkJoinSlotReference)) + // if bottom semi join is mark join, we need remove the mark join slot creating by bottom semi join + // from the project list before swapping the bottom semi to top semi + Set<NamedExpression> acProjects = abProject.getProjects().stream() + .filter(slot -> !(abProject.child().isMarkJoin() + && abProject.child().getMarkJoinSlotReference().get() + .getExprId() == slot.getExprId())) .collect(Collectors.toSet()); bottomSemi.getConditionSlot() diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProjectTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProjectTest.java index ba82d58f2a5..e68d9fa93c2 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProjectTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProjectTest.java @@ -69,4 +69,28 @@ public class SemiJoinSemiJoinTransposeProjectTest implements MemoPatternMatchSup ) ); } + + @Test + public void testSemiProjectSemiCommuteMarkJoin() { + LogicalPlan topJoin = new LogicalPlanBuilder(scan1) + .markJoin(scan2, JoinType.LEFT_SEMI_JOIN, Pair.of(0, 0)) + .project(ImmutableList.of(0, 2)) + .markJoin(scan3, JoinType.LEFT_SEMI_JOIN, Pair.of(0, 1)) + .build(); + PlanChecker.from(MemoTestUtils.createConnectContext(), topJoin) + .applyExploration(SemiJoinSemiJoinTransposeProject.INSTANCE.build()) + .matchesExploration( + logicalProject( + logicalJoin( + logicalProject( + logicalJoin( + logicalOlapScan().when(scan -> scan.getTable().getName().equals("t1")), + logicalOlapScan().when(scan -> scan.getTable().getName().equals("t3")) + ).when(join -> join.getJoinType() == JoinType.LEFT_SEMI_JOIN) + ).when(project -> project.getProjects().size() == 2), + logicalOlapScan().when(scan -> scan.getTable().getName().equals("t2")) + ).when(join -> join.getJoinType() == JoinType.LEFT_SEMI_JOIN) + ) + ); + } } diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query10.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query10.out index a08212e8cc9..15b70cd548a 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query10.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query10.out @@ -10,23 +10,23 @@ PhysicalResultSink --------------hashAgg[LOCAL] ----------------PhysicalProject ------------------filter((ifnull($c$1, FALSE) OR ifnull($c$2, FALSE))) ---------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() +--------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk] ----------------------PhysicalDistribute[DistributionSpecHash] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk] +--------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk] ----------------------------PhysicalProject -------------------------------PhysicalOlapScan[web_sales] apply RFs: RF5 +------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5 ----------------------------PhysicalDistribute[DistributionSpecReplicated] ------------------------------PhysicalProject --------------------------------filter((date_dim.d_moy <= 4) and (date_dim.d_moy >= 1) and (date_dim.d_year = 2001)) ----------------------------------PhysicalOlapScan[date_dim] ----------------------PhysicalProject -------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[ss_customer_sk] +------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() --------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk] +------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ws_sold_date_sk] --------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF3 RF4 +----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF3 --------------------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------------------PhysicalProject ------------------------------------filter((date_dim.d_moy <= 4) and (date_dim.d_moy >= 1) and (date_dim.d_year = 2001)) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query10.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query10.out index a08212e8cc9..15b70cd548a 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query10.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query10.out @@ -10,23 +10,23 @@ PhysicalResultSink --------------hashAgg[LOCAL] ----------------PhysicalProject ------------------filter((ifnull($c$1, FALSE) OR ifnull($c$2, FALSE))) ---------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() +--------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk] ----------------------PhysicalDistribute[DistributionSpecHash] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk] +--------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk] ----------------------------PhysicalProject -------------------------------PhysicalOlapScan[web_sales] apply RFs: RF5 +------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5 ----------------------------PhysicalDistribute[DistributionSpecReplicated] ------------------------------PhysicalProject --------------------------------filter((date_dim.d_moy <= 4) and (date_dim.d_moy >= 1) and (date_dim.d_year = 2001)) ----------------------------------PhysicalOlapScan[date_dim] ----------------------PhysicalProject -------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[ss_customer_sk] +------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() --------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk] +------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ws_sold_date_sk] --------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF3 RF4 +----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF3 --------------------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------------------PhysicalProject ------------------------------------filter((date_dim.d_moy <= 4) and (date_dim.d_moy >= 1) and (date_dim.d_year = 2001)) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
