This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 1be38e798da push topn-filter to both sides of inner join (#33112)
1be38e798da is described below

commit 1be38e798dab8dcaeadafe721b0ea128a2ff4de1
Author: minghong <[email protected]>
AuthorDate: Mon Apr 1 22:46:28 2024 +0800

    push topn-filter to both sides of inner join (#33112)
---
 .../doris/nereids/processor/post/TopNScanOpt.java  | 50 +++++++++++++++-------
 .../suites/nereids_tpch_p0/tpch/topn-filter.groovy | 31 ++++++++++++--
 2 files changed, 61 insertions(+), 20 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java
index 63754afe4df..a8129639f5e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java
@@ -103,26 +103,44 @@ public class TopNScanOpt extends PlanPostProcessor {
     }
 
     private OlapScan findScanNodeBySlotReference(Plan root, SlotReference 
slot, boolean nullsFirst) {
-        // topn-filter cannot be pushed through right/full outer join if the 
first orderKey is nulls first
-        if (nullsFirst && root instanceof Join) {
-            Join join = (Join) root;
-            if (join.getJoinType().isRightOuterJoin() || 
join.getJoinType().isFullOuterJoin()) {
+        if (root instanceof PhysicalWindow) {
+            return null;
+        }
+
+        if (root instanceof OlapScan) {
+            if (root.getOutputSet().contains(slot)) {
+                return (OlapScan) root;
+            } else {
                 return null;
             }
         }
+
         OlapScan target = null;
-        if (root instanceof OlapScan && root.getOutputSet().contains(slot)) {
-            return (OlapScan) root;
-        } else {
-            if (! root.children().isEmpty()) {
-                // for join and intersect, push topn-filter to their left 
child.
-                // TODO for union, topn-filter can be pushed down to all of 
its children.
-                Plan child = root.child(0);
-                if (!(child instanceof PhysicalWindow) && 
child.getOutputSet().contains(slot)) {
-                    target = findScanNodeBySlotReference(child, slot, 
nullsFirst);
-                    if (target != null) {
-                        return target;
-                    }
+        if (root instanceof Join) {
+            Join join = (Join) root;
+            if (nullsFirst && join.getJoinType().isOuterJoin()) {
+                // in fact, topn-filter can be pushed down to the left child 
of leftOuterJoin
+                // and to the right child of rightOuterJoin.
+                // but we have rule to push topn down to the left/right side. 
and topn-filter
+                // will be generated according to the inferred topn node.
+                return null;
+            }
+            // try to push to both left and right child
+            if (root.child(0).getOutputSet().contains(slot)) {
+                target = findScanNodeBySlotReference(root.child(0), slot, 
nullsFirst);
+            } else {
+                target = findScanNodeBySlotReference(root.child(1), slot, 
nullsFirst);
+            }
+            return target;
+        }
+
+        if (!root.children().isEmpty()) {
+            // TODO for set operator, topn-filter can be pushed down to all of 
its children.
+            Plan child = root.child(0);
+            if (child.getOutputSet().contains(slot)) {
+                target = findScanNodeBySlotReference(child, slot, nullsFirst);
+                if (target != null) {
+                    return target;
                 }
             }
         }
diff --git a/regression-test/suites/nereids_tpch_p0/tpch/topn-filter.groovy 
b/regression-test/suites/nereids_tpch_p0/tpch/topn-filter.groovy
index 3a29274a2fe..abcd33c7746 100644
--- a/regression-test/suites/nereids_tpch_p0/tpch/topn-filter.groovy
+++ b/regression-test/suites/nereids_tpch_p0/tpch/topn-filter.groovy
@@ -79,15 +79,15 @@ suite("topn-filter") {
 
     qt_check_result2 "${multi_topn_desc}"
 
-    // do not use topn-filter
+    // push down topn-filter to both join children
     explain {
         sql """
                 select o_orderkey, c_custkey
                 from orders 
-                join[broadcast] customer on o_custkey = c_custkey 
+                join customer on o_custkey = c_custkey 
                 order by c_custkey limit 2; 
             """
-        notContains "TOPN OPT:"
+        contains "TOPN OPT:"
     }
 
     // push topn filter down through AGG
@@ -123,10 +123,33 @@ suite("topn-filter") {
     }
 
     explain {
-        sql "select * from nation right outer join region on r_regionkey = 
n_regionkey order by n_regionkey nulls last limit 1; "
+        sql "select * from nation full outer join region on r_regionkey = 
n_regionkey order by n_regionkey nulls first limit 1; "
+        notContains "TOPN OPT:"
+    }
+
+    // this topn-filter is not generated from the top TOPN.
+    // topn
+    //  +--left outer join
+    //         +-- nation
+    //         +-- region
+    // after topn push down:
+    // topn1
+    //  +--left outer join
+    //         +-- topn2
+    //                +-- nation
+    //         +-- region
+    // this topn-filter is generated by topn2, not topn1
+    explain {
+        sql "select * from nation left outer join region on r_regionkey = 
n_regionkey order by n_regionkey nulls first limit 1; "
         contains "TOPN OPT:"
     }
 
+    // TODO: support latter, push topn to right outer join
+    // explain {
+    //     sql "select * from nation right outer join region on r_regionkey = 
n_regionkey order by n_regionkey nulls last limit 1; "
+    //     contains "TOPN OPT:"
+    // }
+
     explain {
         sql "select * from nation right outer join region on r_regionkey = 
n_regionkey order by n_regionkey nulls last, n_name nulls first limit 1; "
         contains "TOPN OPT:"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to