This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 1be38e798da push topn-filter to both sides of inner join (#33112)
1be38e798da is described below
commit 1be38e798dab8dcaeadafe721b0ea128a2ff4de1
Author: minghong <[email protected]>
AuthorDate: Mon Apr 1 22:46:28 2024 +0800
push topn-filter to both sides of inner join (#33112)
---
.../doris/nereids/processor/post/TopNScanOpt.java | 50 +++++++++++++++-------
.../suites/nereids_tpch_p0/tpch/topn-filter.groovy | 31 ++++++++++++--
2 files changed, 61 insertions(+), 20 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java
index 63754afe4df..a8129639f5e 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java
@@ -103,26 +103,44 @@ public class TopNScanOpt extends PlanPostProcessor {
}
private OlapScan findScanNodeBySlotReference(Plan root, SlotReference
slot, boolean nullsFirst) {
- // topn-filter cannot be pushed through right/full outer join if the
first orderKey is nulls first
- if (nullsFirst && root instanceof Join) {
- Join join = (Join) root;
- if (join.getJoinType().isRightOuterJoin() ||
join.getJoinType().isFullOuterJoin()) {
+ if (root instanceof PhysicalWindow) {
+ return null;
+ }
+
+ if (root instanceof OlapScan) {
+ if (root.getOutputSet().contains(slot)) {
+ return (OlapScan) root;
+ } else {
return null;
}
}
+
OlapScan target = null;
- if (root instanceof OlapScan && root.getOutputSet().contains(slot)) {
- return (OlapScan) root;
- } else {
- if (! root.children().isEmpty()) {
- // for join and intersect, push topn-filter to their left
child.
- // TODO for union, topn-filter can be pushed down to all of
its children.
- Plan child = root.child(0);
- if (!(child instanceof PhysicalWindow) &&
child.getOutputSet().contains(slot)) {
- target = findScanNodeBySlotReference(child, slot,
nullsFirst);
- if (target != null) {
- return target;
- }
+ if (root instanceof Join) {
+ Join join = (Join) root;
+ if (nullsFirst && join.getJoinType().isOuterJoin()) {
+ // in fact, topn-filter can be pushed down to the left child
of leftOuterJoin
+ // and to the right child of rightOuterJoin.
+ // but we have rule to push topn down to the left/right side.
and topn-filter
+ // will be generated according to the inferred topn node.
+ return null;
+ }
+ // try to push to both left and right child
+ if (root.child(0).getOutputSet().contains(slot)) {
+ target = findScanNodeBySlotReference(root.child(0), slot,
nullsFirst);
+ } else {
+ target = findScanNodeBySlotReference(root.child(1), slot,
nullsFirst);
+ }
+ return target;
+ }
+
+ if (!root.children().isEmpty()) {
+ // TODO for set operator, topn-filter can be pushed down to all of
its children.
+ Plan child = root.child(0);
+ if (child.getOutputSet().contains(slot)) {
+ target = findScanNodeBySlotReference(child, slot, nullsFirst);
+ if (target != null) {
+ return target;
}
}
}
diff --git a/regression-test/suites/nereids_tpch_p0/tpch/topn-filter.groovy
b/regression-test/suites/nereids_tpch_p0/tpch/topn-filter.groovy
index 3a29274a2fe..abcd33c7746 100644
--- a/regression-test/suites/nereids_tpch_p0/tpch/topn-filter.groovy
+++ b/regression-test/suites/nereids_tpch_p0/tpch/topn-filter.groovy
@@ -79,15 +79,15 @@ suite("topn-filter") {
qt_check_result2 "${multi_topn_desc}"
- // do not use topn-filter
+ // push down topn-filter to both join children
explain {
sql """
select o_orderkey, c_custkey
from orders
- join[broadcast] customer on o_custkey = c_custkey
+ join customer on o_custkey = c_custkey
order by c_custkey limit 2;
"""
- notContains "TOPN OPT:"
+ contains "TOPN OPT:"
}
// push topn filter down through AGG
@@ -123,10 +123,33 @@ suite("topn-filter") {
}
explain {
- sql "select * from nation right outer join region on r_regionkey =
n_regionkey order by n_regionkey nulls last limit 1; "
+ sql "select * from nation full outer join region on r_regionkey =
n_regionkey order by n_regionkey nulls first limit 1; "
+ notContains "TOPN OPT:"
+ }
+
+ // this topn-filter is not generated from the top TOPN.
+ // topn
+ // +--left outer join
+ // +-- nation
+ // +-- region
+ // after topn push down:
+ // topn1
+ // +--left outer join
+ // +-- topn2
+ // +-- nation
+ // +-- region
+ // this topn-filter is generated by topn2, not topn1
+ explain {
+ sql "select * from nation left outer join region on r_regionkey =
n_regionkey order by n_regionkey nulls first limit 1; "
contains "TOPN OPT:"
}
+ // TODO: support latter, push topn to right outer join
+ // explain {
+ // sql "select * from nation right outer join region on r_regionkey =
n_regionkey order by n_regionkey nulls last limit 1; "
+ // contains "TOPN OPT:"
+ // }
+
explain {
sql "select * from nation right outer join region on r_regionkey =
n_regionkey order by n_regionkey nulls last, n_name nulls first limit 1; "
contains "TOPN OPT:"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]