This is an automated email from the ASF dual-hosted git repository.
rubenql pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push:
new 80cc425 [CALCITE-2624] Add a rule to copy a sort below a join
operator (Khawla Mouhoubi)
80cc425 is described below
commit 80cc4253a3a5b0677382725d7471ed1163cc204e
Author: Khawlamhb <[email protected]>
AuthorDate: Mon Aug 5 14:57:43 2019 +0200
[CALCITE-2624] Add a rule to copy a sort below a join operator (Khawla
Mouhoubi)
---
.../apache/calcite/rel/rules/SortJoinCopyRule.java | 165 +++++++++++++++++++++
.../org/apache/calcite/test/RelOptRulesTest.java | 77 ++++++++++
.../org/apache/calcite/test/RelOptRulesTest.xml | 153 +++++++++++++++++++
3 files changed, 395 insertions(+)
diff --git
a/core/src/main/java/org/apache/calcite/rel/rules/SortJoinCopyRule.java
b/core/src/main/java/org/apache/calcite/rel/rules/SortJoinCopyRule.java
new file mode 100644
index 0000000..d6eab35
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/rel/rules/SortJoinCopyRule.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.rel.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelCollationTraitDef;
+import org.apache.calcite.rel.RelCollations;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.RelFactories;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.logical.LogicalSort;
+import org.apache.calcite.rel.metadata.RelMdUtil;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.tools.RelBuilderFactory;
+
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Planner rule that copies a {@link org.apache.calcite.rel.core.Sort} past a
+ * {@link org.apache.calcite.rel.core.Join} without its limit and offset.
+ * The original {@link org.apache.calcite.rel.core.Sort} is preserved but
+ * can be potentially removed by {@link
org.apache.calcite.rel.rules.SortRemoveRule} if redundant.
+ *
+ * Some examples where {@link org.apache.calcite.rel.rules.SortJoinCopyRule}
can be useful are:
+ * allowing a {@link org.apache.calcite.rel.core.Sort} to be incorporated in
an index scan,
+ * facilitating the use of operators requiring sorted inputs,
+ * and allowing the sort to be performed on a possibly smaller result.
+ */
+public class SortJoinCopyRule extends RelOptRule {
+
+ public static final SortJoinCopyRule INSTANCE =
+ new SortJoinCopyRule(LogicalSort.class,
+ Join.class, RelFactories.LOGICAL_BUILDER);
+
+ //~ Constructors -----------------------------------------------------------
+
+ /** Creates a SortJoinCopyRule. */
+ protected SortJoinCopyRule(Class<? extends Sort> sortClass,
+ Class<? extends Join> joinClass, RelBuilderFactory relBuilderFactory) {
+ super(
+ operand(sortClass,
+ operand(joinClass, any())),
+ relBuilderFactory, null);
+ }
+
+ //~ Methods -----------------------------------------------------------------
+
+ @Override public void onMatch(RelOptRuleCall call) {
+ final Sort sort = call.rel(0);
+ final Join join = call.rel(1);
+ final RelMetadataQuery metadataQuery = call.getMetadataQuery();
+
+ final RelNode newLeftInput;
+ final RelNode newRightInput;
+
+ final List<RelFieldCollation> leftFieldCollation = new ArrayList<>();
+ final List<RelFieldCollation> rightFieldCollation = new ArrayList<>();
+
+ // Decompose sort collations into left and right collations
+ for (RelFieldCollation relFieldCollation :
sort.getCollation().getFieldCollations()) {
+ if (relFieldCollation.getFieldIndex() >=
join.getLeft().getRowType().getFieldCount()) {
+ rightFieldCollation.add(relFieldCollation);
+ } else {
+ leftFieldCollation.add(relFieldCollation);
+ }
+ }
+
+ // Add sort to new left node only if sort collations
+ // contained fields from left table
+ if (leftFieldCollation.isEmpty()) {
+ newLeftInput = join.getLeft();
+ } else {
+ final RelCollation leftCollation =
RelCollationTraitDef.INSTANCE.canonize(
+ RelCollations.of(leftFieldCollation));
+ // If left table already sorted don't add a sort
+ if (RelMdUtil.checkInputForCollationAndLimit(
+ metadataQuery,
+ join.getLeft(),
+ leftCollation,
+ null,
+ null)) {
+ newLeftInput = join.getLeft();
+ } else {
+ newLeftInput = sort.copy(
+ sort.getTraitSet().replaceIf(
+ RelCollationTraitDef.INSTANCE,
+ () -> leftCollation),
+ join.getLeft(),
+ leftCollation,
+ null,
+ null);
+ }
+ }
+ // Add sort to new right node only if sort collations
+ // contained fields from right table
+ if (rightFieldCollation.isEmpty()) {
+ newRightInput = join.getRight();
+ } else {
+ final RelCollation rightCollation =
RelCollationTraitDef.INSTANCE.canonize(
+ RelCollations.shift(
+ RelCollations.of(rightFieldCollation),
+ -join.getLeft().getRowType().getFieldCount()));
+ // If right table already sorted don't add a sort
+ if (RelMdUtil.checkInputForCollationAndLimit(
+ metadataQuery,
+ join.getRight(),
+ rightCollation,
+ null,
+ null)) {
+ newRightInput = join.getRight();
+ } else {
+ newRightInput = sort.copy(
+ sort.getTraitSet().replaceIf(
+ RelCollationTraitDef.INSTANCE,
+ () -> rightCollation),
+ join.getRight(),
+ rightCollation,
+ null,
+ null);
+ }
+ }
+ // If no change was made no need to apply the rule
+ if (newLeftInput == join.getLeft() && newRightInput == join.getRight()) {
+ return;
+ }
+
+ final RelNode joinCopy = join.copy(
+ join.getTraitSet(),
+ join.getCondition(),
+ newLeftInput,
+ newRightInput,
+ join.getJoinType(),
+ join.isSemiJoinDone());
+ final RelNode sortCopy = sort.copy(
+ sort.getTraitSet(),
+ joinCopy,
+ sort.getCollation(),
+ sort.offset,
+ sort.fetch);
+
+ call.transformTo(sortCopy);
+ }
+}
+
+// End SortJoinCopyRule.java
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index 60d0475..6915522 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -117,6 +117,7 @@ import
org.apache.calcite.rel.rules.SemiJoinJoinTransposeRule;
import org.apache.calcite.rel.rules.SemiJoinProjectTransposeRule;
import org.apache.calcite.rel.rules.SemiJoinRemoveRule;
import org.apache.calcite.rel.rules.SemiJoinRule;
+import org.apache.calcite.rel.rules.SortJoinCopyRule;
import org.apache.calcite.rel.rules.SortJoinTransposeRule;
import org.apache.calcite.rel.rules.SortProjectTransposeRule;
import org.apache.calcite.rel.rules.SortRemoveConstantKeysRule;
@@ -6066,6 +6067,82 @@ public class RelOptRulesTest extends RelOptTestBase {
call.transformTo(myProject);
}
}
+
+ @Test public void testSortJoinCopyInnerJoinOrderBy() {
+ final HepProgram preProgram = new HepProgramBuilder()
+ .addRuleInstance(SortProjectTransposeRule.INSTANCE)
+ .build();
+ final HepProgram program = new HepProgramBuilder()
+ .addRuleInstance(SortJoinCopyRule.INSTANCE)
+ .build();
+ final String sql = "select * from sales.emp join sales.dept on\n"
+ + "sales.emp.deptno = sales.dept.deptno order by sal";
+ final HepPlanner hepPlanner = new HepPlanner(program);
+ checkPlanning(tester, preProgram, hepPlanner, sql);
+ }
+
+ @Test public void testSortJoinCopyInnerJoinOrderByLimit() {
+ final HepProgram preProgram = new HepProgramBuilder()
+ .addRuleInstance(SortProjectTransposeRule.INSTANCE)
+ .build();
+ final HepProgram program = new HepProgramBuilder()
+ .addRuleInstance(SortJoinCopyRule.INSTANCE)
+ .build();
+ final String sql = "select * from sales.emp e join (\n"
+ + " select * from sales.dept d) d on e.deptno = d.deptno\n"
+ + "order by sal limit 10";
+ checkPlanning(tester, preProgram, new HepPlanner(program), sql);
+ }
+
+ @Test public void testSortJoinCopyInnerJoinOrderByTwoFields() {
+ final HepProgram preProgram = new HepProgramBuilder()
+ .addRuleInstance(SortProjectTransposeRule.INSTANCE)
+ .build();
+ final HepProgram program = new HepProgramBuilder()
+ .addRuleInstance(SortJoinCopyRule.INSTANCE)
+ .build();
+ final String sql = "select * from sales.emp e join sales.dept d on\n"
+ + " e.deptno = d.deptno order by e.sal,d.name";
+ checkPlanning(tester, preProgram, new HepPlanner(program), sql);
+ }
+
+ @Test public void testSortJoinCopySemiJoinOrderBy() {
+ final HepProgram preProgram = new HepProgramBuilder()
+ .addRuleInstance(SemiJoinRule.PROJECT)
+ .build();
+ final HepProgram program = new HepProgramBuilder()
+ .addRuleInstance(SortJoinCopyRule.INSTANCE)
+ .build();
+ final String sql = "select * from sales.dept d where d.deptno in\n"
+ + " (select e.deptno from sales.emp e) order by d.deptno";
+ checkPlanning(tester, preProgram, new HepPlanner(program), sql);
+ }
+
+ @Test public void testSortJoinCopySemiJoinOrderByLimitOffset() {
+ final HepProgram preProgram = new HepProgramBuilder()
+ .addRuleInstance(SemiJoinRule.PROJECT)
+ .build();
+ final HepProgram program = new HepProgramBuilder()
+ .addRuleInstance(SortJoinCopyRule.INSTANCE)
+ .build();
+ final String sql = "select * from sales.dept d where d.deptno in\n"
+ + " (select e.deptno from sales.emp e) order by d.deptno limit 10
offset 2";
+ // Do not copy the limit and offset
+ checkPlanning(tester, preProgram, new HepPlanner(program), sql);
+ }
+
+ @Test public void testSortJoinCopySemiJoinOrderByOffset() {
+ final HepProgram preProgram = new HepProgramBuilder()
+ .addRuleInstance(SemiJoinRule.PROJECT)
+ .build();
+ final HepProgram program = new HepProgramBuilder()
+ .addRuleInstance(SortJoinCopyRule.INSTANCE)
+ .build();
+ final String sql = "select * from sales.dept d where d.deptno in"
+ + " (select e.deptno from sales.emp e) order by d.deptno offset 2";
+ // Do not copy the offset
+ checkPlanning(tester, preProgram, new HepPlanner(program), sql);
+ }
}
// End RelOptRulesTest.java
diff --git
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index 4142664..2220bcf 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -9918,6 +9918,159 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2],
MGR=[$3], HIREDATE=[$4], SAL=[$
]]>
</Resource>
</TestCase>
+ <TestCase name="testSortJoinCopyInnerJoinOrderBy">
+ <Resource name="sql">
+ <![CDATA[select * from sales.emp join sales.dept on
+sales.emp.deptno = sales.dept.deptno order by sal]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4],
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
+ LogicalSort(sort0=[$5], dir0=[ASC])
+ LogicalJoin(condition=[=($7, $9)], joinType=[inner])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4],
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
+ LogicalSort(sort0=[$5], dir0=[ASC])
+ LogicalJoin(condition=[=($7, $9)], joinType=[inner])
+ LogicalSort(sort0=[$5], dir0=[ASC])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testSortJoinCopyInnerJoinOrderByLimit">
+ <Resource name="sql">
+ <![CDATA[select * from sales.emp e join (
+ select * from sales.dept d) d on e.deptno = d.deptno
+order by sal limit 10]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4],
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
+ LogicalSort(sort0=[$5], dir0=[ASC], fetch=[10])
+ LogicalJoin(condition=[=($7, $9)], joinType=[inner])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(DEPTNO=[$0], NAME=[$1])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4],
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
+ LogicalSort(sort0=[$5], dir0=[ASC], fetch=[10])
+ LogicalJoin(condition=[=($7, $9)], joinType=[inner])
+ LogicalSort(sort0=[$5], dir0=[ASC])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(DEPTNO=[$0], NAME=[$1])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testSortJoinCopyInnerJoinOrderByTwoFields">
+ <Resource name="sql">
+ <![CDATA[select * from sales.emp e join sales.dept d on
+ e.deptno = d.deptno order by e.sal,d.name]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4],
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
+ LogicalSort(sort0=[$5], sort1=[$10], dir0=[ASC], dir1=[ASC])
+ LogicalJoin(condition=[=($7, $9)], joinType=[inner])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4],
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
+ LogicalSort(sort0=[$5], sort1=[$10], dir0=[ASC], dir1=[ASC])
+ LogicalJoin(condition=[=($7, $9)], joinType=[inner])
+ LogicalSort(sort0=[$5], dir0=[ASC])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalSort(sort0=[$1], dir0=[ASC])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testSortJoinCopySemiJoinOrderBy">
+ <Resource name="sql">
+ <![CDATA[select * from sales.dept d where d.deptno in
+ (select e.deptno from sales.emp e) order by d.deptno]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalSort(sort0=[$0], dir0=[ASC])
+ LogicalJoin(condition=[=($0, $2)], joinType=[semi])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalProject(DEPTNO=[$7])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalSort(sort0=[$0], dir0=[ASC])
+ LogicalJoin(condition=[=($0, $2)], joinType=[semi])
+ LogicalSort(sort0=[$0], dir0=[ASC])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalProject(DEPTNO=[$7])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testSortJoinCopySemiJoinOrderByLimitOffset">
+ <Resource name="sql">
+ <![CDATA[select * from sales.dept d where d.deptno in
+ (select e.deptno from sales.emp e) order by d.deptno limit 10 offset 2]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalSort(sort0=[$0], dir0=[ASC], offset=[2], fetch=[10])
+ LogicalJoin(condition=[=($0, $2)], joinType=[semi])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalProject(DEPTNO=[$7])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalSort(sort0=[$0], dir0=[ASC], offset=[2], fetch=[10])
+ LogicalJoin(condition=[=($0, $2)], joinType=[semi])
+ LogicalSort(sort0=[$0], dir0=[ASC])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalProject(DEPTNO=[$7])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testSortJoinCopySemiJoinOrderByOffset">
+ <Resource name="sql">
+ <![CDATA[select * from sales.dept d where d.deptno in (select
e.deptno from sales.emp e) order by d.deptno offset 2]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalSort(sort0=[$0], dir0=[ASC], offset=[2])
+ LogicalJoin(condition=[=($0, $2)], joinType=[semi])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalProject(DEPTNO=[$7])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalSort(sort0=[$0], dir0=[ASC], offset=[2])
+ LogicalJoin(condition=[=($0, $2)], joinType=[semi])
+ LogicalSort(sort0=[$0], dir0=[ASC])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalProject(DEPTNO=[$7])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
<TestCase name="testSortRemovalAllKeysConstant">
<Resource name="sql">
<![CDATA[select count(*) as c