This is an automated email from the ASF dual-hosted git repository.
zhenchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new b457588783 [CALCITE-7104] Remove duplicate sort keys
b457588783 is described below
commit b4575887831b6895a697073b5a4a4e371c2f694a
Author: Zhen Chen <[email protected]>
AuthorDate: Sat Aug 23 15:48:16 2025 +0800
[CALCITE-7104] Remove duplicate sort keys
---
.../org/apache/calcite/rel/rules/CoreRules.java | 4 +
.../rel/rules/SortRemoveDuplicateKeysRule.java | 111 +++++++++++++++++++++
.../org/apache/calcite/test/RelOptRulesTest.java | 23 +++++
.../org/apache/calcite/test/RelOptRulesTest.xml | 50 ++++++++++
4 files changed, 188 insertions(+)
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
b/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
index e6a80b1e42..436a77a170 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
@@ -936,4 +936,8 @@ private CoreRules() {}
/** Rule that converts true filtered aggregates into CASE-style filtered
aggregates. */
public static final AggregateFilterToCaseRule AGGREGATE_FILTER_TO_CASE =
AggregateFilterToCaseRule.Config.DEFAULT.toRule();
+
+ /** Rule that remove duplicate {@link Sort} keys. */
+ public static final SortRemoveDuplicateKeysRule SORT_REMOVE_DUPLICATE_KEYS =
+ SortRemoveDuplicateKeysRule.Config.DEFAULT.toRule();
}
diff --git
a/core/src/main/java/org/apache/calcite/rel/rules/SortRemoveDuplicateKeysRule.java
b/core/src/main/java/org/apache/calcite/rel/rules/SortRemoveDuplicateKeysRule.java
new file mode 100644
index 0000000000..525bece4c8
--- /dev/null
+++
b/core/src/main/java/org/apache/calcite/rel/rules/SortRemoveDuplicateKeysRule.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.rel.rules;
+
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.rel.RelCollations;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.tools.RelBuilder;
+
+import org.immutables.value.Value;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Planner rule that remove duplicate sort keys.
+ *
+ * <p>The original SQL:
+ * <pre>{@code
+ * SELECT d1 FROM (
+ * SELECT deptno AS d1, deptno AS d2 FROM dept
+ * ) AS tmp ORDER BY d1, d2
+ * }</pre>
+ *
+ * <p>The original logical plan:
+ * <pre>
+ * LogicalProject(D1=[$0])
+ * LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])
+ * LogicalProject(D1=[$0], D2=[$0])
+ * LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ * </pre>
+ *
+ * <p>After optimization:
+ * <pre>
+ * LogicalProject(D1=[$0])
+ * LogicalProject(DEPTNO=[$0], DEPTNO0=[$0])
+ * LogicalSort(sort0=[$0], dir0=[ASC])
+ * LogicalProject(DEPTNO=[$0])
+ * LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ * </pre>
+ */
[email protected]
+public class SortRemoveDuplicateKeysRule
+ extends RelRule<SortRemoveDuplicateKeysRule.Config>
+ implements TransformationRule {
+
+ /** Creates a SortRemoveDuplicateKeysRule. */
+ protected SortRemoveDuplicateKeysRule(Config config) {
+ super(config);
+ }
+
+ @Override public void onMatch(RelOptRuleCall call) {
+ Sort sort = call.rel(0);
+ RelBuilder relBuilder = call.builder();
+ RelMetadataQuery mq = call.getMetadataQuery();
+
+ List<RelFieldCollation> newCollations = new ArrayList<>();
+ List<RelFieldCollation> collations =
sort.getCollation().getFieldCollations();
+
+ for (RelFieldCollation field : collations) {
+ boolean dup = false;
+ for (RelFieldCollation existed : newCollations) {
+ if (Boolean.TRUE.equals(
+ mq.determines(sort, field.getFieldIndex(),
existed.getFieldIndex()))) {
+ dup = true;
+ break;
+ }
+ }
+ if (!dup) {
+ newCollations.add(field);
+ }
+ }
+
+ if (collations.size() == newCollations.size()) {
+ return;
+ }
+
+ relBuilder.push(sort.getInput())
+ .sort(RelCollations.of(newCollations));
+ call.transformTo(relBuilder.build());
+ }
+
+ /** Rule configuration. */
+ @Value.Immutable
+ public interface Config extends RelRule.Config {
+ Config DEFAULT = ImmutableSortRemoveDuplicateKeysRule.Config.of()
+ .withOperandSupplier(b0 ->
+ b0.operand(Sort.class).anyInputs());
+
+ @Override default SortRemoveDuplicateKeysRule toRule() {
+ return new SortRemoveDuplicateKeysRule(this);
+ }
+ }
+}
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index c8fdd3111c..c8971b62e0 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -11155,4 +11155,27 @@ private void
checkLoptOptimizeJoinRule(LoptOptimizeJoinRule rule) {
.withRule(CoreRules.AGGREGATE_FILTER_TO_CASE)
.check();
}
+
+ /** Test case of
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-7104">[CALCITE-7104]
+ * Remove duplicate sort keys</a>. */
+ @Test void testSortRemoveDuplicateKeys() {
+ final String query = "select d1\n"
+ + " from (select deptno as d1, deptno as d2 from dept) as tmp\n"
+ + " order by d1, d2, d1 desc\n";
+ sql(query)
+ .withRule(CoreRules.SORT_REMOVE_DUPLICATE_KEYS)
+ .check();
+ }
+
+ /** Test case of
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-7104">[CALCITE-7104]
+ * Remove duplicate sort keys</a>. */
+ @Test void testSortRemoveDuplicateKeysJoin() {
+ final String query = "select * from (select deptno as d1, deptno as d2
from emp) as t1\n"
+ + " join emp t2 on t1.d1 = t2.deptno order by t1.d1, t1.d2, t1.d1 DESC
NULLS FIRST";
+ sql(query)
+ .withRule(CoreRules.SORT_REMOVE_DUPLICATE_KEYS)
+ .check();
+ }
}
diff --git
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index 7f06509681..abfd5f064b 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -17910,6 +17910,56 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2],
MGR=[$3], HIREDATE=[$4], SAL=[$
LogicalSort(sort0=[$7], sort1=[$0], dir0=[ASC], dir1=[ASC])
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4],
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], EXPR$9=[null:NULL])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testSortRemoveDuplicateKeys">
+ <Resource name="sql">
+ <![CDATA[select d1
+ from (select deptno as d1, deptno as d2 from dept) as tmp
+ order by d1, d2, d1 desc
+]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(D1=[$0])
+ LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])
+ LogicalProject(D1=[$0], D2=[$0])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(D1=[$0])
+ LogicalSort(sort0=[$0], dir0=[ASC])
+ LogicalProject(D1=[$0], D2=[$0])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testSortRemoveDuplicateKeysJoin">
+ <Resource name="sql">
+ <![CDATA[select * from (select deptno as d1, deptno as d2 from emp) as t1
+ join emp t2 on t1.d1 = t2.deptno order by t1.d1, t1.d2, t1.d1 DESC NULLS
FIRST]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])
+ LogicalProject(D1=[$0], D2=[$1], EMPNO=[$2], ENAME=[$3], JOB=[$4], MGR=[$5],
HIREDATE=[$6], SAL=[$7], COMM=[$8], DEPTNO=[$9], SLACKER=[$10])
+ LogicalJoin(condition=[=($0, $9)], joinType=[inner])
+ LogicalProject(D1=[$7], D2=[$7])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalSort(sort0=[$0], dir0=[ASC])
+ LogicalProject(D1=[$0], D2=[$1], EMPNO=[$2], ENAME=[$3], JOB=[$4], MGR=[$5],
HIREDATE=[$6], SAL=[$7], COMM=[$8], DEPTNO=[$9], SLACKER=[$10])
+ LogicalJoin(condition=[=($0, $9)], joinType=[inner])
+ LogicalProject(D1=[$7], D2=[$7])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>