This is an automated email from the ASF dual-hosted git repository.
mbudiu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new f47928a629 [CALCITE-6880] Implement IntersectToSemiJoinRule
f47928a629 is described below
commit f47928a629a914c80a02f09f82570081c7238977
Author: Zhen Chen <[email protected]>
AuthorDate: Tue Apr 8 08:46:07 2025 +0800
[CALCITE-6880] Implement IntersectToSemiJoinRule
---
.../org/apache/calcite/rel/rules/CoreRules.java | 4 +
.../calcite/rel/rules/IntersectToSemiJoinRule.java | 108 +++++++++++++++++
.../java/org/apache/calcite/test/JdbcTest.java | 20 +++
.../org/apache/calcite/test/RelOptRulesTest.java | 42 +++++++
.../org/apache/calcite/test/RelOptRulesTest.xml | 134 +++++++++++++++++++++
5 files changed, 308 insertions(+)
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
b/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
index 4e2f9a90a1..31b47aedf9 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
@@ -365,6 +365,10 @@ private CoreRules() {}
public static final IntersectToExistsRule INTERSECT_TO_EXISTS =
IntersectToExistsRule.Config.DEFAULT.toRule();
+ /** Rule to translates a {@link Intersect} to {@link Join#isSemiJoin
semi-join}. */
+ public static final IntersectToSemiJoinRule INTERSECT_TO_SEMI_JOIN =
+ IntersectToSemiJoinRule.Config.DEFAULT.toRule();
+
/** Rule that translates a distinct
* {@link Minus} into a group of operators
* composed of {@link Union}, {@link Aggregate}, etc. */
diff --git
a/core/src/main/java/org/apache/calcite/rel/rules/IntersectToSemiJoinRule.java
b/core/src/main/java/org/apache/calcite/rel/rules/IntersectToSemiJoinRule.java
new file mode 100644
index 0000000000..3da2dfa97d
--- /dev/null
+++
b/core/src/main/java/org/apache/calcite/rel/rules/IntersectToSemiJoinRule.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.rel.rules;
+
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Intersect;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.logical.LogicalIntersect;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.tools.RelBuilder;
+
+import org.immutables.value.Value;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Planner rule that translates a {@link org.apache.calcite.rel.core.Intersect}
+ * to a series of {@link org.apache.calcite.rel.core.Join} that type is
+ * {@link org.apache.calcite.rel.core.JoinRelType#SEMI}.
+ */
[email protected]
+public class IntersectToSemiJoinRule
+ extends RelRule<IntersectToSemiJoinRule.Config>
+ implements TransformationRule {
+
+ /** Creates an IntersectToSemiJoinRule. */
+ protected IntersectToSemiJoinRule(Config config) {
+ super(config);
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ @Override public void onMatch(RelOptRuleCall call) {
+ final Intersect intersect = call.rel(0);
+ if (intersect.all) {
+ return; // nothing we can do
+ }
+
+ final RelBuilder builder = call.builder();
+ final RexBuilder rexBuilder = builder.getRexBuilder();
+
+ List<RelNode> inputs = intersect.getInputs();
+ if (inputs.size() != 2) {
+ return;
+ }
+
+ RelNode left = inputs.get(0);
+ RelNode right = inputs.get(1);
+
+ List<RexNode> conditions = new ArrayList<>();
+ int fieldCount = left.getRowType().getFieldCount();
+
+ for (int i = 0; i < fieldCount; i++) {
+ RelDataType leftFieldType =
left.getRowType().getFieldList().get(i).getType();
+ RelDataType rightFieldType =
right.getRowType().getFieldList().get(i).getType();
+
+ conditions.add(
+ builder.isNotDistinctFrom(
+ rexBuilder.makeInputRef(leftFieldType, i),
+ rexBuilder.makeInputRef(rightFieldType, i + fieldCount)));
+ }
+ RexNode condition = RexUtil.composeConjunction(rexBuilder, conditions);
+
+ builder.push(left)
+ .push(right)
+ .join(JoinRelType.SEMI, condition)
+ .distinct();
+
+ call.transformTo(builder.build());
+ }
+
+ /** Rule configuration. */
+ @Value.Immutable
+ public interface Config extends RelRule.Config {
+ Config DEFAULT = ImmutableIntersectToSemiJoinRule.Config.of()
+ .withOperandFor(LogicalIntersect.class);
+
+ @Override default IntersectToSemiJoinRule toRule() {
+ return new IntersectToSemiJoinRule(this);
+ }
+
+ /** Defines an operand tree for the given classes. */
+ default Config withOperandFor(Class<? extends Intersect> intersectClass) {
+ return withOperandSupplier(b -> b.operand(intersectClass).anyInputs())
+ .as(Config.class);
+ }
+ }
+}
diff --git a/core/src/test/java/org/apache/calcite/test/JdbcTest.java
b/core/src/test/java/org/apache/calcite/test/JdbcTest.java
index ac1948f2e6..82ab47d371 100644
--- a/core/src/test/java/org/apache/calcite/test/JdbcTest.java
+++ b/core/src/test/java/org/apache/calcite/test/JdbcTest.java
@@ -3968,6 +3968,26 @@ public void checkOrderBy(final boolean desc,
"commission=null");
}
+ /** Test case for <a
href="https://issues.apache.org/jira/browse/CALCITE-6880">[CALCITE-6880]
+ * Implement IntersectToSemiJoinRule</a>. */
+ @Test void testIntersectToSemiJoin() {
+ final String sql = ""
+ + "select \"commission\" from \"hr\".\"emps\"\n"
+ + "intersect\n"
+ + "select \"commission\" from \"hr\".\"emps\" where \"empid\">=150";
+ CalciteAssert.hr()
+ .query(sql)
+ .withHook(Hook.PLANNER, (Consumer<RelOptPlanner>)
+ planner -> {
+ planner.removeRule(CoreRules.INTERSECT_TO_DISTINCT);
+ planner.removeRule(EnumerableRules.ENUMERABLE_INTERSECT_RULE);
+ planner.addRule(CoreRules.INTERSECT_TO_SEMI_JOIN);
+ })
+ .explainContains("")
+ .returnsUnordered("commission=500",
+ "commission=null");
+ }
+
@Test void testExcept() {
final String sql = ""
+ "select \"empid\", \"name\" from \"hr\".\"emps\" where
\"deptno\"=10\n"
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index cddd656470..887f93c9a2 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -3570,6 +3570,48 @@ private void
checkPushJoinThroughUnionOnRightDoesNotMatchSemiOrAntiJoin(JoinRelT
.check();
}
+ /** Test case for <a
href="https://issues.apache.org/jira/browse/CALCITE-6880">
+ * [CALCITE-6880] Implement IntersectToSemiJoinRule</a>. */
+ @Test void testIntersectToSemiJoin() {
+ final String sql = "select ename from emp where deptno = 10\n"
+ + "intersect\n"
+ + "select ename from emp where deptno = 20\n";
+ sql(sql).withRule(CoreRules.INTERSECT_TO_SEMI_JOIN)
+ .check();
+ }
+
+ /** Test case for <a
href="https://issues.apache.org/jira/browse/CALCITE-6880">
+ * [CALCITE-6880] Implement IntersectToSemiJoinRule</a>. */
+ @Test void testIntersectToSemiJoinMultiCol() {
+ final String sql = "select deptno, ename from emp where deptno = 10\n"
+ + "intersect\n"
+ + "select deptno, ename from emp where deptno = 20\n";
+ sql(sql).withRule(CoreRules.INTERSECT_TO_SEMI_JOIN)
+ .check();
+ }
+
+ /** Test case for <a
href="https://issues.apache.org/jira/browse/CALCITE-6880">
+ * [CALCITE-6880] Implement IntersectToSemiJoinRule</a>. */
+ @Test void testIntersectToSemiJoinAll() {
+ final String sql = "select ename from emp where deptno = 10\n"
+ + "intersect\n"
+ + "select ename from emp where deptno = 20\n"
+ + "intersect all\n"
+ + "select ename from emp where deptno = 30\n";
+ sql(sql).withRule(CoreRules.INTERSECT_TO_SEMI_JOIN)
+ .check();
+ }
+
+ /** Test case for <a
href="https://issues.apache.org/jira/browse/CALCITE-6880">
+ * [CALCITE-6880] Implement IntersectToSemiJoinRule</a>. */
+ @Test void testIntersectToSemiJoinCoercion() {
+ final String sql = "select ename from emp where deptno = 10\n"
+ + "intersect\n"
+ + "select job from emp where deptno = 20\n";
+ sql(sql).withRule(CoreRules.INTERSECT_TO_SEMI_JOIN)
+ .check();
+ }
+
/** Tests {@link CoreRules#MINUS_MERGE}, which merges 2
* {@link Minus} operators into a single {@code Minus} with 3
* inputs. */
diff --git
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index ae99853c8d..66552188c6 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -6293,6 +6293,140 @@ LogicalIntersect(all=[true])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalProject(NAME=[CAST($1):VARCHAR(20) NOT NULL])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testIntersectToSemiJoin">
+ <Resource name="sql">
+ <![CDATA[select ename from emp where deptno = 10
+intersect
+select ename from emp where deptno = 20
+]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalIntersect(all=[false])
+ LogicalProject(ENAME=[$1])
+ LogicalFilter(condition=[=($7, 10)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(ENAME=[$1])
+ LogicalFilter(condition=[=($7, 20)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalAggregate(group=[{0}])
+ LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $1)], joinType=[semi])
+ LogicalProject(ENAME=[$1])
+ LogicalFilter(condition=[=($7, 10)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(ENAME=[$1])
+ LogicalFilter(condition=[=($7, 20)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testIntersectToSemiJoinAll">
+ <Resource name="sql">
+ <![CDATA[select ename from emp where deptno = 10
+intersect
+select ename from emp where deptno = 20
+intersect all
+select ename from emp where deptno = 30
+]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalIntersect(all=[true])
+ LogicalIntersect(all=[false])
+ LogicalProject(ENAME=[$1])
+ LogicalFilter(condition=[=($7, 10)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(ENAME=[$1])
+ LogicalFilter(condition=[=($7, 20)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(ENAME=[$1])
+ LogicalFilter(condition=[=($7, 30)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalIntersect(all=[true])
+ LogicalAggregate(group=[{0}])
+ LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $1)], joinType=[semi])
+ LogicalProject(ENAME=[$1])
+ LogicalFilter(condition=[=($7, 10)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(ENAME=[$1])
+ LogicalFilter(condition=[=($7, 20)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(ENAME=[$1])
+ LogicalFilter(condition=[=($7, 30)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testIntersectToSemiJoinCoercion">
+ <Resource name="sql">
+ <![CDATA[select ename from emp where deptno = 10
+intersect
+select job from emp where deptno = 20
+]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalIntersect(all=[false])
+ LogicalProject(ENAME=[$1])
+ LogicalFilter(condition=[=($7, 10)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(JOB=[CAST($2):VARCHAR(20) NOT NULL])
+ LogicalFilter(condition=[=($7, 20)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalAggregate(group=[{0}])
+ LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $1)], joinType=[semi])
+ LogicalProject(ENAME=[$1])
+ LogicalFilter(condition=[=($7, 10)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(JOB=[CAST($2):VARCHAR(20) NOT NULL])
+ LogicalFilter(condition=[=($7, 20)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testIntersectToSemiJoinMultiCol">
+ <Resource name="sql">
+ <![CDATA[select deptno, ename from emp where deptno = 10
+intersect
+select deptno, ename from emp where deptno = 20
+]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalIntersect(all=[false])
+ LogicalProject(DEPTNO=[$7], ENAME=[$1])
+ LogicalFilter(condition=[=($7, 10)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(DEPTNO=[$7], ENAME=[$1])
+ LogicalFilter(condition=[=($7, 20)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalAggregate(group=[{0, 1}])
+ LogicalJoin(condition=[AND(IS NOT DISTINCT FROM($0, $2), IS NOT DISTINCT
FROM($1, $3))], joinType=[semi])
+ LogicalProject(DEPTNO=[$7], ENAME=[$1])
+ LogicalFilter(condition=[=($7, 10)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(DEPTNO=[$7], ENAME=[$1])
+ LogicalFilter(condition=[=($7, 20)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>