This is an automated email from the ASF dual-hosted git repository.

mbudiu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new f47928a629 [CALCITE-6880] Implement IntersectToSemiJoinRule
f47928a629 is described below

commit f47928a629a914c80a02f09f82570081c7238977
Author: Zhen Chen <[email protected]>
AuthorDate: Tue Apr 8 08:46:07 2025 +0800

    [CALCITE-6880] Implement IntersectToSemiJoinRule
---
 .../org/apache/calcite/rel/rules/CoreRules.java    |   4 +
 .../calcite/rel/rules/IntersectToSemiJoinRule.java | 108 +++++++++++++++++
 .../java/org/apache/calcite/test/JdbcTest.java     |  20 +++
 .../org/apache/calcite/test/RelOptRulesTest.java   |  42 +++++++
 .../org/apache/calcite/test/RelOptRulesTest.xml    | 134 +++++++++++++++++++++
 5 files changed, 308 insertions(+)

diff --git a/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java 
b/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
index 4e2f9a90a1..31b47aedf9 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
@@ -365,6 +365,10 @@ private CoreRules() {}
   public static final IntersectToExistsRule INTERSECT_TO_EXISTS =
       IntersectToExistsRule.Config.DEFAULT.toRule();
 
+  /** Rule to translates a {@link Intersect} to {@link Join#isSemiJoin 
semi-join}. */
+  public static final IntersectToSemiJoinRule INTERSECT_TO_SEMI_JOIN =
+          IntersectToSemiJoinRule.Config.DEFAULT.toRule();
+
   /** Rule that translates a distinct
    * {@link Minus} into a group of operators
    * composed of {@link Union}, {@link Aggregate}, etc. */
diff --git 
a/core/src/main/java/org/apache/calcite/rel/rules/IntersectToSemiJoinRule.java 
b/core/src/main/java/org/apache/calcite/rel/rules/IntersectToSemiJoinRule.java
new file mode 100644
index 0000000000..3da2dfa97d
--- /dev/null
+++ 
b/core/src/main/java/org/apache/calcite/rel/rules/IntersectToSemiJoinRule.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.rel.rules;
+
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Intersect;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.logical.LogicalIntersect;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.tools.RelBuilder;
+
+import org.immutables.value.Value;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Planner rule that translates a {@link org.apache.calcite.rel.core.Intersect}
+ * to a series of {@link org.apache.calcite.rel.core.Join} that type is
+ * {@link org.apache.calcite.rel.core.JoinRelType#SEMI}.
+ */
[email protected]
+public class IntersectToSemiJoinRule
+    extends RelRule<IntersectToSemiJoinRule.Config>
+    implements TransformationRule {
+
+  /** Creates an IntersectToSemiJoinRule. */
+  protected IntersectToSemiJoinRule(Config config) {
+    super(config);
+  }
+
+  //~ Methods ----------------------------------------------------------------
+
+  @Override public void onMatch(RelOptRuleCall call) {
+    final Intersect intersect = call.rel(0);
+    if (intersect.all) {
+      return; // nothing we can do
+    }
+
+    final RelBuilder builder = call.builder();
+    final RexBuilder rexBuilder = builder.getRexBuilder();
+
+    List<RelNode> inputs = intersect.getInputs();
+    if (inputs.size() != 2) {
+      return;
+    }
+
+    RelNode left = inputs.get(0);
+    RelNode right = inputs.get(1);
+
+    List<RexNode> conditions = new ArrayList<>();
+    int fieldCount = left.getRowType().getFieldCount();
+
+    for (int i = 0; i < fieldCount; i++) {
+      RelDataType leftFieldType = 
left.getRowType().getFieldList().get(i).getType();
+      RelDataType rightFieldType = 
right.getRowType().getFieldList().get(i).getType();
+
+      conditions.add(
+          builder.isNotDistinctFrom(
+              rexBuilder.makeInputRef(leftFieldType, i),
+              rexBuilder.makeInputRef(rightFieldType, i + fieldCount)));
+    }
+    RexNode condition = RexUtil.composeConjunction(rexBuilder, conditions);
+
+    builder.push(left)
+        .push(right)
+        .join(JoinRelType.SEMI, condition)
+        .distinct();
+
+    call.transformTo(builder.build());
+  }
+
+  /** Rule configuration. */
+  @Value.Immutable
+  public interface Config extends RelRule.Config {
+    Config DEFAULT = ImmutableIntersectToSemiJoinRule.Config.of()
+        .withOperandFor(LogicalIntersect.class);
+
+    @Override default IntersectToSemiJoinRule toRule() {
+      return new IntersectToSemiJoinRule(this);
+    }
+
+    /** Defines an operand tree for the given classes. */
+    default Config withOperandFor(Class<? extends Intersect> intersectClass) {
+      return withOperandSupplier(b -> b.operand(intersectClass).anyInputs())
+          .as(Config.class);
+    }
+  }
+}
diff --git a/core/src/test/java/org/apache/calcite/test/JdbcTest.java 
b/core/src/test/java/org/apache/calcite/test/JdbcTest.java
index ac1948f2e6..82ab47d371 100644
--- a/core/src/test/java/org/apache/calcite/test/JdbcTest.java
+++ b/core/src/test/java/org/apache/calcite/test/JdbcTest.java
@@ -3968,6 +3968,26 @@ public void checkOrderBy(final boolean desc,
             "commission=null");
   }
 
+  /** Test case for <a 
href="https://issues.apache.org/jira/browse/CALCITE-6880";>[CALCITE-6880]
+   * Implement IntersectToSemiJoinRule</a>. */
+  @Test void testIntersectToSemiJoin() {
+    final String sql = ""
+        + "select \"commission\" from \"hr\".\"emps\"\n"
+        + "intersect\n"
+        + "select \"commission\" from \"hr\".\"emps\" where \"empid\">=150";
+    CalciteAssert.hr()
+        .query(sql)
+        .withHook(Hook.PLANNER, (Consumer<RelOptPlanner>)
+            planner -> {
+              planner.removeRule(CoreRules.INTERSECT_TO_DISTINCT);
+              planner.removeRule(EnumerableRules.ENUMERABLE_INTERSECT_RULE);
+              planner.addRule(CoreRules.INTERSECT_TO_SEMI_JOIN);
+            })
+        .explainContains("")
+        .returnsUnordered("commission=500",
+            "commission=null");
+  }
+
   @Test void testExcept() {
     final String sql = ""
         + "select \"empid\", \"name\" from \"hr\".\"emps\" where 
\"deptno\"=10\n"
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java 
b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index cddd656470..887f93c9a2 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -3570,6 +3570,48 @@ private void 
checkPushJoinThroughUnionOnRightDoesNotMatchSemiOrAntiJoin(JoinRelT
         .check();
   }
 
+  /** Test case for <a 
href="https://issues.apache.org/jira/browse/CALCITE-6880";>
+   * [CALCITE-6880] Implement IntersectToSemiJoinRule</a>. */
+  @Test void testIntersectToSemiJoin() {
+    final String sql = "select ename from emp where deptno = 10\n"
+        + "intersect\n"
+        + "select ename from emp where deptno = 20\n";
+    sql(sql).withRule(CoreRules.INTERSECT_TO_SEMI_JOIN)
+        .check();
+  }
+
+  /** Test case for <a 
href="https://issues.apache.org/jira/browse/CALCITE-6880";>
+   * [CALCITE-6880] Implement IntersectToSemiJoinRule</a>. */
+  @Test void testIntersectToSemiJoinMultiCol() {
+    final String sql = "select deptno, ename from emp where deptno = 10\n"
+        + "intersect\n"
+        + "select deptno, ename from emp where deptno = 20\n";
+    sql(sql).withRule(CoreRules.INTERSECT_TO_SEMI_JOIN)
+        .check();
+  }
+
+  /** Test case for <a 
href="https://issues.apache.org/jira/browse/CALCITE-6880";>
+   * [CALCITE-6880] Implement IntersectToSemiJoinRule</a>. */
+  @Test void testIntersectToSemiJoinAll() {
+    final String sql = "select ename from emp where deptno = 10\n"
+        + "intersect\n"
+        + "select ename from emp where deptno = 20\n"
+        + "intersect all\n"
+        + "select ename from emp where deptno = 30\n";
+    sql(sql).withRule(CoreRules.INTERSECT_TO_SEMI_JOIN)
+        .check();
+  }
+
+  /** Test case for <a 
href="https://issues.apache.org/jira/browse/CALCITE-6880";>
+   * [CALCITE-6880] Implement IntersectToSemiJoinRule</a>. */
+  @Test void testIntersectToSemiJoinCoercion() {
+    final String sql = "select ename from emp where deptno = 10\n"
+        + "intersect\n"
+        + "select job from emp where deptno = 20\n";
+    sql(sql).withRule(CoreRules.INTERSECT_TO_SEMI_JOIN)
+        .check();
+  }
+
   /** Tests {@link CoreRules#MINUS_MERGE}, which merges 2
    * {@link Minus} operators into a single {@code Minus} with 3
    * inputs. */
diff --git 
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml 
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index ae99853c8d..66552188c6 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -6293,6 +6293,140 @@ LogicalIntersect(all=[true])
     LogicalTableScan(table=[[CATALOG, SALES, EMP]])
   LogicalProject(NAME=[CAST($1):VARCHAR(20) NOT NULL])
     LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testIntersectToSemiJoin">
+    <Resource name="sql">
+      <![CDATA[select ename from emp where deptno = 10
+intersect
+select ename from emp where deptno = 20
+]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalIntersect(all=[false])
+  LogicalProject(ENAME=[$1])
+    LogicalFilter(condition=[=($7, 10)])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+  LogicalProject(ENAME=[$1])
+    LogicalFilter(condition=[=($7, 20)])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalAggregate(group=[{0}])
+  LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $1)], joinType=[semi])
+    LogicalProject(ENAME=[$1])
+      LogicalFilter(condition=[=($7, 10)])
+        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+    LogicalProject(ENAME=[$1])
+      LogicalFilter(condition=[=($7, 20)])
+        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testIntersectToSemiJoinAll">
+    <Resource name="sql">
+      <![CDATA[select ename from emp where deptno = 10
+intersect
+select ename from emp where deptno = 20
+intersect all
+select ename from emp where deptno = 30
+]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalIntersect(all=[true])
+  LogicalIntersect(all=[false])
+    LogicalProject(ENAME=[$1])
+      LogicalFilter(condition=[=($7, 10)])
+        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+    LogicalProject(ENAME=[$1])
+      LogicalFilter(condition=[=($7, 20)])
+        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+  LogicalProject(ENAME=[$1])
+    LogicalFilter(condition=[=($7, 30)])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalIntersect(all=[true])
+  LogicalAggregate(group=[{0}])
+    LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $1)], joinType=[semi])
+      LogicalProject(ENAME=[$1])
+        LogicalFilter(condition=[=($7, 10)])
+          LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalProject(ENAME=[$1])
+        LogicalFilter(condition=[=($7, 20)])
+          LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+  LogicalProject(ENAME=[$1])
+    LogicalFilter(condition=[=($7, 30)])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testIntersectToSemiJoinCoercion">
+    <Resource name="sql">
+      <![CDATA[select ename from emp where deptno = 10
+intersect
+select job from emp where deptno = 20
+]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalIntersect(all=[false])
+  LogicalProject(ENAME=[$1])
+    LogicalFilter(condition=[=($7, 10)])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+  LogicalProject(JOB=[CAST($2):VARCHAR(20) NOT NULL])
+    LogicalFilter(condition=[=($7, 20)])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalAggregate(group=[{0}])
+  LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $1)], joinType=[semi])
+    LogicalProject(ENAME=[$1])
+      LogicalFilter(condition=[=($7, 10)])
+        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+    LogicalProject(JOB=[CAST($2):VARCHAR(20) NOT NULL])
+      LogicalFilter(condition=[=($7, 20)])
+        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testIntersectToSemiJoinMultiCol">
+    <Resource name="sql">
+      <![CDATA[select deptno, ename from emp where deptno = 10
+intersect
+select deptno, ename from emp where deptno = 20
+]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalIntersect(all=[false])
+  LogicalProject(DEPTNO=[$7], ENAME=[$1])
+    LogicalFilter(condition=[=($7, 10)])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+  LogicalProject(DEPTNO=[$7], ENAME=[$1])
+    LogicalFilter(condition=[=($7, 20)])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalAggregate(group=[{0, 1}])
+  LogicalJoin(condition=[AND(IS NOT DISTINCT FROM($0, $2), IS NOT DISTINCT 
FROM($1, $3))], joinType=[semi])
+    LogicalProject(DEPTNO=[$7], ENAME=[$1])
+      LogicalFilter(condition=[=($7, 10)])
+        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+    LogicalProject(DEPTNO=[$7], ENAME=[$1])
+      LogicalFilter(condition=[=($7, 20)])
+        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
 ]]>
     </Resource>
   </TestCase>

Reply via email to