This is an automated email from the ASF dual-hosted git repository. jhyde pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/calcite.git
commit 22fd34f892a2232d393bd5906f8bb1f21f1dff50 Author: rubenada <[email protected]> AuthorDate: Fri Mar 15 16:56:54 2019 +0100 [CALCITE-2920] In RelBuilder, add antiJoin method (Ruben Quesada Lopez) There is no AntiJoin relational operator (for now), so generate a Correlate instead. Close apache/calcite#1110 --- .../java/org/apache/calcite/runtime/FlatLists.java | 9 ++- .../java/org/apache/calcite/sql/SemiJoinType.java | 29 ++++++-- .../java/org/apache/calcite/tools/RelBuilder.java | 79 +++++++++++++++++++++- .../org/apache/calcite/test/CalciteAssert.java | 24 ++++++- .../java/org/apache/calcite/test/LatticeTest.java | 31 ++++----- .../org/apache/calcite/test/RelBuilderTest.java | 22 ++++++ .../test/enumerable/EnumerableCorrelateTest.java | 65 ++++++++++++++++++ site/_docs/algebra.md | 1 + 8 files changed, 231 insertions(+), 29 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/runtime/FlatLists.java b/core/src/main/java/org/apache/calcite/runtime/FlatLists.java index a2418f0..a6ea656 100644 --- a/core/src/main/java/org/apache/calcite/runtime/FlatLists.java +++ b/core/src/main/java/org/apache/calcite/runtime/FlatLists.java @@ -263,7 +263,14 @@ public class FlatLists { /** Returns a map that consists of a given map plus an (key, value), * guaranteed to be an {@link ImmutableMap}. */ public static <K, V> ImmutableMap<K, V> append(Map<K, V> map, K k, V v) { - return ImmutableMap.<K, V>builder().putAll(map).put(k, v).build(); + final ImmutableMap.Builder<K, V> builder = ImmutableMap.builder(); + builder.put(k, v); + map.forEach((k2, v2) -> { + if (!k.equals(k2)) { + builder.put(k2, v2); + } + }); + return builder.build(); } /** Base class for flat lists. diff --git a/core/src/main/java/org/apache/calcite/sql/SemiJoinType.java b/core/src/main/java/org/apache/calcite/sql/SemiJoinType.java index 473e812..2569d76 100644 --- a/core/src/main/java/org/apache/calcite/sql/SemiJoinType.java +++ b/core/src/main/java/org/apache/calcite/sql/SemiJoinType.java @@ -28,25 +28,40 @@ import java.util.Locale; */ public enum SemiJoinType { /** - * Inner join + * Inner join. */ INNER, /** - * Left-outer join + * Left-outer join. */ LEFT, /** - * Semi-join - * <p>Similar to from A ... where a in (select b from B ...)</p> + * Semi-join. + * + * <p>For example, {@code EMP semi-join DEPT} finds all {@code EMP} records + * that have a corresponding {@code DEPT} record: + * + * <blockquote><pre> + * SELECT * FROM EMP + * WHERE EXISTS (SELECT 1 FROM DEPT + * WHERE DEPT.DEPTNO = EMP.DEPTNO)</pre> + * </blockquote> */ SEMI, /** - * Anti-join - * <p>Similar to from A ... where a NOT in (select b from B ...)</p> - * <p>Note: if B.b is nullable and B has nulls, no rows must be returned</p> + * Anti-join. + * + * <p>For example, {@code EMP anti-join DEPT} finds all {@code EMP} records + * that do not have a corresponding {@code DEPT} record: + * + * <blockquote><pre> + * SELECT * FROM EMP + * WHERE NOT EXISTS (SELECT 1 FROM DEPT + * WHERE DEPT.DEPTNO = EMP.DEPTNO)</pre> + * </blockquote> */ ANTI; diff --git a/core/src/main/java/org/apache/calcite/tools/RelBuilder.java b/core/src/main/java/org/apache/calcite/tools/RelBuilder.java index 0d2972b..68492c9 100644 --- a/core/src/main/java/org/apache/calcite/tools/RelBuilder.java +++ b/core/src/main/java/org/apache/calcite/tools/RelBuilder.java @@ -1788,7 +1788,23 @@ public class RelBuilder { return join(joinType, conditions); } - /** Creates a {@link SemiJoin}. */ + /** Creates a {@link SemiJoin}. + * + * <p>A semi-join is a form of join that combines two relational expressions + * according to some condition, and outputs only rows from the left input for + * which at least one row from the right input matches. It only outputs + * columns from the left input, and ignores duplicates on the right. + * + * <p>For example, {@code EMP semi-join DEPT} finds all {@code EMP} records + * that do not have a corresponding {@code DEPT} record, similar to the + * following SQL: + * + * <blockquote><pre> + * SELECT * FROM EMP + * WHERE EXISTS (SELECT 1 FROM DEPT + * WHERE DEPT.DEPTNO = EMP.DEPTNO)</pre> + * </blockquote> + */ public RelBuilder semiJoin(Iterable<? extends RexNode> conditions) { final Frame right = stack.pop(); final RelNode semiJoin = @@ -1797,11 +1813,70 @@ public class RelBuilder { return this; } - /** Creates a {@link SemiJoin}. */ + /** Creates a {@link SemiJoin}. + * + * @see #semiJoin(Iterable) */ public RelBuilder semiJoin(RexNode... conditions) { return semiJoin(ImmutableList.copyOf(conditions)); } + /** Creates an anti-join. + * + * <p>An anti-join is a form of join that combines two relational expressions + * according to some condition, but outputs only rows from the left input + * for which no rows from the right input match. + * + * <p>For example, {@code EMP anti-join DEPT} finds all {@code EMP} records + * that do not have a corresponding {@code DEPT} record, similar to the + * following SQL: + * + * <blockquote><pre> + * SELECT * FROM EMP + * WHERE NOT EXISTS (SELECT 1 FROM DEPT + * WHERE DEPT.DEPTNO = EMP.DEPTNO)</pre> + * </blockquote> + */ + public RelBuilder antiJoin(Iterable<? extends RexNode> conditions) { + // There is currently no "AntiJoin" relational expression, so we + // simulate it using a LogicalCorrelate with SemiJoinType.ANTI. + final RexBuilder rexBuilder = getRexBuilder(); + final RelNode right = build(); + final RelNode left = peek(); + final int leftFieldCount = left.getRowType().getFieldCount(); + final CorrelationId correlationId = cluster.createCorrel(); + final RexNode corrVar = + rexBuilder.makeCorrel(left.getRowType(), correlationId); + final ImmutableBitSet.Builder requiredColumns = ImmutableBitSet.builder(); + + // Replace all references of left input with FieldAccess(corrVar, field) + final RexNode condition = and(conditions).accept(new RexShuttle() { + @Override public RexNode visitInputRef(RexInputRef input) { + final int field = input.getIndex(); + if (field >= leftFieldCount) { + return rexBuilder.makeInputRef(input.getType(), input.getIndex() + - leftFieldCount); + } + requiredColumns.set(field); + return rexBuilder.makeFieldAccess(corrVar, field); + } + }); + + final RelNode right2 = push(right).filter(condition).build(); + + final RelNode antiJoin = + correlateFactory.createCorrelate(left, right2, correlationId, + requiredColumns.build(), SemiJoinType.ANTI); + replaceTop(antiJoin); + return this; + } + + /** Creates an anti-join. + * + * @see #antiJoin(Iterable) */ + public RelBuilder antiJoin(RexNode... conditions) { + return antiJoin(ImmutableList.copyOf(conditions)); + } + /** Assigns a table alias to the top entry on the stack. */ public RelBuilder as(final String alias) { final Frame pair = stack.pop(); diff --git a/core/src/test/java/org/apache/calcite/test/CalciteAssert.java b/core/src/test/java/org/apache/calcite/test/CalciteAssert.java index 26e29f2..943b2fd 100644 --- a/core/src/test/java/org/apache/calcite/test/CalciteAssert.java +++ b/core/src/test/java/org/apache/calcite/test/CalciteAssert.java @@ -22,6 +22,7 @@ import org.apache.calcite.adapter.java.ReflectiveSchema; import org.apache.calcite.adapter.jdbc.JdbcSchema; import org.apache.calcite.avatica.ConnectionProperty; import org.apache.calcite.avatica.util.DateTimeUtils; +import org.apache.calcite.config.CalciteConnectionConfigImpl; import org.apache.calcite.config.CalciteConnectionProperty; import org.apache.calcite.config.CalciteSystemProperty; import org.apache.calcite.config.Lex; @@ -31,6 +32,7 @@ import org.apache.calcite.jdbc.CalcitePrepare; import org.apache.calcite.jdbc.CalciteSchema; import org.apache.calcite.materialize.Lattice; import org.apache.calcite.model.ModelHandler; +import org.apache.calcite.plan.Contexts; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.runtime.CalciteException; @@ -47,6 +49,7 @@ import org.apache.calcite.schema.impl.ViewTableMacro; import org.apache.calcite.sql.validate.SqlConformanceEnum; import org.apache.calcite.sql.validate.SqlValidatorException; import org.apache.calcite.tools.FrameworkConfig; +import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.Closer; import org.apache.calcite.util.Holder; @@ -1368,7 +1371,7 @@ public class CalciteAssert { try (Connection c = createConnection()) { f.accept(c); } catch (SQLException e) { - TestUtil.rethrow(e); + throw TestUtil.rethrow(e); } return this; } @@ -1570,7 +1573,7 @@ public class CalciteAssert { if (plan != null) { return; } - addHook(Hook.JAVA_PLAN, (Consumer<String>) a0 -> plan = a0); + addHook(Hook.JAVA_PLAN, this::setPlan); withConnection(connection -> { assertQuery(connection, sql, limit, materializationsEnabled, hooks, null, checkUpdate, null); @@ -1578,6 +1581,10 @@ public class CalciteAssert { }); } + private void setPlan(String plan) { + this.plan = plan; + } + /** Runs the query and applies a checker to the generated third-party * queries. The checker should throw to fail the test if it does not see * what it wants. This method can be used to check whether a particular @@ -1655,10 +1662,21 @@ public class CalciteAssert { return withHook(Hook.STRING_TO_QUERY, (Consumer<Pair<FrameworkConfig, Holder<CalcitePrepare.Query>>>) pair -> { - final RelBuilder b = RelBuilder.create(pair.left); + final FrameworkConfig config = forceDecorrelate(pair.left); + final RelBuilder b = RelBuilder.create(config); pair.right.set(CalcitePrepare.Query.of(relFn.apply(b))); }); } + + /** Creates a {@link FrameworkConfig} that does not decorrelate. */ + private FrameworkConfig forceDecorrelate(FrameworkConfig config) { + return Frameworks.newConfigBuilder(config) + .context( + Contexts.of(new CalciteConnectionConfigImpl(new Properties()) + .set(CalciteConnectionProperty.FORCE_DECORRELATE, + Boolean.toString(false)))) + .build(); + } } /** Fluent interface for building a metadata query to be tested. */ diff --git a/core/src/test/java/org/apache/calcite/test/LatticeTest.java b/core/src/test/java/org/apache/calcite/test/LatticeTest.java index ca7cacd..b4cbcba 100644 --- a/core/src/test/java/org/apache/calcite/test/LatticeTest.java +++ b/core/src/test/java/org/apache/calcite/test/LatticeTest.java @@ -21,11 +21,11 @@ import org.apache.calcite.materialize.Lattice; import org.apache.calcite.materialize.Lattices; import org.apache.calcite.materialize.MaterializationService; import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.rules.AbstractMaterializedViewRule; import org.apache.calcite.runtime.Hook; import org.apache.calcite.schema.SchemaPlus; -import org.apache.calcite.test.CalciteAssert.AssertQuery; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.TestUtil; @@ -502,35 +502,34 @@ public class LatticeTest { private void checkTileAlgorithm(String statisticProvider, String expectedExplain) { + final RelOptRule[] rules = { + AbstractMaterializedViewRule.INSTANCE_PROJECT_FILTER, + AbstractMaterializedViewRule.INSTANCE_FILTER, + AbstractMaterializedViewRule.INSTANCE_PROJECT_JOIN, + AbstractMaterializedViewRule.INSTANCE_JOIN, + AbstractMaterializedViewRule.INSTANCE_PROJECT_AGGREGATE, + AbstractMaterializedViewRule.INSTANCE_AGGREGATE + }; MaterializationService.setThreadLocal(); MaterializationService.instance().clear(); - AssertQuery that = foodmartLatticeModel(statisticProvider) + foodmartLatticeModel(statisticProvider) .query("select distinct t.\"the_year\", t.\"quarter\"\n" + "from \"foodmart\".\"sales_fact_1997\" as s\n" + "join \"foodmart\".\"time_by_day\" as t using (\"time_id\")\n") - .enableMaterializations(true); + .enableMaterializations(true) // Disable materialization rules from this test. For some reason, there is // a weird interaction between these rules and the lattice rewriting that // produces non-deterministic rewriting (even when only lattices are present). // For more context, see // <a href="https://issues.apache.org/jira/browse/CALCITE-2953">[CALCITE-2953]</a>. - that.withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner -> { - ImmutableList - .of( - AbstractMaterializedViewRule.INSTANCE_PROJECT_FILTER, - AbstractMaterializedViewRule.INSTANCE_FILTER, - AbstractMaterializedViewRule.INSTANCE_PROJECT_JOIN, - AbstractMaterializedViewRule.INSTANCE_JOIN, - AbstractMaterializedViewRule.INSTANCE_PROJECT_AGGREGATE, - AbstractMaterializedViewRule.INSTANCE_AGGREGATE) - .forEach(planner::removeRule); - }); + .withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner -> + Arrays.asList(rules).forEach(planner::removeRule)) // disable for MySQL; times out running star-join query // disable for H2; it thinks our generated SQL has invalid syntax - that.enable(CalciteAssert.DB != CalciteAssert.DatabaseInstance.MYSQL - && CalciteAssert.DB != CalciteAssert.DatabaseInstance.H2) + .enable(CalciteAssert.DB != CalciteAssert.DatabaseInstance.MYSQL + && CalciteAssert.DB != CalciteAssert.DatabaseInstance.H2) .explainContains(expectedExplain) .returnsUnordered("the_year=1997; quarter=Q1", "the_year=1997; quarter=Q2", diff --git a/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java b/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java index 75feb03..dc0240f 100644 --- a/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java +++ b/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java @@ -1501,6 +1501,28 @@ public class RelBuilderTest { assertThat(root, hasTree(expected)); } + @Test public void testAntiJoin() { + // Equivalent SQL: + // SELECT * FROM dept d + // WHERE NOT EXISTS (SELECT 1 FROM emp e WHERE e.deptno = d.deptno) + final RelBuilder builder = RelBuilder.create(config().build()); + RelNode root = builder + .scan("DEPT") + .scan("EMP") + .antiJoin( + builder.equals( + builder.field(2, 0, "DEPTNO"), + builder.field(2, 1, "DEPTNO"))) + .build(); + // AntiJoin implemented as LogicalCorrelate with joinType=anti + final String expected = "" + + "LogicalCorrelate(correlation=[$cor0], joinType=[anti], requiredColumns=[{0}])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n" + + " LogicalFilter(condition=[=($cor0.DEPTNO, $7)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + assertThat(root, hasTree(expected)); + } + @Test public void testAlias() { // Equivalent SQL: // SELECT * diff --git a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java index 9e3793a..451d95d 100644 --- a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java +++ b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java @@ -133,6 +133,71 @@ public class EnumerableCorrelateTest { "empid=200"); } + /** Test case for + * <a href="https://issues.apache.org/jira/browse/CALCITE-2920">[CALCITE-2920] + * RelBuilder: new method to create an anti-join</a>. */ + @Test public void antiJoinCorrelate() { + tester(false, new JdbcTest.HrSchema()) + .query("?") + .withRel( + // Retrieve departments without employees. Equivalent SQL: + // SELECT d.deptno, d.name FROM depts d + // WHERE NOT EXISTS (SELECT 1 FROM emps e WHERE e.deptno = d.deptno) + builder -> builder + .scan("s", "depts").as("d") + .scan("s", "emps").as("e") + .antiJoin( + builder.equals( + builder.field(2, "d", "deptno"), + builder.field(2, "e", "deptno"))) + .project( + builder.field("deptno"), + builder.field("name")) + .build()) + .returnsUnordered( + "deptno=30; name=Marketing", + "deptno=40; name=HR"); + } + + /** Test case for + * <a href="https://issues.apache.org/jira/browse/CALCITE-2920">[CALCITE-2920] + * RelBuilder: new method to create an antijoin</a> */ + @Test public void antiJoinCorrelateWithNullValues() { + final Integer salesDeptNo = 10; + tester(false, new JdbcTest.HrSchema()) + .query("?") + .withRel( + // Retrieve employees from any department other than Sales (deptno 10) whose + // commission is different from any Sales employee commission. Since there + // is a Sales employee with null commission, the goal is to validate that antiJoin + // behaves as a NOT EXISTS (and returns results), and not as a NOT IN (which would + // not return any result due to its null handling). Equivalent SQL: + // SELECT empOther.empid, empOther.name FROM emps empOther + // WHERE empOther.deptno <> 10 AND NOT EXISTS + // (SELECT 1 FROM emps empSales + // WHERE empSales.deptno = 10 AND empSales.commission = empOther.commission) + builder -> builder + .scan("s", "emps").as("empOther") + .filter( + builder.notEquals( + builder.field("empOther", "deptno"), + builder.literal(salesDeptNo))) + .scan("s", "emps").as("empSales") + .filter( + builder.equals( + builder.field("empSales", "deptno"), + builder.literal(salesDeptNo))) + .antiJoin( + builder.equals( + builder.field(2, "empOther", "commission"), + builder.field(2, "empSales", "commission"))) + .project( + builder.field("empid"), + builder.field("name")) + .build()) + .returnsUnordered("empid=200; name=Eric"); + } + private CalciteAssert.AssertThat tester(boolean forceDecorrelate, Object schema) { return CalciteAssert.that() diff --git a/site/_docs/algebra.md b/site/_docs/algebra.md index a68bb89..8802f81 100644 --- a/site/_docs/algebra.md +++ b/site/_docs/algebra.md @@ -275,6 +275,7 @@ return the `RelBuilder`. | `sortExchange(distribution, collation)` | Creates a [SortExchange]({{ site.apiRoot }}/org/apache/calcite/rel/core/SortExchange.html). | `join(joinType, expr...)`<br/>`join(joinType, exprList)`<br/>`join(joinType, fieldName...)` | Creates a [Join]({{ site.apiRoot }}/org/apache/calcite/rel/core/Join.html) of the two most recent relational expressions.<br/><br/>The first form joins on a boolean expression (multiple conditions are combined using AND).<br/><br/>The last form joins on named fields; each side must have a field of each name. | `semiJoin(expr)` | Creates a [SemiJoin]({{ site.apiRoot }}/org/apache/calcite/rel/core/SemiJoin.html) of the two most recent relational expressions. +| `antiJoin(expr)` | Creates an AntiJoin of the two most recent relational expressions. | `union(all [, n])` | Creates a [Union]({{ site.apiRoot }}/org/apache/calcite/rel/core/Union.html) of the `n` (default two) most recent relational expressions. | `intersect(all [, n])` | Creates an [Intersect]({{ site.apiRoot }}/org/apache/calcite/rel/core/Intersect.html) of the `n` (default two) most recent relational expressions. | `minus(all)` | Creates a [Minus]({{ site.apiRoot }}/org/apache/calcite/rel/core/Minus.html) of the two most recent relational expressions.
