This is an automated email from the ASF dual-hosted git repository.
mmior pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push:
new be2b979 [CALCITE-2968] New AntiJoin relational expression
be2b979 is described below
commit be2b97905548a5b24067c8636567b364529332cc
Author: rubenada <[email protected]>
AuthorDate: Fri May 31 18:16:42 2019 +0200
[CALCITE-2968] New AntiJoin relational expression
Close apache/calcite#1246
---
.../adapter/enumerable/EnumerableCorrelate.java | 2 +-
.../adapter/enumerable/EnumerableHashJoin.java | 38 +++---
.../enumerable/EnumerableNestedLoopJoin.java | 3 +-
.../org/apache/calcite/adapter/jdbc/JdbcRules.java | 9 +-
.../org/apache/calcite/rel/core/JoinRelType.java | 22 +++-
.../org/apache/calcite/rel/metadata/RelMdSize.java | 4 +-
.../rel/rules/ProjectJoinTransposeRule.java | 4 +-
.../apache/calcite/rel/rules/PushProjector.java | 9 +-
.../apache/calcite/sql2rel/RelFieldTrimmer.java | 16 ++-
.../java/org/apache/calcite/tools/RelBuilder.java | 31 +----
.../org/apache/calcite/util/BuiltInMethod.java | 6 +-
.../apache/calcite/runtime/EnumerablesTest.java | 15 +--
.../org/apache/calcite/test/RelBuilderTest.java | 6 +-
.../test/enumerable/EnumerableCorrelateTest.java | 57 ++++++++-
.../test/enumerable/EnumerableJoinTest.java | 140 +++++++++++++++++++++
.../apache/calcite/linq4j/EnumerableDefaults.java | 64 +++++++---
.../java/org/apache/calcite/linq4j/JoinType.java | 89 +++++++++++++
.../calcite/linq4j/test/JoinPreserveOrderTest.java | 29 +++--
site/_docs/algebra.md | 2 +-
19 files changed, 441 insertions(+), 105 deletions(-)
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableCorrelate.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableCorrelate.java
index e3fb59e..9d314bc 100644
---
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableCorrelate.java
+++
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableCorrelate.java
@@ -130,7 +130,7 @@ public class EnumerableCorrelate extends Correlate
builder.append(
Expressions.call(leftExpression, BuiltInMethod.CORRELATE_JOIN.method,
- Expressions.constant(joinType.toLinq4j()),
+ Expressions.constant(joinType.toLinq4jCorrelateJoinType()),
Expressions.lambda(corrBlock.toBlock(), corrArg),
selector));
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableHashJoin.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableHashJoin.java
index bedbbeb..ec0264a 100644
---
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableHashJoin.java
+++
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableHashJoin.java
@@ -40,6 +40,7 @@ import org.apache.calcite.util.Util;
import com.google.common.collect.ImmutableList;
+import java.lang.reflect.Method;
import java.util.Set;
/** Implementation of {@link org.apache.calcite.rel.core.Join} in
@@ -110,18 +111,20 @@ public class EnumerableHashJoin extends EquiJoin
implements EnumerableRel {
RelMetadataQuery mq) {
double rowCount = mq.getRowCount(this);
- if (!isSemiJoin()) {
- // Joins can be flipped, and for many algorithms, both versions are
viable
- // and have the same cost. To make the results stable between versions of
- // the planner, make one of the versions slightly more expensive.
- switch (joinType) {
- case RIGHT:
+ // Joins can be flipped, and for many algorithms, both versions are viable
+ // and have the same cost. To make the results stable between versions of
+ // the planner, make one of the versions slightly more expensive.
+ switch (joinType) {
+ case SEMI:
+ case ANTI:
+ // SEMI and ANTI join cannot be flipped
+ break;
+ case RIGHT:
+ rowCount = RelMdUtil.addEpsilon(rowCount);
+ break;
+ default:
+ if (RelNodes.COMPARATOR.compare(left, right) > 0) {
rowCount = RelMdUtil.addEpsilon(rowCount);
- break;
- default:
- if (RelNodes.COMPARATOR.compare(left, right) > 0) {
- rowCount = RelMdUtil.addEpsilon(rowCount);
- }
}
}
@@ -147,14 +150,21 @@ public class EnumerableHashJoin extends EquiJoin
implements EnumerableRel {
}
@Override public Result implement(EnumerableRelImplementor implementor,
Prefer pref) {
- if (isSemiJoin()) {
+ switch (joinType) {
+ case SEMI:
+ case ANTI:
assert joinInfo.isEqui();
return implementHashSemiJoin(implementor, pref);
+ default:
+ return implementHashJoin(implementor, pref);
}
- return implementHashJoin(implementor, pref);
}
private Result implementHashSemiJoin(EnumerableRelImplementor implementor,
Prefer pref) {
+ assert joinType == JoinRelType.SEMI || joinType == JoinRelType.ANTI;
+ final Method method = joinType == JoinRelType.SEMI
+ ? BuiltInMethod.SEMI_JOIN.method
+ : BuiltInMethod.ANTI_JOIN.method;
BlockBuilder builder = new BlockBuilder();
final Result leftResult =
implementor.visitChild(this, 0, (EnumerableRel) left, pref);
@@ -171,7 +181,7 @@ public class EnumerableHashJoin extends EquiJoin implements
EnumerableRel {
physType,
builder.append(
Expressions.call(
- BuiltInMethod.SEMI_JOIN.method,
+ method,
Expressions.list(
leftExpression,
rightExpression,
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableNestedLoopJoin.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableNestedLoopJoin.java
index c737cac..ab91906 100644
---
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableNestedLoopJoin.java
+++
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableNestedLoopJoin.java
@@ -151,8 +151,7 @@ public class EnumerableNestedLoopJoin extends Join
implements EnumerableRel {
physType,
ImmutableList.of(leftResult.physType,
rightResult.physType)),
- Expressions.constant(joinType.generatesNullsOnLeft()),
- Expressions.constant(joinType.generatesNullsOnRight())))
+ Expressions.constant(joinType.toLinq4jJoinType())))
.toBlock());
}
diff --git a/core/src/main/java/org/apache/calcite/adapter/jdbc/JdbcRules.java
b/core/src/main/java/org/apache/calcite/adapter/jdbc/JdbcRules.java
index abfbeec..b46835f 100644
--- a/core/src/main/java/org/apache/calcite/adapter/jdbc/JdbcRules.java
+++ b/core/src/main/java/org/apache/calcite/adapter/jdbc/JdbcRules.java
@@ -275,12 +275,15 @@ public class JdbcRules {
@Override public RelNode convert(RelNode rel) {
final Join join = (Join) rel;
- if (join.isSemiJoin()) {
- // It's not possible to convert semi-joins. They have fewer columns
+ switch (join.getJoinType()) {
+ case SEMI:
+ case ANTI:
+ // It's not possible to convert semi-joins or anti-joins. They have
fewer columns
// than regular joins.
return null;
+ default:
+ return convert(join, true);
}
- return convert(join, true);
}
/**
diff --git a/core/src/main/java/org/apache/calcite/rel/core/JoinRelType.java
b/core/src/main/java/org/apache/calcite/rel/core/JoinRelType.java
index 8a34501..17f3b8d 100644
--- a/core/src/main/java/org/apache/calcite/rel/core/JoinRelType.java
+++ b/core/src/main/java/org/apache/calcite/rel/core/JoinRelType.java
@@ -17,6 +17,7 @@
package org.apache.calcite.rel.core;
import org.apache.calcite.linq4j.CorrelateJoinType;
+import org.apache.calcite.linq4j.JoinType;
import java.util.Locale;
@@ -59,7 +60,7 @@ public enum JoinRelType {
SEMI,
/**
- * Anti-join.
+ * Anti-join (also known as Anti-semi-join).
*
* <p>For example, {@code EMP anti-join DEPT} finds all {@code EMP} records
* that do not have a corresponding {@code DEPT} record:
@@ -152,7 +153,7 @@ public enum JoinRelType {
}
/** Transform this JoinRelType to CorrelateJoinType. **/
- public CorrelateJoinType toLinq4j() {
+ public CorrelateJoinType toLinq4jCorrelateJoinType() {
switch (this) {
case INNER:
return CorrelateJoinType.INNER;
@@ -167,19 +168,28 @@ public enum JoinRelType {
"Unable to convert " + this + " to CorrelateJoinType");
}
- public boolean projectsRight() {
+ /** Transform this JoinRelType to Linq4j JoinType. **/
+ public JoinType toLinq4jJoinType() {
switch (this) {
case INNER:
+ return JoinType.INNER;
case LEFT:
+ return JoinType.LEFT;
case RIGHT:
+ return JoinType.RIGHT;
case FULL:
- return true;
+ return JoinType.FULL;
case SEMI:
+ return JoinType.SEMI;
case ANTI:
- return false;
+ return JoinType.ANTI;
}
throw new IllegalStateException(
- "Unable to convert " + this + " to JoinRelType");
+ "Unable to convert " + this + " to Linq4j JoinType");
+ }
+
+ public boolean projectsRight() {
+ return this != SEMI && this != ANTI;
}
}
diff --git a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdSize.java
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdSize.java
index 4233373..16637a2 100644
--- a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdSize.java
+++ b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdSize.java
@@ -187,12 +187,12 @@ public class RelMdSize implements
MetadataHandler<BuiltInMetadata.Size> {
}
private List<Double> averageJoinColumnSizes(Join rel, RelMetadataQuery mq) {
- boolean semijoin = rel.isSemiJoin();
+ boolean semiOrAntijoin = !rel.getJoinType().projectsRight();
final RelNode left = rel.getLeft();
final RelNode right = rel.getRight();
final List<Double> lefts = mq.getAverageColumnSizes(left);
final List<Double> rights =
- semijoin ? null : mq.getAverageColumnSizes(right);
+ semiOrAntijoin ? null : mq.getAverageColumnSizes(right);
if (lefts == null && rights == null) {
return null;
}
diff --git
a/core/src/main/java/org/apache/calcite/rel/rules/ProjectJoinTransposeRule.java
b/core/src/main/java/org/apache/calcite/rel/rules/ProjectJoinTransposeRule.java
index 33ed935..7c98114 100644
---
a/core/src/main/java/org/apache/calcite/rel/rules/ProjectJoinTransposeRule.java
+++
b/core/src/main/java/org/apache/calcite/rel/rules/ProjectJoinTransposeRule.java
@@ -76,8 +76,8 @@ public class ProjectJoinTransposeRule extends RelOptRule {
Project origProj = call.rel(0);
final Join join = call.rel(1);
- if (join.isSemiJoin()) {
- return; // TODO: support SemiJoin
+ if (!join.getJoinType().projectsRight()) {
+ return; // TODO: support SemiJoin / AntiJoin
}
// Normalize the join condition so we don't end up misidentified expanded
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/PushProjector.java
b/core/src/main/java/org/apache/calcite/rel/rules/PushProjector.java
index 5836684..52fd5fc 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/PushProjector.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/PushProjector.java
@@ -227,7 +227,14 @@ public class PushProjector {
List<RelDataTypeField> rightFields =
joinRel.getRight().getRowType().getFieldList();
nFields = leftFields.size();
- nFieldsRight = joinRel.isSemiJoin() ? 0 : rightFields.size();
+ switch (joinRel.getJoinType()) {
+ case SEMI:
+ case ANTI:
+ nFieldsRight = 0;
+ break;
+ default:
+ nFieldsRight = rightFields.size();
+ }
nSysFields = joinRel.getSystemFieldList().size();
childBitmap =
ImmutableBitSet.range(nSysFields, nFields + nSysFields);
diff --git a/core/src/main/java/org/apache/calcite/sql2rel/RelFieldTrimmer.java
b/core/src/main/java/org/apache/calcite/sql2rel/RelFieldTrimmer.java
index 9029765..b92d5a1 100644
--- a/core/src/main/java/org/apache/calcite/sql2rel/RelFieldTrimmer.java
+++ b/core/src/main/java/org/apache/calcite/sql2rel/RelFieldTrimmer.java
@@ -28,6 +28,7 @@ import org.apache.calcite.rel.core.AggregateCall;
import org.apache.calcite.rel.core.CorrelationId;
import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.RelFactories;
import org.apache.calcite.rel.core.SetOp;
@@ -665,9 +666,15 @@ public class RelFieldTrimmer implements ReflectiveVisitor {
relBuilder.push(newInputs.get(0));
relBuilder.push(newInputs.get(1));
- if (join.isSemiJoin()) {
- relBuilder.semiJoin(newConditionExpr);
- // For SemiJoins only map fields from the left-side
+ switch (join.getJoinType()) {
+ case SEMI:
+ case ANTI:
+ // For SemiJoins and AntiJoins only map fields from the left-side
+ if (join.getJoinType() == JoinRelType.SEMI) {
+ relBuilder.semiJoin(newConditionExpr);
+ } else {
+ relBuilder.antiJoin(newConditionExpr);
+ }
Mapping inputMapping = inputMappings.get(0);
mapping = Mappings.create(MappingType.INVERSE_SURJECTION,
join.getRowType().getFieldCount(),
@@ -680,7 +687,8 @@ public class RelFieldTrimmer implements ReflectiveVisitor {
for (IntPair pair : inputMapping) {
mapping.set(pair.source + offset, pair.target + newOffset);
}
- } else {
+ break;
+ default:
relBuilder.join(join.getJoinType(), newConditionExpr);
}
diff --git a/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
b/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
index 5ee0ec9..c139f58 100644
--- a/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
+++ b/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
@@ -1949,35 +1949,10 @@ public class RelBuilder {
* </blockquote>
*/
public RelBuilder antiJoin(Iterable<? extends RexNode> conditions) {
- // There is currently no "AntiJoin" relational expression, so we
- // simulate it using a LogicalCorrelate with SemiJoinType.ANTI.
- final RexBuilder rexBuilder = getRexBuilder();
- final RelNode right = build();
- final RelNode left = peek();
- final int leftFieldCount = left.getRowType().getFieldCount();
- final CorrelationId correlationId = cluster.createCorrel();
- final RexNode corrVar =
- rexBuilder.makeCorrel(left.getRowType(), correlationId);
- final ImmutableBitSet.Builder requiredColumns = ImmutableBitSet.builder();
-
- // Replace all references of left input with FieldAccess(corrVar, field)
- final RexNode condition = and(conditions).accept(new RexShuttle() {
- @Override public RexNode visitInputRef(RexInputRef input) {
- final int field = input.getIndex();
- if (field >= leftFieldCount) {
- return rexBuilder.makeInputRef(input.getType(), input.getIndex()
- - leftFieldCount);
- }
- requiredColumns.set(field);
- return rexBuilder.makeFieldAccess(corrVar, field);
- }
- });
-
- final RelNode right2 = push(right).filter(condition).build();
-
+ final Frame right = stack.pop();
final RelNode antiJoin =
- correlateFactory.createCorrelate(left, right2, correlationId,
- requiredColumns.build(), JoinRelType.ANTI);
+ joinFactory.createJoin(peek(), right.rel,
+ and(conditions), ImmutableSet.of(), JoinRelType.ANTI, false);
replaceTop(antiJoin);
return this;
}
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index 246479a..d786a6a 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -35,6 +35,7 @@ import org.apache.calcite.linq4j.Enumerable;
import org.apache.calcite.linq4j.EnumerableDefaults;
import org.apache.calcite.linq4j.Enumerator;
import org.apache.calcite.linq4j.ExtendedEnumerable;
+import org.apache.calcite.linq4j.JoinType;
import org.apache.calcite.linq4j.Linq4j;
import org.apache.calcite.linq4j.QueryProvider;
import org.apache.calcite.linq4j.Queryable;
@@ -165,9 +166,10 @@ public enum BuiltInMethod {
SLICE0(Enumerables.class, "slice0", Enumerable.class),
SEMI_JOIN(EnumerableDefaults.class, "semiJoin", Enumerable.class,
Enumerable.class, Function1.class, Function1.class),
+ ANTI_JOIN(EnumerableDefaults.class, "antiJoin", Enumerable.class,
+ Enumerable.class, Function1.class, Function1.class),
NESTED_LOOP_JOIN(EnumerableDefaults.class, "nestedLoopJoin",
Enumerable.class,
- Enumerable.class, Predicate2.class, Function2.class, boolean.class,
- boolean.class),
+ Enumerable.class, Predicate2.class, Function2.class, JoinType.class),
CORRELATE_JOIN(ExtendedEnumerable.class, "correlateJoin",
CorrelateJoinType.class, Function1.class, Function2.class),
SELECT(ExtendedEnumerable.class, "select", Function1.class),
diff --git a/core/src/test/java/org/apache/calcite/runtime/EnumerablesTest.java
b/core/src/test/java/org/apache/calcite/runtime/EnumerablesTest.java
index e33ef27..41fd445 100644
--- a/core/src/test/java/org/apache/calcite/runtime/EnumerablesTest.java
+++ b/core/src/test/java/org/apache/calcite/runtime/EnumerablesTest.java
@@ -18,6 +18,7 @@ package org.apache.calcite.runtime;
import org.apache.calcite.linq4j.Enumerable;
import org.apache.calcite.linq4j.EnumerableDefaults;
+import org.apache.calcite.linq4j.JoinType;
import org.apache.calcite.linq4j.Linq4j;
import org.apache.calcite.linq4j.function.Function2;
import org.apache.calcite.linq4j.function.Functions;
@@ -154,14 +155,14 @@ public class EnumerablesTest {
@Test public void testThetaJoin() {
assertThat(
EnumerableDefaults.nestedLoopJoin(EMPS, DEPTS, EQUAL_DEPTNO,
- EMP_DEPT_TO_STRING, false, false).toList().toString(),
+ EMP_DEPT_TO_STRING, JoinType.INNER).toList().toString(),
equalTo("[{Theodore, 20, 20, Sales}, {Sebastian, 20, 20, Sales}]"));
}
@Test public void testThetaLeftJoin() {
assertThat(
EnumerableDefaults.nestedLoopJoin(EMPS, DEPTS, EQUAL_DEPTNO,
- EMP_DEPT_TO_STRING, false, true).toList().toString(),
+ EMP_DEPT_TO_STRING, JoinType.LEFT).toList().toString(),
equalTo("[{Fred, 10, null, null}, {Theodore, 20, 20, Sales}, "
+ "{Sebastian, 20, 20, Sales}, {Joe, 30, null, null}]"));
}
@@ -169,7 +170,7 @@ public class EnumerablesTest {
@Test public void testThetaRightJoin() {
assertThat(
EnumerableDefaults.nestedLoopJoin(EMPS, DEPTS, EQUAL_DEPTNO,
- EMP_DEPT_TO_STRING, true, false).toList().toString(),
+ EMP_DEPT_TO_STRING, JoinType.RIGHT).toList().toString(),
equalTo("[{Theodore, 20, 20, Sales}, {Sebastian, 20, 20, Sales}, "
+ "{null, null, 15, Marketing}]"));
}
@@ -177,7 +178,7 @@ public class EnumerablesTest {
@Test public void testThetaFullJoin() {
assertThat(
EnumerableDefaults.nestedLoopJoin(EMPS, DEPTS, EQUAL_DEPTNO,
- EMP_DEPT_TO_STRING, true, true).toList().toString(),
+ EMP_DEPT_TO_STRING, JoinType.FULL).toList().toString(),
equalTo("[{Fred, 10, null, null}, {Theodore, 20, 20, Sales}, "
+ "{Sebastian, 20, 20, Sales}, {Joe, 30, null, null}, "
+ "{null, null, 15, Marketing}]"));
@@ -186,7 +187,7 @@ public class EnumerablesTest {
@Test public void testThetaFullJoinLeftEmpty() {
assertThat(
EnumerableDefaults.nestedLoopJoin(EMPS.take(0), DEPTS, EQUAL_DEPTNO,
- EMP_DEPT_TO_STRING, true, true)
+ EMP_DEPT_TO_STRING, JoinType.FULL)
.orderBy(Functions.identitySelector()).toList().toString(),
equalTo("[{null, null, 15, Marketing}, {null, null, 20, Sales}]"));
}
@@ -194,7 +195,7 @@ public class EnumerablesTest {
@Test public void testThetaFullJoinRightEmpty() {
assertThat(
EnumerableDefaults.nestedLoopJoin(EMPS, DEPTS.take(0), EQUAL_DEPTNO,
- EMP_DEPT_TO_STRING, true, true).toList().toString(),
+ EMP_DEPT_TO_STRING, JoinType.FULL).toList().toString(),
equalTo("[{Fred, 10, null, null}, {Theodore, 20, null, null}, "
+ "{Sebastian, 20, null, null}, {Joe, 30, null, null}]"));
}
@@ -202,7 +203,7 @@ public class EnumerablesTest {
@Test public void testThetaFullJoinBothEmpty() {
assertThat(
EnumerableDefaults.nestedLoopJoin(EMPS.take(0), DEPTS.take(0),
EQUAL_DEPTNO,
- EMP_DEPT_TO_STRING, true, true).toList().toString(),
+ EMP_DEPT_TO_STRING, JoinType.FULL).toList().toString(),
equalTo("[]"));
}
diff --git a/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java
b/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java
index 06ea18c..a4f28af 100644
--- a/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java
@@ -1586,12 +1586,10 @@ public class RelBuilderTest {
builder.field(2, 0, "DEPTNO"),
builder.field(2, 1, "DEPTNO")))
.build();
- // AntiJoin implemented as LogicalCorrelate with joinType=anti
final String expected = ""
- + "LogicalCorrelate(correlation=[$cor0], joinType=[anti],
requiredColumns=[{0}])\n"
+ + "LogicalJoin(condition=[=($0, $10)], joinType=[anti])\n"
+ " LogicalTableScan(table=[[scott, DEPT]])\n"
- + " LogicalFilter(condition=[=($cor0.DEPTNO, $7)])\n"
- + " LogicalTableScan(table=[[scott, EMP]])\n";
+ + " LogicalTableScan(table=[[scott, EMP]])\n";
assertThat(root, hasTree(expected));
}
diff --git
a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java
b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java
index 65eafe2..30dec83 100644
---
a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java
+++
b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java
@@ -25,6 +25,7 @@ import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.rel.rules.FilterCorrelateRule;
import org.apache.calcite.rel.rules.JoinToCorrelateRule;
import org.apache.calcite.runtime.Hook;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.test.CalciteAssert;
import org.apache.calcite.test.JdbcTest;
@@ -170,6 +171,12 @@ public class EnumerableCorrelateTest {
@Test public void antiJoinCorrelate() {
tester(false, new JdbcTest.HrSchema())
.query("?")
+ .withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner -> {
+ // force the antijoin to run via EnumerableCorrelate
+ // instead of EnumerableHashJoin(ANTI)
+ planner.addRule(JoinToCorrelateRule.INSTANCE);
+ planner.removeRule(EnumerableRules.ENUMERABLE_JOIN_RULE);
+ })
.withRel(
// Retrieve departments without employees. Equivalent SQL:
// SELECT d.deptno, d.name FROM depts d
@@ -181,15 +188,51 @@ public class EnumerableCorrelateTest {
builder.equals(
builder.field(2, "d", "deptno"),
builder.field(2, "e", "deptno")))
- .project(
- builder.field("deptno"),
- builder.field("name"))
- .build())
+ .project(
+ builder.field("deptno"),
+ builder.field("name"))
+ .build())
.returnsUnordered(
"deptno=30; name=Marketing",
"deptno=40; name=HR");
}
+ @Test public void nonEquiAntiJoinCorrelate() {
+ tester(false, new JdbcTest.HrSchema())
+ .query("?")
+ .withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner -> {
+ // force the antijoin to run via EnumerableCorrelate
+ // instead of EnumerableNestedLoopJoin
+ planner.addRule(JoinToCorrelateRule.INSTANCE);
+ planner.removeRule(EnumerableRules.ENUMERABLE_JOIN_RULE);
+ })
+ .withRel(
+ // Retrieve employees with the top salary in their department.
Equivalent SQL:
+ // SELECT e.name, e.salary FROM emps e
+ // WHERE NOT EXISTS (
+ // SELECT 1 FROM emps e2
+ // WHERE e.deptno = e2.deptno AND e2.salary > e.salary)
+ builder -> builder
+ .scan("s", "emps").as("e")
+ .scan("s", "emps").as("e2")
+ .antiJoin(
+ builder.and(
+ builder.equals(
+ builder.field(2, "e", "deptno"),
+ builder.field(2, "e2", "deptno")),
+ builder.call(
+ SqlStdOperatorTable.GREATER_THAN,
+ builder.field(2, "e2", "salary"),
+ builder.field(2, "e", "salary"))))
+ .project(
+ builder.field("name"),
+ builder.field("salary"))
+ .build())
+ .returnsUnordered(
+ "name=Theodore; salary=11500.0",
+ "name=Eric; salary=8000.0");
+ }
+
/** Test case for
* <a
href="https://issues.apache.org/jira/browse/CALCITE-2920">[CALCITE-2920]
* RelBuilder: new method to create an antijoin</a> */
@@ -197,6 +240,12 @@ public class EnumerableCorrelateTest {
final Integer salesDeptNo = 10;
tester(false, new JdbcTest.HrSchema())
.query("?")
+ .withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) planner -> {
+ // force the antijoin to run via EnumerableCorrelate
+ // instead of EnumerableHashJoin(ANTI)
+ planner.addRule(JoinToCorrelateRule.INSTANCE);
+ planner.removeRule(EnumerableRules.ENUMERABLE_JOIN_RULE);
+ })
.withRel(
// Retrieve employees from any department other than Sales (deptno
10) whose
// commission is different from any Sales employee commission.
Since there
diff --git
a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableJoinTest.java
b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableJoinTest.java
new file mode 100644
index 0000000..e800eb9
--- /dev/null
+++
b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableJoinTest.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.test.enumerable;
+
+import org.apache.calcite.adapter.java.ReflectiveSchema;
+import org.apache.calcite.config.CalciteConnectionProperty;
+import org.apache.calcite.config.Lex;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.test.CalciteAssert;
+import org.apache.calcite.test.JdbcTest;
+
+import org.junit.Test;
+
+/**
+ * Unit tests for the different Enumerable Join implementations.
+ */
+public class EnumerableJoinTest {
+
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-2968">[CALCITE-2968]
+ * New AntiJoin relational expression</a>. */
+ @Test public void equiAntiJoin() {
+ tester(false, new JdbcTest.HrSchema())
+ .query("?")
+ .withRel(
+ // Retrieve departments without employees. Equivalent SQL:
+ // SELECT d.deptno, d.name FROM depts d
+ // WHERE NOT EXISTS (SELECT 1 FROM emps e WHERE e.deptno =
d.deptno)
+ builder -> builder
+ .scan("s", "depts").as("d")
+ .scan("s", "emps").as("e")
+ .antiJoin(
+ builder.equals(
+ builder.field(2, "d", "deptno"),
+ builder.field(2, "e", "deptno")))
+ .project(
+ builder.field("deptno"),
+ builder.field("name"))
+ .build())
+ .returnsUnordered(
+ "deptno=30; name=Marketing",
+ "deptno=40; name=HR");
+ }
+
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-2968">[CALCITE-2968]
+ * New AntiJoin relational expression</a>. */
+ @Test public void nonEquiAntiJoin() {
+ tester(false, new JdbcTest.HrSchema())
+ .query("?")
+ .withRel(
+ // Retrieve employees with the top salary in their department.
Equivalent SQL:
+ // SELECT e.name, e.salary FROM emps e
+ // WHERE NOT EXISTS (
+ // SELECT 1 FROM emps e2
+ // WHERE e.deptno = e2.deptno AND e2.salary > e.salary)
+ builder -> builder
+ .scan("s", "emps").as("e")
+ .scan("s", "emps").as("e2")
+ .antiJoin(
+ builder.and(
+ builder.equals(
+ builder.field(2, "e", "deptno"),
+ builder.field(2, "e2", "deptno")),
+ builder.call(
+ SqlStdOperatorTable.GREATER_THAN,
+ builder.field(2, "e2", "salary"),
+ builder.field(2, "e", "salary"))))
+ .project(
+ builder.field("name"),
+ builder.field("salary"))
+ .build())
+ .returnsUnordered(
+ "name=Theodore; salary=11500.0",
+ "name=Eric; salary=8000.0");
+ }
+
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-2968">[CALCITE-2968]
+ * New AntiJoin relational expression</a>. */
+ @Test public void equiAntiJoinWithNullValues() {
+ final Integer salesDeptNo = 10;
+ tester(false, new JdbcTest.HrSchema())
+ .query("?")
+ .withRel(
+ // Retrieve employees from any department other than Sales (deptno
10) whose
+ // commission is different from any Sales employee commission.
Since there
+ // is a Sales employee with null commission, the goal is to
validate that antiJoin
+ // behaves as a NOT EXISTS (and returns results), and not as a NOT
IN (which would
+ // not return any result due to its null handling). Equivalent SQL:
+ // SELECT empOther.empid, empOther.name FROM emps empOther
+ // WHERE empOther.deptno <> 10 AND NOT EXISTS
+ // (SELECT 1 FROM emps empSales
+ // WHERE empSales.deptno = 10 AND empSales.commission =
empOther.commission)
+ builder -> builder
+ .scan("s", "emps").as("empOther")
+ .filter(
+ builder.notEquals(
+ builder.field("empOther", "deptno"),
+ builder.literal(salesDeptNo)))
+ .scan("s", "emps").as("empSales")
+ .filter(
+ builder.equals(
+ builder.field("empSales", "deptno"),
+ builder.literal(salesDeptNo)))
+ .antiJoin(
+ builder.equals(
+ builder.field(2, "empOther", "commission"),
+ builder.field(2, "empSales", "commission")))
+ .project(
+ builder.field("empid"),
+ builder.field("name"))
+ .build())
+ .returnsUnordered("empid=200; name=Eric");
+ }
+
+ private CalciteAssert.AssertThat tester(boolean forceDecorrelate,
+ Object schema) {
+ return CalciteAssert.that()
+ .with(CalciteConnectionProperty.LEX, Lex.JAVA)
+ .with(CalciteConnectionProperty.FORCE_DECORRELATE, forceDecorrelate)
+ .withSchema("s", new ReflectiveSchema(schema));
+ }
+}
+
+// End EnumerableJoinTest.java
diff --git
a/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
b/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
index cbda1e8..80bd6a6 100644
--- a/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
+++ b/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
@@ -1285,19 +1285,47 @@ public abstract class EnumerableDefaults {
final Enumerable<TSource> outer, final Enumerable<TInner> inner,
final Function1<TSource, TKey> outerKeySelector,
final Function1<TInner, TKey> innerKeySelector) {
- return semiJoin(outer, inner, outerKeySelector, innerKeySelector, null);
+ return semiJoin(outer, inner, outerKeySelector, innerKeySelector, null,
false);
+ }
+
+ public static <TSource, TInner, TKey> Enumerable<TSource> semiJoin(
+ final Enumerable<TSource> outer, final Enumerable<TInner> inner,
+ final Function1<TSource, TKey> outerKeySelector,
+ final Function1<TInner, TKey> innerKeySelector,
+ final EqualityComparer<TKey> comparer) {
+ return semiJoin(outer, inner, outerKeySelector, innerKeySelector,
comparer, false);
}
/**
- * Returns elements of {@code outer} for which there is a member of
- * {@code inner} with a matching key. A specified
- * {@code EqualityComparer<TSource>} is used to compare keys.
+ * Returns elements of {@code outer} for which there is NOT a member of
+ * {@code inner} with a matching key.
*/
- public static <TSource, TInner, TKey> Enumerable<TSource> semiJoin(
+ public static <TSource, TInner, TKey> Enumerable<TSource> antiJoin(
+ final Enumerable<TSource> outer, final Enumerable<TInner> inner,
+ final Function1<TSource, TKey> outerKeySelector,
+ final Function1<TInner, TKey> innerKeySelector) {
+ return semiJoin(outer, inner, outerKeySelector, innerKeySelector, null,
true);
+ }
+
+ public static <TSource, TInner, TKey> Enumerable<TSource> antiJoin(
final Enumerable<TSource> outer, final Enumerable<TInner> inner,
final Function1<TSource, TKey> outerKeySelector,
final Function1<TInner, TKey> innerKeySelector,
final EqualityComparer<TKey> comparer) {
+ return semiJoin(outer, inner, outerKeySelector, innerKeySelector,
comparer, true);
+ }
+
+ /**
+ * Returns elements of {@code outer} for which there is (semi-join) / is not
(anti-semi-join)
+ * a member of {@code inner} with a matching key. A specified
+ * {@code EqualityComparer<TSource>} is used to compare keys.
+ */
+ private static <TSource, TInner, TKey> Enumerable<TSource> semiJoin(
+ final Enumerable<TSource> outer, final Enumerable<TInner> inner,
+ final Function1<TSource, TKey> outerKeySelector,
+ final Function1<TInner, TKey> innerKeySelector,
+ final EqualityComparer<TKey> comparer,
+ final boolean anti) {
return new AbstractEnumerable<TSource>() {
public Enumerator<TSource> enumerator() {
// CALCITE-2909 Delay the computation of the innerLookup until the
moment when we are sure
@@ -1307,10 +1335,11 @@ public abstract class EnumerableDefaults {
? inner.select(innerKeySelector).distinct()
: inner.select(innerKeySelector).distinct(comparer));
- return EnumerableDefaults.where(outer.enumerator(), v0 -> {
- final TKey key = outerKeySelector.apply(v0);
- return innerLookup.get().contains(key);
- });
+ final Predicate1<TSource> predicate = anti
+ ? v0 -> !innerLookup.get().contains(outerKeySelector.apply(v0))
+ : v0 -> innerLookup.get().contains(outerKeySelector.apply(v0));
+
+ return EnumerableDefaults.where(outer.enumerator(), predicate);
}
};
}
@@ -1322,9 +1351,10 @@ public abstract class EnumerableDefaults {
final Enumerable<TSource> outer, final Enumerable<TInner> inner,
final Predicate2<TSource, TInner> predicate,
Function2<TSource, TInner, TResult> resultSelector,
- final boolean generateNullsOnLeft,
- final boolean generateNullsOnRight) {
+ final JoinType joinType) {
// Building the result as a list is easy but hogs memory. We should
iterate.
+ final boolean generateNullsOnLeft = joinType.generatesNullsOnLeft();
+ final boolean generateNullsOnRight = joinType.generatesNullsOnRight();
final List<TResult> result = new ArrayList<>();
final Enumerator<TSource> lefts = outer.enumerator();
final List<TInner> rightList = inner.toList();
@@ -1343,13 +1373,17 @@ public abstract class EnumerableDefaults {
TInner right = rights.current();
if (predicate.apply(left, right)) {
++leftMatchCount;
- if (rightUnmatched != null) {
- rightUnmatched.remove(right);
+ if (joinType == JoinType.ANTI) {
+ break;
+ } else {
+ if (rightUnmatched != null) {
+ rightUnmatched.remove(right);
+ }
+ result.add(resultSelector.apply(left, right));
}
- result.add(resultSelector.apply(left, right));
}
}
- if (generateNullsOnRight && leftMatchCount == 0) {
+ if (leftMatchCount == 0 && (generateNullsOnRight || joinType ==
JoinType.ANTI)) {
result.add(resultSelector.apply(left, null));
}
}
diff --git a/linq4j/src/main/java/org/apache/calcite/linq4j/JoinType.java
b/linq4j/src/main/java/org/apache/calcite/linq4j/JoinType.java
new file mode 100644
index 0000000..4e3ebca
--- /dev/null
+++ b/linq4j/src/main/java/org/apache/calcite/linq4j/JoinType.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.linq4j;
+
+/**
+ * Enumeration of join types.
+ */
+public enum JoinType {
+ /**
+ * Inner join.
+ */
+ INNER,
+
+ /**
+ * Left-outer join.
+ */
+ LEFT,
+
+ /**
+ * Right-outer join.
+ */
+ RIGHT,
+
+ /**
+ * Full-outer join.
+ */
+ FULL,
+
+ /**
+ * Semi-join.
+ *
+ * <p>For example, {@code EMP semi-join DEPT} finds all {@code EMP} records
+ * that have a corresponding {@code DEPT} record:
+ *
+ * <blockquote><pre>
+ * SELECT * FROM EMP
+ * WHERE EXISTS (SELECT 1 FROM DEPT
+ * WHERE DEPT.DEPTNO = EMP.DEPTNO)</pre>
+ * </blockquote>
+ */
+ SEMI,
+
+ /**
+ * Anti-join (also known as Anti-semi-join).
+ *
+ * <p>For example, {@code EMP anti-join DEPT} finds all {@code EMP} records
+ * that do not have a corresponding {@code DEPT} record:
+ *
+ * <blockquote><pre>
+ * SELECT * FROM EMP
+ * WHERE NOT EXISTS (SELECT 1 FROM DEPT
+ * WHERE DEPT.DEPTNO = EMP.DEPTNO)</pre>
+ * </blockquote>
+ */
+ ANTI;
+
+ /**
+ * Returns whether a join of this type may generate NULL values on the
+ * right-hand side.
+ */
+ public boolean generatesNullsOnRight() {
+ return (this == LEFT) || (this == FULL);
+ }
+
+ /**
+ * Returns whether a join of this type may generate NULL values on the
+ * left-hand side.
+ */
+ public boolean generatesNullsOnLeft() {
+ return (this == RIGHT) || (this == FULL);
+ }
+
+}
+
+// End JoinType.java
diff --git
a/linq4j/src/test/java/org/apache/calcite/linq4j/test/JoinPreserveOrderTest.java
b/linq4j/src/test/java/org/apache/calcite/linq4j/test/JoinPreserveOrderTest.java
index 2f93289..683574d 100644
---
a/linq4j/src/test/java/org/apache/calcite/linq4j/test/JoinPreserveOrderTest.java
+++
b/linq4j/src/test/java/org/apache/calcite/linq4j/test/JoinPreserveOrderTest.java
@@ -19,6 +19,7 @@ package org.apache.calcite.linq4j.test;
import org.apache.calcite.linq4j.CorrelateJoinType;
import org.apache.calcite.linq4j.Enumerable;
import org.apache.calcite.linq4j.EnumerableDefaults;
+import org.apache.calcite.linq4j.JoinType;
import org.apache.calcite.linq4j.Linq4j;
import org.apache.calcite.linq4j.function.Function1;
import org.apache.calcite.linq4j.function.Function2;
@@ -172,21 +173,21 @@ public final class JoinPreserveOrderTest {
}
@Test public void testLeftNestedLoopJoinPreservesOrderOfLeftInput() {
- testJoin(nestedLoopJoin(false, true), AssertOrder.PRESERVED,
AssertOrder.IGNORED);
+ testJoin(nestedLoopJoin(JoinType.LEFT), AssertOrder.PRESERVED,
AssertOrder.IGNORED);
}
@Test public void testRightNestedLoopJoinPreservesOrderOfLeftInput() {
Assume.assumeFalse(leftColumn.isNullsFirst);
- testJoin(nestedLoopJoin(true, false), AssertOrder.PRESERVED,
AssertOrder.IGNORED);
+ testJoin(nestedLoopJoin(JoinType.RIGHT), AssertOrder.PRESERVED,
AssertOrder.IGNORED);
}
@Test public void testFullNestedLoopJoinPreservesOrderOfLeftInput() {
Assume.assumeFalse(leftColumn.isNullsFirst);
- testJoin(nestedLoopJoin(true, true), AssertOrder.PRESERVED,
AssertOrder.IGNORED);
+ testJoin(nestedLoopJoin(JoinType.FULL), AssertOrder.PRESERVED,
AssertOrder.IGNORED);
}
@Test public void testInnerNestedLoopJoinPreservesOrderOfLeftInput() {
- testJoin(nestedLoopJoin(false, false), AssertOrder.PRESERVED,
AssertOrder.IGNORED);
+ testJoin(nestedLoopJoin(JoinType.INNER), AssertOrder.PRESERVED,
AssertOrder.IGNORED);
}
@@ -210,6 +211,10 @@ public final class JoinPreserveOrderTest {
testJoin(semiJoin(), AssertOrder.PRESERVED, AssertOrder.IGNORED);
}
+ @Test public void testAntiDefaultJoinPreservesOrderOfLeftInput() {
+ testJoin(antiJoin(), AssertOrder.PRESERVED, AssertOrder.IGNORED);
+ }
+
private void testJoin(
JoinAlgorithm<Employee, Department, List<Integer>> joinAlgorithm,
AssertOrder assertLeftInput, AssertOrder assertRightInput) {
@@ -256,9 +261,7 @@ public final class JoinPreserveOrderTest {
generateNullsOnRight);
}
- private JoinAlgorithm<Employee, Department, List<Integer>> nestedLoopJoin(
- boolean generateNullsOnLeft,
- boolean generateNullsOnRight) {
+ private JoinAlgorithm<Employee, Department, List<Integer>>
nestedLoopJoin(JoinType joinType) {
return (left, right) ->
EnumerableDefaults.nestedLoopJoin(
left,
@@ -266,8 +269,7 @@ public final class JoinPreserveOrderTest {
(emp, dept) ->
emp.deptno != null && dept.deptno != null &&
emp.deptno.equals(dept.deptno),
RESULT_SELECTOR,
- generateNullsOnLeft,
- generateNullsOnRight);
+ joinType);
}
private JoinAlgorithm<Employee, Department, List<Integer>> semiJoin() {
@@ -279,6 +281,15 @@ public final class JoinPreserveOrderTest {
dept -> dept.deptno).select(emp -> Arrays.asList(emp.eid, null));
}
+ private JoinAlgorithm<Employee, Department, List<Integer>> antiJoin() {
+ return (left, right) ->
+ EnumerableDefaults.antiJoin(
+ left,
+ right,
+ emp -> emp.deptno,
+ dept -> dept.deptno).select(emp -> Arrays.asList(emp.eid, null));
+ }
+
/**
* Different assertions for the result of the join.
*/
diff --git a/site/_docs/algebra.md b/site/_docs/algebra.md
index a66eed2..ba07531 100644
--- a/site/_docs/algebra.md
+++ b/site/_docs/algebra.md
@@ -323,7 +323,7 @@ return the `RelBuilder`.
| `sortExchange(distribution, collation)` | Creates a [SortExchange]({{
site.apiRoot }}/org/apache/calcite/rel/core/SortExchange.html).
| `join(joinType, expr...)`<br/>`join(joinType, exprList)`<br/>`join(joinType,
fieldName...)` | Creates a [Join]({{ site.apiRoot
}}/org/apache/calcite/rel/core/Join.html) of the two most recent relational
expressions.<br/><br/>The first form joins on a boolean expression (multiple
conditions are combined using AND).<br/><br/>The last form joins on named
fields; each side must have a field of each name.
| `semiJoin(expr)` | Creates a [Join]({{ site.apiRoot
}}/org/apache/calcite/rel/core/Join.html) with SEMI join type of the two most
recent relational expressions.
-| `antiJoin(expr)` | Creates an AntiJoin of the two most recent relational
expressions.
+| `antiJoin(expr)` | Creates a [Join]({{ site.apiRoot
}}/org/apache/calcite/rel/core/Join.html) with ANTI join type of the two most
recent relational expressions.
| `union(all [, n])` | Creates a [Union]({{ site.apiRoot
}}/org/apache/calcite/rel/core/Union.html) of the `n` (default two) most recent
relational expressions.
| `intersect(all [, n])` | Creates an [Intersect]({{ site.apiRoot
}}/org/apache/calcite/rel/core/Intersect.html) of the `n` (default two) most
recent relational expressions.
| `minus(all)` | Creates a [Minus]({{ site.apiRoot
}}/org/apache/calcite/rel/core/Minus.html) of the two most recent relational
expressions.