This is an automated email from the ASF dual-hosted git repository.
zhenchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new 7dec961ab9 [CALCITE-7029] Support DPhyp to handle various join types
7dec961ab9 is described below
commit 7dec961ab9204e0dc9f849a7ae0c0393f5b8dadf
Author: Silun Dong <[email protected]>
AuthorDate: Wed Jun 25 13:43:36 2025 +0800
[CALCITE-7029] Support DPhyp to handle various join types
---
.../adapter/enumerable/RexToLixTranslator.java | 5 +
.../apache/calcite/plan/PlanTooComplexError.java | 26 ++
.../calcite/rel/rules/ConflictDetectionHelper.java | 222 +++++++++++++++
.../org/apache/calcite/rel/rules/ConflictRule.java | 47 ++++
.../java/org/apache/calcite/rel/rules/DpHyp.java | 202 ++++++++++++--
.../calcite/rel/rules/DphypJoinReorderRule.java | 32 +--
.../org/apache/calcite/rel/rules/HyperEdge.java | 302 +++++++++++++++++++--
.../org/apache/calcite/rel/rules/HyperGraph.java | 276 +++++++++++--------
.../calcite/rel/rules/JoinToHyperGraphRule.java | 163 ++++++-----
.../java/org/apache/calcite/rex/RexBiVisitor.java | 2 +
.../org/apache/calcite/rex/RexBiVisitorImpl.java | 4 +
.../org/apache/calcite/rex/RexInterpreter.java | 4 +
.../apache/calcite/rex/RexNodeAndFieldIndex.java | 99 +++++++
.../java/org/apache/calcite/rex/RexShuttle.java | 4 +
.../java/org/apache/calcite/rex/RexSimplify.java | 4 +
.../org/apache/calcite/rex/RexUnaryBiVisitor.java | 4 +
.../main/java/org/apache/calcite/rex/RexUtil.java | 11 +-
.../java/org/apache/calcite/rex/RexVisitor.java | 2 +
.../org/apache/calcite/rex/RexVisitorImpl.java | 4 +
.../apache/calcite/util/trace/CalciteTrace.java | 8 +
.../apache/calcite/test/DphypJoinReorderTest.java | 294 ++++++++++++++++++++
.../org/apache/calcite/test/RelOptRulesTest.java | 122 +++++++++
core/src/test/resources/log4j2-test.xml | 1 +
.../org/apache/calcite/test/RelOptRulesTest.xml | 223 ++++++++++++++-
24 files changed, 1794 insertions(+), 267 deletions(-)
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexToLixTranslator.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexToLixTranslator.java
index a251d8c618..7e29e6e78b 100644
---
a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexToLixTranslator.java
+++
b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexToLixTranslator.java
@@ -45,6 +45,7 @@
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexLocalRef;
import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexNodeAndFieldIndex;
import org.apache.calcite.rex.RexOver;
import org.apache.calcite.rex.RexPatternFieldRef;
import org.apache.calcite.rex.RexProgram;
@@ -1784,6 +1785,10 @@ private Result toInnerStorageType(Result result, Type
storageType) {
return new Result(isNullVariable, valueVariable);
}
+ @Override public Result visitNodeAndFieldIndex(RexNodeAndFieldIndex
nodeAndFieldIndex) {
+ throw new RuntimeException("cannot translate expression " +
nodeAndFieldIndex);
+ }
+
Expression checkNull(Expression expr) {
if (Primitive.flavor(expr.getType())
== Primitive.Flavor.PRIMITIVE) {
diff --git
a/core/src/main/java/org/apache/calcite/plan/PlanTooComplexError.java
b/core/src/main/java/org/apache/calcite/plan/PlanTooComplexError.java
new file mode 100644
index 0000000000..f9b416cd33
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/plan/PlanTooComplexError.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.util.ControlFlowException;
+
+/**
+ * Exception to catch when optimizing the plan produces a result that is too
complex,
+ * either at the Rel or at the Rex level.
+ */
+public class PlanTooComplexError extends ControlFlowException {
+}
diff --git
a/core/src/main/java/org/apache/calcite/rel/rules/ConflictDetectionHelper.java
b/core/src/main/java/org/apache/calcite/rel/rules/ConflictDetectionHelper.java
new file mode 100644
index 0000000000..575f6f5c45
--- /dev/null
+++
b/core/src/main/java/org/apache/calcite/rel/rules/ConflictDetectionHelper.java
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.rel.rules;
+
+import org.apache.calcite.rel.core.JoinRelType;
+
+import com.google.common.collect.ImmutableMap;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Conflict detection algorithm based on CD-C. More details are in paper:
+ * <a href="https://dl.acm.org/doi/pdf/10.1145/2463676.2465314">
+ * On the correct and complete enumeration of the core search space</a>.
+ */
+public class ConflictDetectionHelper {
+
+ private ConflictDetectionHelper() {
+ }
+
+ /**
+ * Tables in the paper involve 7 types of join: cross product, inner join,
semi join, anti join,
+ * left join, full join, group join. Cross product and inner join have the
same result in the
+ * associative/left_asscom/right_asscom tables. The inner join with true
condition is equivalent
+ * to the cross product, and there is no group join in Calcite, so cross
product and group join
+ * do not appear in the following table.
+ */
+ private static final ImmutableMap<JoinRelType, Integer> INDEX_OF_TABLE =
+ ImmutableMap.<JoinRelType, Integer>builder()
+ .put(JoinRelType.INNER, 0).put(JoinRelType.SEMI,
1).put(JoinRelType.ANTI, 2)
+ .put(JoinRelType.LEFT, 3).put(JoinRelType.FULL, 4).build();
+
+ /**
+ * For {@code e1 joinA e2 joinB e3}, whether joinA and joinB are associative
is shown in the
+ * following table. In particular, for left/full-A and left-B, we assume
that the predicates of
+ * left-B are not null-rejecting on e2; for full-A and full-B, we assume
that the predicates of
+ * full-A and full-B are not null-rejecting on e2. See table.2 in paper.
+ */
+ private static final boolean[][] ASSOCIATIVE_TABLE = {
+ // inner-B semi-B anti-B left-B full-B
+ /* inner-A */ {true, true, true, true, false},
+ /* semi-A */ {false, false, false, false, false},
+ /* anti-A */ {false, false, false, false, false},
+ /* left-A */ {false, false, false, false, false},
+ /* full-A */ {false, false, false, false, false}};
+
+ /**
+ * For {@code e1 joinA e2 joinB e3}, whether joinA and joinB are left asscom
is shown in the
+ * following table. In particular, for left-A and full-B, we assume that the
predicates of left-A
+ * are not null-rejecting on e1; for full-A and left-B, we assume that the
predicates of left-B
+ * are not null-rejecting on e3; for full-A and full-B, we assume that the
predicates of full-A
+ * and full-B are not null-rejecting on e1. See table.3 in paper.
+ */
+ private static final boolean[][] LEFT_ASSCOM_TABLE = {
+ // inner-B semi-B anti-B left-B full-B
+ /* inner-A */ {true, true, true, true, false},
+ /* semi-A */ {true, true, true, true, false},
+ /* anti-A */ {true, true, true, true, false},
+ /* left-A */ {true, true, true, true, false},
+ /* full-A */ {false, false, false, false, false}};
+
+ /**
+ * For {@code e1 joinA e2 joinB e3}, whether joinA and joinB are right
asscom is shown in the
+ * following table. In particular, for full-A and full-B, we assume that the
predicates of full-A
+ * and full-B are not null-rejecting on e3. See table.3 in paper.
+ */
+ private static final boolean[][] RIGHT_ASSCOM_TABLE = {
+ // inner-B semi-B anti-B left-B full-B
+ /* inner-A */ {true, false, false, false, false},
+ /* semi-A */ {false, false, false, false, false},
+ /* anti-A */ {false, false, false, false, false},
+ /* left-A */ {false, false, false, false, false},
+ /* full-A */ {false, false, false, false, false}};
+
+ /**
+ * Make conflict rules for join operator based on CD-C. See Figure.11 in
paper.
+ *
+ * @param leftSubEdges left sub operators
+ * @param rightSubEdges right sub operators
+ * @param joinType current join operator
+ * @return conflict rule list
+ */
+ public static List<ConflictRule> makeConflictRules(
+ List<HyperEdge> leftSubEdges,
+ List<HyperEdge> rightSubEdges,
+ JoinRelType joinType) {
+ List<ConflictRule> conflictRules = new ArrayList<>();
+ // o_b
+ // / \
+ // ... ...
+ // /
+ // o_a
+ //
+ // calculate the conflict rules for join_operator_b based on the all join
operator
+ // (join_operator_a) on the left side of join_operator_b
+ for (HyperEdge leftSubEdge : leftSubEdges) {
+ if (!isAssociative(leftSubEdge.getJoinType(), joinType)) {
+ // if (o_a, o_b) does not satisfy the associative law
+ if (leftSubEdge.getLeftNodeUsedInPredicate() != 0) {
+ // if the predicate of o_a does not reference the table on its left
side,
+ // a less restrictive conflict rule will be added
+ conflictRules.add(
+ new ConflictRule(
+ leftSubEdge.getInitialRightNodeBits(),
+ leftSubEdge.getLeftNodeUsedInPredicate()));
+ } else {
+ conflictRules.add(
+ new ConflictRule(
+ leftSubEdge.getInitialRightNodeBits(),
+ leftSubEdge.getInitialLeftNodeBits()));
+ }
+ }
+ if (!isLeftAsscom(leftSubEdge.getJoinType(), joinType)) {
+ // if (o_a, o_b) does not satisfy the left-asscom law
+ if (leftSubEdge.getRightNodeUsedInPredicate() != 0) {
+ // if the predicate of o_a does not reference the table on its right
side,
+ // a less restrictive conflict rule will be added
+ conflictRules.add(
+ new ConflictRule(
+ leftSubEdge.getInitialLeftNodeBits(),
+ leftSubEdge.getRightNodeUsedInPredicate()));
+ } else {
+ conflictRules.add(
+ new ConflictRule(
+ leftSubEdge.getInitialLeftNodeBits(),
+ leftSubEdge.getInitialRightNodeBits()));
+ }
+ }
+ }
+
+ // o_b
+ // / \
+ // ... ...
+ // \
+ // o_a
+ //
+ // calculate the conflict rules for join_operator_b based on the all join
operator
+ // (join_operator_a) on the right side of join_operator_b
+ for (HyperEdge rightSubEdge : rightSubEdges) {
+ if (!isAssociative(joinType, rightSubEdge.getJoinType())) {
+ // if (o_b, o_a) does not satisfy the associative law
+ if (rightSubEdge.getRightNodeUsedInPredicate() != 0) {
+ // if the predicate of o_a does not reference the table on its right
side,
+ // a less restrictive conflict rule will be added
+ conflictRules.add(
+ new ConflictRule(
+ rightSubEdge.getInitialLeftNodeBits(),
+ rightSubEdge.getRightNodeUsedInPredicate()));
+ } else {
+ conflictRules.add(
+ new ConflictRule(
+ rightSubEdge.getInitialLeftNodeBits(),
+ rightSubEdge.getInitialRightNodeBits()));
+ }
+ }
+ if (!isRightAsscom(joinType, rightSubEdge.getJoinType())) {
+ // if (o_b, o_a) does not satisfy the right-asscom law
+ if (rightSubEdge.getLeftNodeUsedInPredicate() != 0) {
+ // if the predicate of o_a does not reference the table on its left
side,
+ // a less restrictive conflict rule will be added
+ conflictRules.add(
+ new ConflictRule(
+ rightSubEdge.getInitialRightNodeBits(),
+ rightSubEdge.getLeftNodeUsedInPredicate()));
+ } else {
+ conflictRules.add(
+ new ConflictRule(
+ rightSubEdge.getInitialRightNodeBits(),
+ rightSubEdge.getInitialLeftNodeBits()));
+ }
+ }
+ }
+ return conflictRules;
+ }
+
+ public static boolean isCommutative(JoinRelType operator) {
+ return operator == JoinRelType.INNER || operator == JoinRelType.FULL;
+ }
+
+ public static boolean isAssociative(JoinRelType operatorA, JoinRelType
operatorB) {
+ Integer indexA = INDEX_OF_TABLE.get(operatorA);
+ Integer indexB = INDEX_OF_TABLE.get(operatorB);
+ if (indexA == null || indexB == null) {
+ return false;
+ }
+ return ASSOCIATIVE_TABLE[indexA][indexB];
+ }
+
+ public static boolean isLeftAsscom(JoinRelType operatorA, JoinRelType
operatorB) {
+ Integer indexA = INDEX_OF_TABLE.get(operatorA);
+ Integer indexB = INDEX_OF_TABLE.get(operatorB);
+ if (indexA == null || indexB == null) {
+ return false;
+ }
+ return LEFT_ASSCOM_TABLE[indexA][indexB];
+ }
+
+ public static boolean isRightAsscom(JoinRelType operatorA, JoinRelType
operatorB) {
+ Integer indexA = INDEX_OF_TABLE.get(operatorA);
+ Integer indexB = INDEX_OF_TABLE.get(operatorB);
+ if (indexA == null || indexB == null) {
+ return false;
+ }
+ return RIGHT_ASSCOM_TABLE[indexA][indexB];
+ }
+
+}
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/ConflictRule.java
b/core/src/main/java/org/apache/calcite/rel/rules/ConflictRule.java
new file mode 100644
index 0000000000..88d03665a1
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/rel/rules/ConflictRule.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.rel.rules;
+
+/**
+ * A conflict rule (CR) is a pair of table sets denoted by T1 → T2, which
means that if T1
+ * intersects with the table under the join operator, T2 must be included in
this join. With every
+ * hyperedge in hypergraph, we associate a set of conflict rules.
+ *
+ * @see HyperEdge
+ * @see ConflictDetectionHelper
+ */
+public class ConflictRule {
+
+ // 'from' and 'to' are bitmaps where index refer to inputs of hypergraph
+ final long from;
+
+ final long to;
+
+ public ConflictRule(long from, long to) {
+ this.from = from;
+ this.to = to;
+ }
+
+ @Override public String toString() {
+ return "Table" + LongBitmap.printBitmap(from) + " -> Table"
+ + LongBitmap.printBitmap(to);
+ }
+
+ public ConflictRule shift(int offset) {
+ return new ConflictRule(from << offset, to << offset);
+ }
+}
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/DpHyp.java
b/core/src/main/java/org/apache/calcite/rel/rules/DpHyp.java
index 63c82b9efc..3fc16e1a66 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/DpHyp.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/DpHyp.java
@@ -17,17 +17,27 @@
package org.apache.calcite.rel.rules;
import org.apache.calcite.linq4j.function.Experimental;
+import org.apache.calcite.plan.PlanTooComplexError;
import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.tools.RelBuilder;
+import org.apache.calcite.util.trace.CalciteTrace;
+
+import com.google.common.collect.ImmutableList;
import org.checkerframework.checker.nullness.qual.Nullable;
+import org.slf4j.Logger;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
/**
* The core process of dphyp enumeration algorithm.
@@ -35,25 +45,34 @@
@Experimental
public class DpHyp {
- private final HyperGraph hyperGraph;
+ private static final Logger LOGGER =
CalciteTrace.getDpHypJoinReorderTracer();
+
+ protected final HyperGraph hyperGraph;
- private final HashMap<Long, RelNode> dpTable;
+ private final Map<Long, RelNode> dpTable;
- private final RelBuilder builder;
+ // a map from subgraph to the node list of the best join tree. The node list
records the node
+ // index and whether it is projected, which is used to convert the
RexNodeAndFieldIndex in
+ // hyperedge to the RexInputRef in join condition
+ private final Map<Long, ImmutableList<HyperGraph.NodeState>>
resultInputOrder;
+
+ protected final RelBuilder builder;
private final RelMetadataQuery mq;
- public DpHyp(HyperGraph hyperGraph, RelBuilder builder, RelMetadataQuery
relMetadataQuery) {
+ private final int bloat;
+
+ public DpHyp(HyperGraph hyperGraph, RelBuilder builder, RelMetadataQuery
relMetadataQuery,
+ int bloat) {
this.hyperGraph =
hyperGraph.copy(
hyperGraph.getTraitSet(),
hyperGraph.getInputs());
this.dpTable = new HashMap<>();
+ this.resultInputOrder = new HashMap<>();
this.builder = builder;
this.mq = relMetadataQuery;
- // make all field name unique and convert the
- // HyperEdge condition from RexInputRef to RexInputFieldName
- this.hyperGraph.convertHyperEdgeCond(builder);
+ this.bloat = bloat;
}
/**
@@ -67,16 +86,26 @@ public void startEnumerateJoin() {
int size = hyperGraph.getInputs().size();
for (int i = 0; i < size; i++) {
long singleNode = LongBitmap.newBitmap(i);
+ LOGGER.debug("Initialize the dp table. Node {{}} is:\n {}",
+ i,
+ RelOptUtil.toString(hyperGraph.getInput(i)));
dpTable.put(singleNode, hyperGraph.getInput(i));
+ resultInputOrder.put(
+ singleNode,
+ ImmutableList.of(new HyperGraph.NodeState(i, true)));
hyperGraph.initEdgeBitMap(singleNode);
}
- // start enumerating from the second to last
- for (int i = size - 2; i >= 0; i--) {
- long csg = LongBitmap.newBitmap(i);
- long forbidden = csg - 1;
- emitCsg(csg);
- enumerateCsgRec(csg, forbidden);
+ try {
+ // start enumerating from the second to last
+ for (int i = size - 2; i >= 0; i--) {
+ long csg = LongBitmap.newBitmap(i);
+ long forbidden = csg - 1;
+ emitCsg(csg);
+ enumerateCsgRec(csg, forbidden);
+ }
+ } catch (PlanTooComplexError e) {
+ LOGGER.error("The dp table is too large, and the enumeration ends
automatically.");
}
}
@@ -173,42 +202,140 @@ private void enumerateCmpRec(long csg, long cmp, long
forbidden) {
private void emitCsgCmp(long csg, long cmp, List<HyperEdge> edges) {
RelNode child1 = dpTable.get(csg);
RelNode child2 = dpTable.get(cmp);
- if (child1 == null || child2 == null) {
- throw new IllegalArgumentException(
- "csg and cmp were not enumerated in the previous dp process");
- }
+ ImmutableList<HyperGraph.NodeState> csgOrder = resultInputOrder.get(csg);
+ ImmutableList<HyperGraph.NodeState> cmpOrder = resultInputOrder.get(cmp);
+ assert child1 != null && child2 != null && csgOrder != null && cmpOrder !=
null;
+ assert Long.bitCount(csg) == csgOrder.size() && Long.bitCount(cmp) ==
cmpOrder.size();
JoinRelType joinType = hyperGraph.extractJoinType(edges);
if (joinType == null) {
return;
}
- RexNode joinCond1 = hyperGraph.extractJoinCond(child1, child2, edges);
+ // verify whether the subgraph is legal by using the conflict rules in
hyperedges
+ if (!hyperGraph.applicable(csg | cmp, edges)) {
+ return;
+ }
+
+ List<HyperGraph.NodeState> unionOrder = new ArrayList<>(csgOrder);
+ unionOrder.addAll(cmpOrder);
+ // build join condition from hyperedges. e.g.
+ // case.1
+ // csg: node0_projected [field0, field1], node1_projected [field0, field1],
+ //
+ // join
+ // / \
+ // node0 node1
+ //
+ // cmp: node2_projected [field0, field1]
+ // hyperedge1: node0.field0 = node2.field0
+ // hyperedge2: node1.field1 = node2.field1
+ // we will get join condition: ($0 = $4) and ($3 = $5)
+ //
+ // new_join(condition=[AND(($0 = $4), ($3 = $5))])
+ // / \
+ // join node2
+ // / \
+ // node0 node1
+ //
+ // case.2
+ // csg: node0_projected [field0, field1], node1_not_projected [field0,
field1],
+ //
+ // join(joinType=semi/anti)
+ // / \
+ // node0 node1
+ //
+ // cmp: node2_projected [field0, field1]
+ // hyperedge1: node0.field0 = node2.field0
+ // hyperedge2: node0.field1 = node2.field1
+ // we will get join condition: ($0 = $2) and ($1 = $3)
+ //
+ // new_join(condition=[AND(($0 = $2), ($1 = $3))])
+ // / \
+ // join(joinType=semi/anti) node2
+ // / \
+ // node0 node1
+ RexNode joinCond1 = hyperGraph.extractJoinCond(unionOrder,
csgOrder.size(), edges, joinType);
RelNode newPlan1 = builder
.push(child1)
.push(child2)
.join(joinType, joinCond1)
.build();
+ RelNode winPlan = newPlan1;
+ ImmutableList<HyperGraph.NodeState> winOrder =
ImmutableList.copyOf(unionOrder);
+ assert verifyDpResultRowType(newPlan1, unionOrder);
- // swap left and right
- RexNode joinCond2 = hyperGraph.extractJoinCond(child2, child1, edges);
- RelNode newPlan2 = builder
- .push(child2)
- .push(child1)
- .join(joinType, joinCond2)
- .build();
- RelNode winPlan = chooseBetterPlan(newPlan1, newPlan2);
+ if (ConflictDetectionHelper.isCommutative(joinType)) {
+ // swap left and right
+ unionOrder = new ArrayList<>(cmpOrder);
+ unionOrder.addAll(csgOrder);
+ RexNode joinCond2 = hyperGraph.extractJoinCond(unionOrder,
cmpOrder.size(), edges, joinType);
+ RelNode newPlan2 = builder
+ .push(child2)
+ .push(child1)
+ .join(joinType, joinCond2)
+ .build();
+ winPlan = chooseBetterPlan(winPlan, newPlan2);
+ assert verifyDpResultRowType(newPlan2, unionOrder);
+ if (winPlan.equals(newPlan2)) {
+ winOrder = ImmutableList.copyOf(unionOrder);
+ }
+ }
+ LOGGER.debug("Found set {} and {}, connected by condition {}. [cost={},
rows={}]",
+ LongBitmap.printBitmap(csg),
+ LongBitmap.printBitmap(cmp),
+ RexUtil.composeConjunction(
+ builder.getRexBuilder(),
+ edges.stream()
+ .map(edge ->
edge.getCondition()).collect(Collectors.toList())),
+ mq.getCumulativeCost(winPlan),
+ mq.getRowCount(winPlan));
RelNode oriPlan = dpTable.get(csg | cmp);
+ boolean dpTableUpdated = true;
if (oriPlan != null) {
winPlan = chooseBetterPlan(winPlan, oriPlan);
+ if (winPlan.equals(oriPlan)) {
+ winOrder = resultInputOrder.get(csg | cmp);
+ dpTableUpdated = false;
+ }
+ } else {
+ // when enumerating a new connected subgraph, check whether the dpTable
size is too large
+ if (dpTable.size() > bloat) {
+ throw new PlanTooComplexError();
+ }
+ }
+
+ assert winOrder != null;
+ if (dpTableUpdated) {
+ LOGGER.debug("Dp table is updated. The better plan for subgraph {} now
is:\n {}",
+ LongBitmap.printBitmap(csg | cmp),
+ RelOptUtil.toString(winPlan));
}
dpTable.put(csg | cmp, winPlan);
+ resultInputOrder.put(csg | cmp, winOrder);
}
public @Nullable RelNode getBestPlan() {
int size = hyperGraph.getInputs().size();
long wholeGraph = LongBitmap.newBitmapBetween(0, size);
- return dpTable.get(wholeGraph);
+ RelNode orderedJoin = dpTable.get(wholeGraph);
+ if (orderedJoin == null) {
+ LOGGER.error("The optimal plan was not generated because the enumeration
ended prematurely");
+ return null;
+ }
+ LOGGER.debug("Enumeration completed. The best plan is:\n {}",
RelOptUtil.toString(orderedJoin));
+ ImmutableList<HyperGraph.NodeState> resultOrder =
resultInputOrder.get(wholeGraph);
+ assert resultOrder != null && resultOrder.size() == size;
+
+ // ensure that the fields produced by the reordered join are in the same
order as in the
+ // original plan.
+ List<RexNode> projects =
+ hyperGraph.restoreProjectionOrder(resultOrder,
+ orderedJoin.getRowType().getFieldList());
+ return builder
+ .push(orderedJoin)
+ .project(projects)
+ .build();
}
private RelNode chooseBetterPlan(RelNode plan1, RelNode plan2) {
@@ -223,4 +350,25 @@ private RelNode chooseBetterPlan(RelNode plan1, RelNode
plan2) {
}
}
+ /**
+ * Verify that the row type of plans generated by dphyp is equivalent to the
origin plan.
+ *
+ * @param plan plan generated by dphyp
+ * @param resultOrder node status ordered list
+ * @return true if the plan row type equivalent to the hyperGraph row type
+ */
+ protected boolean verifyDpResultRowType(RelNode plan,
List<HyperGraph.NodeState> resultOrder) {
+ // only verify the whole graph
+ if (resultOrder.size() != hyperGraph.getInputs().size()) {
+ return true;
+ }
+ List<RexNode> projects =
+ hyperGraph.restoreProjectionOrder(resultOrder,
+ plan.getRowType().getFieldList());
+ RelNode resultNode = builder
+ .push(plan)
+ .project(projects)
+ .build();
+ return RelOptUtil.areRowTypesEqual(resultNode.getRowType(),
hyperGraph.getRowType(), false);
+ }
}
diff --git
a/core/src/main/java/org/apache/calcite/rel/rules/DphypJoinReorderRule.java
b/core/src/main/java/org/apache/calcite/rel/rules/DphypJoinReorderRule.java
index 8b4a517722..9ca893d120 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/DphypJoinReorderRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/DphypJoinReorderRule.java
@@ -21,15 +21,10 @@
import org.apache.calcite.plan.RelRule;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Join;
-import org.apache.calcite.rex.RexBuilder;
-import org.apache.calcite.rex.RexNode;
import org.apache.calcite.tools.RelBuilder;
import org.immutables.value.Value;
-import java.util.ArrayList;
-import java.util.List;
-
/** Rule that re-orders a {@link Join} tree using dphyp algorithm.
*
* @see CoreRules#HYPER_GRAPH_OPTIMIZE */
@@ -48,27 +43,13 @@ protected DphypJoinReorderRule(Config config) {
RelBuilder relBuilder = call.builder();
// enumerate by Dphyp
- DpHyp dpHyp = new DpHyp(hyperGraph, relBuilder, call.getMetadataQuery());
+ DpHyp dpHyp = new DpHyp(hyperGraph, relBuilder, call.getMetadataQuery(),
config.bloat());
dpHyp.startEnumerateJoin();
RelNode orderedJoin = dpHyp.getBestPlan();
if (orderedJoin == null) {
return;
}
-
- // permute field to origin order
- List<String> oriNames = hyperGraph.getRowType().getFieldNames();
- List<String> newNames = orderedJoin.getRowType().getFieldNames();
- List<RexNode> projects = new ArrayList<>();
- RexBuilder rexBuilder = hyperGraph.getCluster().getRexBuilder();
- for (String oriName : oriNames) {
- projects.add(rexBuilder.makeInputRef(orderedJoin,
newNames.indexOf(oriName)));
- }
-
- RelNode result = call.builder()
- .push(orderedJoin)
- .project(projects)
- .build();
- call.transformTo(result);
+ call.transformTo(orderedJoin);
}
/** Rule configuration. */
@@ -81,5 +62,14 @@ public interface Config extends RelRule.Config {
@Override default DphypJoinReorderRule toRule() {
return new DphypJoinReorderRule(this);
}
+
+ /**
+ * Limit to the size growth of the dpTable allowed during enumerating.
+ * If the graph with n inputs is fully connected and any combination is
legal, the size of
+ * dpTable is 2^n-1. The default value assumes n=7.
+ */
+ default int bloat() {
+ return 127;
+ }
}
}
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/HyperEdge.java
b/core/src/main/java/org/apache/calcite/rel/rules/HyperEdge.java
index f9f85a43e6..5e9a8fabbf 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/HyperEdge.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/HyperEdge.java
@@ -18,7 +18,16 @@
import org.apache.calcite.linq4j.function.Experimental;
import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexNodeAndFieldIndex;
+import org.apache.calcite.rex.RexShuttle;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
/**
* Edge in HyperGraph, that represents a join predicate.
@@ -26,41 +35,82 @@
@Experimental
public class HyperEdge {
- private final long leftNodeBits;
+ private final TotalEligibilitySet tes;
- private final long rightNodeBits;
+ private final ImmutableList<ConflictRule> conflictRules;
- private final JoinRelType joinType;
+ // equivalent to the T(left(o)) ∩ F_T(o) in CD-C paper
+ private final long leftNodeUsedInPredicate;
+
+ // equivalent to the T(right(o)) ∩ F_T(o) in CD-C paper
+ private final long rightNodeUsedInPredicate;
+
+ // bitmaps of the nodes on the left side of join operator, short for
T(left(o))
+ private final long initialLeftNodeBits;
- private final boolean isSimple;
+ // bitmaps of the nodes on the right side of join operator, short for
T(right(o))
+ private final long initialRightNodeBits;
+ private final JoinRelType joinType;
+
+ // converted from join condition, using RexNodeAndFieldIndex instead of
RexInputRef
private final RexNode condition;
- public HyperEdge(long leftNodeBits, long rightNodeBits, JoinRelType
joinType, RexNode condition) {
- this.leftNodeBits = leftNodeBits;
- this.rightNodeBits = rightNodeBits;
+ public HyperEdge(
+ TotalEligibilitySet tes,
+ List<ConflictRule> conflictRules,
+ long leftNodeUsedInPredicate,
+ long rightNodeUsedInPredicate,
+ long initialLeftNodeBits,
+ long initialRightNodeBits,
+ JoinRelType joinType,
+ RexNode condition) {
+ this.tes = tes;
+ this.conflictRules = ImmutableList.copyOf(conflictRules);
+ this.leftNodeUsedInPredicate = leftNodeUsedInPredicate;
+ this.rightNodeUsedInPredicate = rightNodeUsedInPredicate;
+ this.initialLeftNodeBits = initialLeftNodeBits;
+ this.initialRightNodeBits = initialRightNodeBits;
this.joinType = joinType;
this.condition = condition;
- boolean leftSimple = (leftNodeBits & (leftNodeBits - 1)) == 0;
- boolean rightSimple = (rightNodeBits & (rightNodeBits - 1)) == 0;
- this.isSimple = leftSimple && rightSimple;
}
- public long getNodeBitmap() {
- return leftNodeBits | rightNodeBits;
+ // use tes to generate hyperedge endpoint
+ public long getEndpoint() {
+ return tes.totalSet;
}
- public long getLeftNodeBitmap() {
- return leftNodeBits;
+ public long getLeftEndpoint() {
+ return tes.leftSet;
}
- public long getRightNodeBitmap() {
- return rightNodeBits;
+ public long getRightEndpoint() {
+ return tes.rightSet;
+ }
+
+ public long getLeftNodeUsedInPredicate() {
+ return leftNodeUsedInPredicate;
+ }
+
+ public long getRightNodeUsedInPredicate() {
+ return rightNodeUsedInPredicate;
+ }
+
+ public List<ConflictRule> getConflictRules() {
+ return conflictRules;
+ }
+
+ public long getInitialLeftNodeBits() {
+ return initialLeftNodeBits;
+ }
+
+ public long getInitialRightNodeBits() {
+ return initialRightNodeBits;
}
// hyperedge (u, v) is simple if |u| = |v| = 1
public boolean isSimple() {
- return isSimple;
+ return tes.isSimple();
}
public JoinRelType getJoinType() {
@@ -73,10 +123,224 @@ public RexNode getCondition() {
@Override public String toString() {
StringBuilder sb = new StringBuilder();
- sb.append(LongBitmap.printBitmap(leftNodeBits))
+ sb.append(LongBitmap.printBitmap(getLeftEndpoint()))
.append("——[").append(joinType).append(",
").append(condition).append("]——")
- .append(LongBitmap.printBitmap(rightNodeBits));
+ .append(LongBitmap.printBitmap(getRightEndpoint()));
return sb.toString();
}
+ // It is convenient for the accept method in the HyperGraph class to call
+ public HyperEdge accept(RexShuttle shuttle) {
+ RexNode shuttleCondition = condition.accept(shuttle);
+ return new HyperEdge(
+ tes,
+ conflictRules,
+ leftNodeUsedInPredicate,
+ rightNodeUsedInPredicate,
+ initialLeftNodeBits,
+ initialRightNodeBits,
+ joinType,
+ shuttleCondition);
+ }
+
+ /**
+ * When {@link JoinToHyperGraphRule} constructs a hypergraph , if the right
child of Join is a
+ * HyperGraph class, all bitmaps related to inputs in this right child need
to be shifted
+ * according to the number of inputs in the left child of Join.
+ *
+ * @param nodeOffset number of inputs in the left child
+ * @return HyperEdge produced after shifting
+ */
+ public HyperEdge adjustNodeBit(int nodeOffset) {
+ RexShuttle shiftNodeIndexShuttle = new RexShuttle() {
+ @Override public RexNode visitNodeAndFieldIndex(RexNodeAndFieldIndex
nodeAndFieldIndex) {
+ return new RexNodeAndFieldIndex(
+ nodeAndFieldIndex.getNodeIndex() << nodeOffset,
+ nodeAndFieldIndex.getFieldIndex(),
+ nodeAndFieldIndex.getName(),
+ nodeAndFieldIndex.getType());
+ }
+ };
+ RexNode shiftedCondition = condition.accept(shiftNodeIndexShuttle);
+ List<ConflictRule> shiftedConflictRules = new ArrayList<>();
+ for (ConflictRule conflictRule : conflictRules) {
+ shiftedConflictRules.add(conflictRule.shift(nodeOffset));
+ }
+ return new HyperEdge(
+ tes.shift(nodeOffset),
+ shiftedConflictRules,
+ leftNodeUsedInPredicate << nodeOffset,
+ rightNodeUsedInPredicate << nodeOffset,
+ initialLeftNodeBits << nodeOffset,
+ initialRightNodeBits << nodeOffset,
+ joinType,
+ shiftedCondition);
+ }
+
+ /**
+ * Build hyperedges corresponding to a join operator. The construction of a
HyperEdge includes
+ * total eligibility set, conflict rules, node-related bitmaps, predicate
expressions (RexInputRef
+ * is replaced by RexNodeAndFieldIndex) and join type.
+ *
+ * @param inputRefToNodeIndexMap a map from inputRef to node index
+ * @param inputRefToFieldIndexMap a map from inputRef to field index
+ * @param joinConds join conditions
+ * @param conflictRules all conflict rules
+ * @param joinType join type
+ * @param leftNodeCount number of inputs in the left child of Join
+ * @param nodeCount number of inputs of Join
+ * @return HyperEdge list
+ */
+ public static List<HyperEdge> createHyperEdgesFromJoinConds(
+ Map<Integer, Integer> inputRefToNodeIndexMap,
+ Map<Integer, Integer> inputRefToFieldIndexMap,
+ List<RexNode> joinConds,
+ List<ConflictRule> conflictRules,
+ JoinRelType joinType,
+ int leftNodeCount,
+ int nodeCount) {
+ // for join operator o, generate bitmaps of the relations on the
left/right sides
+ long initialLeftNodeBits = LongBitmap.newBitmapBetween(0, leftNodeCount);
+ long initialRightNodeBits = LongBitmap.newBitmapBetween(leftNodeCount,
nodeCount);
+
+ List<HyperEdge> edges = new ArrayList<>();
+ for (RexNode joinCond : joinConds) {
+ List<Integer> leftRefs = new ArrayList<>();
+ List<Integer> rightRefs = new ArrayList<>();
+ RexShuttle inputRef2NodeAndFieldIndexShuttle = new RexShuttle() {
+ @Override public RexNode visitInputRef(RexInputRef inputRef) {
+ Integer nodeIndex = inputRefToNodeIndexMap.get(inputRef.getIndex());
+ assert nodeIndex != null;
+ // collect left input index and right input index
+ if (nodeIndex < leftNodeCount) {
+ leftRefs.add(nodeIndex);
+ } else {
+ rightRefs.add(nodeIndex);
+ }
+ Integer fieldIndex =
inputRefToFieldIndexMap.get(inputRef.getIndex());
+ assert fieldIndex != null;
+
+ return new RexNodeAndFieldIndex(
+ nodeIndex,
+ fieldIndex,
+ inputRef.getName(),
+ inputRef.getType());
+ }
+ };
+ RexNode hyperEdgeCondition =
joinCond.accept(inputRef2NodeAndFieldIndexShuttle);
+
+ long leftNodeUsedInPredicate = LongBitmap.newBitmapFromList(leftRefs);
+ long rightNodeUsedInPredicate = LongBitmap.newBitmapFromList(rightRefs);
+ TotalEligibilitySet tes;
+ List<ConflictRule> conflictRulesAfterAbsorb = new ArrayList<>();
+ if (leftNodeUsedInPredicate == 0 || rightNodeUsedInPredicate == 0) {
+ // when dealing with cross products or degenerate predicates,
construct total eligibility
+ // set using all nodes on the left and right sides (as the endpoint of
the hyperedge). This
+ // behavior constrains the search space and make sure a correct plan.
+ // See section 6.2 in CD-C paper
+
+ // TODO: In some rare case, cross product might be beneficially
introduced. Perhaps
+ // selectively add appropriate endpoints for the cross product
+ tes = new TotalEligibilitySet(initialLeftNodeBits,
initialRightNodeBits);
+ } else {
+ // Initialize total eligibility set with the nodes referenced by the
predicate
+ tes = new TotalEligibilitySet(leftNodeUsedInPredicate,
rightNodeUsedInPredicate);
+ // expand tes with conflict rules. This not only eliminates some
conflict rules, but also
+ // improves the efficiency of enumeration
+ tes =
+ tes.absorbFromConflictRules(
+ conflictRules,
+ conflictRulesAfterAbsorb,
+ initialLeftNodeBits,
+ initialRightNodeBits);
+ }
+ edges.add(
+ new HyperEdge(
+ tes,
+ conflictRulesAfterAbsorb,
+ leftNodeUsedInPredicate,
+ rightNodeUsedInPredicate,
+ initialLeftNodeBits,
+ initialRightNodeBits,
+ joinType,
+ hyperEdgeCondition));
+ }
+ return edges;
+ }
+
+ /**
+ * Total eligibility set (tes for short) is a set of relations (inputs of
HyperGraph). Each
+ * hyperedge has a tes. When building a join operator through a hyperedge,
the inputs
+ * (csg-cmp pair) must contain the tes of the hyperedge. For a hyperedge,
tes is its endpoint. Tes
+ * is initialized by the relations referenced in the predicate.
+ */
+ private static class TotalEligibilitySet {
+
+ final long leftSet;
+
+ final long rightSet;
+
+ final long totalSet;
+
+ // as the endpoint of the hyperedge, it indicates whether the hyperedge is
simple
+ final boolean isSimple;
+
+ TotalEligibilitySet(long leftSet, long rightSet) {
+ assert !LongBitmap.isOverlap(leftSet, rightSet);
+ this.leftSet = leftSet;
+ this.rightSet = rightSet;
+ this.totalSet = leftSet | rightSet;
+ boolean leftSimple = (leftSet & (leftSet - 1)) == 0;
+ boolean rightSimple = (rightSet & (rightSet - 1)) == 0;
+ this.isSimple = leftSimple && rightSimple;
+ }
+
+ TotalEligibilitySet shift(int offset) {
+ return new TotalEligibilitySet(leftSet << offset, rightSet << offset);
+ }
+
+ boolean isSimple() {
+ return isSimple;
+ }
+
+ /**
+ * For a conflict rule T1 → T2, if T1 ∩ tes != empty set, we can add T2 to
tes due to the
+ * nature of conflict rule. Further, if T2 ⊆ tes, we can safely eliminate
this conflict rule.
+ * This behavior has two benefits:
+ *
+ * <p> 1. decrease the number of conflicting rules
+ *
+ * <p> 2. larger tes means larger endpoints of the hyperedge, and
enlarging endpoints can
+ * decreases the number of csg-cmp-pairs when dphyp enumerating.
+ *
+ * <p> See section 5.5 and 6.1 in paper.
+ *
+ * @param conflictRules initial conflict rules
+ * @param conflictRulesAfterAbsorb conflict rules after absorbing
+ * @param initialLeftNodeBits relations on the left side of the join
operator
+ * @param initialRightNodeBits relations on the right side of the
join operator
+ * @return total eligibility set after absorbing conflict rules
+ */
+ TotalEligibilitySet absorbFromConflictRules(
+ List<ConflictRule> conflictRules,
+ List<ConflictRule> conflictRulesAfterAbsorb,
+ long initialLeftNodeBits,
+ long initialRightNodeBits) {
+ long totalSetAbsorb = totalSet;
+ for (ConflictRule conflictRule : conflictRules) {
+ if (LongBitmap.isOverlap(totalSetAbsorb, conflictRule.from)) {
+ totalSetAbsorb |= conflictRule.to;
+ continue;
+ }
+ conflictRulesAfterAbsorb.add(conflictRule);
+ }
+ assert LongBitmap.isSubSet(totalSetAbsorb, initialLeftNodeBits |
initialRightNodeBits);
+ // right endpoints = tes ∩ T(right(o)),
+ // left endpoints = tes \ right endpoints, see section 6.1 in paper
+ long rightSetAbsorb = totalSetAbsorb & initialRightNodeBits;
+ long leftSetAbsorb = totalSetAbsorb & ~rightSetAbsorb;
+ return new TotalEligibilitySet(leftSetAbsorb, rightSetAbsorb);
+ }
+ }
+
}
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/HyperGraph.java
b/core/src/main/java/org/apache/calcite/rel/rules/HyperGraph.java
index d8fe0fd2ff..3e785a19bd 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/HyperGraph.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/HyperGraph.java
@@ -26,17 +26,14 @@
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeField;
-import org.apache.calcite.rex.RexBiVisitor;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexNodeAndFieldIndex;
import org.apache.calcite.rex.RexShuttle;
import org.apache.calcite.rex.RexUtil;
-import org.apache.calcite.rex.RexVariable;
-import org.apache.calcite.rex.RexVisitor;
-import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Pair;
-import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import org.checkerframework.checker.nullness.qual.Nullable;
@@ -45,6 +42,7 @@
import java.util.BitSet;
import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.stream.Collectors;
import static com.google.common.base.Preconditions.checkArgument;
@@ -57,6 +55,10 @@ public class HyperGraph extends AbstractRelNode {
private final List<RelNode> inputs;
+ // unprojected input (from the right child of the semi/anti join) bitmap.
Used to convert
+ // RexInputRef to RexNodeAndFieldIndex when building hypergraph
+ private final long notProjectInputs;
+
@SuppressWarnings("HidingField")
private final RelDataType rowType;
@@ -82,10 +84,12 @@ public class HyperGraph extends AbstractRelNode {
protected HyperGraph(RelOptCluster cluster,
RelTraitSet traitSet,
List<RelNode> inputs,
+ long notProjectInputs,
List<HyperEdge> edges,
RelDataType rowType) {
super(cluster, traitSet);
this.inputs = Lists.newArrayList(inputs);
+ this.notProjectInputs = notProjectInputs;
this.edges = Lists.newArrayList(edges);
this.rowType = rowType;
ImmutableBitSet.Builder bitSetBuilder = ImmutableBitSet.builder();
@@ -104,6 +108,7 @@ protected HyperGraph(RelOptCluster cluster,
protected HyperGraph(RelOptCluster cluster,
RelTraitSet traitSet,
List<RelNode> inputs,
+ long notProjectInputs,
List<HyperEdge> edges,
RelDataType rowType,
ImmutableBitSet complexEdgesBitmap,
@@ -113,6 +118,7 @@ protected HyperGraph(RelOptCluster cluster,
HashMap<Long, BitSet> overlapEdgesMap) {
super(cluster, traitSet);
this.inputs = Lists.newArrayList(inputs);
+ this.notProjectInputs = notProjectInputs;
this.edges = Lists.newArrayList(edges);
this.rowType = rowType;
this.complexEdgesBitmap = complexEdgesBitmap;
@@ -127,6 +133,7 @@ protected HyperGraph(RelOptCluster cluster,
getCluster(),
traitSet,
inputs,
+ notProjectInputs,
edges,
rowType,
complexEdgesBitmap,
@@ -165,11 +172,7 @@ protected HyperGraph(RelOptCluster cluster,
List<HyperEdge> shuttleEdges = new ArrayList<>();
for (HyperEdge edge : edges) {
HyperEdge shuttleEdge =
- new HyperEdge(
- edge.getLeftNodeBitmap(),
- edge.getRightNodeBitmap(),
- edge.getJoinType(),
- shuttle.apply(edge.getCondition()));
+ edge.accept(shuttle);
shuttleEdges.add(shuttleEdge);
}
@@ -177,6 +180,7 @@ protected HyperGraph(RelOptCluster cluster,
getCluster(),
traitSet,
inputs,
+ notProjectInputs,
shuttleEdges,
rowType,
complexEdgesBitmap,
@@ -192,13 +196,17 @@ public List<HyperEdge> getEdges() {
return edges;
}
+ public long getNotProjectInputs() {
+ return notProjectInputs;
+ }
+
public long getNeighborBitmap(long csg, long forbidden) {
long neighbors = 0L;
List<HyperEdge> simpleEdges = simpleEdgesMap.getOrDefault(csg, new
BitSet()).stream()
.mapToObj(edges::get)
.collect(Collectors.toList());
for (HyperEdge edge : simpleEdges) {
- neighbors |= edge.getNodeBitmap();
+ neighbors |= edge.getEndpoint();
}
forbidden = forbidden | csg;
@@ -209,8 +217,8 @@ public long getNeighborBitmap(long csg, long forbidden) {
.mapToObj(edges::get)
.collect(Collectors.toList());
for (HyperEdge edge : complexEdges) {
- long leftBitmap = edge.getLeftNodeBitmap();
- long rightBitmap = edge.getRightNodeBitmap();
+ long leftBitmap = edge.getLeftEndpoint();
+ long rightBitmap = edge.getRightEndpoint();
if (LongBitmap.isSubSet(leftBitmap, csg) &&
!LongBitmap.isOverlap(rightBitmap, forbidden)) {
neighbors |= Long.lowestOneBit(rightBitmap);
} else if (LongBitmap.isSubSet(rightBitmap, csg)
@@ -248,7 +256,7 @@ public List<HyperEdge> connectCsgCmp(long csg, long cmp) {
mayMissedEdges.stream()
.forEach(index -> {
HyperEdge edge = edges.get(index);
- if (LongBitmap.isSubSet(edge.getNodeBitmap(), csg | cmp)) {
+ if (LongBitmap.isSubSet(edge.getEndpoint(), csg | cmp)) {
connectedEdgesBitmap.set(index);
}
});
@@ -344,18 +352,18 @@ public void updateEdgesForUnion(long subset1, long
subset2) {
}
private static boolean isAccurateEdge(HyperEdge edge, long subset) {
- boolean isLeftEnd = LongBitmap.isSubSet(edge.getLeftNodeBitmap(), subset)
- && !LongBitmap.isOverlap(edge.getRightNodeBitmap(), subset);
- boolean isRightEnd = LongBitmap.isSubSet(edge.getRightNodeBitmap(), subset)
- && !LongBitmap.isOverlap(edge.getLeftNodeBitmap(), subset);
+ boolean isLeftEnd = LongBitmap.isSubSet(edge.getLeftEndpoint(), subset)
+ && !LongBitmap.isOverlap(edge.getRightEndpoint(), subset);
+ boolean isRightEnd = LongBitmap.isSubSet(edge.getRightEndpoint(), subset)
+ && !LongBitmap.isOverlap(edge.getLeftEndpoint(), subset);
return isLeftEnd || isRightEnd;
}
private static boolean isOverlapEdge(HyperEdge edge, long subset) {
- boolean isLeftEnd = LongBitmap.isOverlap(edge.getLeftNodeBitmap(), subset)
- && !LongBitmap.isOverlap(edge.getRightNodeBitmap(), subset);
- boolean isRightEnd = LongBitmap.isOverlap(edge.getRightNodeBitmap(),
subset)
- && !LongBitmap.isOverlap(edge.getLeftNodeBitmap(), subset);
+ boolean isLeftEnd = LongBitmap.isOverlap(edge.getLeftEndpoint(), subset)
+ && !LongBitmap.isOverlap(edge.getRightEndpoint(), subset);
+ boolean isRightEnd = LongBitmap.isOverlap(edge.getRightEndpoint(), subset)
+ && !LongBitmap.isOverlap(edge.getLeftEndpoint(), subset);
return isLeftEnd || isRightEnd;
}
@@ -369,124 +377,158 @@ private static boolean isOverlapEdge(HyperEdge edge,
long subset) {
return joinType;
}
- public RexNode extractJoinCond(RelNode left, RelNode right, List<HyperEdge>
edges) {
- List<RexNode> joinConds = new ArrayList<>();
- List<RelDataTypeField> fieldList = new
ArrayList<>(left.getRowType().getFieldList());
- fieldList.addAll(right.getRowType().getFieldList());
-
- List<String> names = new ArrayList<>(left.getRowType().getFieldNames());
- names.addAll(right.getRowType().getFieldNames());
-
- // convert the HyperEdge's condition from RexInputFieldName to RexInputRef
- RexShuttle inputName2InputRefShuttle = new RexShuttle() {
- @Override protected List<RexNode> visitList(
- List<? extends RexNode> exprs,
- boolean @Nullable [] update) {
- ImmutableList.Builder<RexNode> clonedOperands =
ImmutableList.builder();
- for (RexNode operand : exprs) {
- RexNode clonedOperand;
- if (operand instanceof RexInputFieldName) {
- int index = names.indexOf(((RexInputFieldName) operand).getName());
- clonedOperand = new RexInputRef(index,
fieldList.get(index).getType());
- } else {
- clonedOperand = operand.accept(this);
- }
- if ((clonedOperand != operand) && (update != null)) {
- update[0] = true;
- }
- clonedOperands.add(clonedOperand);
+ /**
+ * Check whether the csg-cmp pair is applicable through the conflict rule.
+ *
+ * @param csgcmp csg-cmp pair
+ * @param edges hyper edges with conflict rules
+ * @return true if the csg-cmp pair is applicable
+ */
+ public boolean applicable(long csgcmp, List<HyperEdge> edges) {
+ for (HyperEdge edge : edges) {
+ if (!LongBitmap.isSubSet(edge.getEndpoint(), csgcmp)) {
+ return false;
+ }
+ for (ConflictRule conflictRule : edge.getConflictRules()) {
+ if (LongBitmap.isOverlap(csgcmp, conflictRule.from)
+ && !LongBitmap.isSubSet(conflictRule.to, csgcmp)) {
+ // for conflict rule T1 → T2, if T1 ∩ csgcmp != empty set, then T2
must be
+ // included in csgcmp
+ return false;
}
- return clonedOperands.build();
}
- };
-
- for (HyperEdge edge : edges) {
- RexNode inputRefCond =
edge.getCondition().accept(inputName2InputRefShuttle);
- joinConds.add(inputRefCond);
}
- return RexUtil.composeConjunction(left.getCluster().getRexBuilder(),
joinConds);
+ return true;
}
/**
- * Before starting enumeration, add Project on every input, make all field
name unique.
- * Convert the HyperEdge condition from RexInputRef to RexInputFieldName
+ * Build an RexNode expression for the predicate corresponding to a set of
hyperedges.
+ *
+ * @param inputOrder node status ordered list for current csg-cmp
+ * @param leftCount number of tables in left child
+ * @param edges hyper edges
+ * @param joinType join type
+ * @return join condition
*/
- public void convertHyperEdgeCond(RelBuilder builder) {
- int fieldIndex = 0;
- List<RelDataTypeField> fieldList = rowType.getFieldList();
- for (int nodeIndex = 0; nodeIndex < inputs.size(); nodeIndex++) {
- RelNode input = inputs.get(nodeIndex);
- List<RexNode> projects = new ArrayList<>();
- List<String> names = new ArrayList<>();
- for (int i = 0; i < input.getRowType().getFieldCount(); i++) {
- projects.add(
- new RexInputRef(
- i,
- fieldList.get(fieldIndex).getType()));
- names.add(fieldList.get(fieldIndex).getName());
- fieldIndex++;
+ public RexNode extractJoinCond(
+ List<NodeState> inputOrder,
+ int leftCount,
+ List<HyperEdge> edges,
+ JoinRelType joinType) {
+ // a map from (node index, field index) to input ref
+ Map<Pair<Integer, Integer>, Integer> nodeAndFieldIndex2InputRefMap = new
HashMap<>();
+ int inputRef = 0;
+ for (int i = 0; i < inputOrder.size(); i++) {
+ NodeState nodeState = inputOrder.get(i);
+ if (nodeState.projected) {
+ int fieldCount =
inputs.get(nodeState.nodeIndex).getRowType().getFieldCount();
+ for (int fieldIndex = 0; fieldIndex < fieldCount; fieldIndex++) {
+ nodeAndFieldIndex2InputRefMap.put(
+ Pair.of(nodeState.nodeIndex, fieldIndex), inputRef++);
+ }
}
-
- builder.push(input)
- .project(projects, names, true);
- replaceInput(nodeIndex, builder.build());
}
- RexShuttle inputRef2inputNameShuttle = new RexShuttle() {
- @Override public RexNode visitInputRef(RexInputRef inputRef) {
- int index = inputRef.getIndex();
- return new RexInputFieldName(
- fieldList.get(index).getName(),
- fieldList.get(index).getType());
+ RexShuttle nodeAndFieldIndex2InputRefShuttle = new RexShuttle() {
+ @Override public RexNode visitNodeAndFieldIndex(RexNodeAndFieldIndex
nodeAndFieldIndex) {
+ Integer inputRef =
+ nodeAndFieldIndex2InputRefMap.get(
+ Pair.of(nodeAndFieldIndex.getNodeIndex(),
nodeAndFieldIndex.getFieldIndex()));
+ assert inputRef != null;
+ return new RexInputRef(inputRef, nodeAndFieldIndex.getType());
}
};
+ List<RexNode> joinConds = new ArrayList<>();
+ for (HyperEdge edge : edges) {
+ RexNode inputRefCond =
edge.getCondition().accept(nodeAndFieldIndex2InputRefShuttle);
+ joinConds.add(inputRefCond);
+ }
- for (int i = 0; i < edges.size(); i++) {
- HyperEdge edge = edges.get(i);
- RexNode convertCond =
edge.getCondition().accept(inputRef2inputNameShuttle);
- HyperEdge convertEdge =
- new HyperEdge(
- edge.getLeftNodeBitmap(),
- edge.getRightNodeBitmap(),
- edge.getJoinType(),
- convertCond);
- edges.set(i, convertEdge);
+ // update the node status of subgraph(csg-cmp pair) according to the join
type.
+ if (!joinType.projectsRight()) {
+ for (int i = leftCount; i < inputOrder.size(); i++) {
+ int nodeIndex = inputOrder.get(i).nodeIndex;
+ inputOrder.set(i, new NodeState(nodeIndex, false));
+ }
}
+ return RexUtil.composeConjunction(getCluster().getRexBuilder(), joinConds);
}
/**
- * Adjusting RexInputRef in enumeration process is too complicated,
- * so use unique name replace input ref.
- * Before starting enumeration, convert RexInputRef to RexInputFieldName.
- * When connect csgcmp to Join, convert RexInputFieldName to RexInputRef.
+ * Ensure that the fields produced by the reordered join are in the same
order as in the original
+ * plan.
+ *
+ * @param resultOrder node status ordered list for the final result
+ * @param rowTypeList rowType of the final result
+ * @return list of RexInputRef
*/
- private static class RexInputFieldName extends RexVariable {
-
- RexInputFieldName(final String fieldName, final RelDataType type) {
- super(fieldName, type);
- }
-
- @Override public <R> R accept(RexVisitor<R> visitor) {
- throw new UnsupportedOperationException();
+ public List<RexNode> restoreProjectionOrder(
+ List<NodeState> resultOrder,
+ List<RelDataTypeField> rowTypeList) {
+ // a map from node index to the number of fields projected before this node
+ Map<Integer, Integer> nodeIndexToFieldCountBefore = new HashMap<>();
+ int projectedFieldCount = 0;
+ for (NodeState nodeState : resultOrder) {
+ nodeIndexToFieldCountBefore.put(nodeState.nodeIndex,
projectedFieldCount);
+ if (nodeState.projected) {
+ projectedFieldCount +=
inputs.get(nodeState.nodeIndex).getRowType().getFieldCount();
+ }
}
-
- @Override public <R, P> R accept(RexBiVisitor<R, P> visitor, P arg) {
- throw new UnsupportedOperationException();
+ // origin inputs order is [n0, n1, n2]
+ // n0_projected [field0, field1]
+ // n1_projected [field0, field1]
+ // n2_projected [field0, field1]
+ // rowType is [n0.field0, n0.field1, n1.field0, n1.field1, n2.field0,
n2.field1]
+ // assume that the original plan tree is:
+ // inner join
+ // / \
+ // inner join n2
+ // / \
+ // n0 n1
+ //
+ // if the node order of the final result is [n2, n1, n0], rowType is
+ // [n2.field0, n2.field1, n1.field0, n1.field1, n0.field0, n0.field1]
+ // assume that the reordered plan tree is:
+ // inner join
+ // / \
+ // inner join n0
+ // / \
+ // n2 n1
+ //
+ // we need a projection like [$4, $5, $2, $3, $0, $1]
+ List<RexNode> projects = new ArrayList<>();
+ for (int inputIndex = 0; inputIndex < inputs.size(); inputIndex++) {
+ if (LongBitmap.isOverlap(notProjectInputs,
LongBitmap.newBitmap(inputIndex))) {
+ continue;
+ }
+ int fieldCount = inputs.get(inputIndex).getRowType().getFieldCount();
+ for (int fieldIndex = 0; fieldIndex < fieldCount; fieldIndex++) {
+ Integer fieldOffset = nodeIndexToFieldCountBefore.get(inputIndex);
+ if (fieldOffset == null) {
+ throw new AssertionError(
+ "The result order looses the " + inputIndex + "-th input");
+ }
+ int inputRef = fieldIndex + fieldOffset;
+ projects.add(
+ new RexInputRef(inputRef, rowTypeList.get(inputRef).getType()));
+ }
}
+ return projects;
+ }
- @Override public boolean equals(@Nullable Object obj) {
- return this == obj
- || obj instanceof RexInputFieldName
- && name == ((RexInputFieldName) obj).name
- && type.equals(((RexInputFieldName) obj).type);
- }
+ /**
+ * Record the projection state of vertices in the hypergraph during
enumerating. It is used to
+ * calculate the index of RexInputRef when building join conditions from
hyperedges.
+ */
+ public static class NodeState {
+ final int nodeIndex;
- @Override public int hashCode() {
- return name.hashCode();
- }
+ final boolean projected;
- @Override public String toString() {
- return name;
+ NodeState(int nodeIndex, boolean projected) {
+ this.nodeIndex = nodeIndex;
+ this.projected = projected;
}
}
+
}
diff --git
a/core/src/main/java/org/apache/calcite/rel/rules/JoinToHyperGraphRule.java
b/core/src/main/java/org/apache/calcite/rel/rules/JoinToHyperGraphRule.java
index 3ed41e4d6e..eb18ae0354 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/JoinToHyperGraphRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/JoinToHyperGraphRule.java
@@ -24,16 +24,14 @@
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.logical.LogicalJoin;
-import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexNode;
-import org.apache.calcite.rex.RexUtil;
-import org.apache.calcite.rex.RexVisitorImpl;
import org.immutables.value.Value;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.stream.Collectors;
/** Rule that flattens a tree of {@link LogicalJoin}s
@@ -55,119 +53,148 @@ protected JoinToHyperGraphRule(Config config) {
final Join origJoin = call.rel(0);
final RelNode left = call.rel(1);
final RelNode right = call.rel(2);
- if (origJoin.getJoinType() != JoinRelType.INNER) {
+ if (!supportedJoinType(origJoin.getJoinType())) {
return;
}
- HyperGraph result;
+ long notProjectInputs = 0;
List<RelNode> inputs = new ArrayList<>();
- List<HyperEdge> edges = new ArrayList<>();
+ List<HyperEdge> leftSubEdges = new ArrayList<>();
+ List<HyperEdge> rightSubEdges = new ArrayList<>();
List<RexNode> joinConds = new ArrayList<>();
- if (origJoin.getCondition().isAlwaysTrue()) {
+ if (origJoin.getCondition().isAlwaysTrue() || origJoin.getJoinType() !=
JoinRelType.INNER) {
joinConds.add(origJoin.getCondition());
} else {
+ // inner join can be rewritten as cross product + filter. Due to this
property, inner join
+ // condition can be split by conjunction and freely combine with other
inner join conditions.
+ // So we split inner join condition by conjunction and build multiple
hyperedges, which is
+ // helpful to obtain the potential optimal plan
RelOptUtil.decomposeConjunction(origJoin.getCondition(), joinConds);
}
- // when right is HyperGraph, need shift the leftNodeBit, rightNodeBit,
condition of HyperEdge
+ // when right is HyperGraph, need shift fields related to bitmap of
HyperEdge
int leftNodeCount;
- int leftFieldCount = left.getRowType().getFieldCount();
if (left instanceof HyperGraph && right instanceof HyperGraph) {
+ //
+ // LogicalJoin
+ // / \
+ // HyperGraph HyperGraph (need to shift bitmap)
+ //
leftNodeCount = left.getInputs().size();
inputs.addAll(left.getInputs());
inputs.addAll(right.getInputs());
- edges.addAll(((HyperGraph) left).getEdges());
- edges.addAll(
+ notProjectInputs |= ((HyperGraph) left).getNotProjectInputs();
+ notProjectInputs |= ((HyperGraph) right).getNotProjectInputs() <<
leftNodeCount;
+
+ leftSubEdges.addAll(((HyperGraph) left).getEdges());
+ rightSubEdges.addAll(
((HyperGraph) right).getEdges().stream()
- .map(hyperEdge -> adjustNodeBit(hyperEdge, leftNodeCount,
leftFieldCount))
+ .map(hyperEdge -> hyperEdge.adjustNodeBit(leftNodeCount))
.collect(Collectors.toList()));
} else if (left instanceof HyperGraph) {
+ //
+ // LogicalJoin
+ // / \
+ // HyperGraph RelNode
+ //
leftNodeCount = left.getInputs().size();
inputs.addAll(left.getInputs());
inputs.add(right);
- edges.addAll(((HyperGraph) left).getEdges());
+ notProjectInputs |= ((HyperGraph) left).getNotProjectInputs();
+
+ leftSubEdges.addAll(((HyperGraph) left).getEdges());
} else if (right instanceof HyperGraph) {
+ //
+ // LogicalJoin
+ // / \
+ // RelNode HyperGraph (need to shift bitmap)
+ //
leftNodeCount = 1;
inputs.add(left);
inputs.addAll(right.getInputs());
- edges.addAll(
+ notProjectInputs |= ((HyperGraph) right).getNotProjectInputs() <<
leftNodeCount;
+
+ rightSubEdges.addAll(
((HyperGraph) right).getEdges().stream()
- .map(hyperEdge -> adjustNodeBit(hyperEdge, leftNodeCount,
leftFieldCount))
+ .map(hyperEdge -> hyperEdge.adjustNodeBit(leftNodeCount))
.collect(Collectors.toList()));
} else {
+ //
+ // LogicalJoin
+ // / \
+ // RelNode RelNode
+ //
leftNodeCount = 1;
inputs.add(left);
inputs.add(right);
}
+ // the number of inputs cannot exceed 64 with long type as the bitmap of
hypergraph inputs.
+ if (inputs.size() > 64) {
+ return;
+ }
- HashMap<Integer, Integer> fieldIndexToNodeIndexMap = new HashMap<>();
- int fieldCount = 0;
- for (int i = 0; i < inputs.size(); i++) {
- for (int j = 0; j < inputs.get(i).getRowType().getFieldCount(); j++) {
- fieldIndexToNodeIndexMap.put(fieldCount++, i);
+ // calculate conflict rules
+ List<ConflictRule> conflictRules =
+ ConflictDetectionHelper.makeConflictRules(
+ leftSubEdges,
+ rightSubEdges,
+ origJoin.getJoinType());
+
+ // build the map from input ref to node index and field index. The
RexInputRef in the join
+ // condition will be converted to RexNodeAndFieldIndex and stored in the
hyperedge. The reason
+ // for the replacement is that the order of inputs will be constantly
adjusted during
+ // enumeration, and maintaining the correct RexInputRef is too complicated.
+ Map<Integer, Integer> inputRefToNodeIndexMap = new HashMap<>();
+ Map<Integer, Integer> inputRefToFieldIndexMap = new HashMap<>();
+ int inputRef = 0;
+ for (int nodeIndex = 0; nodeIndex < inputs.size(); nodeIndex++) {
+ if (LongBitmap.isOverlap(notProjectInputs,
LongBitmap.newBitmap(nodeIndex))) {
+ continue;
}
- }
- // convert current join condition to hyper edge condition
- for (RexNode joinCond : joinConds) {
- long leftNodeBits;
- long rightNodeBits;
- List<Integer> leftRefs = new ArrayList<>();
- List<Integer> rightRefs = new ArrayList<>();
-
- RexVisitorImpl visitor = new RexVisitorImpl<Void>(true) {
- @Override public Void visitInputRef(RexInputRef inputRef) {
- Integer nodeIndex =
fieldIndexToNodeIndexMap.get(inputRef.getIndex());
- if (nodeIndex == null) {
- throw new IllegalArgumentException("RexInputRef refers a dummy
field: "
- + inputRef + ", rowType is: " + origJoin.getRowType());
- }
- if (nodeIndex < leftNodeCount) {
- leftRefs.add(nodeIndex);
- } else {
- rightRefs.add(nodeIndex);
- }
- return null;
- }
- };
- joinCond.accept(visitor);
-
- // when cartesian product, make it to complex hyper edge
- if (leftRefs.isEmpty() || rightRefs.isEmpty()) {
- leftNodeBits = LongBitmap.newBitmapBetween(0, leftNodeCount);
- rightNodeBits = LongBitmap.newBitmapBetween(leftNodeCount,
inputs.size());
- } else {
- leftNodeBits = LongBitmap.newBitmapFromList(leftRefs);
- rightNodeBits = LongBitmap.newBitmapFromList(rightRefs);
+ int fieldCount = inputs.get(nodeIndex).getRowType().getFieldCount();
+ for (int fieldIndex = 0; fieldIndex < fieldCount; fieldIndex++) {
+ inputRefToNodeIndexMap.put(inputRef, nodeIndex);
+ inputRefToFieldIndexMap.put(inputRef, fieldIndex);
+ inputRef++;
}
- edges.add(
- new HyperEdge(
- leftNodeBits,
- rightNodeBits,
- origJoin.getJoinType(),
- joinCond));
}
- result =
+
+ List<HyperEdge> edges =
+ HyperEdge.createHyperEdgesFromJoinConds(
+ inputRefToNodeIndexMap,
+ inputRefToFieldIndexMap,
+ joinConds,
+ conflictRules,
+ origJoin.getJoinType(),
+ leftNodeCount,
+ inputs.size());
+ edges.addAll(leftSubEdges);
+ edges.addAll(rightSubEdges);
+
+ if (!origJoin.getJoinType().projectsRight()) {
+ notProjectInputs |= LongBitmap.newBitmapBetween(leftNodeCount,
inputs.size());
+ }
+ HyperGraph hyperGraph =
new HyperGraph(
origJoin.getCluster(),
origJoin.getTraitSet(),
inputs,
+ notProjectInputs,
edges,
origJoin.getRowType());
-
- call.transformTo(result);
+ call.transformTo(hyperGraph);
}
- private static HyperEdge adjustNodeBit(HyperEdge hyperEdge, int nodeOffset,
int fieldOffset) {
- RexNode newCondition = RexUtil.shift(hyperEdge.getCondition(),
fieldOffset);
- return new HyperEdge(
- hyperEdge.getLeftNodeBitmap() << nodeOffset,
- hyperEdge.getRightNodeBitmap() << nodeOffset,
- hyperEdge.getJoinType(),
- newCondition);
+ private static boolean supportedJoinType(JoinRelType joinType) {
+ return joinType == JoinRelType.INNER
+ || joinType == JoinRelType.LEFT
+ || joinType == JoinRelType.FULL
+ || joinType == JoinRelType.SEMI
+ || joinType == JoinRelType.ANTI;
}
/** Rule configuration. */
diff --git a/core/src/main/java/org/apache/calcite/rex/RexBiVisitor.java
b/core/src/main/java/org/apache/calcite/rex/RexBiVisitor.java
index 11d93c9864..995b2e9727 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexBiVisitor.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexBiVisitor.java
@@ -59,6 +59,8 @@ public interface RexBiVisitor<R, P> {
R visitLambda(RexLambda lambda, P arg);
+ R visitNodeAndFieldIndex(RexNodeAndFieldIndex nodeAndFieldIndex, P arg);
+
/** Visits a list and writes the results to another list. */
default void visitList(Iterable<? extends RexNode> exprs, P arg,
List<R> out) {
diff --git a/core/src/main/java/org/apache/calcite/rex/RexBiVisitorImpl.java
b/core/src/main/java/org/apache/calcite/rex/RexBiVisitorImpl.java
index b5a4a2a86f..5a11bf771e 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexBiVisitorImpl.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexBiVisitorImpl.java
@@ -122,4 +122,8 @@ protected RexBiVisitorImpl(boolean deep) {
@Override public R visitLambda(RexLambda lambda, P arg) {
return null;
}
+
+ @Override public R visitNodeAndFieldIndex(RexNodeAndFieldIndex
nodeAndFieldIndex, P arg) {
+ return null;
+ }
}
diff --git a/core/src/main/java/org/apache/calcite/rex/RexInterpreter.java
b/core/src/main/java/org/apache/calcite/rex/RexInterpreter.java
index 38d43c6efb..9e2bd0e3a7 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexInterpreter.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexInterpreter.java
@@ -159,6 +159,10 @@ private Comparable getOrUnbound(RexNode e) {
throw unbound(lambdaRef);
}
+ @Override public Comparable visitNodeAndFieldIndex(RexNodeAndFieldIndex
nodeAndFieldIndex) {
+ throw unbound(nodeAndFieldIndex);
+ }
+
@Override public Comparable visitCall(RexCall call) {
final List<Comparable> values = visitList(call.operands);
switch (call.getKind()) {
diff --git
a/core/src/main/java/org/apache/calcite/rex/RexNodeAndFieldIndex.java
b/core/src/main/java/org/apache/calcite/rex/RexNodeAndFieldIndex.java
new file mode 100644
index 0000000000..b6b5e6c72d
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/rex/RexNodeAndFieldIndex.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.rex;
+
+import org.apache.calcite.rel.type.RelDataType;
+
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+import java.util.Objects;
+
+/**
+ * RexNodeAndFieldIndex has the same meaning as {@link RexInputRef}, they are
both reference a
+ * field of an input relational expression. The difference is that,
RexNodeAndFieldIndex uses the
+ * input index and the relative position of field in input rowType.
+ *
+ * <p> For example, if the inputs to a join are
+ *
+ * <ul>
+ * <li>Input #0: EMP(EMPNO, ENAME, DEPTNO) and</li>
+ * <li>Input #1: DEPT(DEPTNO AS DEPTNO2, DNAME)</li>
+ * </ul>
+ *
+ * <p>then the fields are:
+ *
+ * <ul>
+ * <li>Node #0, Field #0: EMPNO</li>
+ * <li>Node #0, Field #1: ENAME</li>
+ * <li>Node #0, Field #2: DEPTNO (from EMP)</li>
+ * <li>Node #1, Field #0: DEPTNO2 (from DEPT)</li>
+ * <li>Node #1, Field #1: DNAME</li>
+ * </ul>
+ *
+ * <p> If in some cases, the inputs order of a relation is frequently adjusted
and it is difficult
+ * to maintain the correct RexInputRef, you can consider temporarily replacing
RexInputRef with
+ * RexNodeAndFieldIndex.
+ *
+ * @see org.apache.calcite.rel.rules.JoinToHyperGraphRule
+ * @see org.apache.calcite.rel.rules.HyperGraph#extractJoinCond
+ * @see org.apache.calcite.rel.rules.HyperEdge#createHyperEdgesFromJoinConds
+ */
+public class RexNodeAndFieldIndex extends RexVariable {
+
+ // the index of the node in relation inputs
+ private final int nodeIndex;
+
+ // the index of the field in the rowType of the node
+ private final int fieldIndex;
+
+ public RexNodeAndFieldIndex(int nodeIndex, int fieldIndex, String name,
RelDataType type) {
+ super(name, type);
+ this.nodeIndex = nodeIndex;
+ this.fieldIndex = fieldIndex;
+ }
+
+ public int getNodeIndex() {
+ return nodeIndex;
+ }
+
+ public int getFieldIndex() {
+ return fieldIndex;
+ }
+
+ @Override public <R> R accept(RexVisitor<R> visitor) {
+ return visitor.visitNodeAndFieldIndex(this);
+ }
+
+ @Override public <R, P> R accept(RexBiVisitor<R, P> visitor, P arg) {
+ return visitor.visitNodeAndFieldIndex(this, arg);
+ }
+
+ @Override public boolean equals(@Nullable Object obj) {
+ return this == obj
+ || obj instanceof RexNodeAndFieldIndex
+ && nodeIndex == ((RexNodeAndFieldIndex) obj).nodeIndex
+ && fieldIndex == ((RexNodeAndFieldIndex) obj).fieldIndex;
+ }
+
+ @Override public int hashCode() {
+ return Objects.hash(nodeIndex, fieldIndex);
+ }
+
+ @Override public String toString() {
+ return "node(" + nodeIndex + ")_field(" + fieldIndex + ")";
+ }
+}
diff --git a/core/src/main/java/org/apache/calcite/rex/RexShuttle.java
b/core/src/main/java/org/apache/calcite/rex/RexShuttle.java
index a5b0371a7e..2f6b4ec576 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexShuttle.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexShuttle.java
@@ -244,6 +244,10 @@ protected List<RexFieldCollation> visitFieldCollations(
return lambdaRef;
}
+ @Override public RexNode visitNodeAndFieldIndex(RexNodeAndFieldIndex
nodeAndFieldIndex) {
+ return nodeAndFieldIndex;
+ }
+
/**
* Applies this shuttle to each expression in a list.
*
diff --git a/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
b/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
index bffcf60a1b..4710bb1f24 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexSimplify.java
@@ -1462,6 +1462,10 @@ enum SafeRexVisitor implements RexVisitor<Boolean> {
@Override public Boolean visitLambdaRef(RexLambdaRef lambdaRef) {
return true;
}
+
+ @Override public Boolean visitNodeAndFieldIndex(RexNodeAndFieldIndex
nodeAndFieldIndex) {
+ return true;
+ }
}
/** Analyzes a given {@link RexNode} and decides whenever it is safe to
diff --git a/core/src/main/java/org/apache/calcite/rex/RexUnaryBiVisitor.java
b/core/src/main/java/org/apache/calcite/rex/RexUnaryBiVisitor.java
index 227325d347..e59e3b06d9 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexUnaryBiVisitor.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexUnaryBiVisitor.java
@@ -87,4 +87,8 @@ protected R end(RexNode e, R arg) {
super.visitSubQuery(subQuery, arg);
return end(subQuery, arg);
}
+
+ @Override public R visitNodeAndFieldIndex(RexNodeAndFieldIndex
nodeAndFieldIndex, R arg) {
+ return end(nodeAndFieldIndex, arg);
+ }
}
diff --git a/core/src/main/java/org/apache/calcite/rex/RexUtil.java
b/core/src/main/java/org/apache/calcite/rex/RexUtil.java
index 94fa185555..cfa0f25b3c 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexUtil.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexUtil.java
@@ -18,6 +18,7 @@
import org.apache.calcite.DataContexts;
import org.apache.calcite.linq4j.function.Predicate1;
+import org.apache.calcite.plan.PlanTooComplexError;
import org.apache.calcite.plan.RelOptPredicateList;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelCollation;
@@ -776,6 +777,10 @@ static class ConstantFinder implements RexVisitor<Boolean>
{
@Override public Boolean visitLambdaRef(RexLambdaRef lambdaRef) {
return false;
}
+
+ @Override public Boolean visitNodeAndFieldIndex(RexNodeAndFieldIndex
nodeAndFieldIndex) {
+ return false;
+ }
}
/**
@@ -2970,12 +2975,6 @@ private static class RexShiftShuttle extends RexShuttle {
}
}
- /**
- * Exception to catch when optimizing the plan produces a result that is too
complex,
- * either at the Rel or at the Rex level.
- */
- private static class PlanTooComplexError extends ControlFlowException { }
-
/** Visitor that throws {@link org.apache.calcite.util.Util.FoundOne} if
* applied to an expression that contains a {@link RexCorrelVariable}. */
private static class CorrelationFinder extends RexVisitorImpl<Void> {
diff --git a/core/src/main/java/org/apache/calcite/rex/RexVisitor.java
b/core/src/main/java/org/apache/calcite/rex/RexVisitor.java
index f39e41e35d..01a3ff920f 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexVisitor.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexVisitor.java
@@ -61,6 +61,8 @@ public interface RexVisitor<R> {
R visitLambdaRef(RexLambdaRef lambdaRef);
+ R visitNodeAndFieldIndex(RexNodeAndFieldIndex nodeAndFieldIndex);
+
/** Visits a list and writes the results to another list. */
default void visitList(Iterable<? extends RexNode> exprs, List<R> out) {
for (RexNode expr : exprs) {
diff --git a/core/src/main/java/org/apache/calcite/rex/RexVisitorImpl.java
b/core/src/main/java/org/apache/calcite/rex/RexVisitorImpl.java
index a19f6b2cfd..6ebbe92679 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexVisitorImpl.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexVisitorImpl.java
@@ -126,6 +126,10 @@ protected RexVisitorImpl(boolean deep) {
return null;
}
+ @Override public R visitNodeAndFieldIndex(RexNodeAndFieldIndex
nodeAndFieldIndex) {
+ return null;
+ }
+
/**
* Visits an array of expressions, returning the logical 'and' of their
* results.
diff --git a/core/src/main/java/org/apache/calcite/util/trace/CalciteTrace.java
b/core/src/main/java/org/apache/calcite/util/trace/CalciteTrace.java
index 6a50dac5a6..75c6a3dfb8 100644
--- a/core/src/main/java/org/apache/calcite/util/trace/CalciteTrace.java
+++ b/core/src/main/java/org/apache/calcite/util/trace/CalciteTrace.java
@@ -22,6 +22,7 @@
import org.apache.calcite.plan.RelImplementor;
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.prepare.Prepare;
+import org.apache.calcite.rel.rules.DpHyp;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.slf4j.Logger;
@@ -140,6 +141,13 @@ public static Logger getTestTracer(Class<?> testClass) {
return LoggerFactory.getLogger(testClass.getName());
}
+ /**
+ * The tracer reports enumeration process of DpHyp.
+ */
+ public static Logger getDpHypJoinReorderTracer() {
+ return LoggerFactory.getLogger(DpHyp.class);
+ }
+
/**
* Thread-local handler that is called with dynamically generated Java code.
* It exists for unit-testing.
diff --git
a/core/src/test/java/org/apache/calcite/test/DphypJoinReorderTest.java
b/core/src/test/java/org/apache/calcite/test/DphypJoinReorderTest.java
new file mode 100644
index 0000000000..95da1bc234
--- /dev/null
+++ b/core/src/test/java/org/apache/calcite/test/DphypJoinReorderTest.java
@@ -0,0 +1,294 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.test;
+
+import org.apache.calcite.DataContexts;
+import org.apache.calcite.adapter.enumerable.EnumerableConvention;
+import org.apache.calcite.adapter.enumerable.EnumerableInterpretable;
+import org.apache.calcite.adapter.enumerable.EnumerableRel;
+import org.apache.calcite.adapter.enumerable.EnumerableRules;
+import org.apache.calcite.jdbc.CalcitePrepare;
+import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
+import org.apache.calcite.linq4j.Enumerable;
+import org.apache.calcite.linq4j.Enumerator;
+import org.apache.calcite.plan.ConventionTraitDef;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRules;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.hep.HepMatchOrder;
+import org.apache.calcite.plan.hep.HepPlanner;
+import org.apache.calcite.plan.hep.HepProgram;
+import org.apache.calcite.plan.hep.HepProgramBuilder;
+import org.apache.calcite.plan.volcano.VolcanoPlanner;
+import org.apache.calcite.rel.RelCollationTraitDef;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.RelFactories;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rel.rules.CoreRules;
+import org.apache.calcite.rel.rules.DpHyp;
+import org.apache.calcite.rel.rules.HyperGraph;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.runtime.Bindable;
+import org.apache.calcite.tools.RelBuilder;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.function.Function;
+
+/**
+ * Tests for execution results of all candidate plans generated by dphyp.
+ */
+public class DphypJoinReorderTest {
+
+ @Test void testInnerLeftSemiJoinReorder() {
+ // select * from t1 inner join t2 on id1=id2
+ // left join t3 on id2=id3
+ // where exists (select * from t4 where id3=id4)
+ Function<RelBuilder, RelNode> function = builder ->
+ builder
+ .values(new String[]{"id1", "name1"}, 1, "Tom", 2, "Lucy", 3, "Li")
+ .values(new String[]{"id2", "name2"}, 1, "Tom", 2, "Lucy")
+ .join(
+ JoinRelType.INNER,
+ builder.equals(
+ builder.field(2, 0, "id1"),
+ builder.field(2, 1, "id2")))
+ .values(new String[]{"id3", "name3"}, 1, "Tom", 3, "Li", 4,
"Peter")
+ .join(
+ JoinRelType.LEFT,
+ builder.equals(
+ builder.field(2, 0, "id2"),
+ builder.field(2, 1, "id3")))
+ .values(new String[]{"id4", "name4"}, 1, "Tom", 3, "Li")
+ .semiJoin(
+ builder.equals(
+ builder.field(2, 0, "id3"),
+ builder.field(2, 1, "id4")))
+ .build();
+
+ String expectedResult = "id1=1; name1=Tom; id2=1; name2=Tom; id3=1;
name3=Tom\n";
+ run(function, expectedResult);
+ }
+
+ @Test void testChainInnerJoinReorder() {
+ // select * from t1 inner join t2 on id1=id2
+ // inner join t3 on id2=id3
+ // inner join t4 on id3=id4
+ Function<RelBuilder, RelNode> function = builder ->
+ builder
+ .values(new String[]{"id1", "name1"}, 1, "Tom", 2, null, 3, "Li")
+ .values(new String[]{"id2", "name2"}, 2, "Lucy")
+ .join(
+ JoinRelType.INNER,
+ builder.equals(
+ builder.field(2, 0, "id1"),
+ builder.field(2, 1, "id2")))
+ .values(new String[]{"id3", "name3"}, 2, "Lucy", 3, "Li", 4,
"Peter")
+ .join(
+ JoinRelType.INNER,
+ builder.equals(
+ builder.field(2, 0, "id2"),
+ builder.field(2, 1, "id3")))
+ .values(new String[]{"id4", "name4"}, 2, "Lucy", 5, "Andy")
+ .join(
+ JoinRelType.INNER,
+ builder.equals(
+ builder.field(2, 0, "id3"),
+ builder.field(2, 1, "id4")))
+ .build();
+
+ String expectedResult =
+ "id1=2; name1=null; id2=2; name2=Lucy; id3=2; name3=Lucy; id4=2;
name4=Lucy\n";
+ run(function, expectedResult);
+ }
+
+ @Test void testStarInnerJoinReorder() {
+ // select * from t1 inner join t2 on id1=id2
+ // inner join t3 on id1=id3
+ // inner join t4 on id1=id4
+ Function<RelBuilder, RelNode> function = builder ->
+ builder
+ .values(new String[]{"id1", "name1"}, 1, "Tom", 2, "Lucy", 3, "Li")
+ .values(new String[]{"id2", "name2"}, 1, "Tom", 2, "Lucy")
+ .join(
+ JoinRelType.INNER,
+ builder.equals(
+ builder.field(2, 0, "id1"),
+ builder.field(2, 1, "id2")))
+ .values(new String[]{"id3", "name3"}, 1, "Tom", 2, "Lucy", 3,
"Li", 4, "Peter")
+ .join(
+ JoinRelType.INNER,
+ builder.equals(
+ builder.field(2, 0, "id1"),
+ builder.field(2, 1, "id3")))
+ .values(new String[]{"id4", "name4"}, 2, "Lucy", 5, "Andy", 1,
"Tom")
+ .join(
+ JoinRelType.INNER,
+ builder.equals(
+ builder.field(2, 0, "id1"),
+ builder.field(2, 1, "id4")))
+ .build();
+
+ String expectedResult =
+ "id1=1; name1=Tom; id2=1; name2=Tom; id3=1; name3=Tom; id4=1;
name4=Tom\n"
+ + "id1=2; name1=Lucy; id2=2; name2=Lucy; id3=2; name3=Lucy; id4=2;
name4=Lucy\n";
+ run(function, expectedResult);
+ }
+
+ /**
+ * Run the customized plan and check whether its execution results meet
expectations
+ * (ignore row order).
+ *
+ * @param customPlanFunction a function that accepts one RelBuilder and
produces a RelNode
+ * @param expectedResult expected result
+ */
+ void run(Function<RelBuilder, RelNode> customPlanFunction, String
expectedResult) {
+ VolcanoPlanner volcanoPlanner = new VolcanoPlanner();
+ volcanoPlanner.addRelTraitDef(ConventionTraitDef.INSTANCE);
+ volcanoPlanner.addRelTraitDef(RelCollationTraitDef.INSTANCE);
+
+ // initialize plan
+ RelDataTypeFactory typeFactory =
+ new JavaTypeFactoryImpl();
+ RelOptCluster cluster = RelOptCluster.create(volcanoPlanner, new
RexBuilder(typeFactory));
+ RelBuilder builder = RelFactories.LOGICAL_BUILDER.create(cluster, null);
+ RelNode initRel = customPlanFunction.apply(builder);
+
+ // convert to HyperGraph
+ HepProgram program = new HepProgramBuilder()
+ .addMatchOrder(HepMatchOrder.BOTTOM_UP)
+ .addRuleInstance(CoreRules.JOIN_TO_HYPER_GRAPH)
+ .build();
+ HepPlanner hepPlanner = new HepPlanner(program);
+ hepPlanner.setRoot(initRel);
+ RelNode hyperGraph = hepPlanner.findBestExp();
+ assert hyperGraph instanceof HyperGraph : "The root node must be a
HyperGraph, "
+ + "please check the custom plan function. The root node now is:\n"
+ + RelOptUtil.toString(hyperGraph);
+
+ // start dphyp enumeration
+ DphypForTest dphyp =
+ new DphypForTest(
+ (HyperGraph) hyperGraph,
+ builder,
+ hyperGraph.getCluster().getMetadataQuery(),
+ 127);
+ dphyp.startEnumerateJoin();
+
+ // verify the execution results of each candidate plan enumerated by dphyp
+ for (RelNode candidatePlan : dphyp.candidateList) {
+ for (RelOptRule enumerableRule : EnumerableRules.rules()) {
+ volcanoPlanner.addRule(enumerableRule);
+ }
+ // EnumerableProject does not implement the 'implement' function.
+ // must replace Project with Calc to execute the plan.
+ for (RelOptRule calcRule : RelOptRules.CALC_RULES) {
+ volcanoPlanner.addRule(calcRule);
+ }
+ volcanoPlanner.removeRule(EnumerableRules.ENUMERABLE_PROJECT_RULE);
+
+ candidatePlan =
+ volcanoPlanner.changeTraits(
+ candidatePlan,
+ cluster.traitSet().replace(EnumerableConvention.INSTANCE));
+ volcanoPlanner.setRoot(candidatePlan);
+ RelNode physicalPlan = volcanoPlanner.findBestExp();
+ volcanoPlanner.clear();
+
+ Bindable bindable =
+ EnumerableInterpretable.toBindable(
+ Collections.emptyMap(),
+ CalcitePrepare.Dummy.getSparkHandler(false),
+ (EnumerableRel) physicalPlan,
+ EnumerableRel.Prefer.ARRAY);
+ Enumerable<Object[]> result = bindable.bind(DataContexts.EMPTY);
+ Set<String> realResult =
+ formatResult(result.enumerator(),
physicalPlan.getRowType().getFieldNames());
+
+ Set<String> expectedSet = new HashSet<>();
+ for (String row : expectedResult.split("\n")) {
+ expectedSet.add(row.trim());
+ }
+ assert realResult.equals(expectedSet) : "The result does not match the
expected result.\n"
+ + "Expected: " + expectedSet
+ + "Actual: " + realResult;
+ }
+ }
+
+ Set<String> formatResult(Enumerator<Object[]> enumerator, List<String>
fieldNames) {
+ Set<String> rowSet = new HashSet<>();
+ while (enumerator.moveNext()) {
+ StringBuilder output = new StringBuilder();
+ Object[] row = enumerator.current();
+ assert row.length == fieldNames.size() : "Result row length does not
match field names size";
+ for (int i = 0; i < fieldNames.size(); i++) {
+ output.append(fieldNames.get(i))
+ .append("=")
+ .append(row[i] != null ? row[i].toString() : "null");
+
+ if (i < fieldNames.size() - 1) {
+ output.append("; ");
+ }
+ }
+ rowSet.add(output.toString());
+ }
+ return rowSet;
+ }
+
+ /** A test class for {@link DpHyp} that collects candidate plans. */
+ class DphypForTest extends DpHyp {
+
+ // a list for candidate plan that include all tables in the hypergraph
+ final List<RelNode> candidateList;
+
+ DphypForTest(
+ HyperGraph hyperGraph,
+ RelBuilder builder,
+ RelMetadataQuery relMetadataQuery,
+ int bloat) {
+ super(hyperGraph, builder, relMetadataQuery, bloat);
+ candidateList = new ArrayList<>();
+ }
+
+ @Override protected boolean verifyDpResultRowType(
+ RelNode plan,
+ List<HyperGraph.NodeState> resultOrder) {
+ if (hyperGraph.getInputs().size() == resultOrder.size()) {
+ List<RexNode> projects =
+ hyperGraph.restoreProjectionOrder(resultOrder,
+ plan.getRowType().getFieldList());
+ RelNode resultNode = builder
+ .push(plan)
+ .project(projects)
+ .build();
+ candidateList.add(resultNode);
+ return RelOptUtil.areRowTypesEqual(resultNode.getRowType(),
hyperGraph.getRowType(), false);
+ }
+ return true;
+ }
+ }
+
+}
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index a92b57de42..d7fb23756a 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -10820,6 +10820,128 @@ private void
checkLoptOptimizeJoinRule(LoptOptimizeJoinRule rule) {
.check();
}
+ @Test void testOuterJoinForDphyp() {
+ HepProgram program = new HepProgramBuilder()
+ .addMatchOrder(HepMatchOrder.BOTTOM_UP)
+ .addRuleInstance(CoreRules.JOIN_TO_HYPER_GRAPH)
+ .build();
+
+ sql("select emp.empno from "
+ + "emp_address inner join emp on emp_address.empno = emp.empno "
+ + "left join dept on emp.deptno = dept.deptno "
+ + "inner join dept_nested on dept.deptno = dept_nested.deptno")
+ .withPre(program)
+ .withRule(CoreRules.HYPER_GRAPH_OPTIMIZE, CoreRules.PROJECT_REMOVE,
CoreRules.PROJECT_MERGE)
+ .check();
+ }
+
+ @Test void testSemiJoinForDphyp() {
+ HepProgram program = new HepProgramBuilder()
+ .addMatchOrder(HepMatchOrder.BOTTOM_UP)
+ .addRuleInstance(CoreRules.PROJECT_MERGE)
+ .addRuleInstance(CoreRules.PROJECT_TO_SEMI_JOIN)
+ .addRuleInstance(CoreRules.JOIN_TO_HYPER_GRAPH)
+ .build();
+
+ sql("select emp.empno from emp, emp_address, salgrade "
+ + "where emp_address.empno = emp.empno and emp.sal = salgrade.hisal "
+ + "and exists(select * from emp_b where emp_address.empno =
emp_b.empno)")
+ .withExpand(true)
+ .withDecorrelate(true)
+ .withPre(program)
+ .withRule(CoreRules.HYPER_GRAPH_OPTIMIZE, CoreRules.PROJECT_REMOVE,
CoreRules.PROJECT_MERGE)
+ .check();
+ }
+
+ @Test void testCartesianProductForDphyp() {
+ HepProgram program = new HepProgramBuilder()
+ .addMatchOrder(HepMatchOrder.BOTTOM_UP)
+ .addRuleInstance(CoreRules.FILTER_INTO_JOIN)
+ .addRuleInstance(CoreRules.JOIN_TO_HYPER_GRAPH)
+ .build();
+
+ sql("select emp.empno from emp_address, emp, emp_b, dept "
+ + "where emp_address.empno = emp.empno "
+ + "and dept.deptno = emp.deptno")
+ .withPre(program)
+ .withRule(CoreRules.HYPER_GRAPH_OPTIMIZE, CoreRules.PROJECT_REMOVE,
CoreRules.PROJECT_MERGE)
+ .check();
+ }
+
+ @Test void testComplexPredicateForDphyp() {
+ HepProgram program = new HepProgramBuilder()
+ .addMatchOrder(HepMatchOrder.BOTTOM_UP)
+ .addRuleInstance(CoreRules.FILTER_INTO_JOIN)
+ .addRuleInstance(CoreRules.JOIN_TO_HYPER_GRAPH)
+ .build();
+
+ sql("select emp.empno from "
+ + "emp, emp_b, dept, dept_nested "
+ + "where emp.deptno + emp_b.empno = dept.deptno + dept_nested.deptno "
+ + "and (emp.empno = emp_b.empno or emp.ename = emp_b.ename) "
+ + "and sqrt(dept.deptno + dept_nested.deptno) > 2")
+ .withPre(program)
+ .withRule(CoreRules.HYPER_GRAPH_OPTIMIZE, CoreRules.PROJECT_REMOVE,
CoreRules.PROJECT_MERGE)
+ .check();
+ }
+
+ @Test void testInnerLeftSemiJoinTypeForDphyp() {
+ HepProgram program = new HepProgramBuilder()
+ .addMatchOrder(HepMatchOrder.BOTTOM_UP)
+ .addRuleInstance(CoreRules.PROJECT_MERGE)
+ .addRuleInstance(CoreRules.PROJECT_TO_SEMI_JOIN)
+ .addRuleInstance(CoreRules.JOIN_TO_HYPER_GRAPH)
+ .build();
+
+ sql("select emp.empno from "
+ + "emp_address inner join emp on emp_address.empno = emp.empno "
+ + "left join dept on emp.deptno = dept.deptno "
+ + "where exists(select * from dept_nested where dept.deptno =
dept_nested.deptno)")
+ .withExpand(true)
+ .withDecorrelate(true)
+ .withPre(program)
+ .withRule(CoreRules.HYPER_GRAPH_OPTIMIZE, CoreRules.PROJECT_REMOVE,
CoreRules.PROJECT_MERGE)
+ .check();
+ }
+
+ @Test void testLeftInnerSemiJoinTypeForDphyp() {
+ HepProgram program = new HepProgramBuilder()
+ .addMatchOrder(HepMatchOrder.BOTTOM_UP)
+ .addRuleInstance(CoreRules.PROJECT_MERGE)
+ .addRuleInstance(CoreRules.PROJECT_TO_SEMI_JOIN)
+ .addRuleInstance(CoreRules.JOIN_TO_HYPER_GRAPH)
+ .build();
+
+ sql("select emp.empno from "
+ + "emp_address left join emp on emp_address.empno = emp.empno "
+ + "inner join dept on emp.deptno = dept.deptno "
+ + "where exists(select * from dept_nested where dept.deptno =
dept_nested.deptno)")
+ .withExpand(true)
+ .withDecorrelate(true)
+ .withPre(program)
+ .withRule(CoreRules.HYPER_GRAPH_OPTIMIZE, CoreRules.PROJECT_REMOVE,
CoreRules.PROJECT_MERGE)
+ .check();
+ }
+
+ @Test void testFullLeftSemiJoinTypeForDphyp() {
+ HepProgram program = new HepProgramBuilder()
+ .addMatchOrder(HepMatchOrder.BOTTOM_UP)
+ .addRuleInstance(CoreRules.PROJECT_MERGE)
+ .addRuleInstance(CoreRules.PROJECT_TO_SEMI_JOIN)
+ .addRuleInstance(CoreRules.JOIN_TO_HYPER_GRAPH)
+ .build();
+
+ sql("select emp.empno from "
+ + "emp_address full join emp on emp_address.empno = emp.empno "
+ + "left join dept on emp.deptno = dept.deptno "
+ + "where exists(select * from dept_nested where dept.deptno =
dept_nested.deptno)")
+ .withExpand(true)
+ .withDecorrelate(true)
+ .withPre(program)
+ .withRule(CoreRules.HYPER_GRAPH_OPTIMIZE, CoreRules.PROJECT_REMOVE,
CoreRules.PROJECT_MERGE)
+ .check();
+ }
+
/** Test case of
* <a
href="https://issues.apache.org/jira/browse/CALCITE-7047">[CALCITE-7047]
* Improve Volcano planner selection of sort conversion rules</a>. */
diff --git a/core/src/test/resources/log4j2-test.xml
b/core/src/test/resources/log4j2-test.xml
index b4ef459a8e..d051a39817 100644
--- a/core/src/test/resources/log4j2-test.xml
+++ b/core/src/test/resources/log4j2-test.xml
@@ -40,5 +40,6 @@
<logger name="org.apache.calcite.plan.RelOptPlanner" level="ERROR">
<MarkerFilter marker="FULL_PLAN" onMatch="DENY" onMismatch="NEUTRAL"/>
</logger>
+ <logger name="org.apache.calcite.rel.rules.DpHyp" level="ERROR"/>
</Loggers>
</Configuration>
diff --git
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index e3d2473a54..057c44c5df 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -1526,6 +1526,33 @@ LogicalJoin(condition=[=($1, $12)], joinType=[semi])
LogicalTableScan(table=[[scott, DEPT]])
LogicalTableScan(table=[[scott, EMP]])
LogicalTableScan(table=[[scott, BONUS]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testCartesianProductForDphyp">
+ <Resource name="sql">
+ <![CDATA[select emp.empno from emp_address, emp, emp_b, dept where
emp_address.empno = emp.empno and dept.deptno = emp.deptno]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(EMPNO=[$3])
+ HyperGraph(edges=[{1}——[INNER, =(node(3)_field(0),
node(1)_field(7))]——{3},{0, 1}——[INNER, true]——{2},{0}——[INNER,
=(node(0)_field(0), node(1)_field(0))]——{1}])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_ADDRESS]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_B]])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(EMPNO=[$12])
+ LogicalJoin(condition=[true], joinType=[inner])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_B]])
+ LogicalJoin(condition=[=($11, $2)], joinType=[inner])
+ LogicalJoin(condition=[=($0, $9)], joinType=[inner])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_ADDRESS]])
]]>
</Resource>
</TestCase>
@@ -1626,7 +1653,7 @@ LogicalProject(T=[CASE(<(CAST(CASE(>($1, 'abc'), $1,
null:VARCHAR(20))):DOUBLE,
<Resource name="planBefore">
<![CDATA[
LogicalProject(EMPNO=[$0])
- HyperGraph(edges=[{0}——[INNER, =($0, $9)]——{1},{0, 1}——[INNER,
true]——{2},{0, 1, 2}——[INNER, =(+($7, $9), +($12, $14))]——{3},{2}——[INNER,
=($12, $14)]——{3}])
+ HyperGraph(edges=[{0, 1, 2}——[INNER, =(+(node(0)_field(7),
node(1)_field(0)), +(node(2)_field(0), node(3)_field(0)))]——{3},{2}——[INNER,
=(node(2)_field(0), node(3)_field(0))]——{3},{0, 1}——[INNER,
true]——{2},{0}——[INNER, =(node(0)_field(0), node(1)_field(0))]——{1}])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalTableScan(table=[[CATALOG, SALES, EMP_ADDRESS]])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
@@ -1643,6 +1670,33 @@ LogicalProject(EMPNO=[$9])
LogicalJoin(condition=[=($3, $0)], joinType=[inner])
LogicalTableScan(table=[[CATALOG, SALES, EMP_ADDRESS]])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testComplexPredicateForDphyp">
+ <Resource name="sql">
+ <![CDATA[select emp.empno from emp, emp_b, dept, dept_nested where
emp.deptno + emp_b.empno = dept.deptno + dept_nested.deptno and (emp.empno =
emp_b.empno or emp.ename = emp_b.ename) and sqrt(dept.deptno +
dept_nested.deptno) > 2]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(EMPNO=[$0])
+ HyperGraph(edges=[{0, 1, 2}——[INNER, =(+(node(0)_field(7),
node(1)_field(0)), +(node(2)_field(0), node(3)_field(0)))]——{3},{2}——[INNER,
>(POWER(+(node(2)_field(0), node(3)_field(0)), 0.5:DECIMAL(2, 1)),
CAST(2):DOUBLE NOT NULL)]——{3},{0, 1}——[INNER, true]——{2},{0}——[INNER,
OR(=(node(0)_field(0), node(1)_field(0)), =(node(0)_field(1),
node(1)_field(1)))]——{1}])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_B]])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT_NESTED]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(EMPNO=[$16])
+ LogicalJoin(condition=[=(+($23, $6), +($4, $0))], joinType=[inner])
+ LogicalJoin(condition=[>(POWER(+($4, $0), 0.5:DECIMAL(2, 1)), 2.0E0)],
joinType=[inner])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT_NESTED]])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalJoin(condition=[OR(=($10, $0), =($11, $1))], joinType=[inner])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_B]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
@@ -2142,7 +2196,7 @@ and e1.sal > (select avg(sal) from emp e2 where e1.empno
= e2.empno)]]>
<Resource name="planBefore">
<![CDATA[
LogicalProject(EMPNO=[$0])
- HyperGraph(edges=[{0}——[INNER, =($0, $9)]——{1},{1}——[INNER, =($16,
$19)]——{2},{2}——[INNER, =($20, $22)]——{3},{0}——[INNER, =($21, $7)]——{3},{0, 1,
2}——[INNER, =(+($5, $14), +($19, $21))]——{3}])
+ HyperGraph(edges=[{2}——[INNER, =(node(2)_field(1),
node(3)_field(1))]——{3},{0}——[INNER, =(node(3)_field(0),
node(0)_field(7))]——{3},{0, 1, 2}——[INNER, =(+(node(0)_field(5),
node(1)_field(5)), +(node(2)_field(0), node(3)_field(0)))]——{3},{1}——[INNER,
=(node(1)_field(7), node(2)_field(0))]——{2},{0}——[INNER, =(node(0)_field(0),
node(1)_field(0))]——{1}])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalTableScan(table=[[CATALOG, SALES, EMP_B]])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
@@ -2152,12 +2206,12 @@ LogicalProject(EMPNO=[$0])
<Resource name="planAfter">
<![CDATA[
LogicalProject(EMPNO=[$16])
- LogicalJoin(condition=[AND(=($16, $6), =($0, $23), =(+($21, $11), +($4,
$0)))], joinType=[inner])
- LogicalJoin(condition=[=($13, $4)], joinType=[inner])
+ LogicalJoin(condition=[AND(=($10, $23), =(+($21, $5), +($14, $10)), =($16,
$0))], joinType=[inner])
+ LogicalJoin(condition=[=($7, $14)], joinType=[inner])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_B]])
LogicalJoin(condition=[=($5, $1)], joinType=[inner])
LogicalTableScan(table=[[CATALOG, SALES, DEPT_NESTED]])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
- LogicalTableScan(table=[[CATALOG, SALES, EMP_B]])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
@@ -9877,6 +9931,33 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2],
MGR=[$3], HIREDATE=[$4], SAL=[$
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4],
SAL=[$5], COMM=[$6], SLACKER=[$8])
LogicalFilter(condition=[AND(=($7, 20), >($5, 1000))])
LogicalTableScan(table=[[CATALOG, SALES, EMPNULLABLES]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testOuterJoinForDphyp">
+ <Resource name="sql">
+ <![CDATA[select emp.empno from emp_address inner join emp on
emp_address.empno = emp.empno left join dept on emp.deptno = dept.deptno inner
join dept_nested on dept.deptno = dept_nested.deptno]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(EMPNO=[$3])
+ HyperGraph(edges=[{1, 2}——[INNER, =(node(2)_field(0),
node(3)_field(0))]——{3},{1}——[LEFT, =(node(1)_field(7),
node(2)_field(0))]——{2},{0}——[INNER, =(node(0)_field(0),
node(1)_field(0))]——{1}])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_ADDRESS]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT_NESTED]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(EMPNO=[$4])
+ LogicalJoin(condition=[=($15, $4)], joinType=[inner])
+ LogicalJoin(condition=[=($13, $0)], joinType=[inner])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT_NESTED]])
+ LogicalJoin(condition=[=($7, $9)], joinType=[left])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_ADDRESS]])
]]>
</Resource>
</TestCase>
@@ -16215,6 +16296,37 @@ LogicalCorrelate(correlation=[$cor0], joinType=[semi],
requiredColumns=[{7}])
<Resource name="planAfter">
<![CDATA[
LogicalValues(tuples=[[]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testSemiJoinForDphyp">
+ <Resource name="sql">
+ <![CDATA[select emp.empno from emp, emp_address, salgrade where
emp_address.empno = emp.empno and emp.sal = salgrade.hisal and exists(select *
from emp_b where emp_address.empno = emp_b.empno)]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(EMPNO=[$0])
+ HyperGraph(edges=[{1}——[SEMI, =(node(1)_field(0),
node(3)_field(0))]——{3},{0}——[INNER, =(node(0)_field(5),
node(2)_field(2))]——{2},{0}——[INNER, =(node(1)_field(0),
node(0)_field(0))]——{1}])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(EMPNO=[$0], STREET=[$1.STREET], CITY=[$1.CITY],
ZIP=[$1.ZIP], STATE=[$1.STATE], STREET5=[$2.STREET], CITY6=[$2.CITY],
ZIP7=[$2.ZIP], STATE8=[$2.STATE])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_ADDRESS]])
+ LogicalTableScan(table=[[CATALOG, SALES, SALGRADE]])
+ LogicalProject(EMPNO=[$0], $f0=[true])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_B]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(EMPNO=[$12])
+ LogicalJoin(condition=[=($0, $12)], joinType=[inner])
+ LogicalJoin(condition=[=($0, $9)], joinType=[semi])
+ LogicalProject(EMPNO=[$0], STREET=[$1.STREET], CITY=[$1.CITY],
ZIP=[$1.ZIP], STATE=[$1.STATE], STREET5=[$2.STREET], CITY6=[$2.CITY],
ZIP7=[$2.ZIP], STATE8=[$2.STATE])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_ADDRESS]])
+ LogicalProject(EMPNO=[$0], $f0=[true])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_B]])
+ LogicalJoin(condition=[=($8, $2)], joinType=[inner])
+ LogicalTableScan(table=[[CATALOG, SALES, SALGRADE]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
@@ -18001,7 +18113,7 @@ LogicalProject(EXPR$0=[null:VARCHAR NOT NULL ARRAY])
<Resource name="planBefore">
<![CDATA[
LogicalProject(EMPNO=[$0])
- HyperGraph(edges=[{0}——[INNER, =($0, $9)]——{1},{0}——[INNER, =($0,
$19)]——{2},{0}——[INNER, =($7, $22)]——{3},{0}——[INNER, =($7, $24)]——{4},{1, 2,
3}——[INNER, =(+($14, $19), +($22, $24))]——{4}])
+ HyperGraph(edges=[{0}——[INNER, =(node(0)_field(7),
node(4)_field(0))]——{4},{1, 2, 3}——[INNER, =(+(node(1)_field(5),
node(2)_field(0)), +(node(3)_field(0), node(4)_field(0)))]——{4},{0}——[INNER,
=(node(0)_field(7), node(3)_field(0))]——{3},{0}——[INNER, =(node(0)_field(0),
node(2)_field(0))]——{2},{0}——[INNER, =(node(0)_field(0),
node(1)_field(0))]——{1}])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalTableScan(table=[[CATALOG, SALES, EMP_B]])
LogicalTableScan(table=[[CATALOG, SALES, EMP_ADDRESS]])
@@ -18011,9 +18123,8 @@ LogicalProject(EMPNO=[$0])
</Resource>
<Resource name="planAfter">
<![CDATA[
-LogicalProject(EMPNO=[$19])
- LogicalJoin(condition=[AND(=($19, $0), =(+($8, $0), +($17, $13)))],
joinType=[inner])
- LogicalTableScan(table=[[CATALOG, SALES, EMP_ADDRESS]])
+LogicalProject(EMPNO=[$16])
+ LogicalJoin(condition=[AND(=(+($5, $25), +($14, $10)), =($16, $25))],
joinType=[inner])
LogicalJoin(condition=[=($16, $0)], joinType=[inner])
LogicalTableScan(table=[[CATALOG, SALES, EMP_B]])
LogicalJoin(condition=[=($13, $0)], joinType=[inner])
@@ -18021,6 +18132,7 @@ LogicalProject(EMPNO=[$19])
LogicalJoin(condition=[=($9, $0)], joinType=[inner])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_ADDRESS]])
]]>
</Resource>
</TestCase>
@@ -19377,6 +19489,99 @@ LogicalProject(DEPTNO=[$7])
<![CDATA[
LogicalProject(DEPTNO=[$0], NAME=[$1])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testInnerLeftSemiJoinTypeForDphyp">
+ <Resource name="sql">
+ <![CDATA[select emp.empno from emp_address inner join emp on
emp_address.empno = emp.empno left join dept on emp.deptno = dept.deptno where
exists(select * from dept_nested where dept.deptno = dept_nested.deptno)]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(EMPNO=[$9])
+ HyperGraph(edges=[{1, 2}——[SEMI, =(node(2)_field(0),
node(3)_field(0))]——{3},{1}——[LEFT, =(node(1)_field(7),
node(2)_field(0))]——{2},{0}——[INNER, =(node(0)_field(0),
node(1)_field(0))]——{1}])
+ LogicalProject(EMPNO=[$0], STREET=[$1.STREET], CITY=[$1.CITY],
ZIP=[$1.ZIP], STATE=[$1.STATE], STREET5=[$2.STREET], CITY6=[$2.CITY],
ZIP7=[$2.ZIP], STATE8=[$2.STATE])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_ADDRESS]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalProject(DEPTNO=[$0], $f0=[true])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT_NESTED]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(EMPNO=[$0])
+ LogicalJoin(condition=[=($11, $0)], joinType=[inner])
+ LogicalJoin(condition=[=($9, $11)], joinType=[semi])
+ LogicalJoin(condition=[=($7, $9)], joinType=[left])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalProject(DEPTNO=[$0], $f0=[true])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT_NESTED]])
+ LogicalProject(EMPNO=[$0], STREET=[$1.STREET], CITY=[$1.CITY],
ZIP=[$1.ZIP], STATE=[$1.STATE], STREET5=[$2.STREET], CITY6=[$2.CITY],
ZIP7=[$2.ZIP], STATE8=[$2.STATE])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_ADDRESS]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testLeftInnerSemiJoinTypeForDphyp">
+ <Resource name="sql">
+ <![CDATA[select emp.empno from emp_address left join emp on
emp_address.empno = emp.empno inner join dept on emp.deptno = dept.deptno where
exists(select * from dept_nested where dept.deptno = dept_nested.deptno)]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(EMPNO=[$9])
+ HyperGraph(edges=[{2}——[SEMI, =(node(2)_field(0),
node(3)_field(0))]——{3},{0, 1}——[INNER, =(node(1)_field(7),
node(2)_field(0))]——{2},{0}——[LEFT, =(node(0)_field(0),
node(1)_field(0))]——{1}])
+ LogicalProject(EMPNO=[$0], STREET=[$1.STREET], CITY=[$1.CITY],
ZIP=[$1.ZIP], STATE=[$1.STATE], STREET5=[$2.STREET], CITY6=[$2.CITY],
ZIP7=[$2.ZIP], STATE8=[$2.STATE])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_ADDRESS]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalProject(DEPTNO=[$0], $f0=[true])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT_NESTED]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(EMPNO=[$11])
+ LogicalJoin(condition=[=($18, $0)], joinType=[inner])
+ LogicalJoin(condition=[=($0, $2)], joinType=[semi])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalProject(DEPTNO=[$0], $f0=[true])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT_NESTED]])
+ LogicalJoin(condition=[=($0, $9)], joinType=[left])
+ LogicalProject(EMPNO=[$0], STREET=[$1.STREET], CITY=[$1.CITY],
ZIP=[$1.ZIP], STATE=[$1.STATE], STREET5=[$2.STREET], CITY6=[$2.CITY],
ZIP7=[$2.ZIP], STATE8=[$2.STATE])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_ADDRESS]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testFullLeftSemiJoinTypeForDphyp">
+ <Resource name="sql">
+ <![CDATA[select emp.empno from emp_address full join emp on
emp_address.empno = emp.empno left join dept on emp.deptno = dept.deptno where
exists(select * from dept_nested where dept.deptno = dept_nested.deptno)]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(EMPNO=[$9])
+ HyperGraph(edges=[{0, 1, 2}——[SEMI, =(node(2)_field(0),
node(3)_field(0))]——{3},{0, 1}——[LEFT, =(node(1)_field(7),
node(2)_field(0))]——{2},{0}——[FULL, =(node(0)_field(0),
node(1)_field(0))]——{1}])
+ LogicalProject(EMPNO=[$0], STREET=[$1.STREET], CITY=[$1.CITY],
ZIP=[$1.ZIP], STATE=[$1.STATE], STREET5=[$2.STREET], CITY6=[$2.CITY],
ZIP7=[$2.ZIP], STATE8=[$2.STATE])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_ADDRESS]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalProject(DEPTNO=[$0], $f0=[true])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT_NESTED]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(EMPNO=[$0])
+ LogicalJoin(condition=[=($18, $20)], joinType=[semi])
+ LogicalJoin(condition=[=($7, $18)], joinType=[left])
+ LogicalJoin(condition=[=($9, $0)], joinType=[full])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(EMPNO=[$0], STREET=[$1.STREET], CITY=[$1.CITY],
ZIP=[$1.ZIP], STATE=[$1.STATE], STREET5=[$2.STREET], CITY6=[$2.CITY],
ZIP7=[$2.ZIP], STATE8=[$2.STATE])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP_ADDRESS]])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalProject(DEPTNO=[$0], $f0=[true])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT_NESTED]])
]]>
</Resource>
</TestCase>