mihaibudiu commented on code in PR #4392: URL: https://github.com/apache/calcite/pull/4392#discussion_r2155719489
########## core/src/main/java/org/apache/calcite/rel/rules/ConflictRule.java: ########## @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rel.rules; + +/** + * A conflict rule (CR) is a pair of table sets denoted by T1 → T2, which means that if T1 + * intersects with the table under the join operator, T2 must be included in this join. With every + * hyperedge in hypergraph, we associate a set of conflict rules. + * + * @see HyperEdge + * @see ConflictDetectionHelper + */ +public class ConflictRule { + Review Comment: I would add a comment that this is a bitmap where indexes refer to an implicit hypergraph. (I assume this is right) ########## core/src/main/java/org/apache/calcite/rel/rules/ConflictRule.java: ########## @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rel.rules; + +/** + * A conflict rule (CR) is a pair of table sets denoted by T1 → T2, which means that if T1 + * intersects with the table under the join operator, T2 must be included in this join. With every + * hyperedge in hypergraph, we associate a set of conflict rules. + * + * @see HyperEdge + * @see ConflictDetectionHelper + */ +public class ConflictRule { + Review Comment: I would add a comment that this is a bitmap where indexes refer to an implicit hypergraph. (I assume this is right) ########## core/src/main/java/org/apache/calcite/rel/rules/HyperEdge.java: ########## @@ -73,10 +123,224 @@ public RexNode getCondition() { @Override public String toString() { StringBuilder sb = new StringBuilder(); - sb.append(LongBitmap.printBitmap(leftNodeBits)) + sb.append(LongBitmap.printBitmap(getLeftEndpoint())) .append("——[").append(joinType).append(", ").append(condition).append("]——") - .append(LongBitmap.printBitmap(rightNodeBits)); + .append(LongBitmap.printBitmap(getRightEndpoint())); return sb.toString(); } + // It is convenient for the accept method in the HyperGraph class to call + public HyperEdge accept(RexShuttle shuttle) { + RexNode shuttleCondition = condition.accept(shuttle); + return new HyperEdge( + tes, + conflictRules, + leftNodeUsedInPredicate, + rightNodeUsedInPredicate, + initialLeftNodeBits, + initialRightNodeBits, + joinType, + shuttleCondition); + } + + /** + * When {@link JoinToHyperGraphRule} constructs a hypergraph , if the right child of Join is a + * HyperGraph class, all bitmaps related to inputs in this right child need to be shifted + * according to the number of inputs in the left child of Join. + * + * @param nodeOffset number of inputs in the left child + * @return HyperEdge after shifted Review Comment: hyperedge produced after shifting ########## core/src/main/java/org/apache/calcite/rel/rules/JoinToHyperGraphRule.java: ########## @@ -70,106 +75,205 @@ protected JoinToHyperGraphRule(Config config) { RelOptUtil.decomposeConjunction(origJoin.getCondition(), joinConds); } - // when right is HyperGraph, need shift the leftNodeBit, rightNodeBit, condition of HyperEdge + // when right is HyperGraph, need shift fields related to bitmap of HyperEdge int leftNodeCount; - int leftFieldCount = left.getRowType().getFieldCount(); if (left instanceof HyperGraph && right instanceof HyperGraph) { leftNodeCount = left.getInputs().size(); inputs.addAll(left.getInputs()); inputs.addAll(right.getInputs()); - edges.addAll(((HyperGraph) left).getEdges()); - edges.addAll( + notProjectInputs |= ((HyperGraph) left).getNotProjectInputs(); + notProjectInputs |= ((HyperGraph) right).getNotProjectInputs() << leftNodeCount; + + leftSubEdges.addAll(((HyperGraph) left).getEdges()); + rightSubEdges.addAll( ((HyperGraph) right).getEdges().stream() - .map(hyperEdge -> adjustNodeBit(hyperEdge, leftNodeCount, leftFieldCount)) + .map(hyperEdge -> adjustNodeBit(hyperEdge, leftNodeCount)) .collect(Collectors.toList())); } else if (left instanceof HyperGraph) { leftNodeCount = left.getInputs().size(); inputs.addAll(left.getInputs()); inputs.add(right); - edges.addAll(((HyperGraph) left).getEdges()); + notProjectInputs |= ((HyperGraph) left).getNotProjectInputs(); + + leftSubEdges.addAll(((HyperGraph) left).getEdges()); } else if (right instanceof HyperGraph) { leftNodeCount = 1; inputs.add(left); inputs.addAll(right.getInputs()); - edges.addAll( + notProjectInputs |= ((HyperGraph) right).getNotProjectInputs() << leftNodeCount; + + rightSubEdges.addAll( ((HyperGraph) right).getEdges().stream() - .map(hyperEdge -> adjustNodeBit(hyperEdge, leftNodeCount, leftFieldCount)) + .map(hyperEdge -> adjustNodeBit(hyperEdge, leftNodeCount)) .collect(Collectors.toList())); } else { leftNodeCount = 1; inputs.add(left); inputs.add(right); } - HashMap<Integer, Integer> fieldIndexToNodeIndexMap = new HashMap<>(); + // calculate conflict rules + Map<Long, Long> conflictRules = + ConflictDetectionHelper.makeConflictRules( + leftSubEdges, + rightSubEdges, + origJoin.getJoinType()); + leftSubEdges.addAll(rightSubEdges); + + Map<Integer, Integer> fieldIndexToNodeIndexMap = new HashMap<>(); + // the map from input index to the count of fields before it, used to convert RexInputRef to + // RexNodeAndFieldIndex + Map<Integer, Integer> relativePositionInNode = new HashMap<>(); int fieldCount = 0; for (int i = 0; i < inputs.size(); i++) { + if (LongBitmap.isOverlap(notProjectInputs, LongBitmap.newBitmap(i))) { + continue; + } + relativePositionInNode.put(i, fieldCount); for (int j = 0; j < inputs.get(i).getRowType().getFieldCount(); j++) { fieldIndexToNodeIndexMap.put(fieldCount++, i); } } // convert current join condition to hyper edge condition for (RexNode joinCond : joinConds) { - long leftNodeBits; - long rightNodeBits; + long leftEndpoint; + long rightEndpoint; + long leftNodeUsedInPredicate; + long rightNodeUsedInPredicate; + long initialLeftNodeBits = LongBitmap.newBitmapBetween(0, leftNodeCount); + long initialRightNodeBits = LongBitmap.newBitmapBetween(leftNodeCount, inputs.size()); List<Integer> leftRefs = new ArrayList<>(); List<Integer> rightRefs = new ArrayList<>(); - RexVisitorImpl visitor = new RexVisitorImpl<Void>(true) { - @Override public Void visitInputRef(RexInputRef inputRef) { + RexShuttle shuttle = new RexShuttle() { + @Override public RexNode visitInputRef(RexInputRef inputRef) { Integer nodeIndex = fieldIndexToNodeIndexMap.get(inputRef.getIndex()); if (nodeIndex == null) { - throw new IllegalArgumentException("RexInputRef refers a dummy field: " - + inputRef + ", rowType is: " + origJoin.getRowType()); + throw new DpHyp.DphypOrHyperGraphException("When build hyper graph, RexInputRef refers " + + "a dummy field: " + inputRef + ", rowType is: " + origJoin.getRowType()); } if (nodeIndex < leftNodeCount) { leftRefs.add(nodeIndex); } else { rightRefs.add(nodeIndex); } - return null; + Integer fieldOffset = relativePositionInNode.get(nodeIndex); + if (fieldOffset == null) { + throw new DpHyp.DphypOrHyperGraphException("When build hyper graph, failed to map " + + "input index to field count before it"); + } + int fieldIndex = inputRef.getIndex() - fieldOffset; + if (fieldIndex < 0) { + throw new DpHyp.DphypOrHyperGraphException("When build hyper graph, failed to convert " + + "the input ref to the relative position of the field in the input"); + } + return new HyperGraph.RexNodeAndFieldIndex( + nodeIndex, + fieldIndex, + inputRef.getName(), + inputRef.getType()); } }; - joinCond.accept(visitor); + RexNode hyperEdgeCondition = joinCond.accept(shuttle); - // when cartesian product, make it to complex hyper edge + Map<Long, Long> conflictRulesAfterAbsorb = new HashMap<>(); + leftNodeUsedInPredicate = LongBitmap.newBitmapFromList(leftRefs); + rightNodeUsedInPredicate = LongBitmap.newBitmapFromList(rightRefs); if (leftRefs.isEmpty() || rightRefs.isEmpty()) { - leftNodeBits = LongBitmap.newBitmapBetween(0, leftNodeCount); - rightNodeBits = LongBitmap.newBitmapBetween(leftNodeCount, inputs.size()); + // when cartesian product or degenerate predicate, a complex hyperedge is generated to fix + // current join operator without exploring more possibilities. See section 6.2 in CD-C paper + leftEndpoint = LongBitmap.newBitmapBetween(0, leftNodeCount); + rightEndpoint = LongBitmap.newBitmapBetween(leftNodeCount, inputs.size()); } else { - leftNodeBits = LongBitmap.newBitmapFromList(leftRefs); - rightNodeBits = LongBitmap.newBitmapFromList(rightRefs); + // simplify conflict rules. See section 5.5 in CD-C paper + long tes = + ConflictDetectionHelper.absorbConflictRulesIntoTES( + leftNodeUsedInPredicate | rightNodeUsedInPredicate, + conflictRulesAfterAbsorb, + conflictRules); + rightEndpoint = tes & initialRightNodeBits; + leftEndpoint = tes & ~rightEndpoint; } - edges.add( + leftSubEdges.add( new HyperEdge( - leftNodeBits, - rightNodeBits, + leftEndpoint, + rightEndpoint, + leftNodeUsedInPredicate, + rightNodeUsedInPredicate, + conflictRulesAfterAbsorb, + initialLeftNodeBits, + initialRightNodeBits, origJoin.getJoinType(), - joinCond)); + hyperEdgeCondition)); + } + + if (!origJoin.getJoinType().projectsRight()) { + notProjectInputs |= LongBitmap.newBitmapBetween(leftNodeCount, inputs.size()); } result = new HyperGraph( origJoin.getCluster(), origJoin.getTraitSet(), inputs, - edges, + notProjectInputs, + leftSubEdges, origJoin.getRowType()); call.transformTo(result); } - private static HyperEdge adjustNodeBit(HyperEdge hyperEdge, int nodeOffset, int fieldOffset) { - RexNode newCondition = RexUtil.shift(hyperEdge.getCondition(), fieldOffset); + private static HyperEdge adjustNodeBit(HyperEdge hyperEdge, int nodeOffset) { + RexShuttle shiftNodeIndexShuttle = new RexShuttle() { + @Override protected List<RexNode> visitList( + List<? extends RexNode> exprs, + boolean @Nullable [] update) { + ImmutableList.Builder<RexNode> clonedOperands = ImmutableList.builder(); + for (RexNode operand : exprs) { + RexNode clonedOperand; + if (operand instanceof HyperGraph.RexNodeAndFieldIndex) { + clonedOperand = + new HyperGraph.RexNodeAndFieldIndex( + ((HyperGraph.RexNodeAndFieldIndex) operand).nodeIndex + nodeOffset, + ((HyperGraph.RexNodeAndFieldIndex) operand).fieldIndex, + ((HyperGraph.RexNodeAndFieldIndex) operand).getName(), + operand.getType()); + } else { + clonedOperand = operand.accept(this); + } + if ((clonedOperand != operand) && (update != null)) { + update[0] = true; + } + clonedOperands.add(clonedOperand); + } + return clonedOperands.build(); + } + }; + RexNode newCondition = hyperEdge.getCondition().accept(shiftNodeIndexShuttle); + Map<Long, Long> newConflictRules = new HashMap<>(); + for (Map.Entry<Long, Long> entry : hyperEdge.getConflictRules().entrySet()) { + newConflictRules.put(entry.getKey() << nodeOffset, entry.getValue() << nodeOffset); + } return new HyperEdge( - hyperEdge.getLeftNodeBitmap() << nodeOffset, - hyperEdge.getRightNodeBitmap() << nodeOffset, + hyperEdge.getLeftEndpoint() << nodeOffset, + hyperEdge.getRightEndpoint() << nodeOffset, + hyperEdge.getLeftNodeUsedInPredicate() << nodeOffset, + hyperEdge.getRightNodeUsedInPredicate() << nodeOffset, + newConflictRules, + hyperEdge.getInitialLeftNodeBits() << nodeOffset, + hyperEdge.getInitialRightNodeBits() << nodeOffset, hyperEdge.getJoinType(), newCondition); } + private static boolean unSupportedJoinType(JoinRelType joinType) { + return joinType == JoinRelType.RIGHT Review Comment: Maybe in the future you will consider creating a preprocessing step to be run before this one which converts right joins into left joins. (Is there one already?) ########## core/src/main/java/org/apache/calcite/rex/RexNodeAndFieldIndex.java: ########## @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rex; + +import org.apache.calcite.rel.type.RelDataType; + +import org.checkerframework.checker.nullness.qual.Nullable; + +import java.util.Objects; + +/** + * RexNodeAndFieldIndex has the same meaning as {@link RexInputRef}, they are both reference a + * field of an input relational expression. The difference is that, RexNodeAndFieldIndex uses the + * input index and the relative position of field in input rowType. + * + * <p> For example, if the inputs to a join are + * + * <ul> + * <li>Input #0: EMP(EMPNO, ENAME, DEPTNO) and</li> + * <li>Input #1: DEPT(DEPTNO AS DEPTNO2, DNAME)</li> + * </ul> + * + * <p>then the fields are: + * + * <ul> + * <li>Node #0, Field #0: EMPNO</li> + * <li>Node #0, Field #1: ENAME</li> + * <li>Node #0, Field #2: DEPTNO (from EMP)</li> + * <li>Node #1, Field #0: DEPTNO2 (from DEPT)</li> + * <li>Node #1, Field #1: DNAME</li> + * </ul> + * + * <p> If in some cases, the inputs order of a relation is frequently adjusted and it is difficult + * to maintain the correct RexInputRef, you can consider temporarily replacing RexInputRef with + * RexNodeAndFieldIndex. + * + * @see org.apache.calcite.rel.rules.JoinToHyperGraphRule + * @see org.apache.calcite.rel.rules.HyperGraph#extractJoinCond + * @see org.apache.calcite.rel.rules.HyperEdge#createHyperEdgesFromJoinConds + */ +public class RexNodeAndFieldIndex extends RexVariable { Review Comment: Well, the class may prove useful outside of the hypergraph code. It does not seem related to the hypergraph really. But if this class is used in code outside this optimization pass it will probably need a lot of other implementations. ########## core/src/main/java/org/apache/calcite/rel/rules/HyperEdge.java: ########## @@ -73,10 +123,224 @@ public RexNode getCondition() { @Override public String toString() { StringBuilder sb = new StringBuilder(); - sb.append(LongBitmap.printBitmap(leftNodeBits)) + sb.append(LongBitmap.printBitmap(getLeftEndpoint())) .append("——[").append(joinType).append(", ").append(condition).append("]——") - .append(LongBitmap.printBitmap(rightNodeBits)); + .append(LongBitmap.printBitmap(getRightEndpoint())); return sb.toString(); } + // It is convenient for the accept method in the HyperGraph class to call + public HyperEdge accept(RexShuttle shuttle) { + RexNode shuttleCondition = condition.accept(shuttle); + return new HyperEdge( + tes, + conflictRules, + leftNodeUsedInPredicate, + rightNodeUsedInPredicate, + initialLeftNodeBits, + initialRightNodeBits, + joinType, + shuttleCondition); + } + + /** + * When {@link JoinToHyperGraphRule} constructs a hypergraph , if the right child of Join is a + * HyperGraph class, all bitmaps related to inputs in this right child need to be shifted + * according to the number of inputs in the left child of Join. + * + * @param nodeOffset number of inputs in the left child + * @return HyperEdge after shifted + */ + public HyperEdge adjustNodeBit(int nodeOffset) { + RexShuttle shiftNodeIndexShuttle = new RexShuttle() { + @Override public RexNode visitNodeAndFieldIndex(RexNodeAndFieldIndex nodeAndFieldIndex) { + return new RexNodeAndFieldIndex( + nodeAndFieldIndex.getNodeIndex() << nodeOffset, + nodeAndFieldIndex.getFieldIndex(), + nodeAndFieldIndex.getName(), + nodeAndFieldIndex.getType()); + } + }; + RexNode shiftedCondition = condition.accept(shiftNodeIndexShuttle); + List<ConflictRule> shiftedConflictRules = new ArrayList<>(); + for (ConflictRule conflictRule : conflictRules) { + shiftedConflictRules.add(conflictRule.shift(nodeOffset)); + } + return new HyperEdge( + tes.shift(nodeOffset), + shiftedConflictRules, + leftNodeUsedInPredicate << nodeOffset, + rightNodeUsedInPredicate << nodeOffset, + initialLeftNodeBits << nodeOffset, + initialRightNodeBits << nodeOffset, + joinType, + shiftedCondition); + } + + /** + * Build hyperedges corresponding to a join operator. The construction of a HyperEdge includes + * total eligibility set, conflict rules, node-related bitmaps, predicate expressions (RexInputRef + * is replaced by RexNodeAndFieldIndex) and join type. + * + * @param inputRefToNodeIndexMap a map from inputRef to node index + * @param inputRefToFieldIndexMap a map from inputRef to field index + * @param joinConds join conditions + * @param conflictRules all conflict rules + * @param joinType join type + * @param leftNodeCount number of inputs in the left child of Join + * @param nodeCount number of inputs of Join + * @return HyperEdge list + */ + public static List<HyperEdge> createHyperEdgesFromJoinConds( + Map<Integer, Integer> inputRefToNodeIndexMap, + Map<Integer, Integer> inputRefToFieldIndexMap, + List<RexNode> joinConds, + List<ConflictRule> conflictRules, + JoinRelType joinType, + int leftNodeCount, + int nodeCount) { + // for join operator o, generate bitmaps of the relations on the left/right sides + long initialLeftNodeBits = LongBitmap.newBitmapBetween(0, leftNodeCount); + long initialRightNodeBits = LongBitmap.newBitmapBetween(leftNodeCount, nodeCount); + + List<HyperEdge> edges = new ArrayList<>(); + for (RexNode joinCond : joinConds) { + List<Integer> leftRefs = new ArrayList<>(); + List<Integer> rightRefs = new ArrayList<>(); + RexShuttle inputRef2NodeAndFieldIndexShuttle = new RexShuttle() { + @Override public RexNode visitInputRef(RexInputRef inputRef) { + Integer nodeIndex = inputRefToNodeIndexMap.get(inputRef.getIndex()); + assert nodeIndex != null; + // collect left input index and right input index + if (nodeIndex < leftNodeCount) { + leftRefs.add(nodeIndex); + } else { + rightRefs.add(nodeIndex); + } + Integer fieldIndex = inputRefToFieldIndexMap.get(inputRef.getIndex()); + assert fieldIndex != null; + + return new RexNodeAndFieldIndex( + nodeIndex, + fieldIndex, + inputRef.getName(), + inputRef.getType()); + } + }; + RexNode hyperEdgeCondition = joinCond.accept(inputRef2NodeAndFieldIndexShuttle); + + long leftNodeUsedInPredicate = LongBitmap.newBitmapFromList(leftRefs); + long rightNodeUsedInPredicate = LongBitmap.newBitmapFromList(rightRefs); + TotalEligibilitySet tes; + List<ConflictRule> conflictRulesAfterAbsorb = new ArrayList<>(); + if (leftNodeUsedInPredicate == 0 || rightNodeUsedInPredicate == 0) { + // when dealing with cross products or degenerate predicates, construct total eligibility + // set using all nodes on the left and right sides (as the endpoint of the hyperedge). Cross + // products and degenerate predicates should be rare in real queries, this behavior Review Comment: People write many queries using cross-products (see TPC-H). So I suspect that what this means is that in practice few real optimized queries will contain cross-products. This is an important distinction, because you want to handle programs with cross-products. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
