silundong commented on code in PR #4540:
URL: https://github.com/apache/calcite/pull/4540#discussion_r2378621304
##########
core/src/main/java/org/apache/calcite/rel/metadata/RelMdFunctionalDependency.java:
##########
@@ -64,212 +78,361 @@ protected RelMdFunctionalDependency() {}
return BuiltInMetadata.FunctionalDependency.DEF;
}
+ /**
+ * Determines if column is functionally dependent on key for a given rel
node.
+ */
public @Nullable Boolean determines(RelNode rel, RelMetadataQuery mq,
int key, int column) {
- return determinesImpl2(rel, mq, key, column);
+ return determinesSet(rel, mq, ImmutableBitSet.of(key),
ImmutableBitSet.of(column));
}
- public @Nullable Boolean determines(SetOp rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl2(rel, mq, key, column);
+ /**
+ * Determines if a set of columns functionally determines another set of
columns.
+ */
+ public Boolean determinesSet(RelNode rel, RelMetadataQuery mq,
+ ImmutableBitSet determinants, ImmutableBitSet dependents) {
+ FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel);
+ return fdSet.implies(determinants, dependents);
}
- public @Nullable Boolean determines(Join rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl2(rel, mq, key, column);
+ /**
+ * Returns the closure of a set of columns under all functional dependencies.
+ */
+ public ImmutableBitSet closure(RelNode rel, RelMetadataQuery mq,
ImmutableBitSet attrs) {
+ FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel);
+ return fdSet.closure(attrs);
}
- public @Nullable Boolean determines(Correlate rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl2(rel, mq, key, column);
+ /**
+ * Returns candidate keys for the relation within the specified set of
attributes.
+ */
+ public Set<ImmutableBitSet> candidateKeys(
+ RelNode rel, RelMetadataQuery mq, ImmutableBitSet attributes) {
+ FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel);
+ return fdSet.findCandidateKeys(attributes);
}
- public @Nullable Boolean determines(Aggregate rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl(rel, mq, key, column);
+ /**
+ * Main dispatch method for getFunctionalDependencies.
+ * Routes to appropriate handler based on RelNode type.
+ */
+ public FunctionalDependencySet getFunctionalDependencies(RelNode rel,
RelMetadataQuery mq) {
+ if (rel instanceof TableScan) {
+ return getTableScanFD((TableScan) rel);
+ } else if (rel instanceof Project) {
+ return getProjectFD((Project) rel, mq);
+ } else if (rel instanceof Aggregate) {
+ return getAggregateFD((Aggregate) rel, mq);
+ } else if (rel instanceof Join) {
+ return getJoinFD((Join) rel, mq);
+ } else if (rel instanceof Calc) {
+ return getCalcFD((Calc) rel, mq);
+ } else if (rel instanceof Filter) {
+ return getFilterFD((Filter) rel, mq);
+ } else if (rel instanceof SetOp) {
+ // TODO: Handle UNION, INTERSECT, EXCEPT functional dependencies
+ return new FunctionalDependencySet();
+ } else if (rel instanceof Correlate) {
+ // TODO: Handle CORRELATE functional dependencies
+ return new FunctionalDependencySet();
+ }
+ return getFD(rel.getInputs(), mq);
}
- public @Nullable Boolean determines(Calc rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl(rel, mq, key, column);
+ private static FunctionalDependencySet getFD(List<RelNode> inputs,
RelMetadataQuery mq) {
+ if (inputs.size() > 1) {
+ // Conservative approach for multi-input nodes without specific logic
+ return new FunctionalDependencySet();
+ }
+ return mq.getFunctionalDependencies(inputs.get(0));
}
- public @Nullable Boolean determines(Project rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl(rel, mq, key, column);
+ private static FunctionalDependencySet getTableScanFD(TableScan rel) {
+ FunctionalDependencySet fdSet = new FunctionalDependencySet();
+
+ RelOptTable table = rel.getTable();
+ List<ImmutableBitSet> keys = table.getKeys();
+ if (keys == null || keys.isEmpty()) {
+ return fdSet;
+ }
+
+ for (ImmutableBitSet key : keys) {
+ ImmutableBitSet allColumns =
ImmutableBitSet.range(rel.getRowType().getFieldCount());
+ ImmutableBitSet dependents = allColumns.except(key);
+ if (!dependents.isEmpty()) {
+ fdSet.addFD(key, dependents);
+ }
+ }
+
+ return fdSet;
+ }
+
+ private static FunctionalDependencySet getProjectFD(Project rel,
RelMetadataQuery mq) {
+ return getProjectionFD(rel.getInput(), rel.getProjects(), mq);
}
/**
- * Checks if a column is functionally determined by a key column through
expression analysis.
+ * Common method to compute functional dependencies for projection
operations.
+ * Used by both Project and Calc nodes.
*
- * @param rel The input relation
- * @param mq Metadata query instance
- * @param key Index of the determinant expression
- * @param column Index of the dependent expression
- * @return TRUE if column is determined by key,
- * FALSE if not determined,
- * NULL if undetermined
+ * @param input the input relation
+ * @param projections the list of projection expressions
+ * @param mq the metadata query
+ * @return the functional dependency set for the projection
*/
- private static @Nullable Boolean determinesImpl(RelNode rel,
RelMetadataQuery mq,
- int key, int column) {
- if (preCheck(rel, key, column)) {
- return true;
- }
-
- ImmutableBitSet keyInputIndices = null;
- ImmutableBitSet columnInputIndices = null;
- if (rel instanceof Project || rel instanceof Calc) {
- List<RexNode> exprs = null;
- if (rel instanceof Project) {
- Project project = (Project) rel;
- exprs = project.getProjects();
- } else {
- Calc calc = (Calc) rel;
- final RexProgram program = calc.getProgram();
- exprs = program.expandList(program.getProjectList());
+ private static FunctionalDependencySet getProjectionFD(
+ RelNode input, List<RexNode> projections, RelMetadataQuery mq) {
+ FunctionalDependencySet inputFdSet = mq.getFunctionalDependencies(input);
+ FunctionalDependencySet projectionFdSet = new FunctionalDependencySet();
+
+ // Create mapping from input column indices to project column indices
+ Mappings.TargetMapping inputToOutputMap =
+ RelOptUtil.permutation(projections, input.getRowType()).inverse();
+
+ // Map input functional dependencies to project dependencies
+ mapInputFDs(inputFdSet, inputToOutputMap, projectionFdSet);
+
+ int fieldCount = projections.size();
+ final ImmutableBitSet[] inputBits = new ImmutableBitSet[fieldCount];
+ final Map<RexNode, Integer> uniqueExprToIndex = new HashMap<>();
+ final Map<RexLiteral, Integer> literalToIndex = new HashMap<>();
+ final Map<RexInputRef, Integer> refToIndex = new HashMap<>();
+ for (int i = 0; i < fieldCount; i++) {
+ RexNode expr = projections.get(i);
+
+ // Skip non-deterministic expressions
+ if (!RexUtil.isDeterministic(expr)) {
+ continue;
}
- // TODO: Supports dependency analysis for all types of expressions
- if (!(exprs.get(column) instanceof RexInputRef)) {
- return false;
+ // Handle identical expressions in the projection list
+ Integer prev = uniqueExprToIndex.putIfAbsent(expr, i);
+ if (prev != null) {
+ projectionFdSet.addBidirectionalFD(prev, i);
}
- RexNode keyExpr = exprs.get(key);
- RexNode columnExpr = exprs.get(column);
-
- // Identical expressions imply functional dependency
- if (keyExpr.equals(columnExpr)) {
- return true;
+ // Track literal constants to handle them specially
+ if (expr instanceof RexLiteral) {
+ literalToIndex.put((RexLiteral) expr, i);
+ continue;
}
- keyInputIndices = extractDeterministicRefs(keyExpr);
- columnInputIndices = extractDeterministicRefs(columnExpr);
- } else if (rel instanceof Aggregate) {
- Aggregate aggregate = (Aggregate) rel;
-
- int groupByCnt = aggregate.getGroupCount();
- if (key < groupByCnt && column >= groupByCnt) {
- return false;
+ if (expr instanceof RexInputRef) {
+ refToIndex.put((RexInputRef) expr, i);
+ inputBits[i] = ImmutableBitSet.of(((RexInputRef) expr).getIndex());
}
- keyInputIndices = extractDeterministicRefs(aggregate, key);
- columnInputIndices = extractDeterministicRefs(aggregate, column);
- } else {
- throw new UnsupportedOperationException("Unsupported RelNode type: "
- + rel.getClass().getSimpleName());
+ inputBits[i] = RelOptUtil.InputFinder.bits(expr);
}
- // Early return if invalid cases
- if (keyInputIndices.isEmpty()
- || columnInputIndices.isEmpty()) {
- return false;
+ // Remove literals from uniqueExprToIndex to avoid redundant processing
+ uniqueExprToIndex.keySet().removeIf(key -> key instanceof RexLiteral);
+
+ for (Map.Entry<RexNode, Integer> entry : uniqueExprToIndex.entrySet()) {
+ RexNode expr = entry.getKey();
+ Integer i = entry.getValue();
+
+ // All columns determine literals
+ literalToIndex.values()
+ .forEach(l -> projectionFdSet.addFD(ImmutableBitSet.of(i),
ImmutableBitSet.of(l)));
+
+ // Input columns determine identical expressions
+ refToIndex.forEach((k, v) -> {
+ ImmutableBitSet refIndex = ImmutableBitSet.of(k.getIndex());
+ ImmutableBitSet bitSet = expr instanceof RexInputRef
+ ? ImmutableBitSet.of(((RexInputRef) expr).getIndex())
+ : inputBits[i];
+ if (inputFdSet.implies(refIndex, bitSet)) {
+ projectionFdSet.addFD(v, i);
+ }
+ });
}
- // Currently only supports multiple (keyInputIndices) to one
(columnInputIndices)
- // dependency detection
- for (Integer keyRef : keyInputIndices) {
- if (Boolean.FALSE.equals(
- mq.determines(rel.getInput(0), keyRef,
- columnInputIndices.nextSetBit(0)))) {
- return false;
+ return projectionFdSet;
+ }
+
+ /**
+ * Maps input functional dependencies to output dependencies based on column
mapping.
+ */
+ private static void mapInputFDs(FunctionalDependencySet inputFdSet,
+ Mappings.TargetMapping mapping, FunctionalDependencySet outputFdSet) {
+ for (FunctionalDependency inputFd : inputFdSet.getFDs()) {
+ ImmutableBitSet determinants = inputFd.getDeterminants();
+ ImmutableBitSet dependents = inputFd.getDependents();
+
+ // Skip this FD if any determinant column is unmappable
+ boolean allMappable =
+ determinants.stream().allMatch(col -> mapping.getTargetOpt(col) >=
0);
+ if (!allMappable) {
+ continue;
+ }
+
+ // Map all determinant columns
+ ImmutableBitSet mappedDeterminants = mapAllCols(determinants, mapping);
+ if (mappedDeterminants.isEmpty()) {
+ continue;
+ }
+
+ // Map only the dependent columns that can be mapped
+ ImmutableBitSet mappedDependents = mapAvailableCols(dependents, mapping);
+ if (!mappedDependents.isEmpty()) {
+ outputFdSet.addFD(mappedDeterminants, mappedDependents);
}
}
+ }
- return true;
+ /**
+ * Maps all columns in the set. Returns empty set if any column cannot be
mapped.
+ */
+ private static ImmutableBitSet mapAllCols(
+ ImmutableBitSet columns, Mappings.TargetMapping mapping) {
+ ImmutableBitSet.Builder builder = ImmutableBitSet.builder();
+ for (int col : columns) {
+ int mappedCol = mapping.getTargetOpt(col);
+ if (mappedCol < 0) {
+ return ImmutableBitSet.of();
+ }
+ builder.set(mappedCol);
+ }
+ return builder.build();
}
/**
- * determinesImpl2is similar to determinesImpl, but it doesn't need to
handle the
- * mapping between output and input columns.
+ * Maps only the columns that can be mapped, ignoring unmappable ones.
*/
- private static @Nullable Boolean determinesImpl2(RelNode rel,
RelMetadataQuery mq,
- int key, int column) {
- if (preCheck(rel, key, column)) {
- return true;
+ private static ImmutableBitSet mapAvailableCols(
+ ImmutableBitSet columns, Mappings.TargetMapping mapping) {
+ ImmutableBitSet.Builder builder = ImmutableBitSet.builder();
+ for (int col : columns) {
+ int mappedCol = mapping.getTargetOpt(col);
+ if (mappedCol >= 0) {
+ builder.set(mappedCol);
+ }
}
+ return builder.build();
+ }
- if (rel instanceof TableScan) {
- TableScan tableScan = (TableScan) rel;
- RelOptTable table = tableScan.getTable();
- List<ImmutableBitSet> keys = table.getKeys();
- return keys != null
- && keys.size() == 1
- && keys.get(0).equals(ImmutableBitSet.of(column));
- } else if (rel instanceof Join) {
- Join join = (Join) rel;
- // TODO Considering column mapping based on equality conditions in join
- int leftFieldCnt = join.getLeft().getRowType().getFieldCount();
- if (key < leftFieldCnt && column < leftFieldCnt) {
- return mq.determines(join.getLeft(), key, column);
- } else if (key >= leftFieldCnt && column >= leftFieldCnt) {
- return mq.determines(join.getRight(), key - leftFieldCnt, column -
leftFieldCnt);
+ private static FunctionalDependencySet getAggregateFD(Aggregate rel,
RelMetadataQuery mq) {
+ FunctionalDependencySet fdSet = new FunctionalDependencySet();
+ FunctionalDependencySet inputFdSet =
mq.getFunctionalDependencies(rel.getInput());
+
+ // Group set columns in the output
+ ImmutableBitSet groupSet = rel.getGroupSet();
+
+ // 1. Preserve input FDs that only involve group columns
+ if (Aggregate.isSimple(rel)) {
+ for (FunctionalDependency inputFd : inputFdSet.getFDs()) {
+ ImmutableBitSet determinants = inputFd.getDeterminants();
+ ImmutableBitSet dependents = inputFd.getDependents();
+
+ // Only preserve if both determinants and dependents are within group
columns
+ if (groupSet.contains(determinants) && groupSet.contains(dependents)) {
+ fdSet.addFD(determinants, dependents);
+ }
}
- return false;
- } else if (rel instanceof Correlate) {
- // TODO Support Correlate.
- return false;
- } else if (rel instanceof SetOp) {
- // TODO Support SetOp
- return false;
}
- return mq.determines(rel.getInput(0), key, column);
+ // 2. Group keys determine all aggregate columns
+ if (!groupSet.isEmpty() && !rel.getAggCallList().isEmpty()) {
+ for (int i = rel.getGroupCount(); i < rel.getRowType().getFieldCount();
i++) {
+ fdSet.addFD(groupSet, ImmutableBitSet.of(i));
+ }
+ }
+
+ return fdSet;
}
- private static Boolean preCheck(RelNode rel, int key, int column) {
- verifyIndex(rel, key, column);
+ private static FunctionalDependencySet getFilterFD(Filter rel,
RelMetadataQuery mq) {
+ FunctionalDependencySet inputSet =
mq.getFunctionalDependencies(rel.getInput());
+ FunctionalDependencySet filterFdSet = new
FunctionalDependencySet(inputSet.getFDs());
+ addFDsFromEquiCondition(rel.getCondition(), filterFdSet);
+ return filterFdSet;
+ }
- // Equal index values indicate the same expression reference
- if (key == column) {
- return true;
+ private static FunctionalDependencySet getJoinFD(Join rel, RelMetadataQuery
mq) {
+ FunctionalDependencySet leftFdSet =
mq.getFunctionalDependencies(rel.getLeft());
+ FunctionalDependencySet rightFdSet =
mq.getFunctionalDependencies(rel.getRight());
+
+ int leftFieldCount = rel.getLeft().getRowType().getFieldCount();
+ JoinRelType joinType = rel.getJoinType();
+
+ switch (joinType) {
+ case INNER:
+ // Inner join: preserve all FDs and derive cross-table dependencies
+ FunctionalDependencySet innerJoinFdSet
+ = leftFdSet.union(shiftFdSet(rightFdSet, leftFieldCount));
+ addFDsFromEquiCondition(rel.getCondition(), innerJoinFdSet);
+ return innerJoinFdSet.computeTransitiveClosure();
+ case LEFT:
+ // Left join: preserve left FDs, right FDs may be invalidated by NULLs
+ FunctionalDependencySet leftJoinFdSet = new
FunctionalDependencySet(leftFdSet.getFDs());
+ addFDsFromEquiCondition(rel.getCondition(), leftJoinFdSet);
+ return leftJoinFdSet.computeTransitiveClosure();
+ case RIGHT:
+ // Right join: preserve right FDs, left FDs may be invalidated by NULLs
+ FunctionalDependencySet rightJoinFdSet = shiftFdSet(rightFdSet,
leftFieldCount);
+ addFDsFromEquiCondition(rel.getCondition(), rightJoinFdSet);
+ return rightJoinFdSet.computeTransitiveClosure();
+ case FULL:
+ // Full join: both sides may have NULLs, very conservative approach
+ return new FunctionalDependencySet();
+ case SEMI:
+ // Semi join: only left table columns in result, preserve left FDs only
+ return new FunctionalDependencySet(leftFdSet.getFDs());
+ case ANTI:
+ // Anti join: only left table columns in result, preserve left FDs only
+ return new FunctionalDependencySet(leftFdSet.getFDs());
+ default:
+ // Conservative fallback for unknown join types
+ return new FunctionalDependencySet();
}
+ }
- return false;
+ private static FunctionalDependencySet getCalcFD(Calc rel, RelMetadataQuery
mq) {
+ List<RexNode> projections =
rel.getProgram().expandList(rel.getProgram().getProjectList());
+ return getProjectionFD(rel.getInput(), projections, mq);
}
- private static void verifyIndex(RelNode rel, int... indices) {
- for (int index : indices) {
- if (index < 0 || index >= rel.getRowType().getFieldCount()) {
- throw new IndexOutOfBoundsException(
- "Column index " + index + " is out of bounds. "
- + "Valid range is [0, " + rel.getRowType().getFieldCount() +
")");
- }
+ private static FunctionalDependencySet shiftFdSet(FunctionalDependencySet
fdSet, int offset) {
+ FunctionalDependencySet shiftedFdSet = new FunctionalDependencySet();
+ for (FunctionalDependency fd : fdSet.getFunctionalDependencies()) {
+ ImmutableBitSet shiftedDeterminants = fd.getDeterminants().shift(offset);
+ ImmutableBitSet shiftedDependents = fd.getDependents().shift(offset);
+ shiftedFdSet.addFD(shiftedDeterminants, shiftedDependents);
}
+ return shiftedFdSet;
}
/**
- * Extracts input indices referenced by an output column in an Aggregate.
- * For group-by columns, returns the column index itself since they directly
- * reference input columns. For aggregate function columns, returns the input
- * column indices used by the aggregate call.
- *
- * @param aggregate The Aggregate relational expression to analyze
- * @param index Index of the output column in the Aggregate (0-based)
- * @return ImmutableBitSet of input column indices referenced by the output
column.
- * For group-by columns, returns a singleton set of the column index.
- * For aggregate columns, returns the argument indices of the
aggregate call.
+ * Extracts FDs from equality and AND conditions.
*/
- private static ImmutableBitSet extractDeterministicRefs(Aggregate aggregate,
int index) {
- int groupByCnt = aggregate.getGroupCount();
- if (index < groupByCnt) {
- return ImmutableBitSet.of(index);
+ private static void addFDsFromEquiCondition(RexNode condition,
FunctionalDependencySet result) {
+ if (!(condition instanceof RexCall)) {
+ return;
}
- List<AggregateCall> aggCalls = aggregate.getAggCallList();
- AggregateCall call = aggCalls.get(index - groupByCnt);
- return ImmutableBitSet.of(call.getArgList());
- }
-
- /**
- * Extracts input indices referenced by a deterministic RexNode expression.
- *
- * @param rex The expression to analyze
- * @return referenced input indices if deterministic
- */
- private static ImmutableBitSet extractDeterministicRefs(RexNode rex) {
- if (rex instanceof RexCall && !RexUtil.isDeterministic(rex)) {
- return ImmutableBitSet.of();
+ RexCall call = (RexCall) condition;
+ // Handle equality condition: col1 = col2 or col1 IS NOT DISTINCT FROM col2
+ if (call.getOperator().getKind() == SqlKind.EQUALS
+ || call.getOperator().getKind() == SqlKind.IS_NOT_DISTINCT_FROM) {
+ List<RexNode> operands = call.getOperands();
+ if (operands.size() == 2) {
+ RexNode left = operands.get(0);
+ RexNode right = operands.get(1);
+
+ if (left instanceof RexInputRef && right instanceof RexInputRef) {
+ int leftRef = ((RexInputRef) left).getIndex();
+ int rightRef = ((RexInputRef) right).getIndex();
+
+ result.addFD(ImmutableBitSet.of(leftRef),
ImmutableBitSet.of(rightRef));
+ result.addFD(ImmutableBitSet.of(rightRef),
ImmutableBitSet.of(leftRef));
Review Comment:
You defined `addBidirectionalFD` method, I think it can be used here
##########
core/src/main/java/org/apache/calcite/rel/metadata/FunctionalDependencySet.java:
##########
@@ -0,0 +1,403 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.rel.metadata;
+
+import org.apache.calcite.util.ImmutableBitSet;
+
+import com.google.common.collect.ImmutableSet;
+
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Queue;
+import java.util.Set;
+
+/**
+ * A set of functional dependencies with closure and minimal cover operations.
+ * This class implements standard algorithms for functional dependency
reasoning.
+ */
+public class FunctionalDependencySet {
+ // Maximum number of transitive closure iterations to prevent infinite loops
+ public static final int MAX_TRANSITIVE_CLOSURE_LOOPS = 10;
+ // Maximum number of attributes supported in closure computation
+ private static final int MAX_CLOSURE_ATTRS = 10000;
+
+ private final Set<FunctionalDependency> fdSet = new HashSet<>();
+
+ public FunctionalDependencySet() {}
+
+ public FunctionalDependencySet(Set<FunctionalDependency> fds) {
+ this.fdSet.addAll(fds);
+ }
+
+ public void addFD(FunctionalDependency fd) {
+ if (!fd.isTrivial()) {
+ fdSet.add(fd);
+ }
+ }
+
+ public void addFD(ImmutableBitSet determinants, ImmutableBitSet dependents) {
+ addFD(FunctionalDependency.of(determinants, dependents));
+ }
+
+ public void addBidirectionalFD(ImmutableBitSet determinants, ImmutableBitSet
dependents) {
+ addFD(determinants, dependents);
+ addFD(dependents, determinants);
+ }
+
+ public void addFD(int determinant, int dependent) {
+ addFD(ImmutableBitSet.of(determinant), ImmutableBitSet.of(dependent));
+ }
+
+ public void addBidirectionalFD(int determinant, int dependent) {
+ addFD(ImmutableBitSet.of(determinant), ImmutableBitSet.of(dependent));
+ addFD(ImmutableBitSet.of(dependent), ImmutableBitSet.of(determinant));
+ }
+
+ public void removeFD(FunctionalDependency fd) {
+ fdSet.remove(fd);
+ }
+
+ public Set<FunctionalDependency> getFDs() {
+ return Collections.unmodifiableSet(fdSet);
+ }
+
+ /**
+ * Returns an ImmutableBitSet containing all attribute indexes that appear
in any FD in the set.
+ */
+ public static ImmutableBitSet allAttributesFromFDs(FunctionalDependencySet
fds) {
+ ImmutableBitSet.Builder builder = ImmutableBitSet.builder();
+ Set<FunctionalDependency> fdSet = fds.getFDs();
+ for (FunctionalDependency fd : fdSet) {
+ builder.addAll(fd.getDeterminants());
+ builder.addAll(fd.getDependents());
+ }
+ return builder.build();
+ }
+
+ /**
+ * Computes the closure of a set of attributes under this functional
dependency set.
+ * The closure of X, denoted X+, is the set of all attributes that can be
functionally
+ * determined by X using the functional dependencies in this set and
+ * <a href="https://en.wikipedia.org/wiki/Armstrong%27s_axioms">Armstrong's
axioms</a>
+ *
+ * @param attributes the input attribute set
+ * @return the closure of the input attributes
+ */
+ public ImmutableBitSet closure(ImmutableBitSet attributes) {
+ if (attributes.isEmpty()) {
+ return ImmutableBitSet.of();
+ }
+
+ if (attributes.cardinality() > MAX_CLOSURE_ATTRS) {
+ throw new IllegalArgumentException(
Review Comment:
When there are too many elements in `attributes`, it may not be necessary to
throw an exception. How about returning it itself?
##########
core/src/main/java/org/apache/calcite/rel/metadata/RelMdFunctionalDependency.java:
##########
@@ -64,212 +76,424 @@ protected RelMdFunctionalDependency() {}
return BuiltInMetadata.FunctionalDependency.DEF;
}
+ /**
+ * Determines if column is functionally dependent on key for a given rel
node.
+ */
public @Nullable Boolean determines(RelNode rel, RelMetadataQuery mq,
int key, int column) {
- return determinesImpl2(rel, mq, key, column);
+ return determinesSet(rel, mq, ImmutableBitSet.of(key),
ImmutableBitSet.of(column));
}
- public @Nullable Boolean determines(SetOp rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl2(rel, mq, key, column);
+ /**
+ * Determines if a set of columns functionally determines another set of
columns.
+ */
+ public Boolean determinesSet(RelNode rel, RelMetadataQuery mq,
+ ImmutableBitSet determinants, ImmutableBitSet dependents) {
+ FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel);
+ return fdSet.implies(determinants, dependents);
}
- public @Nullable Boolean determines(Join rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl2(rel, mq, key, column);
+ /**
+ * Returns the closure of a set of columns under all functional dependencies.
+ */
+ public ImmutableBitSet closure(RelNode rel, RelMetadataQuery mq,
ImmutableBitSet attrs) {
+ FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel);
+ return fdSet.closure(attrs);
}
- public @Nullable Boolean determines(Correlate rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl2(rel, mq, key, column);
+ /**
+ * Returns candidate keys for the relation within the specified set of
attributes.
+ */
+ public Set<ImmutableBitSet> candidateKeys(
+ RelNode rel, RelMetadataQuery mq, ImmutableBitSet attributes) {
+ FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel);
+ return fdSet.findCandidateKeys(attributes);
}
- public @Nullable Boolean determines(Aggregate rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl(rel, mq, key, column);
+ /**
+ * Main dispatch method for getFunctionalDependencies.
+ * Routes to appropriate handler based on RelNode type.
+ */
+ public FunctionalDependencySet getFunctionalDependencies(RelNode rel,
RelMetadataQuery mq) {
+ if (rel instanceof TableScan) {
+ return getTableScanFD((TableScan) rel);
+ } else if (rel instanceof Project) {
+ return getProjectFD((Project) rel, mq);
+ } else if (rel instanceof Aggregate) {
+ return getAggregateFD((Aggregate) rel, mq);
+ } else if (rel instanceof Join) {
+ return getJoinFD((Join) rel, mq);
+ } else if (rel instanceof Calc) {
+ return getCalcFD((Calc) rel, mq);
+ } else if (rel instanceof SetOp) {
+ // TODO: Handle UNION, INTERSECT, EXCEPT functional dependencies
+ return new FunctionalDependencySet();
+ } else if (rel instanceof Correlate) {
+ // TODO: Handle CORRELATE functional dependencies
+ return new FunctionalDependencySet();
+ }
+ return getFD(rel.getInputs(), mq);
}
- public @Nullable Boolean determines(Calc rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl(rel, mq, key, column);
+ private static FunctionalDependencySet getFD(List<RelNode> inputs,
RelMetadataQuery mq) {
+ if (inputs.size() > 1) {
+ // Conservative approach for multi-input nodes without specific logic
+ return new FunctionalDependencySet();
+ }
+ return mq.getFunctionalDependencies(inputs.get(0));
}
- public @Nullable Boolean determines(Project rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl(rel, mq, key, column);
- }
+ private static FunctionalDependencySet getTableScanFD(TableScan rel) {
+ FunctionalDependencySet fdSet = new FunctionalDependencySet();
- /**
- * Checks if a column is functionally determined by a key column through
expression analysis.
- *
- * @param rel The input relation
- * @param mq Metadata query instance
- * @param key Index of the determinant expression
- * @param column Index of the dependent expression
- * @return TRUE if column is determined by key,
- * FALSE if not determined,
- * NULL if undetermined
- */
- private static @Nullable Boolean determinesImpl(RelNode rel,
RelMetadataQuery mq,
- int key, int column) {
- if (preCheck(rel, key, column)) {
- return true;
+ RelOptTable table = rel.getTable();
+ List<ImmutableBitSet> keys = table.getKeys();
+ if (keys == null || keys.isEmpty()) {
+ return fdSet;
}
- ImmutableBitSet keyInputIndices = null;
- ImmutableBitSet columnInputIndices = null;
- if (rel instanceof Project || rel instanceof Calc) {
- List<RexNode> exprs = null;
- if (rel instanceof Project) {
- Project project = (Project) rel;
- exprs = project.getProjects();
- } else {
- Calc calc = (Calc) rel;
- final RexProgram program = calc.getProgram();
- exprs = program.expandList(program.getProjectList());
+ for (ImmutableBitSet key : keys) {
+ ImmutableBitSet allColumns =
ImmutableBitSet.range(rel.getRowType().getFieldCount());
+ ImmutableBitSet dependents = allColumns.except(key);
+ if (!dependents.isEmpty()) {
+ fdSet.addFD(key, dependents);
}
+ }
- // TODO: Supports dependency analysis for all types of expressions
- if (!(exprs.get(column) instanceof RexInputRef)) {
- return false;
- }
+ return fdSet;
+ }
- RexNode keyExpr = exprs.get(key);
- RexNode columnExpr = exprs.get(column);
+ private static FunctionalDependencySet getProjectFD(Project rel,
RelMetadataQuery mq) {
+ return getProjectionFD(rel.getInput(), rel.getProjects(), mq);
+ }
- // Identical expressions imply functional dependency
- if (keyExpr.equals(columnExpr)) {
- return true;
+ /**
+ * Common method to compute functional dependencies for projection
operations.
+ * Used by both Project and Calc nodes.
+ *
+ * @param input the input relation
+ * @param projections the list of projection expressions
+ * @param mq the metadata query
+ * @return the functional dependency set for the projection
+ */
+ private static FunctionalDependencySet getProjectionFD(
+ RelNode input, List<RexNode> projections, RelMetadataQuery mq) {
+ FunctionalDependencySet inputFdSet = mq.getFunctionalDependencies(input);
+ FunctionalDependencySet projectionFdSet = new FunctionalDependencySet();
+ int fieldCount = projections.size();
+
+ // Create mapping from input column indices to project column indices
+ Mappings.TargetMapping inputToOutputMap =
+ RelOptUtil.permutation(projections, input.getRowType()).inverse();
+
+ // Map input functional dependencies to project dependencies
+ mapInputFDs(inputFdSet, inputToOutputMap, projectionFdSet);
+
+ for (int i = 0; i < fieldCount; i++) {
+ for (int j = i + 1; j < fieldCount; j++) {
+ RexNode expr1 = projections.get(i);
+ RexNode expr2 = projections.get(j);
+
+ // Handle identical expressions, they determine each other
+ if (expr1.equals(expr2) && RexUtil.isDeterministic(expr1)) {
+ projectionFdSet.addFD(i, j);
+ projectionFdSet.addFD(j, i);
+ continue;
+ }
+
+ // Handle literal constants, all columns determine literals
+ if (expr1 instanceof RexLiteral) {
+ projectionFdSet.addFD(j, i);
+ }
+ if (expr2 instanceof RexLiteral) {
+ projectionFdSet.addFD(i, j);
+ }
+
+ // For complex expressions, only allow FD if one side is RexInputRef
+ // and the other is a deterministic expression
+ if (RexUtil.isDeterministic(expr1) && RexUtil.isDeterministic(expr2)) {
+ boolean expr1IsInputRef = expr1 instanceof RexInputRef;
+ boolean expr2IsInputRef = expr2 instanceof RexInputRef;
+ if (expr1IsInputRef && expr2IsInputRef) {
+ // Both are input refs, check FD between them
+ int idx1 = ((RexInputRef) expr1).getIndex();
+ int idx2 = ((RexInputRef) expr2).getIndex();
+ if (inputFdSet.implies(ImmutableBitSet.of(idx1),
ImmutableBitSet.of(idx2))) {
+ projectionFdSet.addFD(i, j);
+ }
+ if (inputFdSet.implies(ImmutableBitSet.of(idx2),
ImmutableBitSet.of(idx1))) {
+ projectionFdSet.addFD(j, i);
+ }
+ } else {
+ // Only one side is input ref, the other is a deterministic expr
+ if (expr1IsInputRef) {
+ int idx1 = ((RexInputRef) expr1).getIndex();
+ ImmutableBitSet inputs2 = RelOptUtil.InputFinder.bits(expr2);
+ if (!inputs2.isEmpty() &&
inputFdSet.implies(ImmutableBitSet.of(idx1), inputs2)) {
+ projectionFdSet.addFD(i, j);
+ }
+ } else if (expr2IsInputRef) {
+ ImmutableBitSet inputs1 = RelOptUtil.InputFinder.bits(expr1);
+ int idx2 = ((RexInputRef) expr2).getIndex();
+ if (!inputs1.isEmpty() &&
inputFdSet.implies(ImmutableBitSet.of(idx2), inputs1)) {
+ projectionFdSet.addFD(j, i);
+ }
+ }
+ }
+ }
}
+ }
- keyInputIndices = extractDeterministicRefs(keyExpr);
- columnInputIndices = extractDeterministicRefs(columnExpr);
- } else if (rel instanceof Aggregate) {
- Aggregate aggregate = (Aggregate) rel;
+ return projectionFdSet;
+ }
- int groupByCnt = aggregate.getGroupCount();
- if (key < groupByCnt && column >= groupByCnt) {
- return false;
+ /**
+ * Maps input functional dependencies to output dependencies based on column
mapping.
+ */
+ private static void mapInputFDs(FunctionalDependencySet inputFdSet,
+ Mappings.TargetMapping mapping, FunctionalDependencySet outputFdSet) {
+ for (FunctionalDependency inputFd : inputFdSet.getFDs()) {
+ ImmutableBitSet determinants = inputFd.getDeterminants();
+ ImmutableBitSet dependents = inputFd.getDependents();
+
+ // Skip this FD if any determinant column is unmappable
+ boolean allMappable =
+ determinants.stream().allMatch(col -> mapping.getTargetOpt(col) >=
0);
+ if (!allMappable) {
+ continue;
}
- keyInputIndices = extractDeterministicRefs(aggregate, key);
- columnInputIndices = extractDeterministicRefs(aggregate, column);
- } else {
- throw new UnsupportedOperationException("Unsupported RelNode type: "
- + rel.getClass().getSimpleName());
- }
+ // Map all determinant columns
+ ImmutableBitSet mappedDeterminants = mapAllCols(determinants, mapping);
Review Comment:
I mean that `allMappable` does not need to be calculated, just keep the
`mappedDeterminants.isEmpty()`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]