julianhyde commented on code in PR #4566:
URL: https://github.com/apache/calcite/pull/4566#discussion_r2399942807
##########
core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java:
##########
@@ -901,21 +901,70 @@ default RelDataTypeFactory getTypeFactory() {
}
}
- /** Metadata about the functional dependency of columns. */
+ /** Metadata about the functional dependency of column ordinals. */
public interface FunctionalDependency extends Metadata {
MetadataDef<FunctionalDependency> DEF =
MetadataDef.of(FunctionalDependency.class,
FunctionalDependency.Handler.class,
- BuiltInMethod.FUNCTIONAL_DEPENDENCY.method);
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY.method,
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY_SET.method,
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY_CLOSURE.method,
+
BuiltInMethod.FUNCTIONAL_DEPENDENCY_CANDIDATE_KEYS_OR_SUPER_KEYS.method);
/**
- * Returns whether column is functionally dependent on column.
+ * Returns whether column ordinal {@code determinant} functionally
determines
+ * column ordinal {@code dependent}.
+ *
+ * @param determinant 0-based ordinal of determinant column
+ * @param dependent 0-based ordinal of dependent column
+ * @return {@code true} if determinant uniquely determines dependent;
+ * {@code false} if not;
+ * {@code null} if unknown
*/
- @Nullable Boolean determines(int key, int column);
+ @Nullable Boolean determines(int determinant, int dependent);
+
+ /**
+ * Returns whether column ordinals in {@code determinants} functionally
determine
Review Comment:
Returns whether a set of columns functionally determines another set of
columns.
For example, `empno, deptno` functionally determines `sal, job` because the
determinant (`empno, deptno`) is a superset of a key.
If we know that `deptno, job` functionally determines `sal` then we cannot
deduce that `deptno` alone determines `sal`.
##########
core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java:
##########
@@ -901,21 +901,70 @@ default RelDataTypeFactory getTypeFactory() {
}
}
- /** Metadata about the functional dependency of columns. */
+ /** Metadata about the functional dependency of column ordinals. */
public interface FunctionalDependency extends Metadata {
MetadataDef<FunctionalDependency> DEF =
MetadataDef.of(FunctionalDependency.class,
FunctionalDependency.Handler.class,
- BuiltInMethod.FUNCTIONAL_DEPENDENCY.method);
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY.method,
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY_SET.method,
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY_CLOSURE.method,
+
BuiltInMethod.FUNCTIONAL_DEPENDENCY_CANDIDATE_KEYS_OR_SUPER_KEYS.method);
/**
- * Returns whether column is functionally dependent on column.
Review Comment:
"Returns whether one column functionally determines another."
For example, "empno" functionally determines "sal", because "empno" is the
primary key.
##########
core/src/main/java/org/apache/calcite/util/Arrow.java:
##########
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+import java.util.Objects;
+
+/**
+ * Represents one functional dependency (Arrow) between two sets of columns,
+ * where each column is identified by its ordinal index.
+ *
+ * <p>{@link Arrow} models the functional dependency such that the values of
the
+ * {@link #determinants} columns uniquely determine the values of the
+ * {@link #dependents} columns. In other words, if two rows have the same
values
+ * for all determinant columns, they must also have the same values for all
+ * dependent columns. Both {@link #determinants} and {@link #dependents} are
+ * ImmutableBitSet column ordinals.
+ *
+ * <p>This structure supports arbitrary cardinality for both determinant and
+ * dependent column sets, allowing the representation relationships:
+ * <ul>
+ * <li>One-to-one: {@code {0} → {1}}
+ * <li>One-to-many: {@code {0} → {1, 2}}
+ * <li>Many-to-one: {@code {0, 1} → {2}}
+ * <li>Many-to-many: {@code {0, 1} → {2, 3}}
+ * </ul>
+ *
+ * <p>Example:
+ *
+ * <blockquote>
+ * <pre>
+ * Table schema: [emp_id, name, dept, salary] // ordinals: 0, 1, 2, 3
+ * Arrow: {0} → {1, 2}
+ * Functional dependency: emp_id → {name, dept}
+ * </pre>
+ * </blockquote>
+ *
+ * <p>This indicates that the employee ID uniquely determines both the name
+ * and department attributes.
+ */
+public class Arrow {
+ // The set of column ordinals that are the determinants in the functional
dependency.
+ private final ImmutableBitSet determinants;
+
+ // The set of column ordinals that are determined by the determinants.
+ private final ImmutableBitSet dependents;
+
+ private Arrow(ImmutableBitSet determinants, ImmutableBitSet dependents) {
+ this.determinants = determinants;
+ this.dependents = dependents;
Review Comment:
add `requireNonNull` for both fields
##########
core/src/main/java/org/apache/calcite/util/ArrowSet.java:
##########
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import com.google.common.collect.ImmutableSet;
+
+import java.util.ArrayDeque;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Queue;
+import java.util.Set;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Represents a set of functional dependencies. Each functional dependency is
an {@link Arrow}.
+ *
+ * <p>An {@link ArrowSet} models a set of functional dependencies that may
hold in a relation.
+ * This class provides implementations for several core algorithms in
functional dependency theory,
+ * such as closure computation and candidate key discovery.
+ * For theory background, see:
+ * <a href="https://en.wikipedia.org/wiki/Functional_dependency">
+ * Functional dependency (Wikipedia)</a>
+ *
+ * @see Arrow
+ * @see ImmutableBitSet
+ */
+public class ArrowSet {
+ public static final ArrowSet EMPTY = new ArrowSet(ImmutableSet.of());
+
+ // Maps each determinant set to the dependent set it functionally determines.
+ private final Map<ImmutableBitSet, ImmutableBitSet> dependencyGraph = new
HashMap<>();
+
+ // Maps each column ordinal to the determinant sets (keys of dependencyGraph)
+ // that contain this column, for efficient reverse lookup.
+ private final Map<Integer, Set<ImmutableBitSet>> reverseIndex = new
HashMap<>();
+
+ // All arrows in this ArrowSet.
+ private final Set<Arrow> arrowSet = new HashSet<>();
+
+ public ArrowSet(Set<Arrow> arrows) {
+ arrowSet.addAll(arrows);
+ for (Arrow arrow : arrows) {
+ ImmutableBitSet determinants = arrow.getDeterminants();
+ ImmutableBitSet dependents = arrow.getDependents();
+ dependencyGraph.merge(determinants, dependents, ImmutableBitSet::union);
+ for (int ordinal : determinants) {
+ reverseIndex.computeIfAbsent(ordinal, k -> new
HashSet<>()).add(determinants);
+ }
+ }
+ }
+
+ public Set<ImmutableBitSet> getDeterminants(int ordinal) {
+ return reverseIndex.getOrDefault(ordinal, ImmutableSet.of());
+ }
+
+ public ImmutableBitSet getDependents(ImmutableBitSet determinants) {
+ return dependencyGraph.getOrDefault(determinants, ImmutableBitSet.of());
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ /**
+ * Computes the closure of a given set of column ordinals with respect to
this ArrowSet.
+ *
+ * <p>The closure of a set of ordinals is defined as the set of all column
ordinals
+ * that can be functionally determined from the specified set by applying
zero or more
+ * functional dependencies present in this collection. This concept is
fundamental
+ * in identifying keys, candidate keys, and in the normalization of
relational schemas.
+ *
+ * <p>Example:
+ * <blockquote>
+ * <pre>
+ * // Given functional dependencies:
+ * {0} → {1}
+ * {1} → {2}
+ *
+ * // Closure result:
+ * closure({0}) = {0, 1, 2}
+ * </pre>
+ * </blockquote>
+ *
+ * @param ordinals the set of column ordinals whose closure is to be computed
+ * @return an immutable set of column ordinals representing the closure of
ordinals
+ * under the current set of functional dependencies
+ *
+ * <p>Time complexity: O(m * n), where m is the number of arrows
+ * and n is the number of ordinals.
+ *
+ * <p>Recommended: For interactive use, n (number of ordinals) is best kept
+ * below a few hundred.
+ */
+ public ImmutableBitSet computeClosure(ImmutableBitSet ordinals) {
+ if (ordinals.isEmpty()) {
+ return ImmutableBitSet.of();
+ }
+
+ ImmutableBitSet closure = ordinals;
+ Set<Integer> processed = new HashSet<>(ordinals.asList());
Review Comment:
`BitSet` might be more efficient than `HashSet`
Similarly, `BitSet` (with `nextSetBit(int)`) might be more efficient than
`ArrayDeque`.
##########
core/src/main/java/org/apache/calcite/rel/metadata/RelMdFunctionalDependency.java:
##########
@@ -64,212 +78,395 @@ protected RelMdFunctionalDependency() {}
return BuiltInMetadata.FunctionalDependency.DEF;
}
+ /**
+ * Determines whether the specified column is functionally dependent on the
given key.
+ *
+ * @param rel Relational node
+ * @param mq Metadata query
+ * @param determinant Determinant column ordinal
+ * @param dependent Dependent column ordinal
+ * @return true if column is functionally dependent on key, false otherwise
+ */
public @Nullable Boolean determines(RelNode rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl2(rel, mq, key, column);
+ int determinant, int dependent) {
+ return determinesSet(rel, mq, ImmutableBitSet.of(determinant),
ImmutableBitSet.of(dependent));
+ }
+
+ /**
+ * Determines whether a set of columns functionally determines another set
of columns.
+ *
+ * @param rel Relational node
+ * @param mq Metadata query
+ * @param determinants Determinant column set
+ * @param dependents Dependent column set
+ * @return true if dependents are functionally determined by determinants,
false otherwise
+ */
+ public Boolean determinesSet(RelNode rel, RelMetadataQuery mq,
+ ImmutableBitSet determinants, ImmutableBitSet dependents) {
+ ArrowSet fdSet = getFDs(rel, mq);
+ return fdSet.implies(determinants, dependents);
}
- public @Nullable Boolean determines(SetOp rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl2(rel, mq, key, column);
+ /**
+ * Computes the closure of a set of column ordinals under all functional
dependencies.
+ *
+ * @param rel Relational node
+ * @param mq Metadata query
+ * @param ordinals Column ordinals
+ * @return Closure of the column ordinals
+ */
+ public ImmutableBitSet computeClosure(RelNode rel, RelMetadataQuery mq,
+ ImmutableBitSet ordinals) {
+ ArrowSet fdSet = getFDs(rel, mq);
+ return fdSet.computeClosure(ordinals);
}
- public @Nullable Boolean determines(Join rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl2(rel, mq, key, column);
+ /**
+ * Finds candidate keys or super keys within the specified attribute set.
+ *
+ * @param rel Relational node
+ * @param mq Metadata query
+ * @param ordinals Column ordinals
+ * @param onlyMinimalKeys Whether to return only minimal candidate keys
+ * @return Set of candidate keys or super keys
+ */
+ public Set<ImmutableBitSet> findCandidateKeysOrSuperKeys(
+ RelNode rel, RelMetadataQuery mq, ImmutableBitSet ordinals, boolean
onlyMinimalKeys) {
+ ArrowSet fdSet = getFDs(rel, mq);
+ return fdSet.findCandidateKeysOrSuperKeys(ordinals, onlyMinimalKeys);
}
- public @Nullable Boolean determines(Correlate rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl2(rel, mq, key, column);
+ /**
+ * Gets the functional dependency set for the specified relational node.
+ * Dispatches to the appropriate handler based on node type.
+ */
+ public ArrowSet getFDs(RelNode rel, RelMetadataQuery mq) {
+ rel = rel.stripped();
+ if (rel instanceof TableScan) {
+ return getTableScanFD((TableScan) rel);
+ } else if (rel instanceof Project) {
+ return getProjectFD((Project) rel, mq);
+ } else if (rel instanceof Aggregate) {
+ return getAggregateFD((Aggregate) rel, mq);
+ } else if (rel instanceof Join) {
+ return getJoinFD((Join) rel, mq);
+ } else if (rel instanceof Calc) {
+ return getCalcFD((Calc) rel, mq);
+ } else if (rel instanceof Filter) {
+ return getFilterFD((Filter) rel, mq);
+ } else if (rel instanceof SetOp) {
+ // TODO: Handle UNION, INTERSECT, EXCEPT functional dependencies
+ return ArrowSet.EMPTY;
+ } else if (rel instanceof Correlate) {
+ // TODO: Handle CORRELATE functional dependencies
+ return ArrowSet.EMPTY;
+ }
+ return getFD(rel.getInputs(), mq);
}
- public @Nullable Boolean determines(Aggregate rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl(rel, mq, key, column);
+ /**
+ * Gets the functional dependencies of input nodes.
+ * For multi-input nodes without specific logic, returns an empty set.
+ */
+ private ArrowSet getFD(List<RelNode> inputs, RelMetadataQuery mq) {
+ if (inputs.size() != 1) {
+ // Conservative approach for multi-input nodes without specific logic
+ return ArrowSet.EMPTY;
+ }
+ return getFDs(inputs.get(0), mq);
}
- public @Nullable Boolean determines(Calc rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl(rel, mq, key, column);
+ /**
+ * Gets the functional dependencies for a TableScan node.
+ * The table's primary key determines all other columns.
+ */
+ private static ArrowSet getTableScanFD(TableScan rel) {
+ ArrowSet.Builder fdBuilder = new ArrowSet.Builder();
+
+ RelOptTable table = rel.getTable();
+ List<ImmutableBitSet> keys = table.getKeys();
+ if (keys == null || keys.isEmpty()) {
+ return fdBuilder.build();
+ }
+
+ for (ImmutableBitSet key : keys) {
+ ImmutableBitSet allColumns =
ImmutableBitSet.range(rel.getRowType().getFieldCount());
+ ImmutableBitSet dependents = allColumns.except(key);
+ if (!dependents.isEmpty()) {
+ fdBuilder.addArrow(key, dependents);
+ }
+ }
+
+ return fdBuilder.build();
}
- public @Nullable Boolean determines(Project rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl(rel, mq, key, column);
+ /**
+ * Gets the functional dependencies for a Project node.
+ * Maps input dependencies through projection expressions.
+ */
+ private ArrowSet getProjectFD(Project rel, RelMetadataQuery mq) {
+ return getProjectionFD(rel.getInput(), rel.getProjects(), mq);
}
/**
- * Checks if a column is functionally determined by a key column through
expression analysis.
+ * Computes the functional dependencies for projection operations
(Project/Calc).
*
- * @param rel The input relation
- * @param mq Metadata query instance
- * @param key Index of the determinant expression
- * @param column Index of the dependent expression
- * @return TRUE if column is determined by key,
- * FALSE if not determined,
- * NULL if undetermined
+ * @param input Input relation
+ * @param projections List of projection expressions
+ * @param mq Metadata query
+ * @return Functional dependency set after projection
*/
- private static @Nullable Boolean determinesImpl(RelNode rel,
RelMetadataQuery mq,
- int key, int column) {
- if (preCheck(rel, key, column)) {
- return true;
- }
-
- ImmutableBitSet keyInputIndices = null;
- ImmutableBitSet columnInputIndices = null;
- if (rel instanceof Project || rel instanceof Calc) {
- List<RexNode> exprs = null;
- if (rel instanceof Project) {
- Project project = (Project) rel;
- exprs = project.getProjects();
- } else {
- Calc calc = (Calc) rel;
- final RexProgram program = calc.getProgram();
- exprs = program.expandList(program.getProjectList());
+ private ArrowSet getProjectionFD(
+ RelNode input, List<RexNode> projections, RelMetadataQuery mq) {
+ ArrowSet inputFdSet = getFDs(input, mq);
+ ArrowSet.Builder fdBuilder = new ArrowSet.Builder();
+
+ // Create mapping from input column indices to project column indices
+ Mappings.TargetMapping inputToOutputMap =
+ RelOptUtil.permutation(projections, input.getRowType()).inverse();
+
+ // Map input functional dependencies to project dependencies
+ mapInputFDs(inputFdSet, inputToOutputMap, fdBuilder);
+
+ int fieldCount = projections.size();
+ final ImmutableBitSet[] inputBits = new ImmutableBitSet[fieldCount];
+ final Map<RexNode, Integer> uniqueExprToIndex = new HashMap<>();
+ final Map<RexLiteral, Integer> literalToIndex = new HashMap<>();
+ final Map<RexInputRef, Integer> refToIndex = new HashMap<>();
+ for (int i = 0; i < fieldCount; i++) {
+ RexNode expr = projections.get(i);
+
+ // Skip non-deterministic expressions
+ if (!RexUtil.isDeterministic(expr)) {
+ continue;
}
- // TODO: Supports dependency analysis for all types of expressions
- if (!(exprs.get(column) instanceof RexInputRef)) {
- return false;
+ // Handle identical expressions in the projection list
+ Integer prev = uniqueExprToIndex.putIfAbsent(expr, i);
+ if (prev != null) {
+ fdBuilder.addBidirectionalArrow(prev, i);
}
- RexNode keyExpr = exprs.get(key);
- RexNode columnExpr = exprs.get(column);
+ // Track literal constants to handle them specially
+ if (expr instanceof RexLiteral) {
+ literalToIndex.put((RexLiteral) expr, i);
+ continue;
+ }
- // Identical expressions imply functional dependency
- if (keyExpr.equals(columnExpr)) {
- return true;
+ if (expr instanceof RexInputRef) {
+ refToIndex.put((RexInputRef) expr, i);
+ inputBits[i] = ImmutableBitSet.of(((RexInputRef) expr).getIndex());
}
- keyInputIndices = extractDeterministicRefs(keyExpr);
- columnInputIndices = extractDeterministicRefs(columnExpr);
- } else if (rel instanceof Aggregate) {
- Aggregate aggregate = (Aggregate) rel;
+ inputBits[i] = RelOptUtil.InputFinder.bits(expr);
+ }
+
+ // Remove literals from uniqueExprToIndex to avoid redundant processing
+ uniqueExprToIndex.keySet().removeIf(key -> key instanceof RexLiteral);
+
+ for (Map.Entry<RexNode, Integer> entry : uniqueExprToIndex.entrySet()) {
+ RexNode expr = entry.getKey();
+ Integer i = entry.getValue();
+
+ // All columns determine literals
+ literalToIndex.values()
+ .forEach(l -> fdBuilder.addArrow(i, l));
+
+ // Input columns determine identical expressions
+ refToIndex.forEach((k, v) -> {
+ ImmutableBitSet refIndex = ImmutableBitSet.of(k.getIndex());
+ ImmutableBitSet bitSet = expr instanceof RexInputRef
+ ? ImmutableBitSet.of(((RexInputRef) expr).getIndex())
+ : inputBits[i];
+ if (inputFdSet.implies(refIndex, bitSet)) {
+ fdBuilder.addArrow(v, i);
+ }
+ });
+ }
+
+ return fdBuilder.build();
+ }
- int groupByCnt = aggregate.getGroupCount();
- if (key < groupByCnt && column >= groupByCnt) {
- return false;
+ /**
+ * Maps input functional dependencies to output dependencies based on column
mapping.
+ */
+ private static void mapInputFDs(ArrowSet inputFdSet,
+ Mappings.TargetMapping mapping, ArrowSet.Builder outputFdBuilder) {
+ for (Arrow inputFd : inputFdSet.getArrows()) {
+ ImmutableBitSet determinants = inputFd.getDeterminants();
+ ImmutableBitSet dependents = inputFd.getDependents();
+
+ // Map all determinant columns
+ ImmutableBitSet mappedDeterminants = mapAllCols(determinants, mapping);
+ if (mappedDeterminants.isEmpty()) {
+ continue;
}
- keyInputIndices = extractDeterministicRefs(aggregate, key);
- columnInputIndices = extractDeterministicRefs(aggregate, column);
- } else {
- throw new UnsupportedOperationException("Unsupported RelNode type: "
- + rel.getClass().getSimpleName());
+ // Map only the dependent columns that can be mapped
+ ImmutableBitSet mappedDependents = mapAvailableCols(dependents, mapping);
+ if (!mappedDependents.isEmpty()) {
+ outputFdBuilder.addArrow(mappedDeterminants, mappedDependents);
+ }
}
+ }
- // Early return if invalid cases
- if (keyInputIndices.isEmpty()
- || columnInputIndices.isEmpty()) {
- return false;
+ /**
+ * Maps all column ordinals in the set. Returns an empty set if any column
cannot be mapped.
+ */
+ private static ImmutableBitSet mapAllCols(
+ ImmutableBitSet ordinals, Mappings.TargetMapping mapping) {
+ ImmutableBitSet.Builder builder = ImmutableBitSet.builder();
+ for (int ord : ordinals) {
+ int mappedOrd = mapping.getTargetOpt(ord);
+ if (mappedOrd < 0) {
+ return ImmutableBitSet.of();
+ }
+ builder.set(mappedOrd);
}
+ return builder.build();
+ }
- // Currently only supports multiple (keyInputIndices) to one
(columnInputIndices)
- // dependency detection
- for (Integer keyRef : keyInputIndices) {
- if (Boolean.FALSE.equals(
- mq.determines(rel.getInput(0), keyRef,
- columnInputIndices.nextSetBit(0)))) {
- return false;
+ /**
+ * Maps only the column ordinals that can be mapped, ignoring unmappable
ones.
+ */
+ private static ImmutableBitSet mapAvailableCols(
+ ImmutableBitSet ordinals, Mappings.TargetMapping mapping) {
+ ImmutableBitSet.Builder builder = ImmutableBitSet.builder();
+ for (int ord : ordinals) {
+ int mappedOrd = mapping.getTargetOpt(ord);
+ if (mappedOrd >= 0) {
+ builder.set(mappedOrd);
}
}
-
- return true;
+ return builder.build();
}
/**
- * determinesImpl2is similar to determinesImpl, but it doesn't need to
handle the
- * mapping between output and input columns.
+ * Gets the functional dependencies for an Aggregate node.
+ * Group keys determine all aggregate columns.
+ * Preserves input dependencies that only involve group columns.
*/
- private static @Nullable Boolean determinesImpl2(RelNode rel,
RelMetadataQuery mq,
- int key, int column) {
- if (preCheck(rel, key, column)) {
- return true;
+ private ArrowSet getAggregateFD(Aggregate rel, RelMetadataQuery mq) {
+ ArrowSet.Builder fdBuilder = new ArrowSet.Builder();
+ ArrowSet inputFdSet = getFDs(rel.getInput(), mq);
+
+ ImmutableBitSet groupSet = rel.getGroupSet();
+
+ // Preserve input FDs that only involve group columns
+ if (Aggregate.isSimple(rel)) {
+ for (Arrow inputFd : inputFdSet.getArrows()) {
+ ImmutableBitSet determinants = inputFd.getDeterminants();
+ ImmutableBitSet dependents = inputFd.getDependents();
+
+ // Only preserve if both determinants and dependents are within group
columns
+ if (groupSet.contains(determinants) && groupSet.contains(dependents)) {
+ fdBuilder.addArrow(determinants, dependents);
+ }
+ }
}
- if (rel instanceof TableScan) {
- TableScan tableScan = (TableScan) rel;
- RelOptTable table = tableScan.getTable();
- List<ImmutableBitSet> keys = table.getKeys();
- return keys != null
- && keys.size() == 1
- && keys.get(0).equals(ImmutableBitSet.of(column));
- } else if (rel instanceof Join) {
- Join join = (Join) rel;
- // TODO Considering column mapping based on equality conditions in join
- int leftFieldCnt = join.getLeft().getRowType().getFieldCount();
- if (key < leftFieldCnt && column < leftFieldCnt) {
- return mq.determines(join.getLeft(), key, column);
- } else if (key >= leftFieldCnt && column >= leftFieldCnt) {
- return mq.determines(join.getRight(), key - leftFieldCnt, column -
leftFieldCnt);
+ // Group keys determine all aggregate columns
+ if (!groupSet.isEmpty() && !rel.getAggCallList().isEmpty()) {
+ for (int i = rel.getGroupCount(); i < rel.getRowType().getFieldCount();
i++) {
+ fdBuilder.addArrow(groupSet, ImmutableBitSet.of(i));
Review Comment:
This is one obvious place where you are creating a lot of arrows 'a, b ->
c', 'a, b -> d', 'a, b -> e' where you could instead create one arrow 'a, b ->
c, d, e'.
Code like this should continue to operate naively, but to do that,
ArrowBuilder should minimize.
Another kind of minimization is that if I add 'a, b -> c' and also 'b -> c'
then I can remove 'a, b -> c'. Perhaps minimization is expensive, so maybe it
can be best-effort (i.e. miss a few things, but cover the important cases). I
suggest adding a follow-up ticket to minimize.
##########
core/src/main/java/org/apache/calcite/rel/metadata/RelMdFunctionalDependency.java:
##########
@@ -64,212 +78,395 @@ protected RelMdFunctionalDependency() {}
return BuiltInMetadata.FunctionalDependency.DEF;
}
+ /**
+ * Determines whether the specified column is functionally dependent on the
given key.
+ *
+ * @param rel Relational node
+ * @param mq Metadata query
+ * @param determinant Determinant column ordinal
+ * @param dependent Dependent column ordinal
+ * @return true if column is functionally dependent on key, false otherwise
+ */
public @Nullable Boolean determines(RelNode rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl2(rel, mq, key, column);
+ int determinant, int dependent) {
+ return determinesSet(rel, mq, ImmutableBitSet.of(determinant),
ImmutableBitSet.of(dependent));
+ }
+
+ /**
+ * Determines whether a set of columns functionally determines another set
of columns.
+ *
+ * @param rel Relational node
+ * @param mq Metadata query
+ * @param determinants Determinant column set
+ * @param dependents Dependent column set
+ * @return true if dependents are functionally determined by determinants,
false otherwise
+ */
+ public Boolean determinesSet(RelNode rel, RelMetadataQuery mq,
+ ImmutableBitSet determinants, ImmutableBitSet dependents) {
+ ArrowSet fdSet = getFDs(rel, mq);
Review Comment:
Because `getFDs` is not part of the metadata API, it will not benefit from
caching. It will be recomputed each time, possibly several times for each
metadata query. This might become a serious performance issue.
##########
core/src/main/java/org/apache/calcite/util/ArrowSet.java:
##########
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import com.google.common.collect.ImmutableSet;
+
+import java.util.ArrayDeque;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Queue;
+import java.util.Set;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Represents a set of functional dependencies. Each functional dependency is
an {@link Arrow}.
+ *
+ * <p>An {@link ArrowSet} models a set of functional dependencies that may
hold in a relation.
+ * This class provides implementations for several core algorithms in
functional dependency theory,
+ * such as closure computation and candidate key discovery.
+ * For theory background, see:
+ * <a href="https://en.wikipedia.org/wiki/Functional_dependency">
+ * Functional dependency (Wikipedia)</a>
+ *
+ * @see Arrow
+ * @see ImmutableBitSet
+ */
+public class ArrowSet {
+ public static final ArrowSet EMPTY = new ArrowSet(ImmutableSet.of());
+
+ // Maps each determinant set to the dependent set it functionally determines.
+ private final Map<ImmutableBitSet, ImmutableBitSet> dependencyGraph = new
HashMap<>();
+
+ // Maps each column ordinal to the determinant sets (keys of dependencyGraph)
+ // that contain this column, for efficient reverse lookup.
+ private final Map<Integer, Set<ImmutableBitSet>> reverseIndex = new
HashMap<>();
+
+ // All arrows in this ArrowSet.
+ private final Set<Arrow> arrowSet = new HashSet<>();
Review Comment:
Since `ArrowSet` is immutable, the 3 underlying collections should probably
be immutable.
I suspect that `ImmutableList` is the best data structure for `arrowSet`.
(The builder can take care of eliminating duplicates. You don't seem to do
anything with the collection besides iteration.)
The other two collections could be computed on demand. (You will create a
lot of `ArrowSet` instances during optimization, most of these instances will
not have many operations performed on them.)
##########
core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java:
##########
@@ -905,17 +905,67 @@ default RelDataTypeFactory getTypeFactory() {
public interface FunctionalDependency extends Metadata {
MetadataDef<FunctionalDependency> DEF =
MetadataDef.of(FunctionalDependency.class,
FunctionalDependency.Handler.class,
- BuiltInMethod.FUNCTIONAL_DEPENDENCY.method);
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY.method,
Review Comment:
Rather than "functional dependency of column ordinals" I would write
"functional dependencies among columns".
##########
core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java:
##########
@@ -901,21 +901,70 @@ default RelDataTypeFactory getTypeFactory() {
}
}
- /** Metadata about the functional dependency of columns. */
+ /** Metadata about the functional dependency of column ordinals. */
public interface FunctionalDependency extends Metadata {
MetadataDef<FunctionalDependency> DEF =
MetadataDef.of(FunctionalDependency.class,
FunctionalDependency.Handler.class,
- BuiltInMethod.FUNCTIONAL_DEPENDENCY.method);
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY.method,
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY_SET.method,
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY_CLOSURE.method,
+
BuiltInMethod.FUNCTIONAL_DEPENDENCY_CANDIDATE_KEYS_OR_SUPER_KEYS.method);
/**
- * Returns whether column is functionally dependent on column.
+ * Returns whether column ordinal {@code determinant} functionally
determines
+ * column ordinal {@code dependent}.
+ *
+ * @param determinant 0-based ordinal of determinant column
+ * @param dependent 0-based ordinal of dependent column
+ * @return {@code true} if determinant uniquely determines dependent;
+ * {@code false} if not;
+ * {@code null} if unknown
*/
- @Nullable Boolean determines(int key, int column);
+ @Nullable Boolean determines(int determinant, int dependent);
+
+ /**
+ * Returns whether column ordinals in {@code determinants} functionally
determine
+ * column ordinals in {@code dependents}.
+ *
+ * @param determinants 0-based ordinals of determinant columns
+ * @param dependents 0-based ordinals of dependent columns
+ * @return true if determinants uniquely determine dependents
+ */
+ Boolean determinesSet(ImmutableBitSet determinants, ImmutableBitSet
dependents);
+
+ /**
+ * Returns the closure of {@code ordinals} under the functional dependency
set.
+ * The closure is the set of column ordinals uniquely determined by {@code
ordinals}.
+ *
+ * @param ordinals 0-based ordinals of determinant columns
+ * @return closure: columns functionally determined by {@code ordinals}
+ */
+ ImmutableBitSet computeClosure(ImmutableBitSet ordinals);
+
+ /**
+ * Finds candidate keys or superkeys in {@code ordinals} under the
functional dependency set.
Review Comment:
I don't agree with this use of the term 'candidate key'.
If a table contains duplicates then NOTHING we know about functional
dependencies will allow us to deduce keys. (To fix this, you could introduce a
special attribute that represents row-identity.)
You also don't define 'super key'. And you use it inconsistently:
'superkeys', 'SuperKeys')
I think you could just rename this method to `determinants(ImmutableBitSet
dependents)`.
I think you should remove the `boolean onlyMinimalKeys` parameter (and set
it to `true`). The set of non-minimal keys might be very large. I assume that
it is straightforward to build a minimal set and omit keys that are not minimal.
Give a few examples.
##########
core/src/main/java/org/apache/calcite/rel/metadata/RelMdFunctionalDependency.java:
##########
@@ -64,212 +78,395 @@ protected RelMdFunctionalDependency() {}
return BuiltInMetadata.FunctionalDependency.DEF;
}
+ /**
+ * Determines whether the specified column is functionally dependent on the
given key.
Review Comment:
Very confusing use of the word "Determines".
Instead, "Returns whether `determinant` determines `dependent`."
##########
core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java:
##########
@@ -901,21 +901,70 @@ default RelDataTypeFactory getTypeFactory() {
}
}
- /** Metadata about the functional dependency of columns. */
+ /** Metadata about the functional dependency of column ordinals. */
public interface FunctionalDependency extends Metadata {
MetadataDef<FunctionalDependency> DEF =
MetadataDef.of(FunctionalDependency.class,
FunctionalDependency.Handler.class,
- BuiltInMethod.FUNCTIONAL_DEPENDENCY.method);
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY.method,
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY_SET.method,
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY_CLOSURE.method,
+
BuiltInMethod.FUNCTIONAL_DEPENDENCY_CANDIDATE_KEYS_OR_SUPER_KEYS.method);
/**
- * Returns whether column is functionally dependent on column.
+ * Returns whether column ordinal {@code determinant} functionally
determines
+ * column ordinal {@code dependent}.
+ *
+ * @param determinant 0-based ordinal of determinant column
+ * @param dependent 0-based ordinal of dependent column
+ * @return {@code true} if determinant uniquely determines dependent;
+ * {@code false} if not;
+ * {@code null} if unknown
*/
- @Nullable Boolean determines(int key, int column);
+ @Nullable Boolean determines(int determinant, int dependent);
+
+ /**
+ * Returns whether column ordinals in {@code determinants} functionally
determine
+ * column ordinals in {@code dependents}.
+ *
+ * @param determinants 0-based ordinals of determinant columns
+ * @param dependents 0-based ordinals of dependent columns
+ * @return true if determinants uniquely determine dependents
+ */
+ Boolean determinesSet(ImmutableBitSet determinants, ImmutableBitSet
dependents);
+
+ /**
+ * Returns the closure of {@code ordinals} under the functional dependency
set.
+ * The closure is the set of column ordinals uniquely determined by {@code
ordinals}.
+ *
+ * @param ordinals 0-based ordinals of determinant columns
+ * @return closure: columns functionally determined by {@code ordinals}
+ */
+ ImmutableBitSet computeClosure(ImmutableBitSet ordinals);
Review Comment:
Rename this method to `dependents(ImmutableBitSet determinants)`.
Give an example.
##########
core/src/main/java/org/apache/calcite/rel/metadata/RelMdFunctionalDependency.java:
##########
@@ -20,33 +20,47 @@
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Aggregate;
-import org.apache.calcite.rel.core.AggregateCall;
import org.apache.calcite.rel.core.Calc;
import org.apache.calcite.rel.core.Correlate;
+import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.SetOp;
import org.apache.calcite.rel.core.TableScan;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
-import org.apache.calcite.rex.RexProgram;
import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.util.Arrow;
+import org.apache.calcite.util.ArrowSet;
import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.mapping.Mappings;
import org.checkerframework.checker.nullness.qual.Nullable;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
+import java.util.Set;
/**
- * Default implementation of
- * {@link RelMetadataQuery#determines(RelNode, int, int)}
+ * Default implementation of {@link BuiltInMetadata.FunctionalDependency}
metadata handler
* for the standard logical algebra.
*
- * <p>The goal of this provider is to determine whether
- * key is functionally dependent on column.
+ * <p>Analyzes functional dependencies for relational operators using {@link
ArrowSet}.
*
Review Comment:
Rather than "Analyzes functional dependencies for relational operators using
{@link ArrowSet}." just say "The implementation uses ArrowSet."
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]