This is an automated email from the ASF dual-hosted git repository.
zhenchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new e2d8ce0ce5 [CALCITE-5913] Support to get functional dependency
metadata in RelMetadataQuery
e2d8ce0ce5 is described below
commit e2d8ce0ce571effc1ae68adcf702c8739826e4c5
Author: Zhen Chen <[email protected]>
AuthorDate: Wed Oct 8 20:04:52 2025 +0800
[CALCITE-5913] Support to get functional dependency metadata in
RelMetadataQuery
---
.../calcite/rel/metadata/BuiltInMetadata.java | 82 ++-
.../rel/metadata/RelMdFunctionalDependency.java | 502 ++++++++++-----
.../calcite/rel/metadata/RelMetadataQuery.java | 54 ++
.../main/java/org/apache/calcite/util/Arrow.java | 118 ++++
.../java/org/apache/calcite/util/ArrowSet.java | 339 +++++++++++
.../org/apache/calcite/util/BuiltInMethod.java | 9 +-
.../org/apache/calcite/test/RelMetadataTest.java | 678 +++++++++++++++++----
.../java/org/apache/calcite/util/ArrowSetTest.java | 167 +++++
8 files changed, 1663 insertions(+), 286 deletions(-)
diff --git
a/core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java
b/core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java
index 0f289aecbf..b0bc96b39c 100644
--- a/core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java
+++ b/core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java
@@ -30,6 +30,7 @@
import org.apache.calcite.rex.RexTableInputRef.RelTableRef;
import org.apache.calcite.sql.SqlExplainLevel;
import org.apache.calcite.tools.RelBuilder;
+import org.apache.calcite.util.ArrowSet;
import org.apache.calcite.util.BuiltInMethod;
import org.apache.calcite.util.ImmutableBitSet;
@@ -901,21 +902,94 @@ default RelDataTypeFactory getTypeFactory() {
}
}
- /** Metadata about the functional dependency of columns. */
+ /** Metadata about the functional dependencies among columns. */
public interface FunctionalDependency extends Metadata {
MetadataDef<FunctionalDependency> DEF =
MetadataDef.of(FunctionalDependency.class,
FunctionalDependency.Handler.class,
- BuiltInMethod.FUNCTIONAL_DEPENDENCY.method);
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY.method,
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY_SET.method,
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY_DEPENDENTS.method,
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY_DETERMINANTS.method,
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY_GET_FDS.method);
/**
- * Returns whether column is functionally dependent on column.
+ * Returns whether one column functionally determines another.
+ *
+ * <p>For example,
+ * {@code empno} functionally determines {@code sal},
+ * because {@code empno} is the primary key.
+ *
+ * @param determinant 0-based ordinal of determinant column
+ * @param dependent 0-based ordinal of dependent column
+ * @return {@code true} if determinant uniquely determines dependent;
+ * {@code false} if not;
+ * {@code null} if unknown
+ */
+ @Nullable Boolean determines(int determinant, int dependent);
+
+ /**
+ * Returns whether a set of columns functionally determines another set of
columns.
+ *
+ * <p>For example,
+ * {{@code empno}, {@code deptno}} functionally determines {{@code sal},
{@code job}},
+ * because the determinant ({@code empno}, {@code deptno}) is a superset
of a key.
+ * If we know that {{@code deptno}, {@code job}} functionally determines
{{@code sal}}
+ * then we cannot deduce that {@code deptno} alone determines {@code sal}.
+ *
+ * @param determinants 0-based ordinals of determinant columns
+ * @param dependents 0-based ordinals of dependent columns
+ * @return true if determinants uniquely determine dependents
+ */
+ Boolean determinesSet(ImmutableBitSet determinants, ImmutableBitSet
dependents);
+
+ /**
+ * Returns the closure of {@code ordinals} under the functional dependency
set.
+ * The closure is the set of column ordinals uniquely determined by {@code
ordinals}.
+ *
+ * <p>For example,
+ * if input is {{@code empno}, {@code deptno}},
+ * and functional dependencies contains
+ * {{@code empno}, {@code deptno}} determines {{@code sal}, {@code job}},
+ * then returns {{@code empno}, {@code deptno}, {@code sal}, {@code job}}.
+ *
+ * @param ordinals 0-based ordinals of determinant columns
+ * @return closure: columns functionally determined by {@code ordinals}
+ */
+ ImmutableBitSet dependents(ImmutableBitSet ordinals);
+
+ /**
+ * Finds minimal determinant sets for the given dependent columns
+ * under the functional dependency set.
+ *
+ * <p>For example,
+ * if input is {{@code empno}, {@code deptno}, {@code sal}, {@code job}},
+ * and functional dependencies contains
+ * {{@code empno} determines {@code deptno}},
+ * {{@code empno} determines {{@code sal},
+ * {{@code empno} determines {@code job}},
+ * then returns {{@code empno}}.
+ *
+ * @param ordinals 0-based ordinals of dependent columns
+ * @return sets of minimal determinant columns
*/
- @Nullable Boolean determines(int key, int column);
+ Set<ImmutableBitSet> determinants(ImmutableBitSet ordinals);
+
+ /** Returns the full set of functional dependencies. */
+ ArrowSet getFDs();
/** Handler API. */
interface Handler extends MetadataHandler<FunctionalDependency> {
@Nullable Boolean determines(RelNode r, RelMetadataQuery mq, int key,
int column);
+ Boolean determinesSet(RelNode r, RelMetadataQuery mq,
+ ImmutableBitSet determinants, ImmutableBitSet dependents);
+
+ ImmutableBitSet dependents(RelNode r, RelMetadataQuery mq,
ImmutableBitSet ordinals);
+
+ Set<ImmutableBitSet> determinants(RelNode r, RelMetadataQuery mq,
ImmutableBitSet ordinals);
+
+ ArrowSet getFDs(RelNode r, RelMetadataQuery mq);
+
@Override default MetadataDef<FunctionalDependency> getDef() {
return DEF;
}
diff --git
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdFunctionalDependency.java
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdFunctionalDependency.java
index 02aaa51d68..ea6d58cd23 100644
---
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdFunctionalDependency.java
+++
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdFunctionalDependency.java
@@ -20,33 +20,45 @@
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Aggregate;
-import org.apache.calcite.rel.core.AggregateCall;
import org.apache.calcite.rel.core.Calc;
import org.apache.calcite.rel.core.Correlate;
+import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.SetOp;
import org.apache.calcite.rel.core.TableScan;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
-import org.apache.calcite.rex.RexProgram;
import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.util.Arrow;
+import org.apache.calcite.util.ArrowSet;
import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.mapping.Mappings;
import org.checkerframework.checker.nullness.qual.Nullable;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
+import java.util.Set;
/**
- * Default implementation of
- * {@link RelMetadataQuery#determines(RelNode, int, int)}
- * for the standard logical algebra.
+ * Default implementation of {@link BuiltInMetadata.FunctionalDependency}
metadata handler
+ * for relational algebra nodes. Uses {@link ArrowSet} to represent functional
dependencies.
*
- * <p>The goal of this provider is to determine whether
- * key is functionally dependent on column.
+ * <p>Core functionalities:
+ * <ul>
+ * <li>Detects functional dependencies ({@link #determines}, {@link
#determinesSet})</li>
+ * <li>Computes all columns functionally determined by a given set ({@link
#dependents})</li>
+ * <li>Finds minimal determinant sets ({@link #determinants})</li>
+ * </ul>
*
- * <p>If the functional dependency cannot be determined, we return false.
+ * @see Arrow
+ * @see ArrowSet
*/
public class RelMdFunctionalDependency
implements MetadataHandler<BuiltInMetadata.FunctionalDependency> {
@@ -64,212 +76,392 @@ protected RelMdFunctionalDependency() {}
return BuiltInMetadata.FunctionalDependency.DEF;
}
+ /**
+ * Returns whether one column functionally determines another.
+ *
+ * @param rel Relational node
+ * @param mq Metadata query
+ * @param determinant Determinant column index
+ * @param dependent Dependent column index
+ * @return true if determinant determines dependent
+ */
public @Nullable Boolean determines(RelNode rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl2(rel, mq, key, column);
- }
-
- public @Nullable Boolean determines(SetOp rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl2(rel, mq, key, column);
+ int determinant, int dependent) {
+ return determinesSet(rel, mq, ImmutableBitSet.of(determinant),
ImmutableBitSet.of(dependent));
}
- public @Nullable Boolean determines(Join rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl2(rel, mq, key, column);
+ /**
+ * Returns whether a set of columns functionally determines another set.
+ *
+ * @param rel Relational node
+ * @param mq Metadata query
+ * @param determinants Indices of determinant columns
+ * @param dependents Indices of dependent columns
+ * @return true if determinants determine dependents
+ */
+ public Boolean determinesSet(RelNode rel, RelMetadataQuery mq,
+ ImmutableBitSet determinants, ImmutableBitSet dependents) {
+ ArrowSet fdSet = getFDs(rel, mq);
+ return fdSet.implies(determinants, dependents);
}
- public @Nullable Boolean determines(Correlate rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl2(rel, mq, key, column);
+ /**
+ * Returns all columns functionally determined by the given columns.
+ *
+ * @param rel Relational node
+ * @param mq Metadata query
+ * @param ordinals Indices of input columns
+ * @return Indices of determined columns
+ */
+ public ImmutableBitSet dependents(RelNode rel, RelMetadataQuery mq,
+ ImmutableBitSet ordinals) {
+ ArrowSet fdSet = getFDs(rel, mq);
+ return fdSet.dependents(ordinals);
}
- public @Nullable Boolean determines(Aggregate rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl(rel, mq, key, column);
+ /**
+ * Returns all minimal determinant sets for the given columns.
+ *
+ * @param rel Relational node
+ * @param mq Metadata query
+ * @param ordinals Indices of columns
+ * @return Minimal determinant sets
+ */
+ public Set<ImmutableBitSet> determinants(
+ RelNode rel, RelMetadataQuery mq, ImmutableBitSet ordinals) {
+ ArrowSet fdSet = getFDs(rel, mq);
+ return fdSet.determinants(ordinals);
}
- public @Nullable Boolean determines(Calc rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl(rel, mq, key, column);
+ /**
+ * Returns all functional dependencies for the given relational node.
+ */
+ public ArrowSet getFDs(RelNode rel, RelMetadataQuery mq) {
+ rel = rel.stripped();
+ if (rel instanceof TableScan) {
+ return getTableScanFD((TableScan) rel);
+ } else if (rel instanceof Project) {
+ return getProjectFD((Project) rel, mq);
+ } else if (rel instanceof Aggregate) {
+ return getAggregateFD((Aggregate) rel, mq);
+ } else if (rel instanceof Join) {
+ return getJoinFD((Join) rel, mq);
+ } else if (rel instanceof Calc) {
+ return getCalcFD((Calc) rel, mq);
+ } else if (rel instanceof Filter) {
+ return getFilterFD((Filter) rel, mq);
+ } else if (rel instanceof SetOp) {
+ // TODO: Handle UNION, INTERSECT, EXCEPT functional dependencies
+ return ArrowSet.EMPTY;
+ } else if (rel instanceof Correlate) {
+ // TODO: Handle CORRELATE functional dependencies
+ return ArrowSet.EMPTY;
+ }
+ return getFD(rel.getInputs(), mq);
}
- public @Nullable Boolean determines(Project rel, RelMetadataQuery mq,
- int key, int column) {
- return determinesImpl(rel, mq, key, column);
+ /**
+ * Returns functional dependencies for input nodes;
+ * returns empty if multiple inputs.
+ */
+ private ArrowSet getFD(List<RelNode> inputs, RelMetadataQuery mq) {
+ if (inputs.size() != 1) {
+ // Conservative approach for multi-input nodes without specific logic
+ return ArrowSet.EMPTY;
+ }
+ return getFDs(inputs.get(0), mq);
}
/**
- * Checks if a column is functionally determined by a key column through
expression analysis.
- *
- * @param rel The input relation
- * @param mq Metadata query instance
- * @param key Index of the determinant expression
- * @param column Index of the dependent expression
- * @return TRUE if column is determined by key,
- * FALSE if not determined,
- * NULL if undetermined
+ * Returns functional dependencies for a TableScan node.
*/
- private static @Nullable Boolean determinesImpl(RelNode rel,
RelMetadataQuery mq,
- int key, int column) {
- if (preCheck(rel, key, column)) {
- return true;
+ private static ArrowSet getTableScanFD(TableScan rel) {
+ ArrowSet.Builder fdBuilder = new ArrowSet.Builder();
+
+ RelOptTable table = rel.getTable();
+ List<ImmutableBitSet> keys = table.getKeys();
+ if (keys == null || keys.isEmpty()) {
+ return fdBuilder.build();
}
- ImmutableBitSet keyInputIndices = null;
- ImmutableBitSet columnInputIndices = null;
- if (rel instanceof Project || rel instanceof Calc) {
- List<RexNode> exprs = null;
- if (rel instanceof Project) {
- Project project = (Project) rel;
- exprs = project.getProjects();
- } else {
- Calc calc = (Calc) rel;
- final RexProgram program = calc.getProgram();
- exprs = program.expandList(program.getProjectList());
+ for (ImmutableBitSet key : keys) {
+ ImmutableBitSet allColumns =
ImmutableBitSet.range(rel.getRowType().getFieldCount());
+ ImmutableBitSet dependents = allColumns.except(key);
+ if (!dependents.isEmpty()) {
+ fdBuilder.addArrow(key, dependents);
}
+ }
- // TODO: Supports dependency analysis for all types of expressions
- if (!(exprs.get(column) instanceof RexInputRef)) {
- return false;
- }
+ return fdBuilder.build();
+ }
- RexNode keyExpr = exprs.get(key);
- RexNode columnExpr = exprs.get(column);
+ /**
+ * Returns functional dependencies for a Project node.
+ */
+ private ArrowSet getProjectFD(Project rel, RelMetadataQuery mq) {
+ return getProjectionFD(rel.getInput(), rel.getProjects(), mq);
+ }
+
+ /**
+ * Returns functional dependencies after projection.
+ *
+ * @param input Input relation
+ * @param projections Projection expressions
+ * @param mq Metadata query
+ * @return Functional dependencies after projection
+ */
+ private ArrowSet getProjectionFD(
+ RelNode input, List<RexNode> projections, RelMetadataQuery mq) {
+ ArrowSet inputFdSet = getFDs(input, mq);
+ ArrowSet.Builder fdBuilder = new ArrowSet.Builder();
+
+ // Create mapping from input column indices to project column indices
+ Mappings.TargetMapping inputToOutputMap =
+ RelOptUtil.permutation(projections, input.getRowType()).inverse();
+
+ // Map input functional dependencies to project dependencies
+ mapInputFDs(inputFdSet, inputToOutputMap, fdBuilder);
+
+ int fieldCount = projections.size();
+ final ImmutableBitSet[] inputBits = new ImmutableBitSet[fieldCount];
+ final Map<RexNode, Integer> uniqueExprToIndex = new HashMap<>();
+ final Map<RexLiteral, Integer> literalToIndex = new HashMap<>();
+ final Map<RexInputRef, Integer> refToIndex = new HashMap<>();
+ for (int i = 0; i < fieldCount; i++) {
+ RexNode expr = projections.get(i);
+
+ // Skip non-deterministic expressions
+ if (!RexUtil.isDeterministic(expr)) {
+ continue;
+ }
- // Identical expressions imply functional dependency
- if (keyExpr.equals(columnExpr)) {
- return true;
+ // Handle identical expressions in the projection list
+ Integer prev = uniqueExprToIndex.putIfAbsent(expr, i);
+ if (prev != null) {
+ fdBuilder.addBidirectionalArrow(prev, i);
}
- keyInputIndices = extractDeterministicRefs(keyExpr);
- columnInputIndices = extractDeterministicRefs(columnExpr);
- } else if (rel instanceof Aggregate) {
- Aggregate aggregate = (Aggregate) rel;
+ // Track literal constants to handle them specially
+ if (expr instanceof RexLiteral) {
+ literalToIndex.put((RexLiteral) expr, i);
+ continue;
+ }
- int groupByCnt = aggregate.getGroupCount();
- if (key < groupByCnt && column >= groupByCnt) {
- return false;
+ if (expr instanceof RexInputRef) {
+ refToIndex.put((RexInputRef) expr, i);
+ inputBits[i] = ImmutableBitSet.of(((RexInputRef) expr).getIndex());
}
- keyInputIndices = extractDeterministicRefs(aggregate, key);
- columnInputIndices = extractDeterministicRefs(aggregate, column);
- } else {
- throw new UnsupportedOperationException("Unsupported RelNode type: "
- + rel.getClass().getSimpleName());
+ inputBits[i] = RelOptUtil.InputFinder.bits(expr);
}
- // Early return if invalid cases
- if (keyInputIndices.isEmpty()
- || columnInputIndices.isEmpty()) {
- return false;
+ // Remove literals from uniqueExprToIndex to avoid redundant processing
+ uniqueExprToIndex.keySet().removeIf(key -> key instanceof RexLiteral);
+
+ for (Map.Entry<RexNode, Integer> entry : uniqueExprToIndex.entrySet()) {
+ RexNode expr = entry.getKey();
+ Integer i = entry.getValue();
+
+ // All columns determine literals
+ literalToIndex.values()
+ .forEach(l -> fdBuilder.addArrow(i, l));
+
+ // Input columns determine identical expressions
+ refToIndex.forEach((k, v) -> {
+ ImmutableBitSet refIndex = ImmutableBitSet.of(k.getIndex());
+ ImmutableBitSet bitSet = expr instanceof RexInputRef
+ ? ImmutableBitSet.of(((RexInputRef) expr).getIndex())
+ : inputBits[i];
+ if (inputFdSet.implies(refIndex, bitSet)) {
+ fdBuilder.addArrow(v, i);
+ }
+ });
}
- // Currently only supports multiple (keyInputIndices) to one
(columnInputIndices)
- // dependency detection
- for (Integer keyRef : keyInputIndices) {
- if (Boolean.FALSE.equals(
- mq.determines(rel.getInput(0), keyRef,
- columnInputIndices.nextSetBit(0)))) {
- return false;
+ return fdBuilder.build();
+ }
+
+ /**
+ * Maps input dependencies to output dependencies using column mapping.
+ */
+ private static void mapInputFDs(ArrowSet inputFdSet,
+ Mappings.TargetMapping mapping, ArrowSet.Builder outputFdBuilder) {
+ for (Arrow inputFd : inputFdSet.getArrows()) {
+ ImmutableBitSet determinants = inputFd.getDeterminants();
+ ImmutableBitSet dependents = inputFd.getDependents();
+
+ // Map all determinant columns
+ ImmutableBitSet mappedDeterminants = mapAllCols(determinants, mapping);
+ if (mappedDeterminants.isEmpty()) {
+ continue;
+ }
+
+ // Map only the dependent columns that can be mapped
+ ImmutableBitSet mappedDependents = mapAvailableCols(dependents, mapping);
+ if (!mappedDependents.isEmpty()) {
+ outputFdBuilder.addArrow(mappedDeterminants, mappedDependents);
}
}
+ }
- return true;
+ /**
+ * Maps all columns;
+ * returns empty if any cannot be mapped.
+ */
+ private static ImmutableBitSet mapAllCols(
+ ImmutableBitSet ordinals, Mappings.TargetMapping mapping) {
+ ImmutableBitSet.Builder builder = ImmutableBitSet.builder();
+ for (int ord : ordinals) {
+ int mappedOrd = mapping.getTargetOpt(ord);
+ if (mappedOrd < 0) {
+ return ImmutableBitSet.of();
+ }
+ builder.set(mappedOrd);
+ }
+ return builder.build();
}
/**
- * determinesImpl2is similar to determinesImpl, but it doesn't need to
handle the
- * mapping between output and input columns.
+ * Maps only columns that can be mapped;
+ * ignores others.
*/
- private static @Nullable Boolean determinesImpl2(RelNode rel,
RelMetadataQuery mq,
- int key, int column) {
- if (preCheck(rel, key, column)) {
- return true;
+ private static ImmutableBitSet mapAvailableCols(
+ ImmutableBitSet ordinals, Mappings.TargetMapping mapping) {
+ ImmutableBitSet.Builder builder = ImmutableBitSet.builder();
+ for (int ord : ordinals) {
+ int mappedOrd = mapping.getTargetOpt(ord);
+ if (mappedOrd >= 0) {
+ builder.set(mappedOrd);
+ }
}
+ return builder.build();
+ }
- if (rel instanceof TableScan) {
- TableScan tableScan = (TableScan) rel;
- RelOptTable table = tableScan.getTable();
- List<ImmutableBitSet> keys = table.getKeys();
- return keys != null
- && keys.size() == 1
- && keys.get(0).equals(ImmutableBitSet.of(column));
- } else if (rel instanceof Join) {
- Join join = (Join) rel;
- // TODO Considering column mapping based on equality conditions in join
- int leftFieldCnt = join.getLeft().getRowType().getFieldCount();
- if (key < leftFieldCnt && column < leftFieldCnt) {
- return mq.determines(join.getLeft(), key, column);
- } else if (key >= leftFieldCnt && column >= leftFieldCnt) {
- return mq.determines(join.getRight(), key - leftFieldCnt, column -
leftFieldCnt);
+ /**
+ * Returns functional dependencies for Aggregate.
+ */
+ private ArrowSet getAggregateFD(Aggregate rel, RelMetadataQuery mq) {
+ ArrowSet.Builder fdBuilder = new ArrowSet.Builder();
+ ArrowSet inputFdSet = getFDs(rel.getInput(), mq);
+
+ ImmutableBitSet groupSet = rel.getGroupSet();
+
+ // Preserve input FDs that only involve group columns
+ if (Aggregate.isSimple(rel)) {
+ for (Arrow inputFd : inputFdSet.getArrows()) {
+ ImmutableBitSet determinants = inputFd.getDeterminants();
+ ImmutableBitSet dependents = inputFd.getDependents();
+
+ // Only preserve if both determinants and dependents are within group
columns
+ if (groupSet.contains(determinants) && groupSet.contains(dependents)) {
+ fdBuilder.addArrow(determinants, dependents);
+ }
}
- return false;
- } else if (rel instanceof Correlate) {
- // TODO Support Correlate.
- return false;
- } else if (rel instanceof SetOp) {
- // TODO Support SetOp
- return false;
}
- return mq.determines(rel.getInput(0), key, column);
+ // Group keys determine all aggregate columns
+ if (!groupSet.isEmpty() && !rel.getAggCallList().isEmpty()) {
+ ImmutableBitSet aggCols =
+ ImmutableBitSet.range(rel.getGroupCount(),
rel.getRowType().getFieldCount());
+ fdBuilder.addArrow(groupSet, aggCols);
+ }
+
+ return fdBuilder.build();
}
- private static Boolean preCheck(RelNode rel, int key, int column) {
- verifyIndex(rel, key, column);
+ /**
+ * Returns functional dependencies for Filter.
+ */
+ private ArrowSet getFilterFD(Filter rel, RelMetadataQuery mq) {
+ ArrowSet inputSet = getFDs(rel.getInput(), mq);
+ ArrowSet.Builder fdBuilder = new ArrowSet.Builder();
- // Equal index values indicate the same expression reference
- if (key == column) {
- return true;
- }
+ // Adds equality dependencies from filter conditions.
+ addFDsFromEqualityCondition(rel.getCondition(), fdBuilder);
- return false;
+ return fdBuilder.build().union(inputSet);
}
- private static void verifyIndex(RelNode rel, int... indices) {
- for (int index : indices) {
- if (index < 0 || index >= rel.getRowType().getFieldCount()) {
- throw new IndexOutOfBoundsException(
- "Column index " + index + " is out of bounds. "
- + "Valid range is [0, " + rel.getRowType().getFieldCount() +
")");
- }
+ /**
+ * Returns functional dependencies for Join.
+ * Preserves and combines dependencies based on join type.
+ */
+ private ArrowSet getJoinFD(Join rel, RelMetadataQuery mq) {
+ ArrowSet leftFdSet = getFDs(rel.getLeft(), mq);
+ ArrowSet rightFdSet = getFDs(rel.getRight(), mq);
+
+ int leftFieldCount = rel.getLeft().getRowType().getFieldCount();
+ JoinRelType joinType = rel.getJoinType();
+
+ switch (joinType) {
+ case INNER:
+ case LEFT:
+ case RIGHT:
+ ArrowSet.Builder joinFdBuilder = new ArrowSet.Builder()
+ .addArrowSet(leftFdSet.union(shiftFdSet(rightFdSet,
leftFieldCount)));
+ addFDsFromEqualityCondition(rel.getCondition(), joinFdBuilder);
+ return joinFdBuilder.build();
+ case SEMI:
+ case ANTI:
+ return leftFdSet.clone();
+ default:
+ return ArrowSet.EMPTY;
}
}
/**
- * Extracts input indices referenced by an output column in an Aggregate.
- * For group-by columns, returns the column index itself since they directly
- * reference input columns. For aggregate function columns, returns the input
- * column indices used by the aggregate call.
+ * Returns functional dependencies for Calc.
+ */
+ private ArrowSet getCalcFD(Calc rel, RelMetadataQuery mq) {
+ List<RexNode> projections =
rel.getProgram().expandList(rel.getProgram().getProjectList());
+ return getProjectionFD(rel.getInput(), projections, mq);
+ }
+
+ /**
+ * Shifts column indices in functional dependencies (for right table in
Joins).
*
- * @param aggregate The Aggregate relational expression to analyze
- * @param index Index of the output column in the Aggregate (0-based)
- * @return ImmutableBitSet of input column indices referenced by the output
column.
- * For group-by columns, returns a singleton set of the column index.
- * For aggregate columns, returns the argument indices of the
aggregate call.
+ * @param fdSet Functional dependency set
+ * @param offset Index offset
+ * @return Shifted functional dependency set
*/
- private static ImmutableBitSet extractDeterministicRefs(Aggregate aggregate,
int index) {
- int groupByCnt = aggregate.getGroupCount();
- if (index < groupByCnt) {
- return ImmutableBitSet.of(index);
+ private ArrowSet shiftFdSet(ArrowSet fdSet, int offset) {
+ ArrowSet.Builder shiftedFdSetBuilder = new ArrowSet.Builder();
+ for (Arrow fd : fdSet.getArrows()) {
+ ImmutableBitSet shiftedDeterminants = fd.getDeterminants().shift(offset);
+ ImmutableBitSet shiftedDependents = fd.getDependents().shift(offset);
+ shiftedFdSetBuilder.addArrow(shiftedDeterminants, shiftedDependents);
}
-
- List<AggregateCall> aggCalls = aggregate.getAggCallList();
- AggregateCall call = aggCalls.get(index - groupByCnt);
- return ImmutableBitSet.of(call.getArgList());
+ return shiftedFdSetBuilder.build();
}
/**
- * Extracts input indices referenced by a deterministic RexNode expression.
- *
- * @param rex The expression to analyze
- * @return referenced input indices if deterministic
+ * Extracts functional dependencies from equality and AND conditions.
+ * Handles col1 = col2, col1 IS NOT DISTINCT FROM col2, and AND conditions.
*/
- private static ImmutableBitSet extractDeterministicRefs(RexNode rex) {
- if (rex instanceof RexCall && !RexUtil.isDeterministic(rex)) {
- return ImmutableBitSet.of();
+ private static void addFDsFromEqualityCondition(RexNode condition,
ArrowSet.Builder builder) {
+ if (!(condition instanceof RexCall)) {
+ return;
+ }
+
+ RexCall call = (RexCall) condition;
+ if (call.getOperator().getKind() == SqlKind.EQUALS
+ || call.getOperator().getKind() == SqlKind.IS_NOT_DISTINCT_FROM) {
+ List<RexNode> operands = call.getOperands();
+ if (operands.size() == 2) {
+ RexNode left = operands.get(0);
+ RexNode right = operands.get(1);
+
+ if (left instanceof RexInputRef && right instanceof RexInputRef) {
+ int leftRef = ((RexInputRef) left).getIndex();
+ int rightRef = ((RexInputRef) right).getIndex();
+
+ builder.addBidirectionalArrow(leftRef, rightRef);
+ }
+ }
+ } else if (call.getOperator().getKind() == SqlKind.AND) {
+ for (RexNode operand : call.getOperands()) {
+ addFDsFromEqualityCondition(operand, builder);
+ }
}
- return RelOptUtil.InputFinder.bits(rex);
}
}
diff --git
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMetadataQuery.java
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMetadataQuery.java
index 1caefacdf8..4a5d8ddff0 100644
--- a/core/src/main/java/org/apache/calcite/rel/metadata/RelMetadataQuery.java
+++ b/core/src/main/java/org/apache/calcite/rel/metadata/RelMetadataQuery.java
@@ -27,6 +27,7 @@
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexTableInputRef.RelTableRef;
import org.apache.calcite.sql.SqlExplainLevel;
+import org.apache.calcite.util.ArrowSet;
import org.apache.calcite.util.ImmutableBitSet;
import com.google.common.base.Suppliers;
@@ -1004,4 +1005,57 @@ public Boolean isVisibleInExplain(RelNode rel,
}
}
}
+
+ /**
+ * Determines whether a set of columns functionally determines another set
of columns.
+ */
+ public Boolean determinesSet(RelNode rel, ImmutableBitSet determinants,
+ ImmutableBitSet dependents) {
+ for (;;) {
+ try {
+ return functionalDependencyHandler.determinesSet(rel, this,
determinants, dependents);
+ } catch (MetadataHandlerProvider.NoHandler e) {
+ functionalDependencyHandler =
revise(BuiltInMetadata.FunctionalDependency.Handler.class);
+ }
+ }
+ }
+
+ /**
+ * Returns the closure of a set of columns under all functional dependencies.
+ */
+ public ImmutableBitSet dependents(RelNode rel, ImmutableBitSet ordinals) {
+ for (;;) {
+ try {
+ return functionalDependencyHandler.dependents(rel, this, ordinals);
+ } catch (MetadataHandlerProvider.NoHandler e) {
+ functionalDependencyHandler =
revise(BuiltInMetadata.FunctionalDependency.Handler.class);
+ }
+ }
+ }
+
+ /**
+ * Returns minimal determinant sets for the relation within the specified
set of attributes.
+ */
+ public Set<ImmutableBitSet> determinants(RelNode rel, ImmutableBitSet
ordinals) {
+ for (;;) {
+ try {
+ return functionalDependencyHandler.determinants(rel, this, ordinals);
+ } catch (MetadataHandlerProvider.NoHandler e) {
+ functionalDependencyHandler =
revise(BuiltInMetadata.FunctionalDependency.Handler.class);
+ }
+ }
+ }
+
+ /**
+ * Returns all functional dependencies for the relational expression.
+ */
+ public ArrowSet getFDs(RelNode rel) {
+ for (;;) {
+ try {
+ return functionalDependencyHandler.getFDs(rel, this);
+ } catch (MetadataHandlerProvider.NoHandler e) {
+ functionalDependencyHandler =
revise(BuiltInMetadata.FunctionalDependency.Handler.class);
+ }
+ }
+ }
}
diff --git a/core/src/main/java/org/apache/calcite/util/Arrow.java
b/core/src/main/java/org/apache/calcite/util/Arrow.java
new file mode 100644
index 0000000000..01584264c9
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/util/Arrow.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+import java.util.Objects;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Represents one functional dependency (Arrow) between two sets of columns,
+ * where each column is identified by its ordinal index.
+ *
+ * <p>{@link Arrow} models the functional dependency such that the values of
the
+ * {@link #determinants} columns uniquely determine the values of the
+ * {@link #dependents} columns. In other words, if two rows have the same
values
+ * for all determinant columns, they must also have the same values for all
+ * dependent columns. Both {@link #determinants} and {@link #dependents} are
+ * ImmutableBitSet column ordinals.
+ *
+ * <p>This structure supports arbitrary cardinality for both determinant and
+ * dependent column sets, allowing the representation relationships:
+ * <ul>
+ * <li>One-to-one: {@code {0} → {1}}
+ * <li>One-to-many: {@code {0} → {1, 2}}
+ * <li>Many-to-one: {@code {0, 1} → {2}}
+ * <li>Many-to-many: {@code {0, 1} → {2, 3}}
+ * </ul>
+ *
+ * <p>Example:
+ *
+ * <blockquote>
+ * <pre>
+ * Table schema: [emp_id, name, dept, salary] // ordinals: 0, 1, 2, 3
+ * Arrow: {0} → {1, 2}
+ * Functional dependency: emp_id → {name, dept}
+ * </pre>
+ * </blockquote>
+ *
+ * <p>This indicates that the employee ID uniquely determines both the name
+ * and department attributes.
+ */
+public class Arrow {
+ // The set of column ordinals that are the determinants in the functional
dependency.
+ private final ImmutableBitSet determinants;
+
+ // The set of column ordinals that are determined by the determinants.
+ private final ImmutableBitSet dependents;
+
+ private Arrow(ImmutableBitSet determinants, ImmutableBitSet dependents) {
+ this.determinants = requireNonNull(determinants, "determinants must not be
null");
+ this.dependents = requireNonNull(dependents, "dependents must not be
null");
+ }
+
+ /**
+ * Create Arrow from determinant set to dependent set.
+ */
+ public static Arrow of(ImmutableBitSet determinants, ImmutableBitSet
dependents) {
+ return new Arrow(determinants, dependents);
+ }
+
+ /**
+ * Create Arrow from single determinant to single dependent.
+ */
+ public static Arrow of(int determinant, int dependent) {
+ return Arrow.of(ImmutableBitSet.of(determinant),
ImmutableBitSet.of(dependent));
+ }
+
+ public ImmutableBitSet getDeterminants() {
+ return determinants;
+ }
+
+ public ImmutableBitSet getDependents() {
+ return dependents;
+ }
+
+ /**
+ * Returns true if this Arrow is trivial (dependents ⊆ determinants).
+ */
+ public boolean isTrivial() {
+ return determinants.contains(dependents);
+ }
+
+ @Override public boolean equals(@Nullable Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof Arrow)) {
+ return false;
+ }
+ Arrow that = (Arrow) o;
+ return determinants.equals(that.determinants)
+ && dependents.equals(that.dependents);
+ }
+
+ @Override public int hashCode() {
+ return Objects.hash(determinants, dependents);
+ }
+
+ @Override public String toString() {
+ return determinants + " -> " + dependents;
+ }
+}
diff --git a/core/src/main/java/org/apache/calcite/util/ArrowSet.java
b/core/src/main/java/org/apache/calcite/util/ArrowSet.java
new file mode 100644
index 0000000000..7bc1b3d290
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/util/ArrowSet.java
@@ -0,0 +1,339 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+
+import java.util.ArrayDeque;
+import java.util.BitSet;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Queue;
+import java.util.Set;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Represents a set of functional dependencies. Each functional dependency is
an {@link Arrow}.
+ *
+ * <p>An {@link ArrowSet} models a set of functional dependencies that may
hold in a relation.
+ * This class provides implementations for several core algorithms in
functional dependency theory,
+ * such as closure computation and candidate key discovery.
+ * For theory background, see:
+ * <a href="https://en.wikipedia.org/wiki/Functional_dependency">
+ * Functional dependency (Wikipedia)</a>
+ *
+ * @see Arrow
+ * @see ImmutableBitSet
+ */
+public class ArrowSet {
+ public static final ArrowSet EMPTY = new ArrowSet(ImmutableSet.of());
+
+ // All arrows in this ArrowSet.
+ private ImmutableList<Arrow> arrowSet;
+
+ // Maps each determinant set to the dependent set it functionally determines
(for fast lookup).
+ private final ImmutableMap<ImmutableBitSet, ImmutableBitSet>
determinantsToDependentsMap;
+
+ // Maps each column ordinal to the determinant sets (keys of
determinantsToDependentsMap).
+ private final ImmutableMap<Integer, ImmutableSet<Arrow>> ordinalToArrows;
+
+ public ArrowSet(Set<Arrow> arrows) {
+ arrowSet = ImmutableList.copyOf(arrows);
+ Map<ImmutableBitSet, ImmutableBitSet> detToDep = new HashMap<>();
+ Map<Integer, Set<Arrow>> ordToArrows = new HashMap<>();
+ for (Arrow arrow : arrows) {
+ ImmutableBitSet determinants = arrow.getDeterminants();
+ ImmutableBitSet dependents = arrow.getDependents();
+ detToDep.merge(determinants, dependents, ImmutableBitSet::union);
+ for (int det : determinants) {
+ ordToArrows.computeIfAbsent(det, k -> new HashSet<>()).add(arrow);
+ }
+ }
+ determinantsToDependentsMap = ImmutableMap.copyOf(detToDep);
+ ImmutableMap.Builder<Integer, ImmutableSet<Arrow>> builder =
ImmutableMap.builder();
+ for (Map.Entry<Integer, Set<Arrow>> entry : ordToArrows.entrySet()) {
+ builder.put(entry.getKey(), ImmutableSet.copyOf(entry.getValue()));
+ }
+ ordinalToArrows = builder.build();
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ /**
+ * Computes the closure of a given set of column ordinals with respect to
this ArrowSet.
+ *
+ * <p>The closure is the maximal set of attributes such that X → X⁺ can be
inferred
+ * through transitive application of
+ * <a href="https://en.wikipedia.org/wiki/Armstrong%27s_axioms">Armstrong's
axioms</a>
+ *
+ * <p>Example:
+ * <blockquote>
+ * <pre>
+ * // Given functional dependencies:
+ * // {0} → {1}
+ * // {1} → {2}
+ * // dependents(ImmutableBitSet.of(0)) = {0, 1, 2}
+ * </pre>
+ * </blockquote>
+ *
+ * <p>Time complexity: O(m + n), m = arrow count, n = ordinal count.
+ * For interactive use, keep n below a few hundred for performance.
+ *
+ * @param ordinals the set of column ordinals whose closure is to be computed
+ * @return an immutable set of column ordinals that can be determined from
the input
+ */
+ public ImmutableBitSet dependents(ImmutableBitSet ordinals) {
+ if (ordinals.isEmpty()) {
+ return ImmutableBitSet.of();
+ }
+
+ BitSet closureSet = new BitSet();
+ Queue<Integer> queue = new ArrayDeque<>();
+ for (int attr : ordinals) {
+ closureSet.set(attr);
+ queue.add(attr);
+ }
+
+ Map<Arrow, Integer> missingCount = new HashMap<>();
+ for (Arrow arrow : arrowSet) {
+ missingCount.put(arrow, arrow.getDeterminants().cardinality());
+ }
+
+ while (!queue.isEmpty()) {
+ int attr =
+ requireNonNull(queue.poll(), "Queue returned null while computing
dependents");
+ Set<Arrow> arrows = ordinalToArrows.get(attr);
+ if (arrows == null) {
+ continue;
+ }
+ for (Arrow arrow : arrows) {
+ int count =
+ requireNonNull(missingCount.get(arrow),
+ "missingCount returned null for Arrow " + arrow);
+ if (count > 0) {
+ count--;
+ missingCount.put(arrow, count);
+ if (count == 0) {
+ for (int dep : arrow.getDependents()) {
+ if (!closureSet.get(dep)) {
+ closureSet.set(dep);
+ queue.add(dep);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return ImmutableBitSet.of(closureSet.stream().toArray());
+ }
+
+ /**
+ * Finds all minimal determinant sets for a given set of column ordinals
based on this ArrowSet.
+ *
+ * <p>Example:
+ * <pre>{@code
+ * // Given functional dependencies:
+ * // {0} → {1}
+ * // {1} → {2}
+ * // {2} → {3}
+ * // The ordinals is {0, 1, 2, 3}:
+ * // determinants(ImmutableBitSet.of(0, 1, 2, 3)) returns [{0}]
+ * }
+ * </pre>
+ *
+ * @param ordinals a set of attribute ordinals for which to find determinant
sets
+ * @return the determinant sets, each represented as an ImmutableBitSet
+ */
+ public Set<ImmutableBitSet> determinants(ImmutableBitSet ordinals) {
+ if (arrowSet.isEmpty()) {
+ return ImmutableSet.of(ordinals);
+ }
+
+ ImmutableBitSet nonDependentOrdinals =
findNonDependentAttributes(ordinals);
+ if (dependents(nonDependentOrdinals).contains(ordinals)) {
+ return ImmutableSet.of(nonDependentOrdinals);
+ }
+
+ Set<ImmutableBitSet> keys = new HashSet<>();
+ int minSize = Integer.MAX_VALUE;
+ PriorityQueue<ImmutableBitSet> queue =
+ new
PriorityQueue<>(Comparator.comparingInt(ImmutableBitSet::cardinality));
+ Set<ImmutableBitSet> visited = new HashSet<>();
+ queue.add(nonDependentOrdinals);
+
+ while (!queue.isEmpty()) {
+ ImmutableBitSet ords = requireNonNull(queue.poll(), "queue.poll()
returned null");
+ if (visited.contains(ords)) {
+ continue;
+ }
+ visited.add(ords);
+ if (ords.cardinality() > minSize) {
+ continue;
+ }
+ boolean covered = false;
+ for (ImmutableBitSet key : keys) {
+ if (ords.contains(key)) {
+ covered = true;
+ break;
+ }
+ }
+ if (covered) {
+ continue;
+ }
+ ImmutableBitSet closure = dependents(ords);
+ if (closure.contains(ordinals)) {
+ keys.add(ords);
+ minSize = ords.cardinality();
+ continue;
+ }
+ // Try adding more attributes from ordinals
+ for (int attr : ordinals) {
+ if (!ords.get(attr)) {
+ ImmutableBitSet next = ords.union(ImmutableBitSet.of(attr));
+ if (!visited.contains(next)) {
+ queue.add(next);
+ }
+ }
+ }
+ }
+ return keys.isEmpty() ? ImmutableSet.of(ordinals) : keys;
+ }
+
+ /**
+ * Find ordinals in the given set that never appear as dependents in any
functional dependency.
+ * These are the "source" ordinals that cannot be derived from others.
+ */
+ private ImmutableBitSet findNonDependentAttributes(ImmutableBitSet ordinals)
{
+ ImmutableBitSet dependentsAttrs =
determinantsToDependentsMap.values().stream()
+ .reduce(ImmutableBitSet.of(), ImmutableBitSet::union);
+ return ordinals.except(dependentsAttrs);
+ }
+
+ /**
+ * Returns a new ArrowSet that is the union of this and another ArrowSet.
+ */
+ public ArrowSet union(ArrowSet other) {
+ Set<Arrow> unionSet = new HashSet<>();
+ unionSet.addAll(this.getArrows());
+ unionSet.addAll(other.getArrows());
+ return new ArrowSet(unionSet);
+ }
+
+ /**
+ * Returns all arrows (functional dependencies) in this ArrowSet.
+ */
+ public ImmutableList<Arrow> getArrows() {
+ return arrowSet;
+ }
+
+ @Override public ArrowSet clone() {
+ return new ArrowSet(new HashSet<>(this.arrowSet));
+ }
+
+ public boolean equalTo(ArrowSet other) {
+ for (Arrow arrow : arrowSet) {
+ if (!other.implies(arrow.getDeterminants(), arrow.getDependents())) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("ArrowSet{");
+ boolean first = true;
+ for (Arrow arrow : getArrows()) {
+ if (!first) {
+ sb.append(", ");
+ }
+ sb.append(arrow);
+ first = false;
+ }
+ sb.append('}');
+ return sb.toString();
+ }
+
+ /**
+ * Returns true if, from this ArrowSet, one can deduce that {@code
determinants}
+ * determine {@code dependents}. That is,
+ * if {@code dependents} ⊆ closure({@code determinants}).
+ */
+ public boolean implies(ImmutableBitSet determinants, ImmutableBitSet
dependents) {
+ ImmutableBitSet dets = determinantsToDependentsMap.get(determinants);
+ if (dets != null && dets.contains(dependents)) {
+ return true;
+ }
+ return dependents(determinants).contains(dependents);
+ }
+
+ /**
+ * Builder for ArrowSet.
+ */
+ public static class Builder {
+ private final Set<Arrow> arrowSet = new HashSet<>();
+
+ /**
+ * Add an Arrow from determinant set to dependent set.
+ */
+ public Builder addArrow(ImmutableBitSet lhs, ImmutableBitSet rhs) {
+ arrowSet.add(Arrow.of(lhs, rhs));
+ return this;
+ }
+
+ /**
+ * Add an Arrow from a single determinant to a single dependent.
+ */
+ public Builder addArrow(int lhs, int rhs) {
+ arrowSet.add(Arrow.of(lhs, rhs));
+ return this;
+ }
+
+ public Builder addBidirectionalArrow(int lhs, int rhs) {
+ addArrow(lhs, rhs);
+ addArrow(rhs, lhs);
+ return this;
+ }
+
+ public Builder addBidirectionalArrow(ImmutableBitSet lhs, ImmutableBitSet
rhs) {
+ addArrow(lhs, rhs);
+ addArrow(rhs, lhs);
+ return this;
+ }
+
+ public Builder addArrowSet(ArrowSet set) {
+ for (Arrow arrow : set.getArrows()) {
+ addArrow(arrow.getDeterminants(), arrow.getDependents());
+ }
+ return this;
+ }
+
+ /**
+ * Build the ArrowSet instance and compute the functional dependency graph.
+ */
+ public ArrowSet build() {
+ return new ArrowSet(arrowSet);
+ }
+ }
+}
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index 3477322715..5fec119d65 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -971,7 +971,14 @@ public enum BuiltInMethod {
VARIANT_ITEM(SqlFunctions.class, "item", VariantValue.class, Object.class),
VARIANTNULL(VariantNull.class, "getInstance"),
FUNCTIONAL_DEPENDENCY(FunctionalDependency.class, "determines",
- int.class, int.class);
+ int.class, int.class),
+ FUNCTIONAL_DEPENDENCY_SET(FunctionalDependency.class, "determinesSet",
+ ImmutableBitSet.class, ImmutableBitSet.class),
+ FUNCTIONAL_DEPENDENCY_DEPENDENTS(FunctionalDependency.class, "dependents",
+ ImmutableBitSet.class),
+ FUNCTIONAL_DEPENDENCY_DETERMINANTS(FunctionalDependency.class,
"determinants",
+ ImmutableBitSet.class),
+ FUNCTIONAL_DEPENDENCY_GET_FDS(FunctionalDependency.class, "getFDs");
@SuppressWarnings("ImmutableEnumChecker")
public final Method method;
diff --git a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
index c1afc5f92d..bbf8676f0f 100644
--- a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
@@ -295,133 +295,550 @@ final RelMetadataFixture sql(String sql) {
/ (DEPT_SIZE + EMP_SIZE)));
}
- @Test void textFunctionDependencySimple() {
- final String sql = "select empno, deptno, deptno + 1, rand() as r"
+ // ----------------------------------------------------------------------
+ // Tests for functional dependency metadata in RelMdFunctionalDependency
+ // ----------------------------------------------------------------------
+
+ @Test void testFunctionalDependencyProject() {
+ final String sql = "select empno, deptno, deptno + 1 as deptNoPlus1,
rand() as rand"
+ " from emp where deptno < 20";
- // Plan is
- // LogicalProject(EMPNO=[$0], DEPTNO=[$7], EXPR$2=[+($7, 1)], R=[RAND()])
- // LogicalFilter(condition=[<($7, 20)])
- // LogicalTableScan(table=[[CATALOG, SALES, EMP]])
final RelNode relNode = sql(sql).toRel();
+
+ // Plan is:
+ assertThat(
+ RelOptUtil.toString(relNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalProject(EMPNO=[$0], DEPTNO=[$7], DEPTNOPLUS1=[+($7, 1)],
RAND=[RAND()])\n"
+ + " LogicalFilter(condition=[<($7, 20)])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"));
+
final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
- Project project = (Project) relNode;
- int empNo = 0;
- int deptNo = 1;
- int deptNoPlus1 = 2;
- int r = 3;
-
- assertThat(mq.determines(project, deptNoPlus1, deptNo), is(Boolean.TRUE));
- assertThat(mq.determines(project, deptNo, deptNoPlus1), is(Boolean.FALSE));
- assertThat(mq.determines(project, deptNo, empNo), is(Boolean.TRUE));
- assertThat(mq.determines(project, empNo, deptNoPlus1), is(Boolean.FALSE));
- assertThat(mq.determines(project, deptNoPlus1, empNo), is(Boolean.TRUE));
- assertThat(mq.determines(project, r, empNo), is(Boolean.FALSE));
- }
-
- @Test void textFunctionDependencyJoin() {
- final String sql = "SELECT e.job, e.deptno, e.deptno + 1, d.d1, d.d2\n"
- + "FROM emp e JOIN (SELECT deptno AS d1, deptno AS d2 FROM dept) d\n"
- + "ON e.deptno = d.d1";
-
- // Plan is
- // LogicalProject(JOB=[$2], DEPTNO=[$7], EXPR$2=[+($7, 1)], D1=[$9],
D2=[$10])
- // LogicalJoin(condition=[=($7, $9)], joinType=[inner])
- // LogicalTableScan(table=[[CATALOG, SALES, EMP]])
- // LogicalProject(D1=[$0], D2=[$0])
- // LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+
+ int empNo = 0; // empno - primary key
+ int deptNo = 1; // deptno
+ int deptNoPlus1 = 2; // deptno + 1
+ int rand = 3; // rand()
+
+ // deptno should determine deptno + 1 (expression is deterministic)
+ assertThat(mq.determines(relNode, deptNo, deptNoPlus1), is(Boolean.TRUE));
+
+ // deptno should NOT determine empno (deptno is not unique)
+ assertThat(mq.determines(relNode, deptNo, empNo), is(Boolean.FALSE));
+
+ // empno should determine deptno + 1 (primary key determines everything)
+ assertThat(mq.determines(relNode, empNo, deptNoPlus1), is(Boolean.TRUE));
+
+ // deptno + 1 should NOT determine empno (deptno + 1 is not unique)
+ assertThat(mq.determines(relNode, deptNoPlus1, empNo), is(Boolean.FALSE));
+
+ // rand() should not determine anything (non-deterministic)
+ assertThat(mq.determines(relNode, rand, empNo), is(Boolean.FALSE));
+
+ // Nothing should determine rand() (non-deterministic)
+ assertThat(mq.determines(relNode, empNo, rand), is(Boolean.FALSE));
+ }
+
+ @Test void testFunctionalDependencyAggregate() {
+ final String sql = "select deptno, count(*) as \"count\", sum(sal) as
sumSal"
+ + " from emp group by deptno";
+
final RelNode relNode = sql(sql).toRel();
+
+ assertThat(
+ RelOptUtil.toString(relNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalAggregate(group=[{0}], count=[COUNT()],
SUMSAL=[SUM($1)])\n"
+ + " LogicalProject(DEPTNO=[$7], SAL=[$5])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"));
+
final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
- Project project = (Project) relNode;
- int eJob = 0;
- int eDeptNo = 1;
- int eDeptNoPlus1 = 2;
- int dD1 = 3;
- int dD2 = 4;
- assertThat(mq.determines(project, eJob, eDeptNo), is(Boolean.FALSE));
- assertThat(mq.determines(project, eDeptNoPlus1, eDeptNo),
is(Boolean.TRUE));
- assertThat(mq.determines(project, eDeptNoPlus1, dD1), is(Boolean.FALSE));
- assertThat(mq.determines(project, dD2, dD1), is(Boolean.TRUE));
+ int deptNo = 0; // deptno (group by column)
+ int count = 1; // count(*)
+ int sumSal = 2; // sum(sal)
+
+ // Group by column should determine aggregate columns
+ assertThat(mq.determines(relNode, deptNo, count), is(Boolean.TRUE));
+ assertThat(mq.determines(relNode, deptNo, sumSal), is(Boolean.TRUE));
+
+ // Aggregate columns should not determine group by column
+ assertThat(mq.determines(relNode, count, deptNo), is(Boolean.FALSE));
+ assertThat(mq.determines(relNode, sumSal, deptNo), is(Boolean.FALSE));
+
+ // Aggregate columns should not determine each other
+ assertThat(mq.determines(relNode, count, sumSal), is(Boolean.FALSE));
+ assertThat(mq.determines(relNode, sumSal, count), is(Boolean.FALSE));
}
- @Test void textFunctionDependencyDoulbePK() {
- // Table double_pk with pk (id1, id2)
- final String sql = "select id1, id2, sum(age) as z"
- + " from double_pk group by id1, id2";
+ @Test void testFunctionalDependencyWithIdenticalExpressions() {
+ final String sql = "select deptno, deptno as deptno2, deptno + 1 as
deptno3 from emp";
- // Plan is
- // LogicalAggregate(group=[{0, 1}], Z=[SUM($2)])
- // LogicalProject(ID1=[$0], ID2=[$1], AGE=[$3])
- // LogicalTableScan(table=[[CATALOG, SALES, DOUBLE_PK]])
final RelNode relNode = sql(sql).toRel();
- System.out.println(RelOptUtil.toString(relNode));
+
+ assertThat(
+ RelOptUtil.toString(relNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalProject(DEPTNO=[$7], DEPTNO2=[$7], DEPTNO3=[+($7, 1)])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"));
+
final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
- Aggregate aggregate = (Aggregate) relNode;
- int id1 = 0;
- int id2 = 1;
- int z = 2;
- assertThat(mq.determines(aggregate, id2, id1), is(Boolean.FALSE));
- assertThat(mq.determines(aggregate, id1, id2), is(Boolean.FALSE));
- assertThat(mq.determines(aggregate, z, id1), is(Boolean.FALSE));
- assertThat(mq.determines(aggregate, z, id2), is(Boolean.FALSE));
+ int deptNo = 0; // deptno
+ int deptNo2 = 1; // deptno (identical)
+ int deptNo3 = 2; // deptno + 1
+
+ // Identical expressions should determine each other
+ assertThat(mq.determines(relNode, deptNo, deptNo2), is(Boolean.TRUE));
+ assertThat(mq.determines(relNode, deptNo2, deptNo), is(Boolean.TRUE));
+
+ // deptno should determine deptno + 1 (deterministic expression)
+ assertThat(mq.determines(relNode, deptNo, deptNo3), is(Boolean.TRUE));
+ assertThat(mq.determines(relNode, deptNo2, deptNo3), is(Boolean.TRUE));
}
- @Test void testFunctionDependencyComplex() {
- final String sql = "SELECT deptno, sal1Sum, sal2Sum\n"
- + "FROM (\n"
- + " SELECT deptno,"
- + " SUM(sal1) AS sal1Sum,"
- + " SUM(sal2) AS sal2Sum,"
- + " job\n"
- + " FROM (\n"
- + " SELECT deptno,"
- + " sal AS sal1,"
- + " sal AS sal2,"
- + " job\n"
- + " FROM emp\n"
- + " ) t\n"
- + " GROUP BY deptno, job\n"
- + ") t2\n"
- + "ORDER BY sal1Sum, job, sal2Sum + sal1Sum + 1";
+ @Test void testFunctionalDependencyJoin() {
+ final String sql = "select e.empno, e.ename, e.deptno, d.name"
+ + " from emp e join dept d on e.deptno = d.deptno";
+
+ final RelNode relNode = sql(sql).toRel();
+
+ assertThat(
+ RelOptUtil.toString(relNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalProject(EMPNO=[$0], ENAME=[$1], DEPTNO=[$7],
NAME=[$10])\n"
+ + " LogicalJoin(condition=[=($7, $9)], joinType=[inner])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, DEPT]])\n"));
+
+ final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
+
+ int empNo = 0; // e.empno
+ int ename = 1; // e.ename
+ int deptno = 2; // e.deptno
+ int dname = 3; // d.name
+
+ // Left side functional dependencies should be preserved
+ // empno determines ename and deptno (primary key from emp table)
+ assertThat(mq.determines(relNode, empNo, ename), is(Boolean.TRUE));
+ assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE));
- // Plan is
- // LogicalSort(sort0=[$1], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC],
dir2=[ASC])
- // LogicalProject(DEPTNO=[$0], SAL1SUM=[$2], SAL2SUM=[$3], JOB=[$1],
- // EXPR$4=[+(+($3, $2), 1)])
- // LogicalAggregate(group=[{0, 1}], SAL1SUM=[SUM($2)],
SAL2SUM=[SUM($3)])
- // LogicalProject(DEPTNO=[$7], JOB=[$2], SAL1=[$5], SAL2=[$5])
- // LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ // Right side functional dependencies should be preserved with shifted
indices
+ // deptno determines dname (primary key from dept table)
+ assertThat(mq.determines(relNode, deptno, dname), is(Boolean.TRUE));
+
+ // Cross-table dependencies through join condition
+ // empno should determine dname (through deptno)
+ assertThat(mq.determines(relNode, empNo, dname), is(Boolean.TRUE));
+
+ // Reverse dependencies should not hold
+ assertThat(mq.determines(relNode, ename, empNo), is(Boolean.FALSE));
+ assertThat(mq.determines(relNode, dname, empNo), is(Boolean.FALSE));
+ }
+
+ @Test void testFunctionalDependencyFilter() {
+ final String sql = "select empno, ename, sal from emp where sal > 1000";
final RelNode relNode = sql(sql).toRel();
+
+ assertThat(
+ RelOptUtil.toString(relNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalProject(EMPNO=[$0], ENAME=[$1], SAL=[$5])\n"
+ + " LogicalFilter(condition=[>($5, 1000)])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"));
+
final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
- // check sort
- final Sort sort = (Sort) relNode;
- List<RelFieldCollation> collations =
sort.getCollation().getFieldCollations();
- int sal1Sum = collations.get(0).getFieldIndex();
- int job = collations.get(1).getFieldIndex();
- int sal2SumPlusSal1SumPlus1 = collations.get(2).getFieldIndex();
+ int empNo = 0; // empno
+ int ename = 1; // ename
+ int sal = 2; // sal
- assertThat(mq.determines(sort, sal1Sum, sal1Sum), is(Boolean.TRUE));
- assertThat(mq.determines(sort, job, sal1Sum), is(Boolean.FALSE));
- assertThat(mq.determines(sort, sal2SumPlusSal1SumPlus1, sal1Sum),
is(Boolean.TRUE));
- assertThat(mq.determines(sort, sal1Sum, sal2SumPlusSal1SumPlus1),
is(Boolean.FALSE));
- assertThat(mq.determines(sort, sal2SumPlusSal1SumPlus1,
sal2SumPlusSal1SumPlus1),
- is(Boolean.TRUE));
- assertThat(mq.determines(sort, sal2SumPlusSal1SumPlus1, job),
is(Boolean.FALSE));
+ // Filter should preserve functional dependencies from base table
+ // empno should still determine ename and sal
+ assertThat(mq.determines(relNode, empNo, ename), is(Boolean.TRUE));
+ assertThat(mq.determines(relNode, empNo, sal), is(Boolean.TRUE));
- // check aggregate
- Aggregate aggregate = (Aggregate) relNode.getInput(0).getInput(0);
- int deptNoGroupByKey = 0;
- int jobGroupByKey = 1;
- int sal1SumAggCall = 2;
- int sal2SumAggCall = 3;
+ // sal should not determine empno
+ assertThat(mq.determines(relNode, sal, empNo), is(Boolean.FALSE));
+ }
- assertThat(mq.determines(aggregate, jobGroupByKey, deptNoGroupByKey),
is(Boolean.FALSE));
- assertThat(mq.determines(aggregate, jobGroupByKey, sal1SumAggCall),
is(Boolean.FALSE));
- assertThat(mq.determines(aggregate, sal2SumAggCall, sal1SumAggCall),
is(Boolean.TRUE));
+ @Test void testFunctionalDependencyComplexExpressions() {
+ final String sql = "select empno, sal, sal * 1.1 as raised_sal, "
+ + "case when sal > 2000 then 'high' else 'low' end as sal_category"
+ + " from emp";
+
+ final RelNode relNode = sql(sql).toRel();
+
+ assertThat(
+ RelOptUtil.toString(relNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalProject(EMPNO=[$0], SAL=[$5], RAISED_SAL=[*($5,
1.1:DECIMAL(2, 1))],"
+ + " SAL_CATEGORY=[CASE(>($5, 2000), 'high', 'low ')])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"));
+
+ final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
+
+ int empNo = 0; // empno
+ int sal = 1; // sal
+ int raisedSal = 2; // sal * 1.1
+ int salCategory = 3; // case expression
+
+ // empno determines all other columns (primary key)
+ assertThat(mq.determines(relNode, empNo, sal), is(Boolean.TRUE));
+ assertThat(mq.determines(relNode, empNo, raisedSal), is(Boolean.TRUE));
+ assertThat(mq.determines(relNode, empNo, salCategory), is(Boolean.TRUE));
+
+ // sal determines computed columns based on sal
+ assertThat(mq.determines(relNode, sal, raisedSal), is(Boolean.TRUE));
+ assertThat(mq.determines(relNode, sal, salCategory), is(Boolean.TRUE));
+ }
+
+ @Test void testFunctionalDependencyUnion() {
+ final String sql = "select empno, deptno from emp where deptno = 10"
+ + " union all "
+ + " select empno, deptno from emp where deptno = 20";
+
+ final RelNode relNode = sql(sql).toRel();
+
+ assertThat(
+ RelOptUtil.toString(relNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalUnion(all=[true])\n"
+ + " LogicalProject(EMPNO=[$0], DEPTNO=[$7])\n"
+ + " LogicalFilter(condition=[=($7, 10)])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"
+ + " LogicalProject(EMPNO=[$0], DEPTNO=[$7])\n"
+ + " LogicalFilter(condition=[=($7, 20)])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"));
+
+ final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
+
+ int empNo = 0; // empno
+ int deptno = 1; // deptno
+
+ // Union handling is not yet fully implemented
+ // For now, expect this might not work
+ // assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE));
+
+ // deptno should not determine empno
+ assertThat(mq.determines(relNode, deptno, empNo), is(Boolean.FALSE));
+ }
+
+ @Test void testFunctionalDependencyMultipleKeys() {
+ final String sql = "select empno, deptno, empno * 100 + deptno as
composite from emp";
+
+ final RelNode relNode = sql(sql).toRel();
+
+ assertThat(
+ RelOptUtil.toString(relNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalProject(EMPNO=[$0], DEPTNO=[$7], COMPOSITE=[+(*($0,
100), $7)])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"));
+
+ final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
+
+ int empNo = 0; // empno
+ int deptno = 1; // deptno
+ int composite = 2; // empno * 100 + deptno
+
+ // empno determines all
+ assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE));
+ assertThat(mq.determines(relNode, empNo, composite), is(Boolean.TRUE));
+
+ // Multiple keys handling is not yet fully implemented
+ }
+
+ @Test void testFunctionalDependencyConstants() {
+ final String sql = "select empno, 100 as constant_col, deptno"
+ + " from emp";
+
+ final RelNode relNode = sql(sql).toRel();
+
+ assertThat(
+ RelOptUtil.toString(relNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalProject(EMPNO=[$0], CONSTANT_COL=[100], DEPTNO=[$7])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"));
+
+ final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
+
+ int empNo = 0; // empno
+ int constant = 1; // 100 (constant)
+ int deptno = 2; // deptno
+
+ // Constants should be functionally dependent on everything
+ assertThat(mq.determines(relNode, empNo, constant), is(Boolean.TRUE));
+
+ // Original dependencies should be preserved
+ assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE));
+
+ // Everything determines constants
+ assertThat(mq.determines(relNode, deptno, constant), is(Boolean.TRUE));
+ }
+
+ @Test void testFunctionalDependencyNonDeterministicFunctions() {
+ final String sql = "select empno, rand() as random1, rand() as random2
from emp";
+
+ final RelNode relNode = sql(sql).toRel();
+
+ assertThat(
+ RelOptUtil.toString(relNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalProject(EMPNO=[$0], RANDOM1=[RAND()],
RANDOM2=[RAND()])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"));
+
+ final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
+
+ int empNo = 0; // empno
+ int random1 = 1; // random1
+ int random2 = 2; // random2
+
+ // Basic functional dependencies should still work
+ assertThat(mq.determines(relNode, empNo, empNo), is(Boolean.TRUE));
+
+ // Non-deterministic functions should not determine anything
+ assertThat(mq.determines(relNode, random1, empNo), is(Boolean.FALSE));
+ assertThat(mq.determines(relNode, random1, random2), is(Boolean.FALSE));
+ assertThat(mq.determines(relNode, random2, empNo), is(Boolean.FALSE));
+ }
+
+ @Test void testFunctionalDependencyInnerJoin() {
+ final String sql = "SELECT e.empno, e.deptno, d.name\n"
+ + "FROM emp e\n"
+ + "JOIN dept d ON e.deptno = d.deptno";
+
+ final RelNode relNode = sql(sql).toRel();
+
+ assertThat(
+ RelOptUtil.toString(relNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalProject(EMPNO=[$0], DEPTNO=[$7], NAME=[$10])\n"
+ + " LogicalJoin(condition=[=($7, $9)], joinType=[inner])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, DEPT]])\n"));
+
+ final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
+
+ int empNo = 0; // e.empno
+ int deptno = 1; // e.deptno
+ int dname = 2; // d.name
+
+ assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE));
+ assertThat(mq.determines(relNode, empNo, dname), is(Boolean.TRUE));
+ }
+
+ @Test void testFunctionalDependencyLeftJoin() {
+ final String sql = "SELECT e.empno, e.deptno, d.name\n"
+ + "FROM emp e\n"
+ + "LEFT JOIN dept d ON e.deptno = d.deptno";
+
+ final RelNode relNode = sql(sql).toRel();
+
+ assertThat(
+ RelOptUtil.toString(relNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalProject(EMPNO=[$0], DEPTNO=[$7], NAME=[$10])\n"
+ + " LogicalJoin(condition=[=($7, $9)], joinType=[left])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, DEPT]])\n"));
+
+ final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
+
+ int empNo = 0; // e.empno
+ int deptno = 1; // e.deptno
+ int dname = 2; // d.name
+
+ assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE));
+ assertThat(mq.determines(relNode, empNo, dname), is(Boolean.TRUE));
+ }
+
+ @Test void testFunctionalDependencyRightJoin() {
+ final String sql = "SELECT e.empno, e.deptno, d.name\n"
+ + "FROM dept d\n"
+ + "RIGHT JOIN emp e ON e.deptno = d.deptno";
+
+ final RelNode relNode = sql(sql).toRel();
+
+ assertThat(
+ RelOptUtil.toString(relNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalProject(EMPNO=[$2], DEPTNO=[$9], NAME=[$1])\n"
+ + " LogicalJoin(condition=[=($9, $0)], joinType=[right])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, DEPT]])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"));
+
+ final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
+
+ int empNo = 0; // e.empno
+ int deptno = 1; // e.deptno
+ int dname = 2; // d.name
+
+ assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE));
+ assertThat(mq.determines(relNode, empNo, dname), is(Boolean.TRUE));
+ }
+
+ @Test void testFunctionalDependencyFullOuterJoin() {
+ final String sql = "SELECT e.empno, e.deptno, d.name\n"
+ + "FROM emp e\n"
+ + "FULL JOIN dept d ON e.deptno = d.deptno";
+
+ final RelNode relNode = sql(sql).toRel();
+
+ assertThat(
+ RelOptUtil.toString(relNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalProject(EMPNO=[$0], DEPTNO=[$7], NAME=[$10])\n"
+ + " LogicalJoin(condition=[=($7, $9)], joinType=[full])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, DEPT]])\n"));
+
+ final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
+
+ int empNo = 0; // e.empno
+ int deptno = 1; // e.deptno
+ int dname = 2; // d.name
+
+ // Left side dependencies should not be preserved
+ assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.FALSE));
+
+ // Cross-join dependencies might not hold due to nulls from outer join
+ assertThat(mq.determines(relNode, empNo, dname), is(Boolean.FALSE));
+ }
+
+ @Test void testFunctionalDependencySort() {
+ final String sql = "select empno, ename, deptno, sal"
+ + " from emp order by empno, deptno";
+
+ final RelNode relNode = sql(sql).toRel();
+
+ assertThat(
+ RelOptUtil.toString(relNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalSort(sort0=[$0], sort1=[$2], dir0=[ASC], dir1=[ASC])\n"
+ + " LogicalProject(EMPNO=[$0], ENAME=[$1], DEPTNO=[$7],
SAL=[$5])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"));
+
+ final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
+
+ int empNo = 0; // empno
+ int ename = 1; // ename
+ int deptno = 2; // deptno
+ int sal = 3; // sal
+
+ // empno is primary key, so it should determine all other columns
+ assertThat(mq.determines(relNode, empNo, ename), is(Boolean.TRUE));
+ assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE));
+ assertThat(mq.determines(relNode, empNo, sal), is(Boolean.TRUE));
+
+ // Non-key columns should not determine key column
+ assertThat(mq.determines(relNode, ename, empNo), is(Boolean.FALSE));
+ assertThat(mq.determines(relNode, deptno, empNo), is(Boolean.FALSE));
+ assertThat(mq.determines(relNode, sal, empNo), is(Boolean.FALSE));
+
+ // Test candidate keys functionality for sort keys
+ ImmutableBitSet sortKeys = ImmutableBitSet.of(empNo, deptno);
+ Set<ImmutableBitSet> candidateKeys = mq.determinants(relNode, sortKeys);
+
+ // Should find that empno alone is a candidate key within the sort keys
+ assertThat(candidateKeys.isEmpty(), is(Boolean.FALSE));
+ assertThat(candidateKeys.contains(ImmutableBitSet.of(empNo)),
is(Boolean.TRUE));
+ }
+
+ @Test void testFunctionalDependencySortDuplicateKeys() {
+ final String sql = "select * from (select deptno as d1, deptno as d2 from
emp) as t1\n"
+ + " join emp t2 on t1.d1 = t2.deptno order by t1.d1, t1.d2, t1.d1 DESC
NULLS FIRST";
+
+ final RelNode relNode = sql(sql).toRel();
+
+ assertThat(
+ RelOptUtil.toString(relNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])\n"
+ + " LogicalProject(D1=[$0], D2=[$1], EMPNO=[$2], ENAME=[$3],
JOB=[$4], MGR=[$5],"
+ + " HIREDATE=[$6], SAL=[$7], COMM=[$8], DEPTNO=[$9],
SLACKER=[$10])\n"
+ + " LogicalJoin(condition=[=($0, $9)], joinType=[inner])\n"
+ + " LogicalProject(D1=[$7], D2=[$7])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"));
+
+ final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
+
+ Sort sort = (Sort) relNode;
+ ImmutableIntList keys = sort.getCollation().getKeys();
+
+ int d1 = keys.get(0); // t1.d1
+ int d2 = keys.get(1); // t1.d2
+
+ // Identical expressions should determine each other
+ assertThat(mq.determines(relNode, d1, d2), is(Boolean.TRUE));
+ assertThat(mq.determines(relNode, d2, d1), is(Boolean.TRUE));
+
+ // Test candidate keys for the sort keys (d1, d2)
+ ImmutableBitSet sortKeys = ImmutableBitSet.of(d1, d2);
+ Set<ImmutableBitSet> candidateKeys = mq.determinants(relNode, sortKeys);
+
+ // Either {d1} or {d2} should be a candidate key since they are identical
+ assertThat(candidateKeys.contains(ImmutableBitSet.of(d1)),
is(Boolean.TRUE));
+ assertThat(candidateKeys.contains(ImmutableBitSet.of(d2)),
is(Boolean.TRUE));
+ }
+
+ @Test void testFunctionalDependencyFilterEqualityCondition() {
+ final String sql = "SELECT e1.empno, e1.sal, e2.mgr, e2.deptno\n"
+ + "FROM emp e1\n"
+ + "JOIN emp e2\n"
+ + "ON e2.mgr IS NOT DISTINCT FROM e2.deptno\n"
+ + "WHERE e1.empno = e1.sal";
+
+ final RelNode relNode = sql(sql).toRel();
+
+ assertThat(
+ RelOptUtil.toString(relNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalProject(EMPNO=[$0], SAL=[$5], MGR=[$12], DEPTNO=[$16])\n"
+ + " LogicalFilter(condition=[=($0, $5)])\n"
+ + " LogicalJoin(condition=[IS NOT DISTINCT FROM($12, $16)],
joinType=[inner])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"));
+
+ final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
+
+ int empNo = 0;
+ int sal = 1;
+ int mgr = 2;
+ int deptno = 3;
+
+ // empno = sal should infer empno <-> sal
+ assertThat(mq.determines(relNode, empNo, sal), is(Boolean.TRUE));
+ assertThat(mq.determines(relNode, sal, empNo), is(Boolean.TRUE));
+ // mgr IS NOT DISTINCT FROM deptno should infer mgr <-> deptno
+ assertThat(mq.determines(relNode, mgr, deptno), is(Boolean.TRUE));
+ assertThat(mq.determines(relNode, deptno, mgr), is(Boolean.TRUE));
+ }
+
+ @Test void testFunctionalDependencyDoublePK() {
+ // double_pk keys is {0}, {0, 1}
+ final String sql = "select id1, id2, sum(age) as z"
+ + " from double_pk group by id1, id2";
+
+ final RelNode relNode = sql(sql).toRel();
+ assertThat(
+ RelOptUtil.toString(relNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalAggregate(group=[{0, 1}], Z=[SUM($2)])\n"
+ + " LogicalProject(ID1=[$0], ID2=[$1], AGE=[$3])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, DOUBLE_PK]])\n"));
+
+ final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
+ Aggregate aggregate = (Aggregate) relNode;
+ int id1 = 0; // id1 (primary key part)
+ int id2 = 1; // id2 (primary key part)
+ int z = 2; // sum(age)
+
+ assertThat(mq.determines(aggregate, id2, id1), is(Boolean.FALSE));
+ assertThat(mq.determines(aggregate, id1, id2), is(Boolean.TRUE));
+ assertThat(mq.determines(aggregate, z, id1), is(Boolean.FALSE));
+ assertThat(mq.determines(aggregate, z, id2), is(Boolean.FALSE));
}
@Test void testFunctionDependencyCalc() {
@@ -442,25 +859,29 @@ final RelMetadataFixture sql(String sql) {
+ ") t2\n"
+ "ORDER BY sal1Sum, job, sal2Sum + sal1Sum + 1";
- // Plan is
- // LogicalSort(sort0=[$1], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC],
dir2=[ASC])
- // LogicalCalc(expr#0..3=[{inputs}], expr#4=[+($t3, $t2)], expr#5=[1],
expr#6=[+($t4, $t5)],
- // DEPTNO=[$t0], SAL1SUM=[$t2], SAL2SUM=[$t3], JOB=[$t1],
EXPR$4=[$t6])
- // LogicalAggregate(group=[{0, 1}], SAL1SUM=[SUM($2)],
SAL2SUM=[SUM($3)])
- // LogicalCalc(expr#0..8=[{inputs}], DEPTNO=[$t7], JOB=[$t2],
SAL1=[$t5], SAL2=[$t5])
- // LogicalTableScan(table=[[CATALOG, SALES, EMP]])
-
- RelNode relNode = sql(sql).toRel();
- final HepProgram program = new HepProgramBuilder().
- addRuleInstance(CoreRules.PROJECT_TO_CALC).build();
+ final RelNode relNode = sql(sql).toRel();
+ final HepProgram program = new HepProgramBuilder()
+ .addRuleInstance(CoreRules.PROJECT_TO_CALC).build();
final HepPlanner planner = new HepPlanner(program);
planner.setRoot(relNode);
- relNode = planner.findBestExp();
- System.out.println(RelOptUtil.toString(relNode));
- final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery();
-
- // check sort
- final Sort sort = (Sort) relNode;
+ final RelNode plannedNode = planner.findBestExp();
+ assertThat(
+ RelOptUtil.toString(plannedNode).replace("\r\n", "\n"),
+ is(""
+ + "LogicalSort(sort0=[$1], sort1=[$3], sort2=[$4], dir0=[ASC],
dir1=[ASC],"
+ + " dir2=[ASC])\n"
+ + " LogicalCalc(expr#0..3=[{inputs}], expr#4=[+($t3, $t2)],
expr#5=[1],"
+ + " expr#6=[+($t4, $t5)], DEPTNO=[$t0], SAL1SUM=[$t2],
SAL2SUM=[$t3],"
+ + " JOB=[$t1], EXPR$4=[$t6])\n"
+ + " LogicalAggregate(group=[{0, 1}], SAL1SUM=[SUM($2)],
SAL2SUM=[SUM($3)])\n"
+ + " LogicalCalc(expr#0..8=[{inputs}], DEPTNO=[$t7],
JOB=[$t2], SAL1=[$t5],"
+ + " SAL2=[$t5])\n"
+ + " LogicalTableScan(table=[[CATALOG, SALES, EMP]])\n"));
+
+ final RelMetadataQuery mq = plannedNode.getCluster().getMetadataQuery();
+
+ // Check Sort
+ final Sort sort = (Sort) plannedNode;
List<RelFieldCollation> collations =
sort.getCollation().getFieldCollations();
int sal1Sum = collations.get(0).getFieldIndex();
int job = collations.get(1).getFieldIndex();
@@ -468,21 +889,26 @@ final RelMetadataFixture sql(String sql) {
assertThat(mq.determines(sort, sal1Sum, sal1Sum), is(Boolean.TRUE));
assertThat(mq.determines(sort, job, sal1Sum), is(Boolean.FALSE));
- assertThat(mq.determines(sort, sal2SumPlusSal1SumPlus1, sal1Sum),
is(Boolean.TRUE));
+ assertThat(mq.determines(sort, sal2SumPlusSal1SumPlus1, sal1Sum),
is(Boolean.FALSE));
assertThat(mq.determines(sort, sal1Sum, sal2SumPlusSal1SumPlus1),
is(Boolean.FALSE));
assertThat(mq.determines(sort, sal2SumPlusSal1SumPlus1,
sal2SumPlusSal1SumPlus1),
is(Boolean.TRUE));
assertThat(mq.determines(sort, sal2SumPlusSal1SumPlus1, job),
is(Boolean.FALSE));
- // check aggregate
- Aggregate aggregate = (Aggregate) relNode.getInput(0).getInput(0);
+
+ // Check Aggregate
+ Aggregate aggregate = (Aggregate) sort.getInput().getInput(0);
int deptNoGroupByKey = 0;
int jobGroupByKey = 1;
int sal1SumAggCall = 2;
int sal2SumAggCall = 3;
assertThat(mq.determines(aggregate, jobGroupByKey, deptNoGroupByKey),
is(Boolean.FALSE));
- assertThat(mq.determines(aggregate, jobGroupByKey, sal1SumAggCall),
is(Boolean.FALSE));
- assertThat(mq.determines(aggregate, sal2SumAggCall, sal1SumAggCall),
is(Boolean.TRUE));
+ assertThat(
+ mq.determinesSet(aggregate, ImmutableBitSet.of(deptNoGroupByKey,
jobGroupByKey),
+ ImmutableBitSet.of(sal1SumAggCall)), is(Boolean.TRUE));
+ assertThat(
+ mq.determinesSet(aggregate, ImmutableBitSet.of(deptNoGroupByKey,
jobGroupByKey),
+ ImmutableBitSet.of(sal2SumAggCall)), is(Boolean.TRUE));
}
// ----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/util/ArrowSetTest.java
b/core/src/test/java/org/apache/calcite/util/ArrowSetTest.java
new file mode 100644
index 0000000000..42c33d7f58
--- /dev/null
+++ b/core/src/test/java/org/apache/calcite/util/ArrowSetTest.java
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import com.google.common.collect.ImmutableSet;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.Set;
+import java.util.stream.IntStream;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.hasSize;
+
+/**
+ * Test fot ArrowSet.
+ */
+public class ArrowSetTest {
+ @Test void testFDEqualTo() {
+ ArrowSet fds1 = new ArrowSet.Builder()
+ .addArrow(0, 1)
+ .addArrow(1, 2)
+ .build();
+
+ ArrowSet fds2 = new ArrowSet.Builder()
+ .addArrow(0, 1)
+ .addArrow(1, 2)
+ .addArrow(0, 2)
+ .build();
+
+ ArrowSet fds3 = new ArrowSet.Builder()
+ .addArrow(1, 0)
+ .addArrow(2, 1)
+ .build();
+
+ assertThat(fds1.equalTo(fds2), is(true));
+ assertThat(fds2.equalTo(fds1), is(true));
+ assertThat(fds1.equalTo(fds3), is(false));
+ assertThat(fds3.equalTo(fds1), is(false));
+ }
+
+ @Test void testFDUnion() {
+ ArrowSet fds1 = new ArrowSet.Builder()
+ .addArrow(0, 1)
+ .addArrow(1, 2)
+ .build();
+
+ ArrowSet fds2 = new ArrowSet.Builder()
+ .addArrow(2, 3)
+ .addArrow(3, 4)
+ .build();
+
+ ArrowSet union = fds1.union(fds2);
+
+ assertThat(union.implies(ImmutableBitSet.of(0), ImmutableBitSet.of(1)),
is(true));
+ assertThat(union.implies(ImmutableBitSet.of(1), ImmutableBitSet.of(2)),
is(true));
+ assertThat(union.implies(ImmutableBitSet.of(2), ImmutableBitSet.of(3)),
is(true));
+ assertThat(union.implies(ImmutableBitSet.of(3), ImmutableBitSet.of(4)),
is(true));
+ assertThat(union.implies(ImmutableBitSet.of(0), ImmutableBitSet.of(3)),
is(true));
+ assertThat(union.implies(ImmutableBitSet.of(0), ImmutableBitSet.of(4)),
is(true));
+ }
+
+ @Test void testDeterminantsNoFD() {
+ // FD: empty
+ ArrowSet fds = new ArrowSet.Builder().build();
+ ImmutableBitSet ordinals = ImmutableBitSet.of(0, 1);
+ Set<ImmutableBitSet> keys = fds.determinants(ordinals);
+ assertThat(ImmutableSet.of(ImmutableBitSet.of(ordinals)).equals(keys),
is(true));
+ }
+
+ @Test void testDeterminantsLargeStarKey() {
+ // FD: 0 -> i (i = 1 .. n - 1)
+ int n = 1024;
+ ImmutableBitSet ordinals = ImmutableBitSet.of(IntStream.range(0,
n).toArray());
+ ArrowSet.Builder builder = new ArrowSet.Builder();
+ for (int i = 1; i < n; i++) {
+ builder.addArrow(0, i);
+ }
+ builder.addArrow(88, 0);
+ ArrowSet fds = builder.build();
+ Set<ImmutableBitSet> keys = fds.determinants(ordinals);
+ assertThat(ImmutableSet.of(ImmutableBitSet.of(0),
ImmutableBitSet.of(88)).equals(keys),
+ is(true));
+ }
+
+ @Test public void testDeterminantsLargeAttributeSet() {
+ // FD: 0 -> 1, 1 -> 2, ..., n - 2 -> n - 1
+ int n = 1024;
+ ImmutableBitSet ordinals = ImmutableBitSet.of(IntStream.range(0,
n).toArray());
+ ArrowSet.Builder builder = new ArrowSet.Builder();
+ for (int i = 0; i < n; i++) {
+ builder.addArrow(i, i + 1);
+ }
+ ArrowSet fds = builder.build();
+ Set<ImmutableBitSet> keys = fds.determinants(ordinals);
+ assertThat(keys, hasSize(1));
+ assertThat(keys.iterator().next(), equalTo(ImmutableBitSet.of(0)));
+ }
+
+ @Test void testDependentsWithLargeRelation() {
+ int numAttrs = 1024;
+ ArrowSet.Builder builder = new ArrowSet.Builder();
+ for (int i = 0; i < numAttrs; i++) {
+ builder.addArrow(i, i + 1);
+ }
+ ArrowSet fds = builder.build();
+ ImmutableBitSet closure = fds.dependents(ImmutableBitSet.of(0));
+ for (int i = 0; i <= numAttrs; i++) {
+ assertThat(closure.get(i), is(true));
+ }
+ }
+
+ @Test void testTransitive() {
+ // Given axioms:
+ // {a, b} -> {c}
+ // {a, e} -> {d}
+ // {c} -> {e}
+ // We can prove that:
+ // {a, b} -> {e}
+ // {a, b} -> {c, e}
+ // {a, b} -> {a, c, e}
+ // But not that:
+ // {a} -> {c}
+ // {a} -> {f}
+ // {a, e} -> {c}
+ // {b} -> {c}
+ final ImmutableBitSet a = ImmutableBitSet.of(0);
+ final ImmutableBitSet ab = ImmutableBitSet.of(0, 1);
+ final ImmutableBitSet ace = ImmutableBitSet.of(0, 2, 4);
+ final ImmutableBitSet b = ImmutableBitSet.of(1);
+ final ImmutableBitSet c = ImmutableBitSet.of(2);
+ final ImmutableBitSet ce = ImmutableBitSet.of(2, 4);
+ final ImmutableBitSet d = ImmutableBitSet.of(3);
+ final ImmutableBitSet e = ImmutableBitSet.of(4);
+ final ImmutableBitSet ae = ImmutableBitSet.of(0, 4);
+ final ImmutableBitSet f = ImmutableBitSet.of(5);
+
+ ArrowSet fds = new ArrowSet.Builder()
+ .addArrow(ab, c)
+ .addArrow(ae, d)
+ .addArrow(c, e)
+ .build();
+ assertThat(fds.implies(ab, e), is(true));
+ assertThat(fds.implies(ab, ce), is(true));
+ assertThat(fds.implies(ab, ace), is(true));
+ assertThat(fds.implies(a, c), is(false));
+ assertThat(fds.implies(a, f), is(false));
+ assertThat(fds.implies(ae, c), is(false));
+ assertThat(fds.implies(b, c), is(false));
+ }
+}