mihaibudiu commented on code in PR #4566:
URL: https://github.com/apache/calcite/pull/4566#discussion_r2396483935
##########
core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java:
##########
@@ -905,17 +905,67 @@ default RelDataTypeFactory getTypeFactory() {
public interface FunctionalDependency extends Metadata {
MetadataDef<FunctionalDependency> DEF =
MetadataDef.of(FunctionalDependency.class,
FunctionalDependency.Handler.class,
- BuiltInMethod.FUNCTIONAL_DEPENDENCY.method);
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY.method,
Review Comment:
I think you should update the JavaDoc of the `FunctionalDependency`
interface too.
##########
core/src/main/java/org/apache/calcite/util/ArrowSet.java:
##########
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import com.google.common.collect.ImmutableSet;
+
+import java.util.ArrayDeque;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Queue;
+import java.util.Set;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Represents a set of functional dependencies.
+ * Each functional dependency is represented as an {@link Arrow} from a set of
Review Comment:
In general I don't think you want to talk about implementation in a JavaDoc.
I would just say "is an Arrow".
##########
core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java:
##########
@@ -905,17 +905,67 @@ default RelDataTypeFactory getTypeFactory() {
public interface FunctionalDependency extends Metadata {
MetadataDef<FunctionalDependency> DEF =
MetadataDef.of(FunctionalDependency.class,
FunctionalDependency.Handler.class,
- BuiltInMethod.FUNCTIONAL_DEPENDENCY.method);
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY.method,
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY_SET.method,
+ BuiltInMethod.FUNCTIONAL_DEPENDENCY_CLOSURE.method,
+
BuiltInMethod.FUNCTIONAL_DEPENDENCY_CANDIDATE_KEYS_OR_SUPER_KEYS.method);
/**
- * Returns whether column is functionally dependent on column.
+ * Checks if a single determinant column functionally determines another
dependent column.
+ * Equivalent to checking if there is an Arrow (functional dependency)
determinant → dependent.
+ *
+ * @param determinant the determining column ordinal
+ * @param dependent the dependent column ordinal
+ * @return true if determinant uniquely determines dependent
+ */
+ @Nullable Boolean determines(int determinant, int dependent);
+
+ /**
+ * Checks if a set of determinant columns functionally determines
+ * another set of dependent columns. Equivalent to checking if
+ * there is an Arrow (functional dependency) determinants → dependents.
+ *
+ * @param determinants ordinals of the determining columns
+ * @param dependents ordinals of the dependent columns
+ * @return true if determinants uniquely determine dependents
*/
- @Nullable Boolean determines(int key, int column);
+ Boolean determinesSet(ImmutableBitSet determinants, ImmutableBitSet
dependents);
+
+ /**
+ * Computes the closure of a set of determinant columns under
+ * all functional dependencies (ArrowSet). The closure is the
Review Comment:
What does "all functional dependencies" mean here?
I suspect the `this` object has to appear somewhere in this description.
##########
core/src/main/java/org/apache/calcite/util/ArrowSet.java:
##########
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import com.google.common.collect.ImmutableSet;
+
+import java.util.ArrayDeque;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Queue;
+import java.util.Set;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Represents a set of functional dependencies.
+ * Each functional dependency is represented as an {@link Arrow} from a set of
+ * determinant columns to a set of dependent columns.
+ *
+ * <p>An {@link ArrowSet} captures the complete set of functional dependencies
+ * that hold in a relation. Provides core algorithms in functional dependency
theory.
+ *
+ * @see Arrow
+ * @see ImmutableBitSet
+ */
+public class ArrowSet {
+ public static final ArrowSet EMPTY = new ArrowSet(ImmutableSet.of());
+
+ // Maps each determinant set to the dependent set it functionally determines.
+ private final Map<ImmutableBitSet, ImmutableBitSet> dependencyGraph = new
HashMap<>();
+
+ // Maps each column ordinal to its containing determinant sets for efficient
reverse lookup.
+ private final Map<Integer, Set<ImmutableBitSet>> reverseIndex = new
HashMap<>();
+
+ // All arrows in this ArrowSet.
+ private final Set<Arrow> arrowSet = new HashSet<>();
+
+ public ArrowSet(Set<Arrow> arrows) {
+ arrowSet.addAll(arrows);
+ for (Arrow arrow : arrows) {
+ ImmutableBitSet determinants = arrow.getDeterminants();
+ ImmutableBitSet dependents = arrow.getDependents();
+ dependencyGraph.merge(determinants, dependents, ImmutableBitSet::union);
+ for (int attr : determinants) {
+ reverseIndex.computeIfAbsent(attr, k -> new
HashSet<>()).add(determinants);
+ }
+ }
+ }
+
+ public Set<ImmutableBitSet> getDeterminants(int attr) {
+ return reverseIndex.getOrDefault(attr, ImmutableSet.of());
+ }
+
+ public ImmutableBitSet getDependents(ImmutableBitSet determinants) {
+ return dependencyGraph.getOrDefault(determinants, ImmutableBitSet.of());
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ /**
+ * Computes the closure of a given set of column ordinals with respect to
the current
+ * collection of functional dependencies.
+ *
+ * <p>The closure of a set of ordinals is defined as the set of all column
ordinals
+ * that can be functionally determined from the specified set by applying
zero or more
+ * functional dependencies present in this collection. This concept is
fundamental
+ * in identifying keys, candidate keys, and in the normalization of
relational schemas.
+ *
+ * <p>Example:
+ * <blockquote>
+ * <pre>
+ * // Given functional dependencies:
+ * {0} → {1}
+ * {1} → {2}
+ *
+ * // Closure computation:
+ * closure({0}) = {0, 1, 2}
+ * </pre>
+ * </blockquote>
+ *
+ * <p>Explanation:
Review Comment:
this is the implementation; this does not have to be in the JavaDoc, but in
a normal comment.
##########
core/src/main/java/org/apache/calcite/util/ArrowSet.java:
##########
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import com.google.common.collect.ImmutableSet;
+
+import java.util.ArrayDeque;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Queue;
+import java.util.Set;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Represents a set of functional dependencies.
+ * Each functional dependency is represented as an {@link Arrow} from a set of
+ * determinant columns to a set of dependent columns.
+ *
+ * <p>An {@link ArrowSet} captures the complete set of functional dependencies
+ * that hold in a relation. Provides core algorithms in functional dependency
theory.
+ *
+ * @see Arrow
+ * @see ImmutableBitSet
+ */
+public class ArrowSet {
+ public static final ArrowSet EMPTY = new ArrowSet(ImmutableSet.of());
+
+ // Maps each determinant set to the dependent set it functionally determines.
+ private final Map<ImmutableBitSet, ImmutableBitSet> dependencyGraph = new
HashMap<>();
+
+ // Maps each column ordinal to its containing determinant sets for efficient
reverse lookup.
+ private final Map<Integer, Set<ImmutableBitSet>> reverseIndex = new
HashMap<>();
+
+ // All arrows in this ArrowSet.
+ private final Set<Arrow> arrowSet = new HashSet<>();
+
+ public ArrowSet(Set<Arrow> arrows) {
+ arrowSet.addAll(arrows);
+ for (Arrow arrow : arrows) {
+ ImmutableBitSet determinants = arrow.getDeterminants();
+ ImmutableBitSet dependents = arrow.getDependents();
+ dependencyGraph.merge(determinants, dependents, ImmutableBitSet::union);
+ for (int attr : determinants) {
+ reverseIndex.computeIfAbsent(attr, k -> new
HashSet<>()).add(determinants);
+ }
+ }
+ }
+
+ public Set<ImmutableBitSet> getDeterminants(int attr) {
+ return reverseIndex.getOrDefault(attr, ImmutableSet.of());
+ }
+
+ public ImmutableBitSet getDependents(ImmutableBitSet determinants) {
+ return dependencyGraph.getOrDefault(determinants, ImmutableBitSet.of());
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ /**
+ * Computes the closure of a given set of column ordinals with respect to
the current
+ * collection of functional dependencies.
+ *
+ * <p>The closure of a set of ordinals is defined as the set of all column
ordinals
+ * that can be functionally determined from the specified set by applying
zero or more
+ * functional dependencies present in this collection. This concept is
fundamental
+ * in identifying keys, candidate keys, and in the normalization of
relational schemas.
+ *
+ * <p>Example:
+ * <blockquote>
+ * <pre>
+ * // Given functional dependencies:
+ * {0} → {1}
+ * {1} → {2}
+ *
+ * // Closure computation:
Review Comment:
this is not the computation, this is the result of the computation.
##########
core/src/main/java/org/apache/calcite/util/ArrowSet.java:
##########
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import com.google.common.collect.ImmutableSet;
+
+import java.util.ArrayDeque;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Queue;
+import java.util.Set;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Represents a set of functional dependencies.
+ * Each functional dependency is represented as an {@link Arrow} from a set of
+ * determinant columns to a set of dependent columns.
+ *
+ * <p>An {@link ArrowSet} captures the complete set of functional dependencies
+ * that hold in a relation. Provides core algorithms in functional dependency
theory.
+ *
+ * @see Arrow
+ * @see ImmutableBitSet
+ */
+public class ArrowSet {
+ public static final ArrowSet EMPTY = new ArrowSet(ImmutableSet.of());
+
+ // Maps each determinant set to the dependent set it functionally determines.
+ private final Map<ImmutableBitSet, ImmutableBitSet> dependencyGraph = new
HashMap<>();
+
+ // Maps each column ordinal to its containing determinant sets for efficient
reverse lookup.
+ private final Map<Integer, Set<ImmutableBitSet>> reverseIndex = new
HashMap<>();
+
+ // All arrows in this ArrowSet.
+ private final Set<Arrow> arrowSet = new HashSet<>();
+
+ public ArrowSet(Set<Arrow> arrows) {
+ arrowSet.addAll(arrows);
+ for (Arrow arrow : arrows) {
+ ImmutableBitSet determinants = arrow.getDeterminants();
+ ImmutableBitSet dependents = arrow.getDependents();
+ dependencyGraph.merge(determinants, dependents, ImmutableBitSet::union);
+ for (int attr : determinants) {
+ reverseIndex.computeIfAbsent(attr, k -> new
HashSet<>()).add(determinants);
+ }
+ }
+ }
+
+ public Set<ImmutableBitSet> getDeterminants(int attr) {
+ return reverseIndex.getOrDefault(attr, ImmutableSet.of());
+ }
+
+ public ImmutableBitSet getDependents(ImmutableBitSet determinants) {
+ return dependencyGraph.getOrDefault(determinants, ImmutableBitSet.of());
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ /**
+ * Computes the closure of a given set of column ordinals with respect to
the current
+ * collection of functional dependencies.
+ *
+ * <p>The closure of a set of ordinals is defined as the set of all column
ordinals
+ * that can be functionally determined from the specified set by applying
zero or more
+ * functional dependencies present in this collection. This concept is
fundamental
+ * in identifying keys, candidate keys, and in the normalization of
relational schemas.
+ *
+ * <p>Example:
+ * <blockquote>
+ * <pre>
+ * // Given functional dependencies:
+ * {0} → {1}
+ * {1} → {2}
+ *
+ * // Closure computation:
+ * closure({0}) = {0, 1, 2}
+ * </pre>
+ * </blockquote>
+ *
+ * <p>Explanation:
+ * <ul>
+ * <li>Starting with {0}
+ * <li>Apply {0} → {1}: set becomes {0, 1}
+ * <li>Apply {1} → {2}: set becomes {0, 1, 2}
+ * <li>No more dependencies can be applied → closure is {0, 1, 2}
+ * </ul>
+ *
+ * @param ordinals the set of column ordinals whose closure is to be computed
+ * @return an immutable set of column ordinals representing the closure of
ordinals
+ * under the current set of functional dependencies
+ */
+ public ImmutableBitSet computeClosure(ImmutableBitSet ordinals) {
+ if (ordinals.isEmpty()) {
+ return ImmutableBitSet.of();
+ }
+
+ ImmutableBitSet closure = ordinals;
+ Set<Integer> processed = new HashSet<>(ordinals.asList());
+ Queue<Integer> queue = new ArrayDeque<>(ordinals.asList());
+
+ while (!queue.isEmpty()) {
+ Integer currentAttr =
+ requireNonNull(queue.poll(), "Queue returned null while computing
closure");
+ Set<ImmutableBitSet> relatedDeterminants = getDeterminants(currentAttr);
+ for (ImmutableBitSet determinants : relatedDeterminants) {
+ if (closure.contains(determinants)) {
+ ImmutableBitSet dependents = getDependents(determinants);
+ ImmutableBitSet newOrdinals = dependents.except(closure);
+ if (!newOrdinals.isEmpty()) {
+ closure = closure.union(newOrdinals);
+ for (int ordinal : newOrdinals) {
+ if (!processed.contains(ordinal)) {
+ queue.add(ordinal);
+ processed.add(ordinal);
+ }
+ }
+ }
+ }
+ }
+ }
+ return closure;
+ }
+
+ /**
+ * Finds all candidate keys or superkeys for a relation based on the current
set of
+ * functional dependencies.
+ *
+ * <p>A <b>candidate key</b> is a minimal set of ordinals that functionally
determines
+ * all other ordinals in the relation.
+ *
+ * <p>A <b>superkey</b> is any superset of a candidate key that also
determines all ordinals.
+ *
+ * <p>Example:
+ * <blockquote>
+ * <pre>
+ * // Given functional dependencies:
+ * {0} → {1}
+ * {1} → {2}
+ * {2} → {3}
+ *
+ * // For relation with ordinals {0, 1, 2, 3}:
+ * Candidate keys: [{0}] // since {0}⁺ = {0, 1, 2, 3}
+ * Superkeys: [{0}, {0,1}, {0,2}, {0,3}, {0,1,2}, ...]
+ * </pre>
+ * </blockquote>
+ *
+ * <p>When {@code onlyMinimalKeys} is true, the method returns only minimal
+ * candidate keys. When false, it may return all superkeys (which includes
Review Comment:
"may" or "will"?
if it's just "may", what else can it return?
##########
core/src/main/java/org/apache/calcite/util/ArrowSet.java:
##########
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import com.google.common.collect.ImmutableSet;
+
+import java.util.ArrayDeque;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Queue;
+import java.util.Set;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Represents a set of functional dependencies.
+ * Each functional dependency is represented as an {@link Arrow} from a set of
+ * determinant columns to a set of dependent columns.
+ *
+ * <p>An {@link ArrowSet} captures the complete set of functional dependencies
Review Comment:
This thing about "complete" is also a strong commitment. This implies, for
example, that it cannot have approximations.
Which is not necessarily true.
For the theory you need to provide a link.
This class provides implementations for several useful functional dependency
computation algorithms, e.g., ....
##########
core/src/main/java/org/apache/calcite/util/ArrowSet.java:
##########
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import com.google.common.collect.ImmutableSet;
+
+import java.util.ArrayDeque;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Queue;
+import java.util.Set;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Represents a set of functional dependencies.
+ * Each functional dependency is represented as an {@link Arrow} from a set of
+ * determinant columns to a set of dependent columns.
+ *
+ * <p>An {@link ArrowSet} captures the complete set of functional dependencies
+ * that hold in a relation. Provides core algorithms in functional dependency
theory.
+ *
+ * @see Arrow
+ * @see ImmutableBitSet
+ */
+public class ArrowSet {
+ public static final ArrowSet EMPTY = new ArrowSet(ImmutableSet.of());
+
+ // Maps each determinant set to the dependent set it functionally determines.
+ private final Map<ImmutableBitSet, ImmutableBitSet> dependencyGraph = new
HashMap<>();
+
+ // Maps each column ordinal to its containing determinant sets for efficient
reverse lookup.
+ private final Map<Integer, Set<ImmutableBitSet>> reverseIndex = new
HashMap<>();
+
+ // All arrows in this ArrowSet.
+ private final Set<Arrow> arrowSet = new HashSet<>();
+
+ public ArrowSet(Set<Arrow> arrows) {
+ arrowSet.addAll(arrows);
+ for (Arrow arrow : arrows) {
+ ImmutableBitSet determinants = arrow.getDeterminants();
+ ImmutableBitSet dependents = arrow.getDependents();
+ dependencyGraph.merge(determinants, dependents, ImmutableBitSet::union);
+ for (int attr : determinants) {
+ reverseIndex.computeIfAbsent(attr, k -> new
HashSet<>()).add(determinants);
+ }
+ }
+ }
+
+ public Set<ImmutableBitSet> getDeterminants(int attr) {
+ return reverseIndex.getOrDefault(attr, ImmutableSet.of());
+ }
+
+ public ImmutableBitSet getDependents(ImmutableBitSet determinants) {
+ return dependencyGraph.getOrDefault(determinants, ImmutableBitSet.of());
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ /**
+ * Computes the closure of a given set of column ordinals with respect to
the current
+ * collection of functional dependencies.
+ *
+ * <p>The closure of a set of ordinals is defined as the set of all column
ordinals
+ * that can be functionally determined from the specified set by applying
zero or more
+ * functional dependencies present in this collection. This concept is
fundamental
+ * in identifying keys, candidate keys, and in the normalization of
relational schemas.
+ *
+ * <p>Example:
+ * <blockquote>
+ * <pre>
+ * // Given functional dependencies:
+ * {0} → {1}
+ * {1} → {2}
+ *
+ * // Closure computation:
+ * closure({0}) = {0, 1, 2}
+ * </pre>
+ * </blockquote>
+ *
+ * <p>Explanation:
+ * <ul>
+ * <li>Starting with {0}
+ * <li>Apply {0} → {1}: set becomes {0, 1}
+ * <li>Apply {1} → {2}: set becomes {0, 1, 2}
+ * <li>No more dependencies can be applied → closure is {0, 1, 2}
+ * </ul>
+ *
+ * @param ordinals the set of column ordinals whose closure is to be computed
+ * @return an immutable set of column ordinals representing the closure of
ordinals
+ * under the current set of functional dependencies
Review Comment:
If you have a bound on the cost of computing this, it can be in the JavaDoc.
##########
core/src/main/java/org/apache/calcite/util/Arrow.java:
##########
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+import java.util.Objects;
+
+/**
+ * Represents a functional dependency (Arrow) between two sets of columns,
Review Comment:
Replace "a" with "one"
##########
core/src/main/java/org/apache/calcite/util/ArrowSet.java:
##########
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import com.google.common.collect.ImmutableSet;
+
+import java.util.ArrayDeque;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Queue;
+import java.util.Set;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Represents a set of functional dependencies.
+ * Each functional dependency is represented as an {@link Arrow} from a set of
+ * determinant columns to a set of dependent columns.
+ *
+ * <p>An {@link ArrowSet} captures the complete set of functional dependencies
+ * that hold in a relation. Provides core algorithms in functional dependency
theory.
+ *
+ * @see Arrow
+ * @see ImmutableBitSet
+ */
+public class ArrowSet {
+ public static final ArrowSet EMPTY = new ArrowSet(ImmutableSet.of());
+
+ // Maps each determinant set to the dependent set it functionally determines.
+ private final Map<ImmutableBitSet, ImmutableBitSet> dependencyGraph = new
HashMap<>();
+
+ // Maps each column ordinal to its containing determinant sets for efficient
reverse lookup.
+ private final Map<Integer, Set<ImmutableBitSet>> reverseIndex = new
HashMap<>();
+
+ // All arrows in this ArrowSet.
+ private final Set<Arrow> arrowSet = new HashSet<>();
+
+ public ArrowSet(Set<Arrow> arrows) {
+ arrowSet.addAll(arrows);
+ for (Arrow arrow : arrows) {
+ ImmutableBitSet determinants = arrow.getDeterminants();
+ ImmutableBitSet dependents = arrow.getDependents();
+ dependencyGraph.merge(determinants, dependents, ImmutableBitSet::union);
+ for (int attr : determinants) {
+ reverseIndex.computeIfAbsent(attr, k -> new
HashSet<>()).add(determinants);
+ }
+ }
+ }
+
+ public Set<ImmutableBitSet> getDeterminants(int attr) {
+ return reverseIndex.getOrDefault(attr, ImmutableSet.of());
+ }
+
+ public ImmutableBitSet getDependents(ImmutableBitSet determinants) {
+ return dependencyGraph.getOrDefault(determinants, ImmutableBitSet.of());
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ /**
+ * Computes the closure of a given set of column ordinals with respect to
the current
+ * collection of functional dependencies.
Review Comment:
should "current collection" be replaced with "'this'"?
##########
core/src/main/java/org/apache/calcite/util/ArrowSet.java:
##########
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import com.google.common.collect.ImmutableSet;
+
+import java.util.ArrayDeque;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Queue;
+import java.util.Set;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Represents a set of functional dependencies.
+ * Each functional dependency is represented as an {@link Arrow} from a set of
+ * determinant columns to a set of dependent columns.
+ *
+ * <p>An {@link ArrowSet} captures the complete set of functional dependencies
+ * that hold in a relation. Provides core algorithms in functional dependency
theory.
+ *
+ * @see Arrow
+ * @see ImmutableBitSet
+ */
+public class ArrowSet {
+ public static final ArrowSet EMPTY = new ArrowSet(ImmutableSet.of());
+
+ // Maps each determinant set to the dependent set it functionally determines.
+ private final Map<ImmutableBitSet, ImmutableBitSet> dependencyGraph = new
HashMap<>();
+
+ // Maps each column ordinal to its containing determinant sets for efficient
reverse lookup.
+ private final Map<Integer, Set<ImmutableBitSet>> reverseIndex = new
HashMap<>();
+
+ // All arrows in this ArrowSet.
+ private final Set<Arrow> arrowSet = new HashSet<>();
+
+ public ArrowSet(Set<Arrow> arrows) {
+ arrowSet.addAll(arrows);
+ for (Arrow arrow : arrows) {
+ ImmutableBitSet determinants = arrow.getDeterminants();
+ ImmutableBitSet dependents = arrow.getDependents();
+ dependencyGraph.merge(determinants, dependents, ImmutableBitSet::union);
+ for (int attr : determinants) {
+ reverseIndex.computeIfAbsent(attr, k -> new
HashSet<>()).add(determinants);
+ }
+ }
+ }
+
+ public Set<ImmutableBitSet> getDeterminants(int attr) {
+ return reverseIndex.getOrDefault(attr, ImmutableSet.of());
+ }
+
+ public ImmutableBitSet getDependents(ImmutableBitSet determinants) {
+ return dependencyGraph.getOrDefault(determinants, ImmutableBitSet.of());
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ /**
+ * Computes the closure of a given set of column ordinals with respect to
the current
+ * collection of functional dependencies.
+ *
+ * <p>The closure of a set of ordinals is defined as the set of all column
ordinals
+ * that can be functionally determined from the specified set by applying
zero or more
+ * functional dependencies present in this collection. This concept is
fundamental
+ * in identifying keys, candidate keys, and in the normalization of
relational schemas.
+ *
+ * <p>Example:
+ * <blockquote>
+ * <pre>
+ * // Given functional dependencies:
+ * {0} → {1}
+ * {1} → {2}
+ *
+ * // Closure computation:
+ * closure({0}) = {0, 1, 2}
+ * </pre>
+ * </blockquote>
+ *
+ * <p>Explanation:
+ * <ul>
+ * <li>Starting with {0}
+ * <li>Apply {0} → {1}: set becomes {0, 1}
+ * <li>Apply {1} → {2}: set becomes {0, 1, 2}
+ * <li>No more dependencies can be applied → closure is {0, 1, 2}
+ * </ul>
+ *
+ * @param ordinals the set of column ordinals whose closure is to be computed
+ * @return an immutable set of column ordinals representing the closure of
ordinals
+ * under the current set of functional dependencies
+ */
+ public ImmutableBitSet computeClosure(ImmutableBitSet ordinals) {
+ if (ordinals.isEmpty()) {
+ return ImmutableBitSet.of();
+ }
+
+ ImmutableBitSet closure = ordinals;
+ Set<Integer> processed = new HashSet<>(ordinals.asList());
+ Queue<Integer> queue = new ArrayDeque<>(ordinals.asList());
+
+ while (!queue.isEmpty()) {
+ Integer currentAttr =
+ requireNonNull(queue.poll(), "Queue returned null while computing
closure");
+ Set<ImmutableBitSet> relatedDeterminants = getDeterminants(currentAttr);
+ for (ImmutableBitSet determinants : relatedDeterminants) {
+ if (closure.contains(determinants)) {
+ ImmutableBitSet dependents = getDependents(determinants);
+ ImmutableBitSet newOrdinals = dependents.except(closure);
+ if (!newOrdinals.isEmpty()) {
+ closure = closure.union(newOrdinals);
+ for (int ordinal : newOrdinals) {
+ if (!processed.contains(ordinal)) {
+ queue.add(ordinal);
+ processed.add(ordinal);
+ }
+ }
+ }
+ }
+ }
+ }
+ return closure;
+ }
+
+ /**
+ * Finds all candidate keys or superkeys for a relation based on the current
set of
+ * functional dependencies.
+ *
+ * <p>A <b>candidate key</b> is a minimal set of ordinals that functionally
determines
+ * all other ordinals in the relation.
+ *
+ * <p>A <b>superkey</b> is any superset of a candidate key that also
determines all ordinals.
Review Comment:
Can "that also" be replaced with, ", which implies that it also"?
##########
core/src/main/java/org/apache/calcite/util/ArrowSet.java:
##########
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import com.google.common.collect.ImmutableSet;
+
+import java.util.ArrayDeque;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Queue;
+import java.util.Set;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Represents a set of functional dependencies.
+ * Each functional dependency is represented as an {@link Arrow} from a set of
+ * determinant columns to a set of dependent columns.
+ *
+ * <p>An {@link ArrowSet} captures the complete set of functional dependencies
+ * that hold in a relation. Provides core algorithms in functional dependency
theory.
+ *
+ * @see Arrow
+ * @see ImmutableBitSet
+ */
+public class ArrowSet {
+ public static final ArrowSet EMPTY = new ArrowSet(ImmutableSet.of());
+
+ // Maps each determinant set to the dependent set it functionally determines.
+ private final Map<ImmutableBitSet, ImmutableBitSet> dependencyGraph = new
HashMap<>();
+
+ // Maps each column ordinal to its containing determinant sets for efficient
reverse lookup.
+ private final Map<Integer, Set<ImmutableBitSet>> reverseIndex = new
HashMap<>();
+
+ // All arrows in this ArrowSet.
+ private final Set<Arrow> arrowSet = new HashSet<>();
+
+ public ArrowSet(Set<Arrow> arrows) {
+ arrowSet.addAll(arrows);
+ for (Arrow arrow : arrows) {
+ ImmutableBitSet determinants = arrow.getDeterminants();
+ ImmutableBitSet dependents = arrow.getDependents();
+ dependencyGraph.merge(determinants, dependents, ImmutableBitSet::union);
+ for (int attr : determinants) {
+ reverseIndex.computeIfAbsent(attr, k -> new
HashSet<>()).add(determinants);
+ }
+ }
+ }
+
+ public Set<ImmutableBitSet> getDeterminants(int attr) {
+ return reverseIndex.getOrDefault(attr, ImmutableSet.of());
+ }
+
+ public ImmutableBitSet getDependents(ImmutableBitSet determinants) {
+ return dependencyGraph.getOrDefault(determinants, ImmutableBitSet.of());
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ /**
+ * Computes the closure of a given set of column ordinals with respect to
the current
+ * collection of functional dependencies.
+ *
+ * <p>The closure of a set of ordinals is defined as the set of all column
ordinals
+ * that can be functionally determined from the specified set by applying
zero or more
+ * functional dependencies present in this collection. This concept is
fundamental
+ * in identifying keys, candidate keys, and in the normalization of
relational schemas.
+ *
+ * <p>Example:
+ * <blockquote>
+ * <pre>
+ * // Given functional dependencies:
+ * {0} → {1}
+ * {1} → {2}
+ *
+ * // Closure computation:
+ * closure({0}) = {0, 1, 2}
+ * </pre>
+ * </blockquote>
+ *
+ * <p>Explanation:
+ * <ul>
+ * <li>Starting with {0}
+ * <li>Apply {0} → {1}: set becomes {0, 1}
+ * <li>Apply {1} → {2}: set becomes {0, 1, 2}
+ * <li>No more dependencies can be applied → closure is {0, 1, 2}
+ * </ul>
+ *
+ * @param ordinals the set of column ordinals whose closure is to be computed
+ * @return an immutable set of column ordinals representing the closure of
ordinals
+ * under the current set of functional dependencies
+ */
+ public ImmutableBitSet computeClosure(ImmutableBitSet ordinals) {
+ if (ordinals.isEmpty()) {
+ return ImmutableBitSet.of();
+ }
+
+ ImmutableBitSet closure = ordinals;
+ Set<Integer> processed = new HashSet<>(ordinals.asList());
+ Queue<Integer> queue = new ArrayDeque<>(ordinals.asList());
+
+ while (!queue.isEmpty()) {
+ Integer currentAttr =
+ requireNonNull(queue.poll(), "Queue returned null while computing
closure");
+ Set<ImmutableBitSet> relatedDeterminants = getDeterminants(currentAttr);
+ for (ImmutableBitSet determinants : relatedDeterminants) {
+ if (closure.contains(determinants)) {
+ ImmutableBitSet dependents = getDependents(determinants);
+ ImmutableBitSet newOrdinals = dependents.except(closure);
+ if (!newOrdinals.isEmpty()) {
+ closure = closure.union(newOrdinals);
+ for (int ordinal : newOrdinals) {
+ if (!processed.contains(ordinal)) {
+ queue.add(ordinal);
+ processed.add(ordinal);
+ }
+ }
+ }
+ }
+ }
+ }
+ return closure;
+ }
+
+ /**
+ * Finds all candidate keys or superkeys for a relation based on the current
set of
+ * functional dependencies.
+ *
+ * <p>A <b>candidate key</b> is a minimal set of ordinals that functionally
determines
+ * all other ordinals in the relation.
+ *
+ * <p>A <b>superkey</b> is any superset of a candidate key that also
determines all ordinals.
+ *
+ * <p>Example:
+ * <blockquote>
+ * <pre>
+ * // Given functional dependencies:
+ * {0} → {1}
+ * {1} → {2}
+ * {2} → {3}
+ *
+ * // For relation with ordinals {0, 1, 2, 3}:
+ * Candidate keys: [{0}] // since {0}⁺ = {0, 1, 2, 3}
+ * Superkeys: [{0}, {0,1}, {0,2}, {0,3}, {0,1,2}, ...]
+ * </pre>
+ * </blockquote>
+ *
+ * <p>When {@code onlyMinimalKeys} is true, the method returns only minimal
+ * candidate keys. When false, it may return all superkeys (which includes
+ * candidate keys and their supersets).
+ *
+ * @param ordinals the complete set of attribute indices in the relation
schema
+ * @param onlyMinimalKeys if true, returns only minimal candidate keys;
+ * if false, returns all superkeys
+ * @return a set of candidate keys or superkeys, each represented as an
{@link ImmutableBitSet}
+ *
+ * @see #computeClosure(ImmutableBitSet)
+ */
+ public Set<ImmutableBitSet> findCandidateKeysOrSuperKeys(
+ ImmutableBitSet ordinals, boolean onlyMinimalKeys) {
+ if (dependencyGraph.isEmpty()) {
+ return ImmutableSet.of(ordinals);
+ }
+
+ ImmutableBitSet nonDependentOrdinals =
findNonDependentAttributes(ordinals);
+ if (computeClosure(nonDependentOrdinals).contains(ordinals)) {
+ return ImmutableSet.of(nonDependentOrdinals);
+ }
+
+ Set<ImmutableBitSet> result = new HashSet<>();
+ int minKeySize = Integer.MAX_VALUE;
+ PriorityQueue<ImmutableBitSet> queue =
+ new
PriorityQueue<>(Comparator.comparingInt(ImmutableBitSet::cardinality));
+ Set<ImmutableBitSet> visited = new HashSet<>();
+ queue.add(nonDependentOrdinals);
+ while (!queue.isEmpty()) {
+ ImmutableBitSet keys = requireNonNull(queue.poll(), "queue.poll()
returned null");
+ if (visited.contains(keys)) {
+ continue;
+ }
+ visited.add(keys);
+ if (onlyMinimalKeys && keys.cardinality() > minKeySize) {
+ break;
+ }
+ boolean covered = false;
+ for (ImmutableBitSet key : result) {
+ if (keys.contains(key)) {
+ covered = true;
+ break;
+ }
+ }
+ if (covered) {
+ continue;
+ }
+ ImmutableBitSet keysClosure = computeClosure(keys);
+ if (keysClosure.contains(ordinals)) {
+ result.add(keys);
+ if (onlyMinimalKeys) {
+ minKeySize = keys.cardinality();
+ }
+ continue;
+ }
+ ImmutableBitSet remain = ordinals.except(keys);
+ for (int attr : remain) {
+ if (keysClosure.get(attr)) {
+ continue;
+ }
+ ImmutableBitSet next = keys.union(ImmutableBitSet.of(attr));
+ if (!visited.contains(next)) {
+ queue.add(next);
+ }
+ }
+ }
+ return result.isEmpty() ? ImmutableSet.of(ordinals) : result;
+ }
+
+ /**
+ * Find ordinals in the given set that never appear as dependents in any
functional dependency.
+ * These are the "source" ordinals that cannot be derived from others.
+ */
+ private ImmutableBitSet findNonDependentAttributes(ImmutableBitSet ordinals)
{
+ ImmutableBitSet dependentsAttrs = dependencyGraph.values().stream()
+ .reduce(ImmutableBitSet.of(), ImmutableBitSet::union);
+ return ordinals.except(dependentsAttrs);
+ }
+
+ /**
+ * Returns a new ArrowSet that is the union of this and another ArrowSet.
+ */
+ public ArrowSet union(ArrowSet other) {
+ Set<Arrow> unionSet = new HashSet<>();
+ unionSet.addAll(this.getArrows());
+ unionSet.addAll(other.getArrows());
+ return new ArrowSet(unionSet);
+ }
+
+ /**
+ * Returns all arrows (functional dependencies) in this ArrowSet.
+ *
+ * @return a set of Arrow objects representing all functional dependencies
+ */
+ public Set<Arrow> getArrows() {
+ return arrowSet;
+ }
+
+ @Override public ArrowSet clone() {
+ return new ArrowSet(new HashSet<>(this.arrowSet));
+ }
+
+ public boolean equalTo(ArrowSet other) {
+ for (Map.Entry<ImmutableBitSet, ImmutableBitSet> entry :
dependencyGraph.entrySet()) {
+ if (!other.implies(entry.getKey(), entry.getValue())) {
+ return false;
+ }
+ }
+ for (Map.Entry<ImmutableBitSet, ImmutableBitSet> entry :
other.dependencyGraph.entrySet()) {
+ if (!implies(entry.getKey(), entry.getValue())) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("ArrowSet{");
+ boolean first = true;
+ for (Arrow arrow : getArrows()) {
+ if (!first) {
+ sb.append(", ");
+ }
+ sb.append(arrow);
+ first = false;
+ }
+ sb.append('}');
+ return sb.toString();
+ }
+
+ /**
+ * Returns true if this ArrowSet implies that determinants determine
dependents.
Review Comment:
if from this ArrowSet one can deduce that 'determinants' determine
'dependents'.
This is a proper use of 'this ArrowSet'.
##########
core/src/main/java/org/apache/calcite/util/ArrowSet.java:
##########
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import com.google.common.collect.ImmutableSet;
+
+import java.util.ArrayDeque;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Queue;
+import java.util.Set;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Represents a set of functional dependencies.
+ * Each functional dependency is represented as an {@link Arrow} from a set of
+ * determinant columns to a set of dependent columns.
+ *
+ * <p>An {@link ArrowSet} captures the complete set of functional dependencies
+ * that hold in a relation. Provides core algorithms in functional dependency
theory.
+ *
+ * @see Arrow
+ * @see ImmutableBitSet
+ */
+public class ArrowSet {
+ public static final ArrowSet EMPTY = new ArrowSet(ImmutableSet.of());
+
+ // Maps each determinant set to the dependent set it functionally determines.
+ private final Map<ImmutableBitSet, ImmutableBitSet> dependencyGraph = new
HashMap<>();
+
+ // Maps each column ordinal to its containing determinant sets for efficient
reverse lookup.
+ private final Map<Integer, Set<ImmutableBitSet>> reverseIndex = new
HashMap<>();
Review Comment:
"its containing" is not clear.
Do you mean "the keys of the dependencyGraph which contain this column?"
##########
core/src/main/java/org/apache/calcite/util/ArrowSet.java:
##########
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import com.google.common.collect.ImmutableSet;
+
+import java.util.ArrayDeque;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Queue;
+import java.util.Set;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Represents a set of functional dependencies.
+ * Each functional dependency is represented as an {@link Arrow} from a set of
+ * determinant columns to a set of dependent columns.
+ *
+ * <p>An {@link ArrowSet} captures the complete set of functional dependencies
+ * that hold in a relation. Provides core algorithms in functional dependency
theory.
+ *
+ * @see Arrow
+ * @see ImmutableBitSet
+ */
+public class ArrowSet {
+ public static final ArrowSet EMPTY = new ArrowSet(ImmutableSet.of());
+
+ // Maps each determinant set to the dependent set it functionally determines.
+ private final Map<ImmutableBitSet, ImmutableBitSet> dependencyGraph = new
HashMap<>();
+
+ // Maps each column ordinal to its containing determinant sets for efficient
reverse lookup.
+ private final Map<Integer, Set<ImmutableBitSet>> reverseIndex = new
HashMap<>();
+
+ // All arrows in this ArrowSet.
+ private final Set<Arrow> arrowSet = new HashSet<>();
+
+ public ArrowSet(Set<Arrow> arrows) {
+ arrowSet.addAll(arrows);
+ for (Arrow arrow : arrows) {
+ ImmutableBitSet determinants = arrow.getDeterminants();
+ ImmutableBitSet dependents = arrow.getDependents();
+ dependencyGraph.merge(determinants, dependents, ImmutableBitSet::union);
+ for (int attr : determinants) {
+ reverseIndex.computeIfAbsent(attr, k -> new
HashSet<>()).add(determinants);
+ }
+ }
+ }
+
+ public Set<ImmutableBitSet> getDeterminants(int attr) {
+ return reverseIndex.getOrDefault(attr, ImmutableSet.of());
+ }
+
+ public ImmutableBitSet getDependents(ImmutableBitSet determinants) {
+ return dependencyGraph.getOrDefault(determinants, ImmutableBitSet.of());
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ /**
+ * Computes the closure of a given set of column ordinals with respect to
the current
+ * collection of functional dependencies.
+ *
+ * <p>The closure of a set of ordinals is defined as the set of all column
ordinals
+ * that can be functionally determined from the specified set by applying
zero or more
+ * functional dependencies present in this collection. This concept is
fundamental
+ * in identifying keys, candidate keys, and in the normalization of
relational schemas.
+ *
+ * <p>Example:
+ * <blockquote>
+ * <pre>
+ * // Given functional dependencies:
+ * {0} → {1}
+ * {1} → {2}
+ *
+ * // Closure computation:
+ * closure({0}) = {0, 1, 2}
+ * </pre>
+ * </blockquote>
+ *
+ * <p>Explanation:
+ * <ul>
+ * <li>Starting with {0}
+ * <li>Apply {0} → {1}: set becomes {0, 1}
+ * <li>Apply {1} → {2}: set becomes {0, 1, 2}
+ * <li>No more dependencies can be applied → closure is {0, 1, 2}
+ * </ul>
+ *
+ * @param ordinals the set of column ordinals whose closure is to be computed
+ * @return an immutable set of column ordinals representing the closure of
ordinals
+ * under the current set of functional dependencies
+ */
+ public ImmutableBitSet computeClosure(ImmutableBitSet ordinals) {
+ if (ordinals.isEmpty()) {
+ return ImmutableBitSet.of();
+ }
+
+ ImmutableBitSet closure = ordinals;
+ Set<Integer> processed = new HashSet<>(ordinals.asList());
+ Queue<Integer> queue = new ArrayDeque<>(ordinals.asList());
+
+ while (!queue.isEmpty()) {
+ Integer currentAttr =
+ requireNonNull(queue.poll(), "Queue returned null while computing
closure");
+ Set<ImmutableBitSet> relatedDeterminants = getDeterminants(currentAttr);
+ for (ImmutableBitSet determinants : relatedDeterminants) {
+ if (closure.contains(determinants)) {
+ ImmutableBitSet dependents = getDependents(determinants);
+ ImmutableBitSet newOrdinals = dependents.except(closure);
+ if (!newOrdinals.isEmpty()) {
+ closure = closure.union(newOrdinals);
+ for (int ordinal : newOrdinals) {
+ if (!processed.contains(ordinal)) {
+ queue.add(ordinal);
+ processed.add(ordinal);
+ }
+ }
+ }
+ }
+ }
+ }
+ return closure;
+ }
+
+ /**
+ * Finds all candidate keys or superkeys for a relation based on the current
set of
+ * functional dependencies.
+ *
+ * <p>A <b>candidate key</b> is a minimal set of ordinals that functionally
determines
+ * all other ordinals in the relation.
+ *
+ * <p>A <b>superkey</b> is any superset of a candidate key that also
determines all ordinals.
+ *
+ * <p>Example:
+ * <blockquote>
+ * <pre>
+ * // Given functional dependencies:
+ * {0} → {1}
+ * {1} → {2}
+ * {2} → {3}
+ *
+ * // For relation with ordinals {0, 1, 2, 3}:
+ * Candidate keys: [{0}] // since {0}⁺ = {0, 1, 2, 3}
+ * Superkeys: [{0}, {0,1}, {0,2}, {0,3}, {0,1,2}, ...]
+ * </pre>
+ * </blockquote>
+ *
+ * <p>When {@code onlyMinimalKeys} is true, the method returns only minimal
+ * candidate keys. When false, it may return all superkeys (which includes
+ * candidate keys and their supersets).
+ *
+ * @param ordinals the complete set of attribute indices in the relation
schema
+ * @param onlyMinimalKeys if true, returns only minimal candidate keys;
+ * if false, returns all superkeys
+ * @return a set of candidate keys or superkeys, each represented as an
{@link ImmutableBitSet}
+ *
+ * @see #computeClosure(ImmutableBitSet)
+ */
+ public Set<ImmutableBitSet> findCandidateKeysOrSuperKeys(
+ ImmutableBitSet ordinals, boolean onlyMinimalKeys) {
+ if (dependencyGraph.isEmpty()) {
+ return ImmutableSet.of(ordinals);
+ }
+
+ ImmutableBitSet nonDependentOrdinals =
findNonDependentAttributes(ordinals);
+ if (computeClosure(nonDependentOrdinals).contains(ordinals)) {
+ return ImmutableSet.of(nonDependentOrdinals);
+ }
+
+ Set<ImmutableBitSet> result = new HashSet<>();
+ int minKeySize = Integer.MAX_VALUE;
+ PriorityQueue<ImmutableBitSet> queue =
+ new
PriorityQueue<>(Comparator.comparingInt(ImmutableBitSet::cardinality));
+ Set<ImmutableBitSet> visited = new HashSet<>();
+ queue.add(nonDependentOrdinals);
+ while (!queue.isEmpty()) {
+ ImmutableBitSet keys = requireNonNull(queue.poll(), "queue.poll()
returned null");
+ if (visited.contains(keys)) {
+ continue;
+ }
+ visited.add(keys);
+ if (onlyMinimalKeys && keys.cardinality() > minKeySize) {
+ break;
+ }
+ boolean covered = false;
+ for (ImmutableBitSet key : result) {
+ if (keys.contains(key)) {
+ covered = true;
+ break;
+ }
+ }
+ if (covered) {
+ continue;
+ }
+ ImmutableBitSet keysClosure = computeClosure(keys);
+ if (keysClosure.contains(ordinals)) {
+ result.add(keys);
+ if (onlyMinimalKeys) {
+ minKeySize = keys.cardinality();
+ }
+ continue;
+ }
+ ImmutableBitSet remain = ordinals.except(keys);
+ for (int attr : remain) {
+ if (keysClosure.get(attr)) {
+ continue;
+ }
+ ImmutableBitSet next = keys.union(ImmutableBitSet.of(attr));
+ if (!visited.contains(next)) {
+ queue.add(next);
+ }
+ }
+ }
+ return result.isEmpty() ? ImmutableSet.of(ordinals) : result;
+ }
+
+ /**
+ * Find ordinals in the given set that never appear as dependents in any
functional dependency.
+ * These are the "source" ordinals that cannot be derived from others.
+ */
+ private ImmutableBitSet findNonDependentAttributes(ImmutableBitSet ordinals)
{
+ ImmutableBitSet dependentsAttrs = dependencyGraph.values().stream()
+ .reduce(ImmutableBitSet.of(), ImmutableBitSet::union);
+ return ordinals.except(dependentsAttrs);
+ }
+
+ /**
+ * Returns a new ArrowSet that is the union of this and another ArrowSet.
+ */
+ public ArrowSet union(ArrowSet other) {
+ Set<Arrow> unionSet = new HashSet<>();
+ unionSet.addAll(this.getArrows());
+ unionSet.addAll(other.getArrows());
+ return new ArrowSet(unionSet);
+ }
+
+ /**
+ * Returns all arrows (functional dependencies) in this ArrowSet.
+ *
+ * @return a set of Arrow objects representing all functional dependencies
+ */
+ public Set<Arrow> getArrows() {
+ return arrowSet;
+ }
+
+ @Override public ArrowSet clone() {
+ return new ArrowSet(new HashSet<>(this.arrowSet));
+ }
+
+ public boolean equalTo(ArrowSet other) {
+ for (Map.Entry<ImmutableBitSet, ImmutableBitSet> entry :
dependencyGraph.entrySet()) {
+ if (!other.implies(entry.getKey(), entry.getValue())) {
+ return false;
+ }
+ }
+ for (Map.Entry<ImmutableBitSet, ImmutableBitSet> entry :
other.dependencyGraph.entrySet()) {
+ if (!implies(entry.getKey(), entry.getValue())) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("ArrowSet{");
+ boolean first = true;
+ for (Arrow arrow : getArrows()) {
+ if (!first) {
+ sb.append(", ");
+ }
+ sb.append(arrow);
+ first = false;
+ }
+ sb.append('}');
+ return sb.toString();
+ }
+
+ /**
+ * Returns true if this ArrowSet implies that determinants determine
dependents.
+ * That is, if dependents ⊆ closure(determinants).
+ */
+ public boolean implies(ImmutableBitSet determinants, ImmutableBitSet
dependents) {
+ ImmutableBitSet dets = dependencyGraph.get(determinants);
+ if (dets != null && dets.contains(dependents)) {
+ return true;
+ }
+ return computeClosure(determinants).contains(dependents);
+ }
+
+ /**
+ * Builder for ArrowSet.
+ * Supports fluent Arrow addition and builds the dependency graph.
Review Comment:
What is the dependency graph?
Isn't it the ArrowSet itself?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]