[ 
https://issues.apache.org/jira/browse/CALCITE-7205?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18024344#comment-18024344
 ] 

Julian Hyde commented on CALCITE-7205:
--------------------------------------

Can you add the following test? It checks a number of important things, like 
reflexivity and transitivity. It passes already.
{code:java}
@Test void testTransitive() {
  // Given axioms:
  //   {a, b} -> {c}
  //   {a, e} -> {d}
  //   {c} -> {e}
  // We can prove that:
  //   {a, b} -> {e}
  //   {a, b} -> {c, e}
  //   {a, b} -> {a, c, e}
  // But not that:
  //   {a} -> {c}
  //   {a} -> {f}
  //   {a, e} -> {c}
  //   {b} -> {c}
  final Set<Integer> a = ImmutableSet.of(0);
  final Set<Integer> ab = ImmutableSet.of(0, 1);
  final Set<Integer> ace = ImmutableSet.of(0, 2, 4);
  final Set<Integer> b = ImmutableSet.of(1);
  final Set<Integer> c = ImmutableSet.of(2);
  final Set<Integer> ce = ImmutableSet.of(2, 4);
  final Set<Integer> d = ImmutableSet.of(3);
  final Set<Integer> e = ImmutableSet.of(4);
  final Set<Integer> ae = ImmutableSet.of(0, 4);
  final Set<Integer> f = ImmutableSet.of(5);

  FunctionalDependencies fds = new FunctionalDependencies.Builder()
      .addFD(ab, c)
      .addFD(ae, d)
      .addFD(c, e)
      .build();
  assertThat(fds.implies(ab, e), is(true));
  assertThat(fds.implies(ab, ce), is(true));
  assertThat(fds.implies(ab, ace), is(true));
  assertThat(fds.implies(a, c), is(false));
  assertThat(fds.implies(a, f), is(false));
  assertThat(fds.implies(ae, c), is(false));
  assertThat(fds.implies(b, c), is(false));
}
 {code}

> Implement Core FD Algorithm Library for Functional Dependencies
> ---------------------------------------------------------------
>
>                 Key: CALCITE-7205
>                 URL: https://issues.apache.org/jira/browse/CALCITE-7205
>             Project: Calcite
>          Issue Type: Task
>          Components: core
>    Affects Versions: 1.40.0
>            Reporter: Zhen Chen
>            Assignee: Zhen Chen
>            Priority: Minor
>              Labels: pull-request-available
>
> This JIRA primarily provides foundational capabilities for *relational 
> algebra and metadata reasoning*.
> *Main Implementations:*
> * Encapsulates functional dependency (FD) sets, supporting basic operations 
> such as addition, deletion, and query.
> * Provides algorithms for closure computation, equivalence judgment, and 
> candidate key discovery.
> * Supports efficient attribute reasoning and dependency reasoning, 
> facilitating use by upper-layer optimizers and metadata modules.
> Please refer to the discussion in the PR for [more 
> details|https://github.com/apache/calcite/pull/4540#discussion_r2384265382].
> ​​The naming design for the relevant classes is as follows:​​
> {code:java}
> /**
>  * Represents a functional dependency between two sets of columns (by their 
> indices)
>  * in a relational schema.
>  * <p>
>  * A functional dependency specifies that the values of the {@code 
> determinants}
>  * uniquely determine the values of the {@code dependents}. In other words, 
> for any two rows
>  * in the relation, if the values of the determinant columns are equal, then 
> the values
>  * of the dependent columns must also be equal.
>  * </p>
>  *
>  * <p>
>  * Notation: {@code determinants} &rarr; {@code dependents}
>  * </p>
>  *
>  * <p>
>  * <b>Example:</b>
>  * <pre>
>  * Given a table with columns [emp_id, name, dept]
>  * If determinants = {0} (emp_id) and dependents = {1, 2} (name, dept),
>  * this means: emp_id &rarr; {name, dept}
>  * That is, each emp_id uniquely determines both name and dept.
>  * </pre>
>  * </p>
>  *
>  * <p>
>  * Both determinants and dependents are sets of column indices, allowing this 
> class
>  * to represent one-to-one, one-to-many, many-to-one, and many-to-many 
> relationships.
>  * </p>
>  */
> public class FunctionalDependence {
>     // The set of column indices that are the determinants in the functional 
> dependency.
>     private final ImmutableSet<Integer> determinants;
>     // The set of column indices that are functionally determined by the 
> {@link #determinants}.
>     private final ImmutableSet<Integer> dependents;
>     // Constructor, getters, and other methods would go here...
> }
> {code}
> {code:java}
> /**
>  * Represents a collection of functional dependency relationships in a 
> relational schema.
>  *
>  * <p>
>  * Each element in this collection is a mapping from a set of determinant 
> columns to a set of dependent columns,
>  * capturing how the values of the determinants uniquely determine the values 
> of the dependents.
>  * This class provides convenient methods to manage, query, and analyze 
> multiple functional dependencies
>  * associated with a table or relational expression.
>  * </p>
>  *
>  * <p>
>  * This structure can represent one-to-one, one-to-many, many-to-one, and 
> many-to-many
>  * functional dependencies, and supports efficient dependency analysis and 
> closure computation.
>  * </p>
>  */
> public class FunctionalDependencies {
>     /** Maps each determinant set to the set of dependent columns it 
> functionally determines. */
>     private final Map<Set<Integer>, Set<Integer>> dependencyGraph = new 
> HashMap<>();
>     /** Maps each attribute (column index) to all determinant sets containing 
> this attribute,
>      *  for efficient reverse lookups and dependency analysis. */
>     private final Map<Integer, Set<Set<Integer>>> reverseIndex = new 
> HashMap<>();
>     
>     /**
>      * Computes the closure of a set of attributes with respect to the 
> current collection of functional dependencies.
>      *
>      * <p>
>      * The closure of a set of attributes is the set of all attributes that 
> can be functionally determined
>      * from the given set via zero or more applications of the functional 
> dependencies in this collection.
>      * </p>
>      *
>      * <p>
>      * <b>Example:</b> If the dependencies are {0} -> {1}, {1} -> {2}, then 
> the closure of {0} is {0, 1, 2}.
>      * </p>
>      *
>      * @param attributes the set of attribute indices whose closure is to be 
> computed
>      * @return the closure of {@code attributes} under the current set of 
> functional dependencies
>      */
>     public Set<Integer> computeClosure(Set<Integer> attributes) {}
>     /**
>      * Finds all candidate keys (or superkeys) for a relation given a set of 
> attributes and the current functional dependencies.
>      *
>      * <p>
>      * A candidate key is a minimal set of attributes that can uniquely 
> determine all attributes in the relation
>      * (i.e., its closure contains all attributes), and no proper subset of 
> it has this property.
>      * If {@code onlyMinimalKeys} is false, the method may return all 
> superkeys (not necessarily minimal).
>      * </p>
>      *
>      * <p>
>      * <b>Note:</b> This method may be computationally expensive for large 
> attribute sets.
>      * </p>
>      *
>      * @param attributes the set of all attribute indices in the relation
>      * @param onlyMinimalKeys if true, only minimal candidate keys are 
> returned; if false, all superkeys may be returned
>      * @return a set of candidate keys (or superkeys), each represented as a 
> set of attribute indices
>      */
>     public Set<Set<Integer>> findCandidateKeys(Set<Integer> attributes, 
> boolean onlyMinimalKeys) {}
> }
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to