mihaibudiu commented on code in PR #4540: URL: https://github.com/apache/calcite/pull/4540#discussion_r2353984213
########## core/src/main/java/org/apache/calcite/rel/metadata/FunctionalDependency.java: ########## @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.util.ImmutableBitSet; + +import org.checkerframework.checker.nullness.qual.Nullable; + +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; + +import static java.util.Objects.requireNonNull; + +/** + * Represents a functional dependency X determines Y in relational database theory. + * + * <p>A functional dependency X determines Y holds in a relation R if and only if: + * for any two tuples t1 and t2 in R, if t1[X] = t2[X], then t1[Y] = t2[Y]. + * + * <p>In other words, the values of attributes X uniquely determine the values + * of attributes Y. X is called the determinant and Y is called the dependent. + * + * <p>This class is immutable and thread-safe. + */ +public class FunctionalDependency { + private final ImmutableBitSet determinants; + private final ImmutableBitSet dependents; + + public FunctionalDependency(ImmutableBitSet determinants, ImmutableBitSet dependents) { + this.determinants = requireNonNull(determinants, "determinants"); + this.dependents = requireNonNull(dependents, "dependents"); + } + + /** + * Create FD from column indices. + */ + public static FunctionalDependency of(int[] determinantColumns, int[] dependentColumns) { + return new FunctionalDependency( + ImmutableBitSet.of(determinantColumns), + ImmutableBitSet.of(dependentColumns)); + } + + /** + * Create FD from single determinant to single dependent. + */ + public static FunctionalDependency of(int determinant, int dependent) { + return new FunctionalDependency( + ImmutableBitSet.of(determinant), + ImmutableBitSet.of(dependent)); + } + + /** + * Create FD from determinant set to dependent set. + */ + public static FunctionalDependency of(ImmutableBitSet determinants, ImmutableBitSet dependents) { + return new FunctionalDependency(determinants, dependents); + } + + public ImmutableBitSet getDeterminants() { + return determinants; + } + + public ImmutableBitSet getDependents() { + return dependents; + } + + /** + * Returns true if this FD is trivial (dependents ⊆ determinants). + */ + public boolean isTrivial() { + return determinants.contains(dependents); + } + + /** + * Split this FD into multiple FDs, each with a single dependent column. + */ + public Set<FunctionalDependency> split() { Review Comment: are these "canonical" FDs? ########## core/src/main/java/org/apache/calcite/rel/metadata/FunctionalDependency.java: ########## @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.util.ImmutableBitSet; + +import org.checkerframework.checker.nullness.qual.Nullable; + +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; + +import static java.util.Objects.requireNonNull; + +/** + * Represents a functional dependency X determines Y in relational database theory. + * + * <p>A functional dependency X determines Y holds in a relation R if and only if: + * for any two tuples t1 and t2 in R, if t1[X] = t2[X], then t1[Y] = t2[Y]. + * + * <p>In other words, the values of attributes X uniquely determine the values + * of attributes Y. X is called the determinant and Y is called the dependent. + * + * <p>This class is immutable and thread-safe. + */ +public class FunctionalDependency { + private final ImmutableBitSet determinants; Review Comment: In this implementation these are column indexes, and not attribute names. Clearly, relation R mentioned above is not part of this data structure. ########## core/src/main/java/org/apache/calcite/rel/metadata/FunctionalDependencySet.java: ########## @@ -0,0 +1,267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.util.ImmutableBitSet; + +import com.google.common.collect.ImmutableSet; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +/** + * A set of functional dependencies with closure and minimal cover operations. + * This class implements standard algorithms for functional dependency reasoning. + */ +public class FunctionalDependencySet { + private final Set<FunctionalDependency> fdSet = new HashSet<>(); + + public FunctionalDependencySet() {} + + public FunctionalDependencySet(Set<FunctionalDependency> fds) { + this.fdSet.addAll(fds); + } + + public void addFD(FunctionalDependency fd) { + if (!fd.isTrivial()) { + fdSet.add(fd); + } + } + + public void addFD(ImmutableBitSet determinants, ImmutableBitSet dependents) { + addFD(new FunctionalDependency(determinants, dependents)); + } + + public void addFD(int determinant, int dependent) { + addFD(ImmutableBitSet.of(determinant), ImmutableBitSet.of(dependent)); + } + + public void removeFD(FunctionalDependency fd) { + fdSet.remove(fd); + } + + public Set<FunctionalDependency> getFDs() { + return Collections.unmodifiableSet(fdSet); + } + + public boolean isEmpty() { + return fdSet.isEmpty(); + } + + public int size() { + return fdSet.size(); + } + + /** + * Computes the closure of a set of attributes under this functional dependency set. + * The closure of X, denoted X+, is the set of all attributes that can be functionally + * determined by X using the functional dependencies in this set and Armstrong's axioms. + * + * @param attrs the input attribute set + * @return the closure of the input attributes + */ + public ImmutableBitSet closure(ImmutableBitSet attrs) { Review Comment: I am guessing this can be very large - exponential in the size of the attributes ########## core/src/test/java/org/apache/calcite/rel/metadata/FunctionalDependencyTest.java: ########## @@ -0,0 +1,218 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.util.ImmutableBitSet; + +import org.junit.jupiter.api.Test; + +import java.util.Set; + +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; + +/** + * Tests for {@link FunctionalDependency} and {@link FunctionalDependencySet}. + */ +public class FunctionalDependencyTest { + + @Test void testFunctionalDependencyBasic() { + // Test FD creation and basic properties + FunctionalDependency fd = FunctionalDependency.of(new int[]{0}, new int[]{1}); + assertThat(fd.getDeterminants(), equalTo(ImmutableBitSet.of(0))); + assertThat(fd.getDependents(), equalTo(ImmutableBitSet.of(1))); + assertThat(fd.isTrivial(), is(false)); + + // Test trivial FD + FunctionalDependency trivialFd = FunctionalDependency.of(new int[]{0, 1}, new int[]{0}); + assertThat(trivialFd.isTrivial(), is(true)); + } + + @Test void testFunctionalDependencySet() { + FunctionalDependencySet fdSet = new FunctionalDependencySet(); + + // Add some FDs: A -> B, B -> C + fdSet.addFD(0, 1); // A -> B + fdSet.addFD(1, 2); // B -> C + + // Test closure: {A}+ should include A, B, C + ImmutableBitSet closure = fdSet.closure(ImmutableBitSet.of(0)); + assertThat(closure.get(0), is(true)); // A + assertThat(closure.get(1), is(true)); // B (A -> B) + assertThat(closure.get(2), is(true)); // C (A -> B, B -> C, so A -> C by transitivity) + + // Test determines + assertThat(fdSet.determines(0, 1), is(true)); // A -> B (direct) + assertThat(fdSet.determines(0, 2), is(true)); // A -> C (transitive) + assertThat(fdSet.determines(2, 0), is(false)); // C does not determine A + } + + @Test void testMinimalCover() { + FunctionalDependencySet fdSet = new FunctionalDependencySet(); + + // Add redundant FDs + fdSet.addFD(ImmutableBitSet.of(0), ImmutableBitSet.of(1)); // A -> B + fdSet.addFD(ImmutableBitSet.of(1), ImmutableBitSet.of(2)); // B -> C + fdSet.addFD(ImmutableBitSet.of(0), ImmutableBitSet.of(2)); // A -> C + + FunctionalDependencySet minimal = fdSet.minimalCover(); + + // The minimal cover should not contain A -> C since it's implied by A -> B and B -> C + assertThat( + minimal.implies(ImmutableBitSet.of(0), + ImmutableBitSet.of(1)), is(true)); // A -> B + assertThat( + minimal.implies(ImmutableBitSet.of(1), + ImmutableBitSet.of(2)), is(true)); // B -> C + assertThat( + minimal.implies(ImmutableBitSet.of(0), + ImmutableBitSet.of(2)), is(true)); // A -> C (derived) + + // Should be equivalent to original + assertThat(fdSet.equalTo(minimal), is(true)); + } + + @Test void testKeyFinding() { + FunctionalDependencySet fdSet = new FunctionalDependencySet(); + + // Schema: A, B, C, D with FDs: A -> B, BC -> D + fdSet.addFD(0, 1); // A -> B + fdSet.addFD(ImmutableBitSet.of(1, 2), ImmutableBitSet.of(3)); // BC -> D + + ImmutableBitSet allAttributes = ImmutableBitSet.of(0, 1, 2, 3); Review Comment: The method which generates all attributes should be a static method of the FunctionalDependencySet. ########## core/src/test/java/org/apache/calcite/rel/metadata/RelMdFunctionalDependencyIntegrationTest.java: ########## @@ -0,0 +1,527 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgram; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.rules.CoreRules; +import org.apache.calcite.test.SqlToRelTestBase; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.ImmutableIntList; + +import com.google.common.collect.ImmutableList; + +import org.junit.jupiter.api.Test; + +import java.util.Set; + +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; + +/** + * Tests for functional dependency metadata in RelMdFunctionalDependency. + */ +public class RelMdFunctionalDependencyIntegrationTest extends SqlToRelTestBase { + + @Test void testFunctionalDependencyProject() { + final String sql = "select empno, deptno, deptno + 1 as deptNoPlus1, rand() as rand" + + " from emp where deptno < 20"; + + // Plan is: + // LogicalProject(EMPNO=[$0], DEPTNO=[$7], DEPTNOPLUS1=[+($7, 1)], RAND=[RAND()]) + // LogicalFilter(condition=[<($7, 20)]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // empno - primary key + int deptNo = 1; // deptno + int deptNoPlus1 = 2; // deptno + 1 + int rand = 3; // rand() + + // deptno + 1 should determine deptno (can derive deptno from deptno+1) + assertThat(mq.determines(relNode, deptNoPlus1, deptNo), is(Boolean.TRUE)); + + // deptno should determine deptno + 1 (expression is deterministic) + assertThat(mq.determines(relNode, deptNo, deptNoPlus1), is(Boolean.TRUE)); + + // deptno should NOT determine empno (deptno is not unique) + assertThat(mq.determines(relNode, deptNo, empNo), is(Boolean.FALSE)); + + // empno should determine deptno + 1 (primary key determines everything) + assertThat(mq.determines(relNode, empNo, deptNoPlus1), is(Boolean.TRUE)); + + // deptno + 1 should NOT determine empno (deptno + 1 is not unique) + assertThat(mq.determines(relNode, deptNoPlus1, empNo), is(Boolean.FALSE)); + + // rand() should not determine anything (non-deterministic) + assertThat(mq.determines(relNode, rand, empNo), is(Boolean.FALSE)); + + // Nothing should determine rand() (non-deterministic) + assertThat(mq.determines(relNode, empNo, rand), is(Boolean.FALSE)); + } + + @Test void testFunctionalDependencyAggregate() { + final String sql = "select deptno, count(*) as \"count\", sum(sal) as sumSal" + + " from emp group by deptno"; + + // Plan is: + // LogicalAggregate(group=[{0}], count=[COUNT()], SUMSAL=[SUM($1)]) + // LogicalProject(DEPTNO=[$7], SAL=[$5]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int deptNo = 0; // deptno (group by column) + int count = 1; // count(*) + int sumSal = 2; // sum(sal) + + // Group by column should determine aggregate columns + assertThat(mq.determines(relNode, deptNo, count), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, deptNo, sumSal), is(Boolean.TRUE)); + + // Aggregate columns should not determine group by column + assertThat(mq.determines(relNode, count, deptNo), is(Boolean.FALSE)); + assertThat(mq.determines(relNode, sumSal, deptNo), is(Boolean.FALSE)); + + // Aggregate columns should not determine each other + assertThat(mq.determines(relNode, count, sumSal), is(Boolean.FALSE)); + assertThat(mq.determines(relNode, sumSal, count), is(Boolean.FALSE)); + } + + @Test void testFunctionalDependencyWithIdenticalExpressions() { + final String sql = "select deptno, deptno as deptno2, deptno + 1 as deptno3 from emp"; + + // Plan is: + // LogicalProject(DEPTNO=[$7], DEPTNO2=[$7], DEPTNO3=[+($7, 1)]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int deptNo = 0; // deptno + int deptNo2 = 1; // deptno (identical) + int deptNo3 = 2; // deptno + 1 + + // Identical expressions should determine each other + assertThat(mq.determines(relNode, deptNo, deptNo2), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, deptNo2, deptNo), is(Boolean.TRUE)); + + // deptno should determine deptno + 1 (deterministic expression) + assertThat(mq.determines(relNode, deptNo, deptNo3), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, deptNo2, deptNo3), is(Boolean.TRUE)); + + // deptno + 1 should determine deptno (can derive original from increment) + assertThat(mq.determines(relNode, deptNo3, deptNo), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, deptNo3, deptNo2), is(Boolean.TRUE)); + } + + @Test void testFunctionalDependencyJoin() { + // Test inner join with emp and dept tables + final String sql = "select e.empno, e.ename, e.deptno, d.name" + + " from emp e join dept d on e.deptno = d.deptno"; + + // Plan is: + // LogicalProject(EMPNO=[$0], ENAME=[$1], DEPTNO=[$7], NAME=[$10]) + // LogicalJoin(condition=[=($7, $9)], joinType=[inner]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + // LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // e.empno + int ename = 1; // e.ename + int deptno = 2; // e.deptno + int dname = 3; // d.name + + // Left side functional dependencies should be preserved + // empno determines ename and deptno (primary key from emp table) + assertThat(mq.determines(relNode, empNo, ename), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE)); + + // Right side functional dependencies should be preserved with shifted indices + // deptno determines dname (primary key from dept table) + assertThat(mq.determines(relNode, deptno, dname), is(Boolean.TRUE)); + + // Cross-table dependencies through join condition + // empno should determine dname (through deptno) + assertThat(mq.determines(relNode, empNo, dname), is(Boolean.TRUE)); + + // Reverse dependencies should not hold + assertThat(mq.determines(relNode, ename, empNo), is(Boolean.FALSE)); + assertThat(mq.determines(relNode, dname, empNo), is(Boolean.FALSE)); + } + + @Test void testFunctionalDependencyFilter() { + final String sql = "select empno, ename, sal from emp where sal > 1000"; + + // Plan is: + // LogicalProject(EMPNO=[$0], ENAME=[$1], SAL=[$5]) + // LogicalFilter(condition=[>($5, 1000)]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // empno + int ename = 1; // ename + int sal = 2; // sal + + // Filter should preserve functional dependencies from base table + // empno should still determine ename and sal + assertThat(mq.determines(relNode, empNo, ename), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, empNo, sal), is(Boolean.TRUE)); + + // sal should not determine empno + assertThat(mq.determines(relNode, sal, empNo), is(Boolean.FALSE)); + } + + @Test void testFunctionalDependencyComplexExpressions() { + final String sql = "select empno, sal, sal * 1.1 as raised_sal, " + + "case when sal > 2000 then 'high' else 'low' end as sal_category" + + " from emp"; + + // Plan is: + // LogicalProject(EMPNO=[$0], SAL=[$5], RAISED_SAL=[*($5, 1.1:DECIMAL(2, 1))], + // SAL_CATEGORY=[CASE(>($5, 2000), 'high', 'low ')]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // empno + int sal = 1; // sal + int raisedSal = 2; // sal * 1.1 + int salCategory = 3; // case expression + + // empno determines all other columns (primary key) + assertThat(mq.determines(relNode, empNo, sal), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, empNo, raisedSal), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, empNo, salCategory), is(Boolean.TRUE)); + + // sal determines computed columns based on sal + assertThat(mq.determines(relNode, sal, raisedSal), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, sal, salCategory), is(Boolean.TRUE)); + + // Computed columns should also determine sal (deterministic computation) + assertThat(mq.determines(relNode, raisedSal, sal), is(Boolean.TRUE)); + } + + @Test void testFunctionalDependencyUnion() { + final String sql = "select empno, deptno from emp where deptno = 10" + + " union all " + + " select empno, deptno from emp where deptno = 20"; + + // Plan is: + // LogicalUnion(all=[true]) + // LogicalProject(EMPNO=[$0], DEPTNO=[$7]) + // LogicalFilter(condition=[=($7, 10)]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + // LogicalProject(EMPNO=[$0], DEPTNO=[$7]) + // LogicalFilter(condition=[=($7, 20)]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // empno + int deptno = 1; // deptno + + // Union handling is not yet fully implemented + // For now, expect this might not work + // assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE)); + + // deptno should not determine empno + assertThat(mq.determines(relNode, deptno, empNo), is(Boolean.FALSE)); + } + + @Test void testFunctionalDependencyMultipleKeys() { + final String sql = "select empno, deptno, empno * 100 + deptno as composite from emp"; + + // Plan is: + // LogicalProject(EMPNO=[$0], DEPTNO=[$7], COMPOSITE=[+(*($0, 100), $7)]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // empno + int deptno = 1; // deptno + int composite = 2; // empno * 100 + deptno + + // empno determines all + assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, empNo, composite), is(Boolean.TRUE)); + + // Multiple keys handling is not yet fully implemented + } + + @Test void testFunctionalDependencyConstants() { + final String sql = "select empno, 100 as constant_col, deptno" + + " from emp"; + + // Plan is: + // LogicalProject(EMPNO=[$0], CONSTANT_COL=[100], DEPTNO=[$7]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // empno + int constant = 1; // 100 (constant) + int deptno = 2; // deptno + + // Constants should be functionally dependent on everything + assertThat(mq.determines(relNode, empNo, constant), is(Boolean.TRUE)); + + // Original dependencies should be preserved + assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE)); + + // Everything determines constants + assertThat(mq.determines(relNode, deptno, constant), is(Boolean.TRUE)); + } + + @Test void testFunctionalDependencyNonDeterministicFunctions() { + final String sql = "select empno, rand() as random1, rand() as random2 from emp"; + + // Plan is: + // LogicalProject(EMPNO=[$0], RANDOM1=[RAND()], RANDOM2=[RAND()]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // empno + int random1 = 1; // random1 + int random2 = 2; // random2 + + // Basic functional dependencies should still work + assertThat(mq.determines(relNode, empNo, empNo), is(Boolean.TRUE)); + + // Non-deterministic functions should not determine anything + assertThat(mq.determines(relNode, random1, empNo), is(Boolean.FALSE)); + assertThat(mq.determines(relNode, random1, random2), is(Boolean.FALSE)); + assertThat(mq.determines(relNode, random2, empNo), is(Boolean.FALSE)); + } + + @Test void testFunctionalDependencyLeftJoin() { + final String sql = "SELECT e.empno, e.deptno, d.name\n" + + "FROM emp e\n" + + "LEFT JOIN dept d ON e.deptno = d.deptno"; + + // Plan is: + // LogicalProject(EMPNO=[$0], DEPTNO=[$7], NAME=[$10]) + // LogicalJoin(condition=[=($7, $9)], joinType=[left]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + // LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // e.empno + int deptno = 1; // e.deptno + int dname = 2; // d.name + + // Left side dependencies should be preserved + assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE)); + + // Cross-join dependencies might not hold due to nulls from outer join + assertThat(mq.determines(relNode, empNo, dname), is(Boolean.FALSE)); + } + + @Test void testFunctionalDependencyInnerJoin() { + final String sql = "SELECT e.empno, e.deptno, d.name\n" + + "FROM emp e\n" + + "JOIN dept d ON e.deptno = d.deptno"; + + // Plan is: + // LogicalProject(EMPNO=[$0], DEPTNO=[$7], NAME=[$10]) + // LogicalJoin(condition=[=($7, $9)], joinType=[inner]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + // LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // e.empno + int deptno = 1; // e.deptno + int dname = 2; // d.name + + // Left side dependencies should be preserved + assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE)); + + // Cross-join dependencies should be preserved in inner join + assertThat(mq.determines(relNode, empNo, dname), is(Boolean.TRUE)); + } + + @Test void testFunctionalDependencyRightJoin() { + final String sql = "SELECT e.empno, e.deptno, d.name\n" + + "FROM dept d\n" + + "RIGHT JOIN emp e ON e.deptno = d.deptno"; + + // Plan is: + // LogicalProject(EMPNO=[$2], DEPTNO=[$9], NAME=[$1]) + // LogicalJoin(condition=[=($9, $0)], joinType=[right]) + // LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + System.out.println(RelOptUtil.toString(relNode)); Review Comment: should this be here? ########## core/src/main/java/org/apache/calcite/rel/metadata/RelMdFunctionalDependency.java: ########## @@ -64,212 +76,445 @@ protected RelMdFunctionalDependency() {} return BuiltInMetadata.FunctionalDependency.DEF; } + /** + * Determines if column is functionally dependent on key for a given rel node. + */ public @Nullable Boolean determines(RelNode rel, RelMetadataQuery mq, int key, int column) { - return determinesImpl2(rel, mq, key, column); + return determinesSet(rel, mq, ImmutableBitSet.of(key), ImmutableBitSet.of(column)); } - public @Nullable Boolean determines(SetOp rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl2(rel, mq, key, column); + /** + * Determines if a set of columns functionally determines another set of columns. + */ + public Boolean determinesSet(RelNode rel, RelMetadataQuery mq, + ImmutableBitSet determinants, ImmutableBitSet dependents) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel); + return fdSet.implies(determinants, dependents); } - public @Nullable Boolean determines(Join rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl2(rel, mq, key, column); + /** + * Returns the closure of a set of columns under all functional dependencies. + */ + public ImmutableBitSet closure(RelNode rel, RelMetadataQuery mq, ImmutableBitSet attrs) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel); + return fdSet.closure(attrs); } - public @Nullable Boolean determines(Correlate rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl2(rel, mq, key, column); + /** + * Returns candidate keys for the relation within the specified set of attributes. + */ + public Set<ImmutableBitSet> candidateKeys( + RelNode rel, RelMetadataQuery mq, ImmutableBitSet attributes) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel); + return fdSet.findKeys(attributes); } - public @Nullable Boolean determines(Aggregate rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl(rel, mq, key, column); + /** + * Main dispatch method for getFunctionalDependencies. + * Routes to appropriate handler based on RelNode type. + */ + public FunctionalDependencySet getFunctionalDependencies(RelNode rel, RelMetadataQuery mq) { + if (rel instanceof TableScan) { + return getTableScanFD((TableScan) rel); + } else if (rel instanceof Project) { + return getProjectFD((Project) rel, mq); + } else if (rel instanceof Aggregate) { + return getAggregateFD((Aggregate) rel, mq); + } else if (rel instanceof Join) { + return getJoinFD((Join) rel, mq); + } else if (rel instanceof Calc) { + return getCalcFD((Calc) rel, mq); + } else if (rel instanceof SetOp) { + // TODO: Handle UNION, INTERSECT, EXCEPT functional dependencies + return new FunctionalDependencySet(); + } else if (rel instanceof Correlate) { + // TODO: Handle CORRELATE functional dependencies + return new FunctionalDependencySet(); + } + return getFD(rel.getInputs(), mq); } - public @Nullable Boolean determines(Calc rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl(rel, mq, key, column); + private static FunctionalDependencySet getFD(List<RelNode> inputs, RelMetadataQuery mq) { + FunctionalDependencySet result = new FunctionalDependencySet(); + for (RelNode input : inputs) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(input); + result = result.union(fdSet); + } + return result; } - public @Nullable Boolean determines(Project rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl(rel, mq, key, column); - } + private static FunctionalDependencySet getTableScanFD(TableScan rel) { + FunctionalDependencySet fdSet = new FunctionalDependencySet(); - /** - * Checks if a column is functionally determined by a key column through expression analysis. - * - * @param rel The input relation - * @param mq Metadata query instance - * @param key Index of the determinant expression - * @param column Index of the dependent expression - * @return TRUE if column is determined by key, - * FALSE if not determined, - * NULL if undetermined - */ - private static @Nullable Boolean determinesImpl(RelNode rel, RelMetadataQuery mq, - int key, int column) { - if (preCheck(rel, key, column)) { - return true; + RelOptTable table = rel.getTable(); + List<ImmutableBitSet> keys = table.getKeys(); + if (keys == null || keys.isEmpty()) { + return fdSet; } - ImmutableBitSet keyInputIndices = null; - ImmutableBitSet columnInputIndices = null; - if (rel instanceof Project || rel instanceof Calc) { - List<RexNode> exprs = null; - if (rel instanceof Project) { - Project project = (Project) rel; - exprs = project.getProjects(); - } else { - Calc calc = (Calc) rel; - final RexProgram program = calc.getProgram(); - exprs = program.expandList(program.getProjectList()); + for (ImmutableBitSet key : keys) { + ImmutableBitSet allColumns = ImmutableBitSet.range(rel.getRowType().getFieldCount()); + ImmutableBitSet dependents = allColumns.except(key); + if (!dependents.isEmpty()) { + fdSet.addFD(key, dependents); } + } - // TODO: Supports dependency analysis for all types of expressions - if (!(exprs.get(column) instanceof RexInputRef)) { - return false; - } + return fdSet; + } - RexNode keyExpr = exprs.get(key); - RexNode columnExpr = exprs.get(column); + private static FunctionalDependencySet getProjectFD(Project rel, RelMetadataQuery mq) { + return getProjectionFD(rel.getInput(), rel.getProjects(), mq); + } - // Identical expressions imply functional dependency - if (keyExpr.equals(columnExpr)) { - return true; + /** + * Common method to compute functional dependencies for projection operations. + * Used by both Project and Calc nodes. + * + * @param input the input relation + * @param projections the list of projection expressions + * @param mq the metadata query + * @return the functional dependency set for the projection + */ + private static FunctionalDependencySet getProjectionFD( + RelNode input, List<RexNode> projections, RelMetadataQuery mq) { + FunctionalDependencySet inputFdSet = mq.getFunctionalDependencies(input); + FunctionalDependencySet projectionFdSet = new FunctionalDependencySet(); + int fieldCount = projections.size(); + + // Create mapping from input column indices to project column indices + Mappings.TargetMapping inputToOutputMap = + RelOptUtil.permutation(projections, input.getRowType()); + + // Map input functional dependencies to project dependencies + mapInputFDs(inputFdSet, inputToOutputMap, projectionFdSet); + + // For each pair of output columns, determine if one determines the other + for (int i = 0; i < fieldCount; i++) { + for (int j = i + 1; j < fieldCount; j++) { + RexNode expr1 = projections.get(i); + RexNode expr2 = projections.get(j); + + // Handle identical expressions, they determine each other + if (expr1.equals(expr2) && RexUtil.isDeterministic(expr1)) { + projectionFdSet.addFD(i, j); + projectionFdSet.addFD(j, i); + continue; + } + + // Handle literal constants, all columns determine literals + if (expr1 instanceof RexLiteral) { + projectionFdSet.addFD(j, i); + } + if (expr2 instanceof RexLiteral) { + projectionFdSet.addFD(i, j); + } + + // For complex expressions, check if they have functional dependencies + if (!(expr1 instanceof RexLiteral) && !(expr2 instanceof RexLiteral) + && RexUtil.isDeterministic(expr1) && RexUtil.isDeterministic(expr2)) { + ImmutableBitSet inputs1 = RelOptUtil.InputFinder.bits(expr1); + ImmutableBitSet inputs2 = RelOptUtil.InputFinder.bits(expr2); + + if (!inputs1.isEmpty() && !inputs2.isEmpty()) { + if (inputFdSet.implies(inputs1, inputs2)) { + projectionFdSet.addFD(i, j); + } + if (inputFdSet.implies(inputs2, inputs1)) { + projectionFdSet.addFD(j, i); + } + } + } } + } - keyInputIndices = extractDeterministicRefs(keyExpr); - columnInputIndices = extractDeterministicRefs(columnExpr); - } else if (rel instanceof Aggregate) { - Aggregate aggregate = (Aggregate) rel; + return projectionFdSet; + } - int groupByCnt = aggregate.getGroupCount(); - if (key < groupByCnt && column >= groupByCnt) { - return false; + /** + * Maps input functional dependencies to output dependencies based on column mapping. + */ + private static void mapInputFDs(FunctionalDependencySet inputFdSet, + Mappings.TargetMapping mapping, FunctionalDependencySet outputFdSet) { + for (FunctionalDependency inputFd : inputFdSet.getFDs()) { + ImmutableBitSet determinants = inputFd.getDeterminants(); + ImmutableBitSet dependents = inputFd.getDependents(); + + // Skip this FD if any determinant column is unmappable + boolean allMappable = + determinants.stream().allMatch(col -> col >= 0 + && col < mapping.getSourceCount() + && mapping.getTargetOpt(col) >= 0); + if (!allMappable) { + continue; } - keyInputIndices = extractDeterministicRefs(aggregate, key); - columnInputIndices = extractDeterministicRefs(aggregate, column); - } else { - throw new UnsupportedOperationException("Unsupported RelNode type: " - + rel.getClass().getSimpleName()); - } + // Map all determinant columns + ImmutableBitSet mappedDeterminants = mapAllCols(determinants, mapping); + if (mappedDeterminants.isEmpty()) { + continue; + } - // Early return if invalid cases - if (keyInputIndices.isEmpty() - || columnInputIndices.isEmpty()) { - return false; + // Map only the dependent columns that can be mapped + ImmutableBitSet mappedDependents = mapAvailableCols(dependents, mapping); + if (!mappedDependents.isEmpty()) { + outputFdSet.addFD(mappedDeterminants, mappedDependents); + } } + } - // Currently only supports multiple (keyInputIndices) to one (columnInputIndices) - // dependency detection - for (Integer keyRef : keyInputIndices) { - if (Boolean.FALSE.equals( - mq.determines(rel.getInput(0), keyRef, - columnInputIndices.nextSetBit(0)))) { - return false; + /** + * Maps all columns in the set. Returns empty set if any column cannot be mapped. + */ + private static ImmutableBitSet mapAllCols( + ImmutableBitSet columns, Mappings.TargetMapping mapping) { + ImmutableBitSet.Builder builder = ImmutableBitSet.builder(); + for (int col : columns) { + if (col < 0 || col >= mapping.getSourceCount()) { + return ImmutableBitSet.of(); + } + int mappedCol = mapping.getTargetOpt(col); + if (mappedCol >= 0) { + builder.set(mappedCol); + } else { + return ImmutableBitSet.of(); } } - - return true; + return builder.build(); } /** - * determinesImpl2is similar to determinesImpl, but it doesn't need to handle the - * mapping between output and input columns. + * Maps only the columns that can be mapped, ignoring unmappable ones. */ - private static @Nullable Boolean determinesImpl2(RelNode rel, RelMetadataQuery mq, - int key, int column) { - if (preCheck(rel, key, column)) { - return true; + private static ImmutableBitSet mapAvailableCols( + ImmutableBitSet columns, Mappings.TargetMapping mapping) { + ImmutableBitSet.Builder builder = ImmutableBitSet.builder(); + for (int col : columns) { + if (col < 0 || col >= mapping.getSourceCount()) { + continue; + } + int mappedCol = mapping.getTargetOpt(col); + if (mappedCol >= 0) { + builder.set(mappedCol); + } } + return builder.build(); + } - if (rel instanceof TableScan) { - TableScan tableScan = (TableScan) rel; - RelOptTable table = tableScan.getTable(); - List<ImmutableBitSet> keys = table.getKeys(); - return keys != null - && keys.size() == 1 - && keys.get(0).equals(ImmutableBitSet.of(column)); - } else if (rel instanceof Join) { - Join join = (Join) rel; - // TODO Considering column mapping based on equality conditions in join - int leftFieldCnt = join.getLeft().getRowType().getFieldCount(); - if (key < leftFieldCnt && column < leftFieldCnt) { - return mq.determines(join.getLeft(), key, column); - } else if (key >= leftFieldCnt && column >= leftFieldCnt) { - return mq.determines(join.getRight(), key - leftFieldCnt, column - leftFieldCnt); + private static FunctionalDependencySet getAggregateFD(Aggregate rel, RelMetadataQuery mq) { + FunctionalDependencySet fdSet = new FunctionalDependencySet(); + + FunctionalDependencySet inputFdSet = mq.getFunctionalDependencies(rel.getInput()); + + // Group set columns in the output + ImmutableBitSet groupSet = rel.getGroupSet(); + + // 1. Preserve input FDs that only involve group columns + for (FunctionalDependency inputFd : inputFdSet.getFDs()) { + ImmutableBitSet determinants = inputFd.getDeterminants(); + ImmutableBitSet dependents = inputFd.getDependents(); + + // Only preserve if both determinants and dependents are within group columns + if (groupSet.contains(determinants) && groupSet.contains(dependents)) { + fdSet.addFD(determinants, dependents); } - return false; - } else if (rel instanceof Correlate) { - // TODO Support Correlate. - return false; - } else if (rel instanceof SetOp) { - // TODO Support SetOp - return false; } - return mq.determines(rel.getInput(0), key, column); + // 2. Group keys determine all aggregate columns + if (!groupSet.isEmpty() && !rel.getAggCallList().isEmpty()) { + for (int i = rel.getGroupCount(); i < rel.getRowType().getFieldCount(); i++) { + fdSet.addFD(groupSet, ImmutableBitSet.of(i)); + } + } + + return fdSet; + } + + private static FunctionalDependencySet getJoinFD(Join rel, RelMetadataQuery mq) { + FunctionalDependencySet leftFdSet = mq.getFunctionalDependencies(rel.getLeft()); + FunctionalDependencySet rightFdSet = mq.getFunctionalDependencies(rel.getRight()); + + int leftFieldCount = rel.getLeft().getRowType().getFieldCount(); + JoinRelType joinType = rel.getJoinType(); + + switch (joinType) { + case INNER: + // Inner join: preserve all FDs and derive cross-table dependencies + FunctionalDependencySet innerJoinFdSet + = leftFdSet.union(shiftFdSet(rightFdSet, leftFieldCount)); + deriveTransitiveFDs(rel, innerJoinFdSet, leftFieldCount); + return innerJoinFdSet; + case LEFT: + // Left join: preserve left FDs, right FDs may be invalidated by NULLs + FunctionalDependencySet leftJoinFdSet = new FunctionalDependencySet(leftFdSet.getFDs()); + deriveTransitiveFDs(rel, leftJoinFdSet, leftFieldCount); + return leftJoinFdSet; + case RIGHT: { + // Right join: preserve right FDs, left FDs may be invalidated by NULLs + // 只保留右表字段决定右表字段的 FD,不做任何跨表 transitive 推导 + FunctionalDependencySet shiftedRightFdSet = shiftFdSet(rightFdSet, leftFieldCount); + FunctionalDependencySet filteredFdSet = new FunctionalDependencySet(); Review Comment: i don't understand why LEFT and RIGHT are not symmetric ########## core/src/main/java/org/apache/calcite/rel/metadata/FunctionalDependencySet.java: ########## @@ -0,0 +1,267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.util.ImmutableBitSet; + +import com.google.common.collect.ImmutableSet; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +/** + * A set of functional dependencies with closure and minimal cover operations. + * This class implements standard algorithms for functional dependency reasoning. + */ +public class FunctionalDependencySet { + private final Set<FunctionalDependency> fdSet = new HashSet<>(); + + public FunctionalDependencySet() {} Review Comment: I think you need to represent the size of the relation in the Set (i.e. number of columns). You can then check that all members are well-formed (have at most this many columns). ########## core/src/main/java/org/apache/calcite/rel/metadata/FunctionalDependencySet.java: ########## @@ -0,0 +1,267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.util.ImmutableBitSet; + +import com.google.common.collect.ImmutableSet; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +/** + * A set of functional dependencies with closure and minimal cover operations. + * This class implements standard algorithms for functional dependency reasoning. + */ +public class FunctionalDependencySet { + private final Set<FunctionalDependency> fdSet = new HashSet<>(); + + public FunctionalDependencySet() {} + + public FunctionalDependencySet(Set<FunctionalDependency> fds) { + this.fdSet.addAll(fds); + } + + public void addFD(FunctionalDependency fd) { + if (!fd.isTrivial()) { + fdSet.add(fd); + } + } + + public void addFD(ImmutableBitSet determinants, ImmutableBitSet dependents) { + addFD(new FunctionalDependency(determinants, dependents)); + } + + public void addFD(int determinant, int dependent) { + addFD(ImmutableBitSet.of(determinant), ImmutableBitSet.of(dependent)); + } + + public void removeFD(FunctionalDependency fd) { + fdSet.remove(fd); + } + + public Set<FunctionalDependency> getFDs() { + return Collections.unmodifiableSet(fdSet); + } + + public boolean isEmpty() { + return fdSet.isEmpty(); + } + + public int size() { + return fdSet.size(); + } + + /** + * Computes the closure of a set of attributes under this functional dependency set. + * The closure of X, denoted X+, is the set of all attributes that can be functionally + * determined by X using the functional dependencies in this set and Armstrong's axioms. + * + * @param attrs the input attribute set + * @return the closure of the input attributes + */ + public ImmutableBitSet closure(ImmutableBitSet attrs) { + ImmutableBitSet closure = attrs; + boolean changed; + do { + changed = false; + for (FunctionalDependency fd : fdSet) { + if (closure.contains(fd.getDeterminants())) { + ImmutableBitSet newClosure = closure.union(fd.getDependents()); + if (!newClosure.equals(closure)) { + closure = newClosure; + changed = true; + } + } + } + } while (changed); + return closure; + } + + /** + * Check if X determined Y is implied by this FD set. + */ + public boolean implies(ImmutableBitSet determinants, ImmutableBitSet dependents) { + return closure(determinants).contains(dependents); + } + + /** + * Check if a single column is functionally determined by another column. + */ + public boolean determines(int determinant, int dependent) { + return closure(ImmutableBitSet.of(determinant)).get(dependent); + } + + /** + * Compute the minimal cover of this functional dependency set. + * Returns an equivalent set with minimal dependencies. + */ + public FunctionalDependencySet minimalCover() { + // Split multi-attribute right sides into single attributes + Set<FunctionalDependency> splitFDs = new HashSet<>(); + for (FunctionalDependency fd : fdSet) { + splitFDs.addAll(fd.split()); + } + splitFDs.removeIf(FunctionalDependency::isTrivial); + + // Remove redundant attributes from left sides + Set<FunctionalDependency> reducedFDs = new HashSet<>(); + for (FunctionalDependency fd : splitFDs) { + FunctionalDependencySet tempSet = new FunctionalDependencySet(splitFDs); + tempSet.removeFD(fd); + reducedFDs.add(reduceLeft(fd, tempSet)); + } + + // Remove redundant functional dependencies + reducedFDs.removeIf(fd -> { + FunctionalDependencySet remainingFDs = new FunctionalDependencySet(reducedFDs); + remainingFDs.removeFD(fd); + return remainingFDs.implies(fd.getDeterminants(), fd.getDependents()); + }); + + return new FunctionalDependencySet(reducedFDs); + } + + /** + * Reduce left side by removing redundant columns from determinants. + */ + private static FunctionalDependency reduceLeft(FunctionalDependency fd, + FunctionalDependencySet fdSet) { + ImmutableBitSet determinants = fd.getDeterminants(); + ImmutableBitSet dependents = fd.getDependents(); + + // Try removing each attribute to find minimal determinant set + for (int attr : fd.getDeterminants()) { + ImmutableBitSet reduced = determinants.clear(attr); + if (fdSet.closure(reduced).contains(dependents)) { Review Comment: if the closure is expensive, this is expensive isn't there a cheaper algorithm? ########## core/src/main/java/org/apache/calcite/rel/metadata/FunctionalDependency.java: ########## @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.util.ImmutableBitSet; + +import org.checkerframework.checker.nullness.qual.Nullable; + +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; + +import static java.util.Objects.requireNonNull; + +/** + * Represents a functional dependency X determines Y in relational database theory. + * + * <p>A functional dependency X determines Y holds in a relation R if and only if: + * for any two tuples t1 and t2 in R, if t1[X] = t2[X], then t1[Y] = t2[Y]. + * + * <p>In other words, the values of attributes X uniquely determine the values + * of attributes Y. X is called the determinant and Y is called the dependent. Review Comment: This implies that if X determines Y, X determines any superset of Y. This would be probably good to add here. ########## core/src/main/java/org/apache/calcite/rel/metadata/FunctionalDependencySet.java: ########## @@ -0,0 +1,267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.util.ImmutableBitSet; + +import com.google.common.collect.ImmutableSet; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +/** + * A set of functional dependencies with closure and minimal cover operations. + * This class implements standard algorithms for functional dependency reasoning. + */ +public class FunctionalDependencySet { + private final Set<FunctionalDependency> fdSet = new HashSet<>(); + + public FunctionalDependencySet() {} + + public FunctionalDependencySet(Set<FunctionalDependency> fds) { + this.fdSet.addAll(fds); + } + + public void addFD(FunctionalDependency fd) { + if (!fd.isTrivial()) { + fdSet.add(fd); + } + } + + public void addFD(ImmutableBitSet determinants, ImmutableBitSet dependents) { + addFD(new FunctionalDependency(determinants, dependents)); + } + + public void addFD(int determinant, int dependent) { + addFD(ImmutableBitSet.of(determinant), ImmutableBitSet.of(dependent)); + } + + public void removeFD(FunctionalDependency fd) { + fdSet.remove(fd); + } + + public Set<FunctionalDependency> getFDs() { + return Collections.unmodifiableSet(fdSet); + } + + public boolean isEmpty() { + return fdSet.isEmpty(); + } + + public int size() { + return fdSet.size(); + } + + /** + * Computes the closure of a set of attributes under this functional dependency set. + * The closure of X, denoted X+, is the set of all attributes that can be functionally + * determined by X using the functional dependencies in this set and Armstrong's axioms. + * + * @param attrs the input attribute set + * @return the closure of the input attributes + */ + public ImmutableBitSet closure(ImmutableBitSet attrs) { + ImmutableBitSet closure = attrs; + boolean changed; + do { + changed = false; + for (FunctionalDependency fd : fdSet) { + if (closure.contains(fd.getDeterminants())) { + ImmutableBitSet newClosure = closure.union(fd.getDependents()); + if (!newClosure.equals(closure)) { + closure = newClosure; + changed = true; + } + } + } + } while (changed); + return closure; + } + + /** + * Check if X determined Y is implied by this FD set. Review Comment: determines? ########## core/src/test/java/org/apache/calcite/rel/metadata/FunctionalDependencyTest.java: ########## @@ -0,0 +1,218 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.util.ImmutableBitSet; + +import org.junit.jupiter.api.Test; + +import java.util.Set; + +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; + +/** + * Tests for {@link FunctionalDependency} and {@link FunctionalDependencySet}. + */ +public class FunctionalDependencyTest { + + @Test void testFunctionalDependencyBasic() { + // Test FD creation and basic properties + FunctionalDependency fd = FunctionalDependency.of(new int[]{0}, new int[]{1}); + assertThat(fd.getDeterminants(), equalTo(ImmutableBitSet.of(0))); + assertThat(fd.getDependents(), equalTo(ImmutableBitSet.of(1))); + assertThat(fd.isTrivial(), is(false)); + + // Test trivial FD + FunctionalDependency trivialFd = FunctionalDependency.of(new int[]{0, 1}, new int[]{0}); + assertThat(trivialFd.isTrivial(), is(true)); + } + + @Test void testFunctionalDependencySet() { + FunctionalDependencySet fdSet = new FunctionalDependencySet(); + + // Add some FDs: A -> B, B -> C Review Comment: more like 0 -> 1 than A -> B ########## core/src/test/java/org/apache/calcite/rel/metadata/RelMdFunctionalDependencyIntegrationTest.java: ########## @@ -0,0 +1,527 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgram; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.rules.CoreRules; +import org.apache.calcite.test.SqlToRelTestBase; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.ImmutableIntList; + +import com.google.common.collect.ImmutableList; + +import org.junit.jupiter.api.Test; + +import java.util.Set; + +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; + +/** + * Tests for functional dependency metadata in RelMdFunctionalDependency. + */ +public class RelMdFunctionalDependencyIntegrationTest extends SqlToRelTestBase { + + @Test void testFunctionalDependencyProject() { Review Comment: I believe some of these tests were moved from another file. ########## core/src/test/java/org/apache/calcite/rel/metadata/FunctionalDependencyTest.java: ########## @@ -0,0 +1,218 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.util.ImmutableBitSet; + +import org.junit.jupiter.api.Test; + +import java.util.Set; + +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; + +/** + * Tests for {@link FunctionalDependency} and {@link FunctionalDependencySet}. + */ +public class FunctionalDependencyTest { + + @Test void testFunctionalDependencyBasic() { + // Test FD creation and basic properties + FunctionalDependency fd = FunctionalDependency.of(new int[]{0}, new int[]{1}); + assertThat(fd.getDeterminants(), equalTo(ImmutableBitSet.of(0))); + assertThat(fd.getDependents(), equalTo(ImmutableBitSet.of(1))); + assertThat(fd.isTrivial(), is(false)); + + // Test trivial FD + FunctionalDependency trivialFd = FunctionalDependency.of(new int[]{0, 1}, new int[]{0}); + assertThat(trivialFd.isTrivial(), is(true)); + } + + @Test void testFunctionalDependencySet() { + FunctionalDependencySet fdSet = new FunctionalDependencySet(); + + // Add some FDs: A -> B, B -> C + fdSet.addFD(0, 1); // A -> B + fdSet.addFD(1, 2); // B -> C + + // Test closure: {A}+ should include A, B, C + ImmutableBitSet closure = fdSet.closure(ImmutableBitSet.of(0)); + assertThat(closure.get(0), is(true)); // A + assertThat(closure.get(1), is(true)); // B (A -> B) + assertThat(closure.get(2), is(true)); // C (A -> B, B -> C, so A -> C by transitivity) + + // Test determines + assertThat(fdSet.determines(0, 1), is(true)); // A -> B (direct) + assertThat(fdSet.determines(0, 2), is(true)); // A -> C (transitive) + assertThat(fdSet.determines(2, 0), is(false)); // C does not determine A + } + + @Test void testMinimalCover() { + FunctionalDependencySet fdSet = new FunctionalDependencySet(); + + // Add redundant FDs + fdSet.addFD(ImmutableBitSet.of(0), ImmutableBitSet.of(1)); // A -> B + fdSet.addFD(ImmutableBitSet.of(1), ImmutableBitSet.of(2)); // B -> C + fdSet.addFD(ImmutableBitSet.of(0), ImmutableBitSet.of(2)); // A -> C + + FunctionalDependencySet minimal = fdSet.minimalCover(); + + // The minimal cover should not contain A -> C since it's implied by A -> B and B -> C + assertThat( + minimal.implies(ImmutableBitSet.of(0), + ImmutableBitSet.of(1)), is(true)); // A -> B + assertThat( + minimal.implies(ImmutableBitSet.of(1), + ImmutableBitSet.of(2)), is(true)); // B -> C + assertThat( + minimal.implies(ImmutableBitSet.of(0), + ImmutableBitSet.of(2)), is(true)); // A -> C (derived) + + // Should be equivalent to original + assertThat(fdSet.equalTo(minimal), is(true)); + } + + @Test void testKeyFinding() { + FunctionalDependencySet fdSet = new FunctionalDependencySet(); + + // Schema: A, B, C, D with FDs: A -> B, BC -> D + fdSet.addFD(0, 1); // A -> B + fdSet.addFD(ImmutableBitSet.of(1, 2), ImmutableBitSet.of(3)); // BC -> D + + ImmutableBitSet allAttributes = ImmutableBitSet.of(0, 1, 2, 3); + Set<ImmutableBitSet> keys = fdSet.findKeys(allAttributes); + + // AC should be a key: A -> B, and BC -> D, so AC -> ABCD + assertThat(keys, containsInAnyOrder(ImmutableBitSet.of(0, 2))); + + // Verify it's actually a key + assertThat(fdSet.isKey(ImmutableBitSet.of(0, 2), allAttributes), is(true)); + + // Verify non-keys + assertThat(fdSet.isKey(ImmutableBitSet.of(0), allAttributes), + is(false)); // A alone is not a key + assertThat(fdSet.isKey(ImmutableBitSet.of(2), allAttributes), + is(false)); // C alone is not a key + } + + @Test void testSplit() { + FunctionalDependency fd = FunctionalDependency.of(new int[]{0}, new int[]{1, 2, 3}); + Set<FunctionalDependency> split = fd.split(); + + assertThat(split, hasSize(3)); + assertThat( + split, + containsInAnyOrder(FunctionalDependency.of(0, 1), + FunctionalDependency.of(0, 2), + FunctionalDependency.of(0, 3))); + } + + @Test void testEquivalence() { + FunctionalDependencySet fdSet1 = new FunctionalDependencySet(); + fdSet1.addFD(0, 1); // A -> B + fdSet1.addFD(1, 2); // B -> C + + FunctionalDependencySet fdSet2 = new FunctionalDependencySet(); + fdSet2.addFD(0, 1); // A -> B + fdSet2.addFD(1, 2); // B -> C + fdSet2.addFD(0, 2); // A -> C (redundant) + + // Should be equivalent despite fdSet2 having a redundant FD + assertThat(fdSet1.equalTo(fdSet2), is(true)); + assertThat(fdSet2.equalTo(fdSet1), is(true)); + } + + @Test void testUnion() { + FunctionalDependencySet fdSet1 = new FunctionalDependencySet(); + fdSet1.addFD(0, 1); // A -> B + + FunctionalDependencySet fdSet2 = new FunctionalDependencySet(); + fdSet2.addFD(1, 2); // B -> C + + FunctionalDependencySet union = fdSet1.union(fdSet2); + + assertThat(union.determines(0, 1), is(true)); // A -> B + assertThat(union.determines(1, 2), is(true)); // B -> C + assertThat(union.determines(0, 2), is(true)); // A -> C (transitive) + } + + @Test void testMultipleCandidateKeys() { + FunctionalDependencySet fdSet = new FunctionalDependencySet(); + + // Schema: StudentID(0), Email(1), CourseID(2), Grade(3) + // FDs: StudentID <-> Email (bidirectional unique mapping) + // {StudentID, CourseID} -> Grade + // {Email, CourseID} -> Grade + fdSet.addFD(0, 1); // StudentID -> Email + fdSet.addFD(1, 0); // Email -> StudentID + fdSet.addFD(ImmutableBitSet.of(0, 2), + ImmutableBitSet.of(3)); // {StudentID, CourseID} -> Grade + fdSet.addFD(ImmutableBitSet.of(1, 2), + ImmutableBitSet.of(3)); // {Email, CourseID} -> Grade + + ImmutableBitSet allAttributes = ImmutableBitSet.of(0, 1, 2, 3); + Set<ImmutableBitSet> keys = fdSet.findKeys(allAttributes); + + // Should have two candidate keys + assertThat(keys, hasSize(2)); + + // Both {StudentID, CourseID} and {Email, CourseID} should be candidate keys + assertThat( + keys, + containsInAnyOrder(ImmutableBitSet.of(0, 2), + ImmutableBitSet.of(1, 2))); + + // Verify both are actually keys + assertThat(fdSet.isKey(ImmutableBitSet.of(0, 2), allAttributes), is(true)); + assertThat(fdSet.isKey(ImmutableBitSet.of(1, 2), allAttributes), is(true)); + + // Verify that individual attributes are not keys + assertThat(fdSet.isKey(ImmutableBitSet.of(0), allAttributes), + is(false)); // StudentID alone + assertThat(fdSet.isKey(ImmutableBitSet.of(1), allAttributes), + is(false)); // Email alone + assertThat(fdSet.isKey(ImmutableBitSet.of(2), allAttributes), + is(false)); // CourseID alone + assertThat(fdSet.isKey(ImmutableBitSet.of(3), allAttributes), + is(false)); // Grade alone + + // Verify superkeys (should not be minimal keys) + assertThat(fdSet.isSuperkey(ImmutableBitSet.of(0, 1, 2), allAttributes), + is(true)); // Contains both candidate keys + assertThat(fdSet.isKey(ImmutableBitSet.of(0, 1, 2), allAttributes), + is(false)); // Not minimal + } + + @Test void testProjectFunctionalDependencies() { Review Comment: how about a test with 100 columns in a relation with a key, to see whether any of the functions blows up in cost for this case? ########## core/src/main/java/org/apache/calcite/rel/metadata/FunctionalDependency.java: ########## @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.util.ImmutableBitSet; + +import org.checkerframework.checker.nullness.qual.Nullable; + +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; + +import static java.util.Objects.requireNonNull; + +/** + * Represents a functional dependency X determines Y in relational database theory. Review Comment: I would say here upfront that X and Y are sets of attributes (columns). ########## core/src/test/java/org/apache/calcite/rel/metadata/FunctionalDependencyTest.java: ########## @@ -0,0 +1,218 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.util.ImmutableBitSet; + +import org.junit.jupiter.api.Test; + +import java.util.Set; + +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; + +/** + * Tests for {@link FunctionalDependency} and {@link FunctionalDependencySet}. + */ +public class FunctionalDependencyTest { + + @Test void testFunctionalDependencyBasic() { + // Test FD creation and basic properties + FunctionalDependency fd = FunctionalDependency.of(new int[]{0}, new int[]{1}); + assertThat(fd.getDeterminants(), equalTo(ImmutableBitSet.of(0))); + assertThat(fd.getDependents(), equalTo(ImmutableBitSet.of(1))); + assertThat(fd.isTrivial(), is(false)); + + // Test trivial FD + FunctionalDependency trivialFd = FunctionalDependency.of(new int[]{0, 1}, new int[]{0}); + assertThat(trivialFd.isTrivial(), is(true)); + } + + @Test void testFunctionalDependencySet() { + FunctionalDependencySet fdSet = new FunctionalDependencySet(); + + // Add some FDs: A -> B, B -> C + fdSet.addFD(0, 1); // A -> B + fdSet.addFD(1, 2); // B -> C + + // Test closure: {A}+ should include A, B, C + ImmutableBitSet closure = fdSet.closure(ImmutableBitSet.of(0)); + assertThat(closure.get(0), is(true)); // A + assertThat(closure.get(1), is(true)); // B (A -> B) + assertThat(closure.get(2), is(true)); // C (A -> B, B -> C, so A -> C by transitivity) + + // Test determines + assertThat(fdSet.determines(0, 1), is(true)); // A -> B (direct) + assertThat(fdSet.determines(0, 2), is(true)); // A -> C (transitive) + assertThat(fdSet.determines(2, 0), is(false)); // C does not determine A + } + + @Test void testMinimalCover() { + FunctionalDependencySet fdSet = new FunctionalDependencySet(); + + // Add redundant FDs + fdSet.addFD(ImmutableBitSet.of(0), ImmutableBitSet.of(1)); // A -> B + fdSet.addFD(ImmutableBitSet.of(1), ImmutableBitSet.of(2)); // B -> C + fdSet.addFD(ImmutableBitSet.of(0), ImmutableBitSet.of(2)); // A -> C + + FunctionalDependencySet minimal = fdSet.minimalCover(); + + // The minimal cover should not contain A -> C since it's implied by A -> B and B -> C + assertThat( + minimal.implies(ImmutableBitSet.of(0), + ImmutableBitSet.of(1)), is(true)); // A -> B + assertThat( + minimal.implies(ImmutableBitSet.of(1), + ImmutableBitSet.of(2)), is(true)); // B -> C + assertThat( + minimal.implies(ImmutableBitSet.of(0), + ImmutableBitSet.of(2)), is(true)); // A -> C (derived) + + // Should be equivalent to original + assertThat(fdSet.equalTo(minimal), is(true)); + } + + @Test void testKeyFinding() { + FunctionalDependencySet fdSet = new FunctionalDependencySet(); + + // Schema: A, B, C, D with FDs: A -> B, BC -> D + fdSet.addFD(0, 1); // A -> B + fdSet.addFD(ImmutableBitSet.of(1, 2), ImmutableBitSet.of(3)); // BC -> D + + ImmutableBitSet allAttributes = ImmutableBitSet.of(0, 1, 2, 3); + Set<ImmutableBitSet> keys = fdSet.findKeys(allAttributes); + + // AC should be a key: A -> B, and BC -> D, so AC -> ABCD + assertThat(keys, containsInAnyOrder(ImmutableBitSet.of(0, 2))); + + // Verify it's actually a key + assertThat(fdSet.isKey(ImmutableBitSet.of(0, 2), allAttributes), is(true)); + + // Verify non-keys + assertThat(fdSet.isKey(ImmutableBitSet.of(0), allAttributes), + is(false)); // A alone is not a key + assertThat(fdSet.isKey(ImmutableBitSet.of(2), allAttributes), + is(false)); // C alone is not a key + } + + @Test void testSplit() { + FunctionalDependency fd = FunctionalDependency.of(new int[]{0}, new int[]{1, 2, 3}); + Set<FunctionalDependency> split = fd.split(); + + assertThat(split, hasSize(3)); + assertThat( + split, + containsInAnyOrder(FunctionalDependency.of(0, 1), + FunctionalDependency.of(0, 2), + FunctionalDependency.of(0, 3))); + } + + @Test void testEquivalence() { + FunctionalDependencySet fdSet1 = new FunctionalDependencySet(); + fdSet1.addFD(0, 1); // A -> B + fdSet1.addFD(1, 2); // B -> C + + FunctionalDependencySet fdSet2 = new FunctionalDependencySet(); + fdSet2.addFD(0, 1); // A -> B + fdSet2.addFD(1, 2); // B -> C + fdSet2.addFD(0, 2); // A -> C (redundant) + + // Should be equivalent despite fdSet2 having a redundant FD + assertThat(fdSet1.equalTo(fdSet2), is(true)); + assertThat(fdSet2.equalTo(fdSet1), is(true)); + } + + @Test void testUnion() { + FunctionalDependencySet fdSet1 = new FunctionalDependencySet(); + fdSet1.addFD(0, 1); // A -> B + + FunctionalDependencySet fdSet2 = new FunctionalDependencySet(); + fdSet2.addFD(1, 2); // B -> C + + FunctionalDependencySet union = fdSet1.union(fdSet2); + + assertThat(union.determines(0, 1), is(true)); // A -> B + assertThat(union.determines(1, 2), is(true)); // B -> C + assertThat(union.determines(0, 2), is(true)); // A -> C (transitive) + } + + @Test void testMultipleCandidateKeys() { + FunctionalDependencySet fdSet = new FunctionalDependencySet(); + + // Schema: StudentID(0), Email(1), CourseID(2), Grade(3) Review Comment: If you really want to write this nicely you can define constants StudentID = 0, Email = 1, etc. ########## core/src/main/java/org/apache/calcite/rel/metadata/RelMdFunctionalDependency.java: ########## @@ -64,212 +76,445 @@ protected RelMdFunctionalDependency() {} return BuiltInMetadata.FunctionalDependency.DEF; } + /** + * Determines if column is functionally dependent on key for a given rel node. + */ public @Nullable Boolean determines(RelNode rel, RelMetadataQuery mq, int key, int column) { - return determinesImpl2(rel, mq, key, column); + return determinesSet(rel, mq, ImmutableBitSet.of(key), ImmutableBitSet.of(column)); } - public @Nullable Boolean determines(SetOp rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl2(rel, mq, key, column); + /** + * Determines if a set of columns functionally determines another set of columns. + */ + public Boolean determinesSet(RelNode rel, RelMetadataQuery mq, + ImmutableBitSet determinants, ImmutableBitSet dependents) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel); + return fdSet.implies(determinants, dependents); } - public @Nullable Boolean determines(Join rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl2(rel, mq, key, column); + /** + * Returns the closure of a set of columns under all functional dependencies. + */ + public ImmutableBitSet closure(RelNode rel, RelMetadataQuery mq, ImmutableBitSet attrs) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel); + return fdSet.closure(attrs); } - public @Nullable Boolean determines(Correlate rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl2(rel, mq, key, column); + /** + * Returns candidate keys for the relation within the specified set of attributes. + */ + public Set<ImmutableBitSet> candidateKeys( + RelNode rel, RelMetadataQuery mq, ImmutableBitSet attributes) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel); + return fdSet.findKeys(attributes); } - public @Nullable Boolean determines(Aggregate rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl(rel, mq, key, column); + /** + * Main dispatch method for getFunctionalDependencies. + * Routes to appropriate handler based on RelNode type. + */ + public FunctionalDependencySet getFunctionalDependencies(RelNode rel, RelMetadataQuery mq) { + if (rel instanceof TableScan) { + return getTableScanFD((TableScan) rel); + } else if (rel instanceof Project) { + return getProjectFD((Project) rel, mq); + } else if (rel instanceof Aggregate) { + return getAggregateFD((Aggregate) rel, mq); + } else if (rel instanceof Join) { + return getJoinFD((Join) rel, mq); + } else if (rel instanceof Calc) { + return getCalcFD((Calc) rel, mq); + } else if (rel instanceof SetOp) { + // TODO: Handle UNION, INTERSECT, EXCEPT functional dependencies + return new FunctionalDependencySet(); + } else if (rel instanceof Correlate) { + // TODO: Handle CORRELATE functional dependencies + return new FunctionalDependencySet(); + } + return getFD(rel.getInputs(), mq); } - public @Nullable Boolean determines(Calc rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl(rel, mq, key, column); + private static FunctionalDependencySet getFD(List<RelNode> inputs, RelMetadataQuery mq) { + FunctionalDependencySet result = new FunctionalDependencySet(); + for (RelNode input : inputs) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(input); + result = result.union(fdSet); Review Comment: This looks weird to me - what if the inputs have different schemas? ########## core/src/main/java/org/apache/calcite/rel/metadata/RelMdFunctionalDependency.java: ########## @@ -64,212 +76,445 @@ protected RelMdFunctionalDependency() {} return BuiltInMetadata.FunctionalDependency.DEF; } + /** + * Determines if column is functionally dependent on key for a given rel node. + */ public @Nullable Boolean determines(RelNode rel, RelMetadataQuery mq, int key, int column) { - return determinesImpl2(rel, mq, key, column); + return determinesSet(rel, mq, ImmutableBitSet.of(key), ImmutableBitSet.of(column)); } - public @Nullable Boolean determines(SetOp rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl2(rel, mq, key, column); + /** + * Determines if a set of columns functionally determines another set of columns. + */ + public Boolean determinesSet(RelNode rel, RelMetadataQuery mq, + ImmutableBitSet determinants, ImmutableBitSet dependents) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel); + return fdSet.implies(determinants, dependents); } - public @Nullable Boolean determines(Join rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl2(rel, mq, key, column); + /** + * Returns the closure of a set of columns under all functional dependencies. + */ + public ImmutableBitSet closure(RelNode rel, RelMetadataQuery mq, ImmutableBitSet attrs) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel); + return fdSet.closure(attrs); } - public @Nullable Boolean determines(Correlate rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl2(rel, mq, key, column); + /** + * Returns candidate keys for the relation within the specified set of attributes. + */ + public Set<ImmutableBitSet> candidateKeys( + RelNode rel, RelMetadataQuery mq, ImmutableBitSet attributes) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel); + return fdSet.findKeys(attributes); } - public @Nullable Boolean determines(Aggregate rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl(rel, mq, key, column); + /** + * Main dispatch method for getFunctionalDependencies. + * Routes to appropriate handler based on RelNode type. Review Comment: so an empty set is always a conservative approximation ########## core/src/main/java/org/apache/calcite/rel/metadata/RelMdFunctionalDependency.java: ########## @@ -64,212 +76,445 @@ protected RelMdFunctionalDependency() {} return BuiltInMetadata.FunctionalDependency.DEF; } + /** + * Determines if column is functionally dependent on key for a given rel node. + */ public @Nullable Boolean determines(RelNode rel, RelMetadataQuery mq, int key, int column) { - return determinesImpl2(rel, mq, key, column); + return determinesSet(rel, mq, ImmutableBitSet.of(key), ImmutableBitSet.of(column)); } - public @Nullable Boolean determines(SetOp rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl2(rel, mq, key, column); + /** + * Determines if a set of columns functionally determines another set of columns. + */ + public Boolean determinesSet(RelNode rel, RelMetadataQuery mq, + ImmutableBitSet determinants, ImmutableBitSet dependents) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel); + return fdSet.implies(determinants, dependents); } - public @Nullable Boolean determines(Join rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl2(rel, mq, key, column); + /** + * Returns the closure of a set of columns under all functional dependencies. + */ + public ImmutableBitSet closure(RelNode rel, RelMetadataQuery mq, ImmutableBitSet attrs) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel); + return fdSet.closure(attrs); } - public @Nullable Boolean determines(Correlate rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl2(rel, mq, key, column); + /** + * Returns candidate keys for the relation within the specified set of attributes. + */ + public Set<ImmutableBitSet> candidateKeys( + RelNode rel, RelMetadataQuery mq, ImmutableBitSet attributes) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel); + return fdSet.findKeys(attributes); } - public @Nullable Boolean determines(Aggregate rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl(rel, mq, key, column); + /** + * Main dispatch method for getFunctionalDependencies. + * Routes to appropriate handler based on RelNode type. + */ + public FunctionalDependencySet getFunctionalDependencies(RelNode rel, RelMetadataQuery mq) { + if (rel instanceof TableScan) { + return getTableScanFD((TableScan) rel); + } else if (rel instanceof Project) { + return getProjectFD((Project) rel, mq); + } else if (rel instanceof Aggregate) { + return getAggregateFD((Aggregate) rel, mq); + } else if (rel instanceof Join) { + return getJoinFD((Join) rel, mq); + } else if (rel instanceof Calc) { + return getCalcFD((Calc) rel, mq); + } else if (rel instanceof SetOp) { + // TODO: Handle UNION, INTERSECT, EXCEPT functional dependencies + return new FunctionalDependencySet(); + } else if (rel instanceof Correlate) { + // TODO: Handle CORRELATE functional dependencies + return new FunctionalDependencySet(); + } + return getFD(rel.getInputs(), mq); } - public @Nullable Boolean determines(Calc rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl(rel, mq, key, column); + private static FunctionalDependencySet getFD(List<RelNode> inputs, RelMetadataQuery mq) { + FunctionalDependencySet result = new FunctionalDependencySet(); + for (RelNode input : inputs) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(input); + result = result.union(fdSet); + } + return result; } - public @Nullable Boolean determines(Project rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl(rel, mq, key, column); - } + private static FunctionalDependencySet getTableScanFD(TableScan rel) { + FunctionalDependencySet fdSet = new FunctionalDependencySet(); - /** - * Checks if a column is functionally determined by a key column through expression analysis. - * - * @param rel The input relation - * @param mq Metadata query instance - * @param key Index of the determinant expression - * @param column Index of the dependent expression - * @return TRUE if column is determined by key, - * FALSE if not determined, - * NULL if undetermined - */ - private static @Nullable Boolean determinesImpl(RelNode rel, RelMetadataQuery mq, - int key, int column) { - if (preCheck(rel, key, column)) { - return true; + RelOptTable table = rel.getTable(); + List<ImmutableBitSet> keys = table.getKeys(); + if (keys == null || keys.isEmpty()) { + return fdSet; } - ImmutableBitSet keyInputIndices = null; - ImmutableBitSet columnInputIndices = null; - if (rel instanceof Project || rel instanceof Calc) { - List<RexNode> exprs = null; - if (rel instanceof Project) { - Project project = (Project) rel; - exprs = project.getProjects(); - } else { - Calc calc = (Calc) rel; - final RexProgram program = calc.getProgram(); - exprs = program.expandList(program.getProjectList()); + for (ImmutableBitSet key : keys) { + ImmutableBitSet allColumns = ImmutableBitSet.range(rel.getRowType().getFieldCount()); + ImmutableBitSet dependents = allColumns.except(key); + if (!dependents.isEmpty()) { + fdSet.addFD(key, dependents); } + } - // TODO: Supports dependency analysis for all types of expressions - if (!(exprs.get(column) instanceof RexInputRef)) { - return false; - } + return fdSet; + } - RexNode keyExpr = exprs.get(key); - RexNode columnExpr = exprs.get(column); + private static FunctionalDependencySet getProjectFD(Project rel, RelMetadataQuery mq) { + return getProjectionFD(rel.getInput(), rel.getProjects(), mq); + } - // Identical expressions imply functional dependency - if (keyExpr.equals(columnExpr)) { - return true; + /** + * Common method to compute functional dependencies for projection operations. + * Used by both Project and Calc nodes. + * + * @param input the input relation + * @param projections the list of projection expressions + * @param mq the metadata query + * @return the functional dependency set for the projection + */ + private static FunctionalDependencySet getProjectionFD( + RelNode input, List<RexNode> projections, RelMetadataQuery mq) { + FunctionalDependencySet inputFdSet = mq.getFunctionalDependencies(input); + FunctionalDependencySet projectionFdSet = new FunctionalDependencySet(); + int fieldCount = projections.size(); + + // Create mapping from input column indices to project column indices + Mappings.TargetMapping inputToOutputMap = + RelOptUtil.permutation(projections, input.getRowType()); + + // Map input functional dependencies to project dependencies + mapInputFDs(inputFdSet, inputToOutputMap, projectionFdSet); + + // For each pair of output columns, determine if one determines the other + for (int i = 0; i < fieldCount; i++) { + for (int j = i + 1; j < fieldCount; j++) { + RexNode expr1 = projections.get(i); + RexNode expr2 = projections.get(j); + + // Handle identical expressions, they determine each other + if (expr1.equals(expr2) && RexUtil.isDeterministic(expr1)) { + projectionFdSet.addFD(i, j); + projectionFdSet.addFD(j, i); + continue; + } + + // Handle literal constants, all columns determine literals + if (expr1 instanceof RexLiteral) { + projectionFdSet.addFD(j, i); + } + if (expr2 instanceof RexLiteral) { + projectionFdSet.addFD(i, j); + } + + // For complex expressions, check if they have functional dependencies + if (!(expr1 instanceof RexLiteral) && !(expr2 instanceof RexLiteral) + && RexUtil.isDeterministic(expr1) && RexUtil.isDeterministic(expr2)) { + ImmutableBitSet inputs1 = RelOptUtil.InputFinder.bits(expr1); + ImmutableBitSet inputs2 = RelOptUtil.InputFinder.bits(expr2); + + if (!inputs1.isEmpty() && !inputs2.isEmpty()) { + if (inputFdSet.implies(inputs1, inputs2)) { + projectionFdSet.addFD(i, j); + } + if (inputFdSet.implies(inputs2, inputs1)) { + projectionFdSet.addFD(j, i); + } + } + } } + } - keyInputIndices = extractDeterministicRefs(keyExpr); - columnInputIndices = extractDeterministicRefs(columnExpr); - } else if (rel instanceof Aggregate) { - Aggregate aggregate = (Aggregate) rel; + return projectionFdSet; + } - int groupByCnt = aggregate.getGroupCount(); - if (key < groupByCnt && column >= groupByCnt) { - return false; + /** + * Maps input functional dependencies to output dependencies based on column mapping. + */ + private static void mapInputFDs(FunctionalDependencySet inputFdSet, + Mappings.TargetMapping mapping, FunctionalDependencySet outputFdSet) { + for (FunctionalDependency inputFd : inputFdSet.getFDs()) { + ImmutableBitSet determinants = inputFd.getDeterminants(); + ImmutableBitSet dependents = inputFd.getDependents(); + + // Skip this FD if any determinant column is unmappable + boolean allMappable = + determinants.stream().allMatch(col -> col >= 0 + && col < mapping.getSourceCount() + && mapping.getTargetOpt(col) >= 0); + if (!allMappable) { + continue; } - keyInputIndices = extractDeterministicRefs(aggregate, key); - columnInputIndices = extractDeterministicRefs(aggregate, column); - } else { - throw new UnsupportedOperationException("Unsupported RelNode type: " - + rel.getClass().getSimpleName()); - } + // Map all determinant columns + ImmutableBitSet mappedDeterminants = mapAllCols(determinants, mapping); + if (mappedDeterminants.isEmpty()) { + continue; + } - // Early return if invalid cases - if (keyInputIndices.isEmpty() - || columnInputIndices.isEmpty()) { - return false; + // Map only the dependent columns that can be mapped + ImmutableBitSet mappedDependents = mapAvailableCols(dependents, mapping); + if (!mappedDependents.isEmpty()) { + outputFdSet.addFD(mappedDeterminants, mappedDependents); + } } + } - // Currently only supports multiple (keyInputIndices) to one (columnInputIndices) - // dependency detection - for (Integer keyRef : keyInputIndices) { - if (Boolean.FALSE.equals( - mq.determines(rel.getInput(0), keyRef, - columnInputIndices.nextSetBit(0)))) { - return false; + /** + * Maps all columns in the set. Returns empty set if any column cannot be mapped. + */ + private static ImmutableBitSet mapAllCols( + ImmutableBitSet columns, Mappings.TargetMapping mapping) { + ImmutableBitSet.Builder builder = ImmutableBitSet.builder(); + for (int col : columns) { + if (col < 0 || col >= mapping.getSourceCount()) { + return ImmutableBitSet.of(); + } + int mappedCol = mapping.getTargetOpt(col); + if (mappedCol >= 0) { + builder.set(mappedCol); + } else { + return ImmutableBitSet.of(); } } - - return true; + return builder.build(); } /** - * determinesImpl2is similar to determinesImpl, but it doesn't need to handle the - * mapping between output and input columns. + * Maps only the columns that can be mapped, ignoring unmappable ones. */ - private static @Nullable Boolean determinesImpl2(RelNode rel, RelMetadataQuery mq, - int key, int column) { - if (preCheck(rel, key, column)) { - return true; + private static ImmutableBitSet mapAvailableCols( + ImmutableBitSet columns, Mappings.TargetMapping mapping) { + ImmutableBitSet.Builder builder = ImmutableBitSet.builder(); + for (int col : columns) { + if (col < 0 || col >= mapping.getSourceCount()) { + continue; + } + int mappedCol = mapping.getTargetOpt(col); + if (mappedCol >= 0) { + builder.set(mappedCol); + } } + return builder.build(); + } - if (rel instanceof TableScan) { - TableScan tableScan = (TableScan) rel; - RelOptTable table = tableScan.getTable(); - List<ImmutableBitSet> keys = table.getKeys(); - return keys != null - && keys.size() == 1 - && keys.get(0).equals(ImmutableBitSet.of(column)); - } else if (rel instanceof Join) { - Join join = (Join) rel; - // TODO Considering column mapping based on equality conditions in join - int leftFieldCnt = join.getLeft().getRowType().getFieldCount(); - if (key < leftFieldCnt && column < leftFieldCnt) { - return mq.determines(join.getLeft(), key, column); - } else if (key >= leftFieldCnt && column >= leftFieldCnt) { - return mq.determines(join.getRight(), key - leftFieldCnt, column - leftFieldCnt); + private static FunctionalDependencySet getAggregateFD(Aggregate rel, RelMetadataQuery mq) { + FunctionalDependencySet fdSet = new FunctionalDependencySet(); + + FunctionalDependencySet inputFdSet = mq.getFunctionalDependencies(rel.getInput()); + + // Group set columns in the output + ImmutableBitSet groupSet = rel.getGroupSet(); + + // 1. Preserve input FDs that only involve group columns + for (FunctionalDependency inputFd : inputFdSet.getFDs()) { + ImmutableBitSet determinants = inputFd.getDeterminants(); + ImmutableBitSet dependents = inputFd.getDependents(); + + // Only preserve if both determinants and dependents are within group columns + if (groupSet.contains(determinants) && groupSet.contains(dependents)) { + fdSet.addFD(determinants, dependents); } - return false; - } else if (rel instanceof Correlate) { - // TODO Support Correlate. - return false; - } else if (rel instanceof SetOp) { - // TODO Support SetOp - return false; } - return mq.determines(rel.getInput(0), key, column); + // 2. Group keys determine all aggregate columns + if (!groupSet.isEmpty() && !rel.getAggCallList().isEmpty()) { + for (int i = rel.getGroupCount(); i < rel.getRowType().getFieldCount(); i++) { + fdSet.addFD(groupSet, ImmutableBitSet.of(i)); + } + } + + return fdSet; + } + + private static FunctionalDependencySet getJoinFD(Join rel, RelMetadataQuery mq) { + FunctionalDependencySet leftFdSet = mq.getFunctionalDependencies(rel.getLeft()); + FunctionalDependencySet rightFdSet = mq.getFunctionalDependencies(rel.getRight()); + + int leftFieldCount = rel.getLeft().getRowType().getFieldCount(); + JoinRelType joinType = rel.getJoinType(); + + switch (joinType) { + case INNER: + // Inner join: preserve all FDs and derive cross-table dependencies + FunctionalDependencySet innerJoinFdSet + = leftFdSet.union(shiftFdSet(rightFdSet, leftFieldCount)); + deriveTransitiveFDs(rel, innerJoinFdSet, leftFieldCount); + return innerJoinFdSet; + case LEFT: + // Left join: preserve left FDs, right FDs may be invalidated by NULLs + FunctionalDependencySet leftJoinFdSet = new FunctionalDependencySet(leftFdSet.getFDs()); + deriveTransitiveFDs(rel, leftJoinFdSet, leftFieldCount); + return leftJoinFdSet; + case RIGHT: { + // Right join: preserve right FDs, left FDs may be invalidated by NULLs + // 只保留右表字段决定右表字段的 FD,不做任何跨表 transitive 推导 Review Comment: I think the convention is to stick to English comments. ########## core/src/test/java/org/apache/calcite/rel/metadata/RelMdFunctionalDependencyIntegrationTest.java: ########## @@ -0,0 +1,527 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgram; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.rules.CoreRules; +import org.apache.calcite.test.SqlToRelTestBase; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.ImmutableIntList; + +import com.google.common.collect.ImmutableList; + +import org.junit.jupiter.api.Test; + +import java.util.Set; + +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; + +/** + * Tests for functional dependency metadata in RelMdFunctionalDependency. + */ +public class RelMdFunctionalDependencyIntegrationTest extends SqlToRelTestBase { + + @Test void testFunctionalDependencyProject() { + final String sql = "select empno, deptno, deptno + 1 as deptNoPlus1, rand() as rand" + + " from emp where deptno < 20"; + + // Plan is: + // LogicalProject(EMPNO=[$0], DEPTNO=[$7], DEPTNOPLUS1=[+($7, 1)], RAND=[RAND()]) + // LogicalFilter(condition=[<($7, 20)]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // empno - primary key + int deptNo = 1; // deptno + int deptNoPlus1 = 2; // deptno + 1 + int rand = 3; // rand() + + // deptno + 1 should determine deptno (can derive deptno from deptno+1) + assertThat(mq.determines(relNode, deptNoPlus1, deptNo), is(Boolean.TRUE)); + + // deptno should determine deptno + 1 (expression is deterministic) + assertThat(mq.determines(relNode, deptNo, deptNoPlus1), is(Boolean.TRUE)); + + // deptno should NOT determine empno (deptno is not unique) + assertThat(mq.determines(relNode, deptNo, empNo), is(Boolean.FALSE)); + + // empno should determine deptno + 1 (primary key determines everything) + assertThat(mq.determines(relNode, empNo, deptNoPlus1), is(Boolean.TRUE)); + + // deptno + 1 should NOT determine empno (deptno + 1 is not unique) + assertThat(mq.determines(relNode, deptNoPlus1, empNo), is(Boolean.FALSE)); + + // rand() should not determine anything (non-deterministic) + assertThat(mq.determines(relNode, rand, empNo), is(Boolean.FALSE)); + + // Nothing should determine rand() (non-deterministic) + assertThat(mq.determines(relNode, empNo, rand), is(Boolean.FALSE)); + } + + @Test void testFunctionalDependencyAggregate() { + final String sql = "select deptno, count(*) as \"count\", sum(sal) as sumSal" + + " from emp group by deptno"; + + // Plan is: + // LogicalAggregate(group=[{0}], count=[COUNT()], SUMSAL=[SUM($1)]) + // LogicalProject(DEPTNO=[$7], SAL=[$5]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int deptNo = 0; // deptno (group by column) + int count = 1; // count(*) + int sumSal = 2; // sum(sal) + + // Group by column should determine aggregate columns + assertThat(mq.determines(relNode, deptNo, count), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, deptNo, sumSal), is(Boolean.TRUE)); + + // Aggregate columns should not determine group by column + assertThat(mq.determines(relNode, count, deptNo), is(Boolean.FALSE)); + assertThat(mq.determines(relNode, sumSal, deptNo), is(Boolean.FALSE)); + + // Aggregate columns should not determine each other + assertThat(mq.determines(relNode, count, sumSal), is(Boolean.FALSE)); + assertThat(mq.determines(relNode, sumSal, count), is(Boolean.FALSE)); + } + + @Test void testFunctionalDependencyWithIdenticalExpressions() { + final String sql = "select deptno, deptno as deptno2, deptno + 1 as deptno3 from emp"; + + // Plan is: + // LogicalProject(DEPTNO=[$7], DEPTNO2=[$7], DEPTNO3=[+($7, 1)]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int deptNo = 0; // deptno + int deptNo2 = 1; // deptno (identical) + int deptNo3 = 2; // deptno + 1 + + // Identical expressions should determine each other + assertThat(mq.determines(relNode, deptNo, deptNo2), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, deptNo2, deptNo), is(Boolean.TRUE)); + + // deptno should determine deptno + 1 (deterministic expression) + assertThat(mq.determines(relNode, deptNo, deptNo3), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, deptNo2, deptNo3), is(Boolean.TRUE)); + + // deptno + 1 should determine deptno (can derive original from increment) + assertThat(mq.determines(relNode, deptNo3, deptNo), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, deptNo3, deptNo2), is(Boolean.TRUE)); + } + + @Test void testFunctionalDependencyJoin() { + // Test inner join with emp and dept tables + final String sql = "select e.empno, e.ename, e.deptno, d.name" + + " from emp e join dept d on e.deptno = d.deptno"; + + // Plan is: + // LogicalProject(EMPNO=[$0], ENAME=[$1], DEPTNO=[$7], NAME=[$10]) + // LogicalJoin(condition=[=($7, $9)], joinType=[inner]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + // LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // e.empno + int ename = 1; // e.ename + int deptno = 2; // e.deptno + int dname = 3; // d.name + + // Left side functional dependencies should be preserved + // empno determines ename and deptno (primary key from emp table) + assertThat(mq.determines(relNode, empNo, ename), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE)); + + // Right side functional dependencies should be preserved with shifted indices + // deptno determines dname (primary key from dept table) + assertThat(mq.determines(relNode, deptno, dname), is(Boolean.TRUE)); + + // Cross-table dependencies through join condition + // empno should determine dname (through deptno) + assertThat(mq.determines(relNode, empNo, dname), is(Boolean.TRUE)); + + // Reverse dependencies should not hold + assertThat(mq.determines(relNode, ename, empNo), is(Boolean.FALSE)); + assertThat(mq.determines(relNode, dname, empNo), is(Boolean.FALSE)); + } + + @Test void testFunctionalDependencyFilter() { + final String sql = "select empno, ename, sal from emp where sal > 1000"; + + // Plan is: + // LogicalProject(EMPNO=[$0], ENAME=[$1], SAL=[$5]) + // LogicalFilter(condition=[>($5, 1000)]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // empno + int ename = 1; // ename + int sal = 2; // sal + + // Filter should preserve functional dependencies from base table + // empno should still determine ename and sal + assertThat(mq.determines(relNode, empNo, ename), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, empNo, sal), is(Boolean.TRUE)); + + // sal should not determine empno + assertThat(mq.determines(relNode, sal, empNo), is(Boolean.FALSE)); + } + + @Test void testFunctionalDependencyComplexExpressions() { + final String sql = "select empno, sal, sal * 1.1 as raised_sal, " + + "case when sal > 2000 then 'high' else 'low' end as sal_category" + + " from emp"; + + // Plan is: + // LogicalProject(EMPNO=[$0], SAL=[$5], RAISED_SAL=[*($5, 1.1:DECIMAL(2, 1))], + // SAL_CATEGORY=[CASE(>($5, 2000), 'high', 'low ')]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // empno + int sal = 1; // sal + int raisedSal = 2; // sal * 1.1 + int salCategory = 3; // case expression + + // empno determines all other columns (primary key) + assertThat(mq.determines(relNode, empNo, sal), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, empNo, raisedSal), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, empNo, salCategory), is(Boolean.TRUE)); + + // sal determines computed columns based on sal + assertThat(mq.determines(relNode, sal, raisedSal), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, sal, salCategory), is(Boolean.TRUE)); + + // Computed columns should also determine sal (deterministic computation) + assertThat(mq.determines(relNode, raisedSal, sal), is(Boolean.TRUE)); + } + + @Test void testFunctionalDependencyUnion() { + final String sql = "select empno, deptno from emp where deptno = 10" + + " union all " + + " select empno, deptno from emp where deptno = 20"; + + // Plan is: + // LogicalUnion(all=[true]) + // LogicalProject(EMPNO=[$0], DEPTNO=[$7]) + // LogicalFilter(condition=[=($7, 10)]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + // LogicalProject(EMPNO=[$0], DEPTNO=[$7]) + // LogicalFilter(condition=[=($7, 20)]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // empno + int deptno = 1; // deptno + + // Union handling is not yet fully implemented + // For now, expect this might not work + // assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE)); + + // deptno should not determine empno + assertThat(mq.determines(relNode, deptno, empNo), is(Boolean.FALSE)); + } + + @Test void testFunctionalDependencyMultipleKeys() { + final String sql = "select empno, deptno, empno * 100 + deptno as composite from emp"; + + // Plan is: + // LogicalProject(EMPNO=[$0], DEPTNO=[$7], COMPOSITE=[+(*($0, 100), $7)]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // empno + int deptno = 1; // deptno + int composite = 2; // empno * 100 + deptno + + // empno determines all + assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE)); + assertThat(mq.determines(relNode, empNo, composite), is(Boolean.TRUE)); + + // Multiple keys handling is not yet fully implemented + } + + @Test void testFunctionalDependencyConstants() { + final String sql = "select empno, 100 as constant_col, deptno" + + " from emp"; + + // Plan is: + // LogicalProject(EMPNO=[$0], CONSTANT_COL=[100], DEPTNO=[$7]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // empno + int constant = 1; // 100 (constant) + int deptno = 2; // deptno + + // Constants should be functionally dependent on everything + assertThat(mq.determines(relNode, empNo, constant), is(Boolean.TRUE)); + + // Original dependencies should be preserved + assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE)); + + // Everything determines constants + assertThat(mq.determines(relNode, deptno, constant), is(Boolean.TRUE)); + } + + @Test void testFunctionalDependencyNonDeterministicFunctions() { + final String sql = "select empno, rand() as random1, rand() as random2 from emp"; + + // Plan is: + // LogicalProject(EMPNO=[$0], RANDOM1=[RAND()], RANDOM2=[RAND()]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // empno + int random1 = 1; // random1 + int random2 = 2; // random2 + + // Basic functional dependencies should still work + assertThat(mq.determines(relNode, empNo, empNo), is(Boolean.TRUE)); + + // Non-deterministic functions should not determine anything + assertThat(mq.determines(relNode, random1, empNo), is(Boolean.FALSE)); + assertThat(mq.determines(relNode, random1, random2), is(Boolean.FALSE)); + assertThat(mq.determines(relNode, random2, empNo), is(Boolean.FALSE)); + } + + @Test void testFunctionalDependencyLeftJoin() { + final String sql = "SELECT e.empno, e.deptno, d.name\n" + + "FROM emp e\n" + + "LEFT JOIN dept d ON e.deptno = d.deptno"; + + // Plan is: + // LogicalProject(EMPNO=[$0], DEPTNO=[$7], NAME=[$10]) + // LogicalJoin(condition=[=($7, $9)], joinType=[left]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + // LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // e.empno + int deptno = 1; // e.deptno + int dname = 2; // d.name + + // Left side dependencies should be preserved + assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE)); + + // Cross-join dependencies might not hold due to nulls from outer join + assertThat(mq.determines(relNode, empNo, dname), is(Boolean.FALSE)); + } + + @Test void testFunctionalDependencyInnerJoin() { + final String sql = "SELECT e.empno, e.deptno, d.name\n" + + "FROM emp e\n" + + "JOIN dept d ON e.deptno = d.deptno"; + + // Plan is: + // LogicalProject(EMPNO=[$0], DEPTNO=[$7], NAME=[$10]) + // LogicalJoin(condition=[=($7, $9)], joinType=[inner]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + // LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // e.empno + int deptno = 1; // e.deptno + int dname = 2; // d.name + + // Left side dependencies should be preserved + assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE)); + + // Cross-join dependencies should be preserved in inner join + assertThat(mq.determines(relNode, empNo, dname), is(Boolean.TRUE)); + } + + @Test void testFunctionalDependencyRightJoin() { + final String sql = "SELECT e.empno, e.deptno, d.name\n" + + "FROM dept d\n" + + "RIGHT JOIN emp e ON e.deptno = d.deptno"; + + // Plan is: + // LogicalProject(EMPNO=[$2], DEPTNO=[$9], NAME=[$1]) + // LogicalJoin(condition=[=($9, $0)], joinType=[right]) + // LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + System.out.println(RelOptUtil.toString(relNode)); + + int empNo = 0; // e.empno + int deptno = 1; // e.deptno + int dname = 2; // d.name + + // Right side dependencies should be preserved + assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.TRUE)); + + // Cross-join dependencies might not hold due to nulls from outer join + assertThat(mq.determines(relNode, empNo, dname), is(Boolean.FALSE)); + } + + @Test void testFunctionalDependencyFullOuterJoin() { + final String sql = "SELECT e.empno, e.deptno, d.name\n" + + "FROM emp e\n" + + "FULL JOIN dept d ON e.deptno = d.deptno"; + + // Plan is: + // LogicalProject(EMPNO=[$0], DEPTNO=[$7], NAME=[$10]) + // LogicalJoin(condition=[=($7, $9)], joinType=[full]) + // LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + // LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) + + final RelNode relNode = sql(sql).toRel(); + final RelMetadataQuery mq = relNode.getCluster().getMetadataQuery(); + + int empNo = 0; // e.empno + int deptno = 1; // e.deptno + int dname = 2; // d.name + + // Left side dependencies should not be preserved + assertThat(mq.determines(relNode, empNo, deptno), is(Boolean.FALSE)); + + // Cross-join dependencies might not hold due to nulls from outer join + assertThat(mq.determines(relNode, empNo, dname), is(Boolean.FALSE)); + } + + @Test void testFunctionalDependencySemiJoin() { + final String sql = "select empno, deptno from emp\n" + + "intersect\n" + + "select deptno, deptno from dept\n"; + + // Plan is: Review Comment: I wonder whether this could be an assertion too. ########## core/src/main/java/org/apache/calcite/rel/metadata/RelMdFunctionalDependency.java: ########## @@ -64,212 +76,445 @@ protected RelMdFunctionalDependency() {} return BuiltInMetadata.FunctionalDependency.DEF; } + /** + * Determines if column is functionally dependent on key for a given rel node. + */ public @Nullable Boolean determines(RelNode rel, RelMetadataQuery mq, int key, int column) { - return determinesImpl2(rel, mq, key, column); + return determinesSet(rel, mq, ImmutableBitSet.of(key), ImmutableBitSet.of(column)); } - public @Nullable Boolean determines(SetOp rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl2(rel, mq, key, column); + /** + * Determines if a set of columns functionally determines another set of columns. + */ + public Boolean determinesSet(RelNode rel, RelMetadataQuery mq, + ImmutableBitSet determinants, ImmutableBitSet dependents) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel); + return fdSet.implies(determinants, dependents); } - public @Nullable Boolean determines(Join rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl2(rel, mq, key, column); + /** + * Returns the closure of a set of columns under all functional dependencies. + */ + public ImmutableBitSet closure(RelNode rel, RelMetadataQuery mq, ImmutableBitSet attrs) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel); + return fdSet.closure(attrs); } - public @Nullable Boolean determines(Correlate rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl2(rel, mq, key, column); + /** + * Returns candidate keys for the relation within the specified set of attributes. + */ + public Set<ImmutableBitSet> candidateKeys( + RelNode rel, RelMetadataQuery mq, ImmutableBitSet attributes) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(rel); + return fdSet.findKeys(attributes); } - public @Nullable Boolean determines(Aggregate rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl(rel, mq, key, column); + /** + * Main dispatch method for getFunctionalDependencies. + * Routes to appropriate handler based on RelNode type. + */ + public FunctionalDependencySet getFunctionalDependencies(RelNode rel, RelMetadataQuery mq) { + if (rel instanceof TableScan) { + return getTableScanFD((TableScan) rel); + } else if (rel instanceof Project) { + return getProjectFD((Project) rel, mq); + } else if (rel instanceof Aggregate) { + return getAggregateFD((Aggregate) rel, mq); + } else if (rel instanceof Join) { + return getJoinFD((Join) rel, mq); + } else if (rel instanceof Calc) { + return getCalcFD((Calc) rel, mq); + } else if (rel instanceof SetOp) { + // TODO: Handle UNION, INTERSECT, EXCEPT functional dependencies + return new FunctionalDependencySet(); + } else if (rel instanceof Correlate) { + // TODO: Handle CORRELATE functional dependencies + return new FunctionalDependencySet(); + } + return getFD(rel.getInputs(), mq); } - public @Nullable Boolean determines(Calc rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl(rel, mq, key, column); + private static FunctionalDependencySet getFD(List<RelNode> inputs, RelMetadataQuery mq) { + FunctionalDependencySet result = new FunctionalDependencySet(); + for (RelNode input : inputs) { + FunctionalDependencySet fdSet = mq.getFunctionalDependencies(input); + result = result.union(fdSet); + } + return result; } - public @Nullable Boolean determines(Project rel, RelMetadataQuery mq, - int key, int column) { - return determinesImpl(rel, mq, key, column); - } + private static FunctionalDependencySet getTableScanFD(TableScan rel) { + FunctionalDependencySet fdSet = new FunctionalDependencySet(); - /** - * Checks if a column is functionally determined by a key column through expression analysis. - * - * @param rel The input relation - * @param mq Metadata query instance - * @param key Index of the determinant expression - * @param column Index of the dependent expression - * @return TRUE if column is determined by key, - * FALSE if not determined, - * NULL if undetermined - */ - private static @Nullable Boolean determinesImpl(RelNode rel, RelMetadataQuery mq, - int key, int column) { - if (preCheck(rel, key, column)) { - return true; + RelOptTable table = rel.getTable(); + List<ImmutableBitSet> keys = table.getKeys(); + if (keys == null || keys.isEmpty()) { + return fdSet; } - ImmutableBitSet keyInputIndices = null; - ImmutableBitSet columnInputIndices = null; - if (rel instanceof Project || rel instanceof Calc) { - List<RexNode> exprs = null; - if (rel instanceof Project) { - Project project = (Project) rel; - exprs = project.getProjects(); - } else { - Calc calc = (Calc) rel; - final RexProgram program = calc.getProgram(); - exprs = program.expandList(program.getProjectList()); + for (ImmutableBitSet key : keys) { + ImmutableBitSet allColumns = ImmutableBitSet.range(rel.getRowType().getFieldCount()); + ImmutableBitSet dependents = allColumns.except(key); + if (!dependents.isEmpty()) { + fdSet.addFD(key, dependents); } + } - // TODO: Supports dependency analysis for all types of expressions - if (!(exprs.get(column) instanceof RexInputRef)) { - return false; - } + return fdSet; + } - RexNode keyExpr = exprs.get(key); - RexNode columnExpr = exprs.get(column); + private static FunctionalDependencySet getProjectFD(Project rel, RelMetadataQuery mq) { + return getProjectionFD(rel.getInput(), rel.getProjects(), mq); + } - // Identical expressions imply functional dependency - if (keyExpr.equals(columnExpr)) { - return true; + /** + * Common method to compute functional dependencies for projection operations. + * Used by both Project and Calc nodes. + * + * @param input the input relation + * @param projections the list of projection expressions + * @param mq the metadata query + * @return the functional dependency set for the projection + */ + private static FunctionalDependencySet getProjectionFD( + RelNode input, List<RexNode> projections, RelMetadataQuery mq) { + FunctionalDependencySet inputFdSet = mq.getFunctionalDependencies(input); + FunctionalDependencySet projectionFdSet = new FunctionalDependencySet(); + int fieldCount = projections.size(); + + // Create mapping from input column indices to project column indices + Mappings.TargetMapping inputToOutputMap = + RelOptUtil.permutation(projections, input.getRowType()); + + // Map input functional dependencies to project dependencies + mapInputFDs(inputFdSet, inputToOutputMap, projectionFdSet); + + // For each pair of output columns, determine if one determines the other + for (int i = 0; i < fieldCount; i++) { + for (int j = i + 1; j < fieldCount; j++) { + RexNode expr1 = projections.get(i); + RexNode expr2 = projections.get(j); + + // Handle identical expressions, they determine each other + if (expr1.equals(expr2) && RexUtil.isDeterministic(expr1)) { + projectionFdSet.addFD(i, j); + projectionFdSet.addFD(j, i); + continue; + } + + // Handle literal constants, all columns determine literals + if (expr1 instanceof RexLiteral) { + projectionFdSet.addFD(j, i); + } + if (expr2 instanceof RexLiteral) { + projectionFdSet.addFD(i, j); + } + + // For complex expressions, check if they have functional dependencies + if (!(expr1 instanceof RexLiteral) && !(expr2 instanceof RexLiteral) + && RexUtil.isDeterministic(expr1) && RexUtil.isDeterministic(expr2)) { + ImmutableBitSet inputs1 = RelOptUtil.InputFinder.bits(expr1); + ImmutableBitSet inputs2 = RelOptUtil.InputFinder.bits(expr2); + + if (!inputs1.isEmpty() && !inputs2.isEmpty()) { + if (inputFdSet.implies(inputs1, inputs2)) { + projectionFdSet.addFD(i, j); + } + if (inputFdSet.implies(inputs2, inputs1)) { + projectionFdSet.addFD(j, i); + } + } + } } + } - keyInputIndices = extractDeterministicRefs(keyExpr); - columnInputIndices = extractDeterministicRefs(columnExpr); - } else if (rel instanceof Aggregate) { - Aggregate aggregate = (Aggregate) rel; + return projectionFdSet; + } - int groupByCnt = aggregate.getGroupCount(); - if (key < groupByCnt && column >= groupByCnt) { - return false; + /** + * Maps input functional dependencies to output dependencies based on column mapping. + */ + private static void mapInputFDs(FunctionalDependencySet inputFdSet, + Mappings.TargetMapping mapping, FunctionalDependencySet outputFdSet) { + for (FunctionalDependency inputFd : inputFdSet.getFDs()) { + ImmutableBitSet determinants = inputFd.getDeterminants(); + ImmutableBitSet dependents = inputFd.getDependents(); + + // Skip this FD if any determinant column is unmappable + boolean allMappable = + determinants.stream().allMatch(col -> col >= 0 + && col < mapping.getSourceCount() + && mapping.getTargetOpt(col) >= 0); + if (!allMappable) { + continue; } - keyInputIndices = extractDeterministicRefs(aggregate, key); - columnInputIndices = extractDeterministicRefs(aggregate, column); - } else { - throw new UnsupportedOperationException("Unsupported RelNode type: " - + rel.getClass().getSimpleName()); - } + // Map all determinant columns + ImmutableBitSet mappedDeterminants = mapAllCols(determinants, mapping); + if (mappedDeterminants.isEmpty()) { + continue; + } - // Early return if invalid cases - if (keyInputIndices.isEmpty() - || columnInputIndices.isEmpty()) { - return false; + // Map only the dependent columns that can be mapped + ImmutableBitSet mappedDependents = mapAvailableCols(dependents, mapping); + if (!mappedDependents.isEmpty()) { + outputFdSet.addFD(mappedDeterminants, mappedDependents); + } } + } - // Currently only supports multiple (keyInputIndices) to one (columnInputIndices) - // dependency detection - for (Integer keyRef : keyInputIndices) { - if (Boolean.FALSE.equals( - mq.determines(rel.getInput(0), keyRef, - columnInputIndices.nextSetBit(0)))) { - return false; + /** + * Maps all columns in the set. Returns empty set if any column cannot be mapped. + */ + private static ImmutableBitSet mapAllCols( + ImmutableBitSet columns, Mappings.TargetMapping mapping) { + ImmutableBitSet.Builder builder = ImmutableBitSet.builder(); + for (int col : columns) { + if (col < 0 || col >= mapping.getSourceCount()) { + return ImmutableBitSet.of(); + } + int mappedCol = mapping.getTargetOpt(col); + if (mappedCol >= 0) { + builder.set(mappedCol); + } else { + return ImmutableBitSet.of(); } } - - return true; + return builder.build(); } /** - * determinesImpl2is similar to determinesImpl, but it doesn't need to handle the - * mapping between output and input columns. + * Maps only the columns that can be mapped, ignoring unmappable ones. */ - private static @Nullable Boolean determinesImpl2(RelNode rel, RelMetadataQuery mq, - int key, int column) { - if (preCheck(rel, key, column)) { - return true; + private static ImmutableBitSet mapAvailableCols( + ImmutableBitSet columns, Mappings.TargetMapping mapping) { + ImmutableBitSet.Builder builder = ImmutableBitSet.builder(); + for (int col : columns) { + if (col < 0 || col >= mapping.getSourceCount()) { + continue; + } + int mappedCol = mapping.getTargetOpt(col); + if (mappedCol >= 0) { + builder.set(mappedCol); + } } + return builder.build(); + } - if (rel instanceof TableScan) { - TableScan tableScan = (TableScan) rel; - RelOptTable table = tableScan.getTable(); - List<ImmutableBitSet> keys = table.getKeys(); - return keys != null - && keys.size() == 1 - && keys.get(0).equals(ImmutableBitSet.of(column)); - } else if (rel instanceof Join) { - Join join = (Join) rel; - // TODO Considering column mapping based on equality conditions in join - int leftFieldCnt = join.getLeft().getRowType().getFieldCount(); - if (key < leftFieldCnt && column < leftFieldCnt) { - return mq.determines(join.getLeft(), key, column); - } else if (key >= leftFieldCnt && column >= leftFieldCnt) { - return mq.determines(join.getRight(), key - leftFieldCnt, column - leftFieldCnt); + private static FunctionalDependencySet getAggregateFD(Aggregate rel, RelMetadataQuery mq) { + FunctionalDependencySet fdSet = new FunctionalDependencySet(); + + FunctionalDependencySet inputFdSet = mq.getFunctionalDependencies(rel.getInput()); + + // Group set columns in the output + ImmutableBitSet groupSet = rel.getGroupSet(); + + // 1. Preserve input FDs that only involve group columns + for (FunctionalDependency inputFd : inputFdSet.getFDs()) { + ImmutableBitSet determinants = inputFd.getDeterminants(); + ImmutableBitSet dependents = inputFd.getDependents(); + + // Only preserve if both determinants and dependents are within group columns + if (groupSet.contains(determinants) && groupSet.contains(dependents)) { + fdSet.addFD(determinants, dependents); } - return false; - } else if (rel instanceof Correlate) { - // TODO Support Correlate. - return false; - } else if (rel instanceof SetOp) { - // TODO Support SetOp - return false; } - return mq.determines(rel.getInput(0), key, column); + // 2. Group keys determine all aggregate columns + if (!groupSet.isEmpty() && !rel.getAggCallList().isEmpty()) { + for (int i = rel.getGroupCount(); i < rel.getRowType().getFieldCount(); i++) { + fdSet.addFD(groupSet, ImmutableBitSet.of(i)); + } + } + + return fdSet; + } + + private static FunctionalDependencySet getJoinFD(Join rel, RelMetadataQuery mq) { + FunctionalDependencySet leftFdSet = mq.getFunctionalDependencies(rel.getLeft()); + FunctionalDependencySet rightFdSet = mq.getFunctionalDependencies(rel.getRight()); + + int leftFieldCount = rel.getLeft().getRowType().getFieldCount(); + JoinRelType joinType = rel.getJoinType(); + + switch (joinType) { + case INNER: + // Inner join: preserve all FDs and derive cross-table dependencies + FunctionalDependencySet innerJoinFdSet + = leftFdSet.union(shiftFdSet(rightFdSet, leftFieldCount)); + deriveTransitiveFDs(rel, innerJoinFdSet, leftFieldCount); + return innerJoinFdSet; + case LEFT: + // Left join: preserve left FDs, right FDs may be invalidated by NULLs + FunctionalDependencySet leftJoinFdSet = new FunctionalDependencySet(leftFdSet.getFDs()); + deriveTransitiveFDs(rel, leftJoinFdSet, leftFieldCount); + return leftJoinFdSet; + case RIGHT: { + // Right join: preserve right FDs, left FDs may be invalidated by NULLs + // 只保留右表字段决定右表字段的 FD,不做任何跨表 transitive 推导 + FunctionalDependencySet shiftedRightFdSet = shiftFdSet(rightFdSet, leftFieldCount); + FunctionalDependencySet filteredFdSet = new FunctionalDependencySet(); + int totalFieldCount = rel.getRowType().getFieldCount(); + for (FunctionalDependency fd : shiftedRightFdSet.getFunctionalDependencies()) { + boolean determinantsInRight = true; + boolean dependentsInRight = true; + for (int col : fd.getDeterminants()) { + if (col < leftFieldCount || col >= totalFieldCount) { + determinantsInRight = false; + break; + } + } + for (int col : fd.getDependents()) { + if (col < leftFieldCount || col >= totalFieldCount) { + dependentsInRight = false; + break; + } + } + if (determinantsInRight && dependentsInRight) { + filteredFdSet.addFD(fd.getDeterminants(), fd.getDependents()); + } + } + // 禁用 deriveTransitiveFDs,彻底禁止跨表 FD + return filteredFdSet; + } + case FULL: + // Full join: both sides may have NULLs, very conservative approach + return new FunctionalDependencySet(); + case SEMI: + // Semi join: only left table columns in result, preserve left FDs only + return leftFdSet; + case ANTI: + // Anti join: only left table columns in result, preserve left FDs only + return leftFdSet; + default: + // Conservative fallback for unknown join types + return new FunctionalDependencySet(); + } } - private static Boolean preCheck(RelNode rel, int key, int column) { - verifyIndex(rel, key, column); + private static FunctionalDependencySet getCalcFD(Calc rel, RelMetadataQuery mq) { + List<RexNode> projections = rel.getProgram().expandList(rel.getProgram().getProjectList()); + return getProjectionFD(rel.getInput(), projections, mq); + } - // Equal index values indicate the same expression reference - if (key == column) { - return true; + private static FunctionalDependencySet shiftFdSet(FunctionalDependencySet fdSet, int offset) { + FunctionalDependencySet shiftedFdSet = new FunctionalDependencySet(); + for (FunctionalDependency fd : fdSet.getFunctionalDependencies()) { + ImmutableBitSet shiftedDeterminants = fd.getDeterminants().shift(offset); + ImmutableBitSet shiftedDependents = fd.getDependents().shift(offset); + shiftedFdSet.addFD(shiftedDeterminants, shiftedDependents); } + return shiftedFdSet; + } - return false; + /** + * Derives cross-table functional dependencies through join conditions. + */ + private static void deriveTransitiveFDs( + Join rel, FunctionalDependencySet result, int leftFieldCount) { + deriveTransitiveFDsFromCondition(rel.getCondition(), result, leftFieldCount); + deriveAdditionalTransitiveFDs(result); } - private static void verifyIndex(RelNode rel, int... indices) { - for (int index : indices) { - if (index < 0 || index >= rel.getRowType().getFieldCount()) { - throw new IndexOutOfBoundsException( - "Column index " + index + " is out of bounds. " - + "Valid range is [0, " + rel.getRowType().getFieldCount() + ")"); + /** + * Iteratively derives additional transitive dependencies until closure. + */ + private static void deriveAdditionalTransitiveFDs(FunctionalDependencySet result) { + boolean changed = true; + // Limit iterations to avoid infinite loops + int iteration = 10; + + while (changed && iteration > 0) { + changed = false; + iteration--; + List<FunctionalDependency> currentFDs = new ArrayList<>(result.getFunctionalDependencies()); + + for (FunctionalDependency fd1 : currentFDs) { + for (FunctionalDependency fd2 : currentFDs) { + // Only apply transitivity when fd1's dependents completely contain fd2's determinants + // This ensures: if X → Y and Y → Z, then X → Z + if (fd1.getDependents().contains(fd2.getDeterminants())) { + ImmutableBitSet newDeterminants = fd1.getDeterminants(); + ImmutableBitSet newDependents = fd2.getDependents(); + result.addFD(newDeterminants, newDependents); + changed = true; + } + } } } } /** - * Extracts input indices referenced by an output column in an Aggregate. - * For group-by columns, returns the column index itself since they directly - * reference input columns. For aggregate function columns, returns the input - * column indices used by the aggregate call. - * - * @param aggregate The Aggregate relational expression to analyze - * @param index Index of the output column in the Aggregate (0-based) - * @return ImmutableBitSet of input column indices referenced by the output column. - * For group-by columns, returns a singleton set of the column index. - * For aggregate columns, returns the argument indices of the aggregate call. + * Processes join conditions to derive cross-table dependencies. */ - private static ImmutableBitSet extractDeterministicRefs(Aggregate aggregate, int index) { - int groupByCnt = aggregate.getGroupCount(); - if (index < groupByCnt) { - return ImmutableBitSet.of(index); + private static void deriveTransitiveFDsFromCondition( + RexNode condition, FunctionalDependencySet result, int leftFieldCount) { + if (!(condition instanceof RexCall)) { + return; } - List<AggregateCall> aggCalls = aggregate.getAggCallList(); - AggregateCall call = aggCalls.get(index - groupByCnt); - return ImmutableBitSet.of(call.getArgList()); + RexCall call = (RexCall) condition; + if (call.getOperator().getKind() == SqlKind.EQUALS) { Review Comment: how about IS_NOT_DISTINCT_FROM? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
