This is an automated email from the ASF dual-hosted git repository.
mbudiu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new e5682547cb [CALCITE-7319] FILTER_INTO_JOIN rule loses correlation
variable context in HepPlanner
e5682547cb is described below
commit e5682547cb07320634f4b1f335b50abb13f30cac
Author: Mihai Budiu <[email protected]>
AuthorDate: Wed Dec 10 20:27:59 2025 -0800
[CALCITE-7319] FILTER_INTO_JOIN rule loses correlation variable context in
HepPlanner
Signed-off-by: Mihai Budiu <[email protected]>
---
.../apache/calcite/rel/rules/FilterJoinRule.java | 21 ++++--
.../org/apache/calcite/test/RelOptRulesTest.java | 17 +++++
.../org/apache/calcite/test/RelOptRulesTest.xml | 40 ++++++++++
.../org/apache/calcite/adapter/tpch/TpchTest.java | 85 ++++++++++++++++++++++
4 files changed, 157 insertions(+), 6 deletions(-)
diff --git
a/core/src/main/java/org/apache/calcite/rel/rules/FilterJoinRule.java
b/core/src/main/java/org/apache/calcite/rel/rules/FilterJoinRule.java
index e80ac4150d..1f80126aa8 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/FilterJoinRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/FilterJoinRule.java
@@ -84,12 +84,7 @@ protected void perform(RelOptRuleCall call, @Nullable Filter
filter,
return;
}
- if (filter != null
- && RexUtil.containsCorrelation(filter.getCondition())) {
- return;
- }
-
- final List<RexNode> aboveFilters =
+ List<RexNode> aboveFilters =
filter != null
? getConjunctions(filter)
: new ArrayList<>();
@@ -107,6 +102,18 @@ protected void perform(RelOptRuleCall call, @Nullable
Filter filter,
final List<RexNode> leftFilters = new ArrayList<>();
final List<RexNode> rightFilters = new ArrayList<>();
+ // Do not consider moving predicates that contain correlation variables
+ final List<RexNode> ineligible = new ArrayList<>();
+ final List<RexNode> eligible = new ArrayList<>();
+ for (RexNode f : aboveFilters) {
+ if (RexUtil.containsCorrelation(f)) {
+ ineligible.add(f);
+ } else {
+ eligible.add(f);
+ }
+ }
+ aboveFilters = eligible;
+
// TODO - add logic to derive additional filters. E.g., from
// (t1.a = 1 AND t2.a = 2) OR (t1.b = 3 AND t2.b = 4), you can
// derive table filters:
@@ -126,6 +133,8 @@ protected void perform(RelOptRuleCall call, @Nullable
Filter filter,
leftFilters,
rightFilters);
+ // Add back the ineligible filters
+ aboveFilters.addAll(ineligible);
// Move join filters up if needed
validateJoinFilters(aboveFilters, joinFilters, join, joinType);
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index 33b72492f7..8d432256b4 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -8275,6 +8275,23 @@ private void checkSemiJoinRuleOnAntiJoin(RelOptRule
rule) {
.check();
}
+ /** Test case for <a
href="https://issues.apache.org/jira/browse/CALCITE-7319">[CALCITE-7319]
+ * FILTER_INTO_JOIN rule loses correlation variable context in
HepPlanner</a>. */
+ @Test void testFilterIntoJoinMissingVariableCor() {
+ final String sql = "SELECT E.EMPNO\n"
+ + "FROM EMP E\n"
+ + "JOIN DEPT D ON E.DEPTNO = D.DEPTNO\n"
+ + "WHERE E.EMPNO > 10 AND D.DEPTNO = (\n"
+ + " SELECT MIN(D_INNER.DEPTNO)\n"
+ + " FROM DEPT D_INNER\n"
+ + " WHERE D_INNER.DEPTNO = E.DEPTNO)";
+ sql(sql)
+ .withExpand(false)
+ .withDecorrelate(false)
+ .withRule(CoreRules.FILTER_INTO_JOIN)
+ .check();
+ }
+
/** Test case for
* <a
href="https://issues.apache.org/jira/browse/CALCITE-4616">[CALCITE-4616]
* AggregateUnionTransposeRule causes row type mismatch when some inputs have
diff --git
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index ab4ba1281b..8ed236780f 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -5594,6 +5594,46 @@ LogicalFilter(condition=[<($0, 20)])
})])
LogicalCalc(expr#0..8=[{inputs}], DEPTNO=[$t7])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testFilterIntoJoinMissingVariableCor">
+ <Resource name="sql">
+ <![CDATA[SELECT E.EMPNO
+FROM EMP E
+JOIN DEPT D ON E.DEPTNO = D.DEPTNO
+WHERE E.EMPNO > 10 AND D.DEPTNO = (
+ SELECT MIN(D_INNER.DEPTNO)
+ FROM DEPT D_INNER
+ WHERE D_INNER.DEPTNO = E.DEPTNO)]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(EMPNO=[$0])
+ LogicalFilter(condition=[AND(>($0, 10), =($9, $SCALAR_QUERY({
+LogicalAggregate(group=[{}], EXPR$0=[MIN($0)])
+ LogicalProject(DEPTNO=[$0])
+ LogicalFilter(condition=[=($0, $cor0.DEPTNO)])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+})))], variablesSet=[[$cor0]])
+ LogicalJoin(condition=[=($7, $9)], joinType=[inner])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(EMPNO=[$0])
+ LogicalFilter(condition=[=($9, $SCALAR_QUERY({
+LogicalAggregate(group=[{}], EXPR$0=[MIN($0)])
+ LogicalProject(DEPTNO=[$0])
+ LogicalFilter(condition=[=($0, $cor0.DEPTNO)])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+}))])
+ LogicalJoin(condition=[=($7, $9)], joinType=[inner])
+ LogicalFilter(condition=[>($0, 10)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
</TestCase>
diff --git a/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
b/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
index 9ea5cd0ab7..5f4e93d9d7 100644
--- a/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
+++ b/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
@@ -16,8 +16,23 @@
*/
package org.apache.calcite.adapter.tpch;
+import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.hep.HepPlanner;
+import org.apache.calcite.plan.hep.HepProgram;
+import org.apache.calcite.plan.hep.HepProgramBuilder;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelRoot;
+import org.apache.calcite.rel.rules.CoreRules;
+import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.calcite.sql.parser.SqlParseException;
import org.apache.calcite.test.CalciteAssert;
+import org.apache.calcite.tools.FrameworkConfig;
+import org.apache.calcite.tools.Frameworks;
+import org.apache.calcite.tools.Planner;
+import org.apache.calcite.tools.RelConversionException;
+import org.apache.calcite.tools.ValidationException;
import org.apache.calcite.util.TestUtil;
import com.google.common.collect.ImmutableList;
@@ -29,6 +44,8 @@
import java.util.List;
import java.util.concurrent.TimeUnit;
+import static org.apache.calcite.test.Matchers.containsStringLinux;
+
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.CoreMatchers.not;
import static org.hamcrest.MatcherAssert.assertThat;
@@ -833,6 +850,74 @@ private CalciteAssert.AssertThat with() {
});
}
+ /** Test case for <a
href="https://issues.apache.org/jira/browse/CALCITE-7319">[CALCITE-7319]
+ * FILTER_INTO_JOIN rule loses correlation variable context in
HepPlanner</a>. */
+ @Test public void optimizeQuery2()
+ throws SqlParseException, ValidationException, RelConversionException {
+ SchemaPlus rootSchema = Frameworks.createRootSchema(true);
+ TpchSchema tpchSchema = new TpchSchema(1.0, 0, 1, false);
+ rootSchema.add("TPCH", tpchSchema);
+ FrameworkConfig config = Frameworks.newConfigBuilder()
+ .defaultSchema(rootSchema)
+ .build();
+
+ Planner planner = Frameworks.getPlanner(config);
+
+ SqlNode parsed = planner.parse(QUERY_ARRAY[1]);
+ SqlNode validated = planner.validate(parsed);
+ RelRoot root = planner.rel(validated);
+
+ final HepProgramBuilder builder = HepProgram.builder();
+ builder.addRuleInstance(CoreRules.FILTER_SUB_QUERY_TO_CORRELATE);
+ builder.addRuleInstance(CoreRules.PROJECT_SUB_QUERY_TO_CORRELATE);
+ builder.addRuleInstance(CoreRules.JOIN_SUB_QUERY_TO_CORRELATE);
+ builder.addRuleInstance(CoreRules.FILTER_CORRELATE);
+ // We are checking that this rule can push some predicates into joins
+ // Prior to fixing [CALCITE-7319] (second improvement) many joins below
+ // had a condition=[true].
+ builder.addRuleInstance(CoreRules.FILTER_INTO_JOIN);
+
+ RelOptPlanner optPlanner = new HepPlanner(builder.build());
+ optPlanner.setRoot(root.rel);
+ RelNode rel = optPlanner.findBestExp();
+ final String expected = "LogicalSort(sort0=[$0], sort1=[$2], sort2=[$1],
sort3=[$3], "
+ + "dir0=[DESC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100])\n"
+ + " LogicalProject(S_ACCTBAL=[$14], S_NAME=[$10], N_NAME=[$22],
P_PARTKEY=[$0], "
+ + "P_MFGR=[$2], S_ADDRESS=[$11], S_PHONE=[$13], S_COMMENT=[$15])\n"
+ + " LogicalProject(P_PARTKEY=[$0], P_NAME=[$1], P_MFGR=[$2],
P_BRAND=[$3], P_TYPE=[$4], "
+ + "P_SIZE=[$5], P_CONTAINER=[$6], P_RETAILPRICE=[$7], P_COMMENT=[$8],
S_SUPPKEY=[$9], "
+ + "S_NAME=[$10], S_ADDRESS=[$11], S_NATIONKEY=[$12], S_PHONE=[$13],
S_ACCTBAL=[$14], "
+ + "S_COMMENT=[$15], PS_PARTKEY=[$16], PS_SUPPKEY=[$17],
PS_AVAILQTY=[$18], "
+ + "PS_SUPPLYCOST=[$19], PS_COMMENT=[$20], N_NATIONKEY=[$21],
N_NAME=[$22], "
+ + "N_REGIONKEY=[$23], N_COMMENT=[$24], R_REGIONKEY=[$25],
R_NAME=[$26], R_COMMENT=[$27])\n"
+ + " LogicalFilter(condition=[=($19, $28)])\n"
+ + " LogicalCorrelate(correlation=[$cor0], joinType=[left],
requiredColumns=[{0}])\n"
+ + " LogicalJoin(condition=[=($23, $25)], joinType=[inner])\n"
+ + " LogicalJoin(condition=[=($12, $21)],
joinType=[inner])\n"
+ + " LogicalJoin(condition=[AND(=($0, $16), =($9, $17))],
joinType=[inner])\n"
+ + " LogicalJoin(condition=[true], joinType=[inner])\n"
+ + " LogicalFilter(condition=[AND(=(CAST($5):INTEGER,
41), "
+ + "LIKE($4, '%NICKEL'))])\n"
+ + " LogicalTableScan(table=[[TPCH, PART]])\n"
+ + " LogicalTableScan(table=[[TPCH, SUPPLIER]])\n"
+ + " LogicalTableScan(table=[[TPCH, PARTSUPP]])\n"
+ + " LogicalTableScan(table=[[TPCH, NATION]])\n"
+ + " LogicalFilter(condition=[=(CAST($1):VARCHAR,
'EUROPE')])\n"
+ + " LogicalTableScan(table=[[TPCH, REGION]])\n"
+ + " LogicalAggregate(group=[{}], EXPR$0=[MIN($0)])\n"
+ + " LogicalProject(PS_SUPPLYCOST=[$3])\n"
+ + " LogicalFilter(condition=[=($cor0.P_PARTKEY, $0)])\n"
+ + " LogicalJoin(condition=[=($14, $16)],
joinType=[inner])\n"
+ + " LogicalJoin(condition=[=($8, $12)],
joinType=[inner])\n"
+ + " LogicalJoin(condition=[=($5, $1)],
joinType=[inner])\n"
+ + " LogicalTableScan(table=[[TPCH, PARTSUPP]])\n"
+ + " LogicalTableScan(table=[[TPCH, SUPPLIER]])\n"
+ + " LogicalTableScan(table=[[TPCH, NATION]])\n"
+ + " LogicalFilter(condition=[=(CAST($1):VARCHAR,
'EUROPE')])\n"
+ + " LogicalTableScan(table=[[TPCH, REGION]])";
+ assertThat(rel.explain(), containsStringLinux(expected));
+ }
+
@Test void testQuery03() {
checkQuery(3);
}