This is an automated email from the ASF dual-hosted git repository.
mbudiu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new 2a6ce48a97 [CALCITE-7196] Create an optimization pass which can
convert some cases of Correlate + Unnest to Unnest
2a6ce48a97 is described below
commit 2a6ce48a97465f37f7fdf97b8d6fab56919f2064
Author: Mihai Budiu <[email protected]>
AuthorDate: Fri Dec 26 16:45:13 2025 -0800
[CALCITE-7196] Create an optimization pass which can convert some cases of
Correlate + Unnest to Unnest
Signed-off-by: Mihai Budiu <[email protected]>
---
.../org/apache/calcite/rel/rules/CoreRules.java | 12 ++
.../calcite/rel/rules/UnnestDecorrelateRule.java | 182 +++++++++++++++++++++
.../org/apache/calcite/test/RelOptRulesTest.java | 68 ++++++++
.../org/apache/calcite/test/RelOptRulesTest.xml | 127 ++++++++++++++
4 files changed, 389 insertions(+)
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
b/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
index c604f71358..fbca06e760 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
@@ -17,6 +17,7 @@
package org.apache.calcite.rel.rules;
import org.apache.calcite.linq4j.function.Experimental;
+import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptUtil.Exists;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Aggregate;
@@ -32,6 +33,7 @@
import org.apache.calcite.rel.core.Sort;
import org.apache.calcite.rel.core.TableFunctionScan;
import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.core.Uncollect;
import org.apache.calcite.rel.core.Union;
import org.apache.calcite.rel.core.Values;
import org.apache.calcite.rel.logical.LogicalAggregate;
@@ -959,4 +961,14 @@ private CoreRules() {}
* into equivalent {@link Union} ALL of GROUP BY operations. */
public static final AggregateGroupingSetsToUnionRule
AGGREGATE_GROUPING_SETS_TO_UNION =
AggregateGroupingSetsToUnionRule.Config.DEFAULT.toRule();
+
+ /** Rule that converts a {@link Correlate} after an {@link Uncollect} into a
simple
+ * Uncollect, if possible. */
+ public static final RelOptRule UNNEST_DECORRELATE =
+ UnnestDecorrelateRule.Config.DEFAULT.toRule();
+
+ /** Rule that converts a {@link Correlate} after an {@link Project} of an
+ * {@link Uncollect} into a simple Uncollect, if possible. */
+ public static final RelOptRule UNNEST_PROJECT_DECORRELATE =
+ UnnestDecorrelateRule.Config.WITH_PROJECT.toRule();
}
diff --git
a/core/src/main/java/org/apache/calcite/rel/rules/UnnestDecorrelateRule.java
b/core/src/main/java/org/apache/calcite/rel/rules/UnnestDecorrelateRule.java
new file mode 100644
index 0000000000..71005b52fd
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/rel/rules/UnnestDecorrelateRule.java
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.rel.rules;
+
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Correlate;
+import org.apache.calcite.rel.core.CorrelationId;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.Uncollect;
+import org.apache.calcite.rel.logical.LogicalValues;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexCorrelVariable;
+import org.apache.calcite.rex.RexFieldAccess;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.calcite.util.ImmutableBitSet;
+
+import org.immutables.value.Value;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import static java.util.Objects.requireNonNull;
+
+/** Convert representations of a projected Unnest that use LogicalCorrelate
into
+ * simple Unnest representations.
+ *
+ * <p>Original plan:
+ * LogicalProject // only uses rightmost columns of correlate, outerProject
+ * LogicalCorrelate(correlation=[$cor0], joinType=[inner],
requiredColumns=[{...}])
+ * LeftSubquery
+ * LogicalProject (optional; innerProject)
+ * Uncollect
+ * LogicalProject(COL=[$cor0.ARRAY])
+ * LogicalValues(tuples=[[{ 0 }]])
+ *
+ * <p>is converted to
+ *
+ * <p>Resulting plan:
+ * LogicalProject
+ * LogicalProject (optional)
+ * Uncollect
+ * LogicalProject
+ * LeftSubquery
+ */
[email protected]
+public class UnnestDecorrelateRule extends
RelRule<UnnestDecorrelateRule.Config>
+ implements TransformationRule {
+
+ protected UnnestDecorrelateRule(UnnestDecorrelateRule.Config config) {
+ super(config);
+ }
+
+ /** Given an expression and a correlationId, find whether the expression is a
+ * sequence of field accesses that starts in the correlationId, i.e., it
+ * has the form corId.field1.field2.
+ *
+ * @param expr Expression to analyze
+ * @param corId Correlation id to search for
+ * @param fieldsAccessed On successful return, contains the list of fields
accessed
+ * in reverse order, e.g., (field2, field1)
+ * @return True if {@code expr} has the expected shape, false otherwise.
+ */
+ private boolean extractFieldReferences(
+ RexNode expr, CorrelationId corId, List<RelDataTypeField>
fieldsAccessed) {
+ if (expr instanceof RexCorrelVariable) {
+ RexCorrelVariable cv = (RexCorrelVariable) expr;
+ return cv.id == corId;
+ } else if (expr instanceof RexFieldAccess) {
+ RexFieldAccess fieldAccess = (RexFieldAccess) expr;
+ fieldsAccessed.add(fieldAccess.getField());
+ return extractFieldReferences(fieldAccess.getReferenceExpr(), corId,
fieldsAccessed);
+ } else {
+ return false;
+ }
+ }
+
+ @Override public void onMatch(RelOptRuleCall call) {
+ Project outerProject = call.rel(0);
+ Correlate cor = call.rel(1);
+ CorrelationId corId = cor.getCorrelationId();
+
+ RelNode left = call.rel(2);
+ int leftCount = left.getRowType().getFieldCount();
+ ImmutableBitSet used =
RelOptUtil.InputFinder.bits(outerProject.getProjects(), null);
+ int firstUsed = used.nextSetBit(0);
+ if (firstUsed != -1 && firstUsed < leftCount) {
+ return;
+ }
+
+ int uncollectIndex = 3;
+ Project innerProject = null;
+ if (call.rel(uncollectIndex) instanceof Project) {
+ innerProject = call.rel(3);
+ uncollectIndex = 4;
+ }
+
+ Uncollect uncollect = call.rel(uncollectIndex);
+ Project project = call.rel(uncollectIndex + 1);
+
+ List<RexNode> projects = project.getProjects();
+ if (projects.size() != 1) {
+ return;
+ }
+
+ final RexNode projected = projects.get(0);
+ final ArrayList<RelDataTypeField> fieldsAccessed = new ArrayList<>();
+ if (!extractFieldReferences(projected, corId, fieldsAccessed)) {
+ return;
+ }
+
+ final RelBuilder builder = call.builder();
+ builder.push(left);
+
+ // Last field constructed by builder
+ RexNode field = null;
+ // Fields are in reverse order
+ Collections.reverse(fieldsAccessed);
+ for (RelDataTypeField index : fieldsAccessed) {
+ if (field != null) {
+ field = builder.field(field, index.getName());
+ } else {
+ field = builder.field(index.getName());
+ }
+ }
+ builder.project(requireNonNull(field, "field"))
+ .uncollect(uncollect.getItemAliases(), uncollect.withOrdinality);
+ if (innerProject != null) {
+ builder.project(innerProject.getProjects());
+ }
+ final List<RexNode> shifted = RexUtil.shift(outerProject.getProjects(),
-leftCount);
+ builder.project(shifted);
+ RelNode result = builder.build();
+ call.transformTo(result);
+ }
+
+ /** Rule configuration. */
+ @Value.Immutable
+ public interface Config extends RelRule.Config {
+ UnnestDecorrelateRule.Config BASE =
ImmutableUnnestDecorrelateRule.Config.of();
+
+ RelRule.Config DEFAULT = BASE
+ .withOperandSupplier(b0 -> b0.operand(Project.class)
+ .oneInput(b1 -> b1.operand(Correlate.class)
+ .inputs(b2 -> b2.operand(RelNode.class).anyInputs(),
+ b3 -> b3.operand(Uncollect.class)
+ .oneInput(b4 -> b4.operand(Project.class)
+ .oneInput(b5 ->
b5.operand(LogicalValues.class).anyInputs())))));
+
+ RelRule.Config WITH_PROJECT = BASE
+ .withOperandSupplier(b0 -> b0.operand(Project.class)
+ .oneInput(b1 -> b1.operand(Correlate.class)
+ .inputs(b2 -> b2.operand(RelNode.class).anyInputs(),
+ b3 -> b3.operand(Project.class)
+ .oneInput(b4 -> b4.operand(Uncollect.class)
+ .oneInput(b5 -> b5.operand(Project.class)
+ .oneInput(b6 ->
b6.operand(LogicalValues.class).anyInputs()))))));
+
+ @Override default UnnestDecorrelateRule toRule() {
+ return new UnnestDecorrelateRule(this);
+ }
+ }
+}
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index 1afbae4519..a839054349 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -673,6 +673,74 @@ private HepProgram createHypergraphProgram() {
.checkUnchanged();
}
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-7196">[CALCITE-7196]
+ * Create an optimization pass which can convert some cases of Correlate +
Unnest
+ * to Unnest</a>. */
+ @Test void testUnnestDecorrelate() {
+ final String sql = "WITH t1 AS (SELECT ARRAY[1, 2, 3] as arr)\n"
+ + "SELECT array_element.id\n"
+ + "FROM t1, UNNEST(t1.arr) AS array_element(id)";
+ sql(sql)
+ .withRule(CoreRules.UNNEST_PROJECT_DECORRELATE)
+ .check();
+ }
+
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-7196">[CALCITE-7196]
+ * Create an optimization pass which can convert some cases of Correlate +
Unnest
+ * to Unnest</a>. */
+ @Test void testUnnestDecorrelate2() {
+ final String sql = "WITH t1 AS (SELECT ARRAY[1, 2, 3] as arr)\n"
+ + "SELECT array_element.id, array_element.ord\n"
+ + "FROM t1, UNNEST(t1.arr) WITH ORDINALITY AS array_element(id, ord)";
+ sql(sql)
+ .withRule(CoreRules.UNNEST_PROJECT_DECORRELATE)
+ .check();
+ }
+
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-7196">[CALCITE-7196]
+ * Create an optimization pass which can convert some cases of Correlate +
Unnest
+ * to Unnest</a>. */
+ @Test void testUnnestDecorrelate3() {
+ final String sql = "WITH t1 AS (SELECT ARRAY[1, 2, 3] as arr)\n"
+ + "SELECT array_element.id\n"
+ + "FROM t1, UNNEST(t1.arr) AS array_element(id)";
+ sql(sql)
+ .withPreRule(CoreRules.PROJECT_REMOVE)
+ .withRule(CoreRules.UNNEST_DECORRELATE)
+ .check();
+ }
+
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-7196">[CALCITE-7196]
+ * Create an optimization pass which can convert some cases of Correlate +
Unnest
+ * to Unnest</a>. */
+ @Test void testUnnestDecorrelate4() {
+ final String sql = "select t2.ename\n"
+ + "from DEPT_NESTED as t1,\n"
+ + "unnest(t1.employees) as t2";
+ sql(sql)
+ .withPreRule(CoreRules.PROJECT_REMOVE)
+ .withRule(CoreRules.UNNEST_DECORRELATE)
+ .check();
+ }
+
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-7196">[CALCITE-7196]
+ * Create an optimization pass which can convert some cases of Correlate +
Unnest
+ * to Unnest</a>. */
+ @Test void testUnnestDecorrelate5() {
+ final String sql = "WITH t1 AS (SELECT ROW(ARRAY[1, 2, 3]) as
struct_with_array_field)\n"
+ + "SELECT array_element.id\n"
+ + "FROM t1, UNNEST(t1.struct_with_array_field[1]) AS
array_element(id)";
+ sql(sql)
+ .withPreRule(CoreRules.PROJECT_REMOVE)
+ .withRule(CoreRules.UNNEST_DECORRELATE)
+ .check();
+ }
+
@Test void testFilterProjectTransposeRule3() {
final String sql = "select * from (select deptno from emp) as d\n"
+ "where NOT EXISTS (\n"
diff --git
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index 3887c4fe50..465ce0e3e1 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -21563,6 +21563,133 @@ LogicalProject(DEPTNO=[$7])
<![CDATA[
LogicalProject(DEPTNO=[$0], NAME=[$1])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testUnnestDecorrelate">
+ <Resource name="sql">
+ <![CDATA[WITH t1 AS (SELECT ARRAY[1, 2, 3] as arr)
+SELECT array_element.id
+FROM t1, UNNEST(t1.arr) AS array_element(id)]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(ID=[$1])
+ LogicalCorrelate(correlation=[$cor0], joinType=[inner],
requiredColumns=[{0}])
+ LogicalProject(ARR=[ARRAY(1, 2, 3)])
+ LogicalValues(tuples=[[{ 0 }]])
+ LogicalProject(ID=[$0])
+ Uncollect
+ LogicalProject(ARR=[$cor0.ARR])
+ LogicalValues(tuples=[[{ 0 }]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+Uncollect
+ LogicalProject(ARR=[ARRAY(1, 2, 3)])
+ LogicalValues(tuples=[[{ 0 }]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testUnnestDecorrelate2">
+ <Resource name="sql">
+ <![CDATA[WITH t1 AS (SELECT ARRAY[1, 2, 3] as arr)
+SELECT array_element.id, array_element.ord
+FROM t1, UNNEST(t1.arr) WITH ORDINALITY AS array_element(id, ord)]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(ID=[$1], ORD=[$2])
+ LogicalCorrelate(correlation=[$cor0], joinType=[inner],
requiredColumns=[{0}])
+ LogicalProject(ARR=[ARRAY(1, 2, 3)])
+ LogicalValues(tuples=[[{ 0 }]])
+ LogicalProject(ID=[$0], ORD=[$1])
+ Uncollect(withOrdinality=[true])
+ LogicalProject(ARR=[$cor0.ARR])
+ LogicalValues(tuples=[[{ 0 }]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+Uncollect(withOrdinality=[true])
+ LogicalProject(ARR=[ARRAY(1, 2, 3)])
+ LogicalValues(tuples=[[{ 0 }]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testUnnestDecorrelate3">
+ <Resource name="sql">
+ <![CDATA[WITH t1 AS (SELECT ARRAY[1, 2, 3] as arr)
+SELECT array_element.id
+FROM t1, UNNEST(t1.arr) AS array_element(id)]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(ID=[$1])
+ LogicalCorrelate(correlation=[$cor0], joinType=[inner],
requiredColumns=[{0}])
+ LogicalProject(ARR=[ARRAY(1, 2, 3)])
+ LogicalValues(tuples=[[{ 0 }]])
+ Uncollect
+ LogicalProject(ARR=[$cor0.ARR])
+ LogicalValues(tuples=[[{ 0 }]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+Uncollect
+ LogicalProject(ARR=[ARRAY(1, 2, 3)])
+ LogicalValues(tuples=[[{ 0 }]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testUnnestDecorrelate4">
+ <Resource name="sql">
+ <![CDATA[select t2.ename
+from DEPT_NESTED as t1,
+unnest(t1.employees) as t2]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(ENAME=[$5])
+ LogicalCorrelate(correlation=[$cor0], joinType=[inner],
requiredColumns=[{3}])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT_NESTED]])
+ Uncollect
+ LogicalProject(EMPLOYEES=[$cor0.EMPLOYEES])
+ LogicalValues(tuples=[[{ 0 }]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(ENAME=[$1])
+ Uncollect
+ LogicalProject(EMPLOYEES=[$3])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT_NESTED]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testUnnestDecorrelate5">
+ <Resource name="sql">
+ <![CDATA[WITH t1 AS (SELECT ROW(ARRAY[1, 2, 3]) as
struct_with_array_field)
+SELECT array_element.id
+FROM t1, UNNEST(t1.struct_with_array_field[1]) AS array_element(id)]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(ID=[$1])
+ LogicalCorrelate(correlation=[$cor0], joinType=[inner],
requiredColumns=[{0}])
+ LogicalProject(STRUCT_WITH_ARRAY_FIELD=[ROW(ARRAY(1, 2, 3))])
+ LogicalValues(tuples=[[{ 0 }]])
+ Uncollect
+ LogicalProject(EXPR$0=[$cor0.STRUCT_WITH_ARRAY_FIELD.EXPR$0])
+ LogicalValues(tuples=[[{ 0 }]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+Uncollect
+ LogicalProject($f0=[ROW(ARRAY(1, 2, 3)).EXPR$0])
+ LogicalValues(tuples=[[{ 0 }]])
]]>
</Resource>
</TestCase>