Ian Bertolacci created CALCITE-7324:
---------------------------------------

             Summary: NullPointerException from RelDecorrelator with multiple 
subqueries where one has a complex correlated condition
                 Key: CALCITE-7324
                 URL: https://issues.apache.org/jira/browse/CALCITE-7324
             Project: Calcite
          Issue Type: Bug
            Reporter: Ian Bertolacci


Example SQL (using TCP-H schema):
{code:Sql}
select
    (select count(*) from PART where p_partkey = PARTSUPP.ps_partkey),
    (select count(*) from SUPPLIER where s_suppkey = case when s_acctbal > 0 
then PARTSUPP.ps_partkey + 1 else 1234 end)
from PARTSUPP
{code}

Exception:
{code}
Exception in thread "main" java.lang.NullPointerException
        at java.base/java.util.Objects.requireNonNull(Objects.java:209)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.createValueGenerator(RelDecorrelator.java:1272)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateInputWithValueGenerator(RelDecorrelator.java:1466)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.maybeAddValueGenerator(RelDecorrelator.java:1385)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1595)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1560)
        at 
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
        at 
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.base/java.lang.reflect.Method.invoke(Method.java:569)
        at org.apache.calcite.util.ReflectUtil$2.invoke(ReflectUtil.java:533)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.getInvoke(RelDecorrelator.java:1013)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:614)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:596)
        at 
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
        at 
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.base/java.lang.reflect.Method.invoke(Method.java:569)
        at org.apache.calcite.util.ReflectUtil$2.invoke(ReflectUtil.java:533)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.getInvoke(RelDecorrelator.java:1013)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1640)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1615)
        at 
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
        at 
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.base/java.lang.reflect.Method.invoke(Method.java:569)
        at org.apache.calcite.util.ReflectUtil$2.invoke(ReflectUtil.java:533)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.getInvoke(RelDecorrelator.java:1013)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1192)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1175)
        at 
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
        at 
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.base/java.lang.reflect.Method.invoke(Method.java:569)
        at org.apache.calcite.util.ReflectUtil$2.invoke(ReflectUtil.java:533)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.getInvoke(RelDecorrelator.java:1013)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1192)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1175)
        at 
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
        at 
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.base/java.lang.reflect.Method.invoke(Method.java:569)
        at org.apache.calcite.util.ReflectUtil$2.invoke(ReflectUtil.java:533)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.getInvoke(RelDecorrelator.java:1013)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1192)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1175)
        at 
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
        at 
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.base/java.lang.reflect.Method.invoke(Method.java:569)
        at org.apache.calcite.util.ReflectUtil$2.invoke(ReflectUtil.java:533)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.getInvoke(RelDecorrelator.java:1013)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.decorrelate(RelDecorrelator.java:342)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateQuery(RelDecorrelator.java:261)
        at 
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateQuery(RelDecorrelator.java:221)
        at 
com.github.zabetak.calcite.tutorial.RecursiveQueryIssueDemo.main(RecursiveQueryIssueDemo.java:170)
{code}

I demonstrated this in several calcite versions using the below (which I built 
in the LuceneQueryProcessor project):
{code:java}
public static void main(String[] args) throws Exception {
String sqlQuery = 
  "SELECT (SELECT count(*) from PART where p_partkey = PARTSUPP.ps_partkey), " +
  "(select count(*) from SUPPLIER where s_suppkey = case when s_acctbal > 0 
then PARTSUPP.ps_partkey + 1 else 1234 end) " + 
  "FROM PARTSUPP";

System.out.println("[Input query]");
System.out.println(sqlQuery);

// Create the schema and table data types
CalciteSchema schema = CalciteSchema.createRootSchema(true);
RelDataTypeFactory typeFactory = new JavaTypeFactoryImpl();
for (TpchTable table : TpchTable.values()) {
  RelDataTypeFactory.Builder builder = new 
RelDataTypeFactory.Builder(typeFactory);
  for (TpchTable.Column column : table.columns) {
    RelDataType type = typeFactory.createJavaType(column.type);
    builder.add(column.name, type.getSqlTypeName()).nullable(true);
  }
  String indexPath = DatasetIndexer.INDEX_LOCATION + "/tpch/" + table.name();
  LuceneTable lt = new LuceneTable(indexPath, builder.build());
  schema.add(table.name(), new LuceneTable(indexPath, builder.build()));
}

// Configure and instantiate validator
Properties props = new Properties();
props.setProperty(CalciteConnectionProperty.CASE_SENSITIVE.camelName(), 
"false");
CalciteConnectionConfig config = new CalciteConnectionConfigImpl(props);
CalciteCatalogReader catalogReader = new CalciteCatalogReader(schema,
    Collections.singletonList("bs"),
    typeFactory, config);

SqlValidator validator = 
SqlValidatorUtil.newValidator(SqlStdOperatorTable.instance(),
    catalogReader, typeFactory,
    SqlValidator.Config.DEFAULT);

// Configure and instantiate the converter of the AST to Logical plan (requires 
opt cluster)
RelOptCluster cluster = newCluster(typeFactory);
SqlToRelConverter relConverter = new SqlToRelConverter(
    NOOP_EXPANDER,
    validator,
    catalogReader,
    cluster,
    StandardConvertletTable.INSTANCE,
    SqlToRelConverter.config().withDecorrelationEnabled(false));

// Create an SQL parser
SqlParser parser = SqlParser.create(sqlQuery);
// Parse the query into an AST
SqlNode sqlNode = parser.parseQuery();
System.out.println("[Parsed query]");
System.out.println(sqlNode.toString());

// Validate the initial AST
SqlNode validNode = validator.validate(sqlNode);
// Convert the valid AST into a logical plan
RelNode logPlan = relConverter.convertQuery(validNode, false, true).rel;

// Display the logical plan
System.out.println(
    RelOptUtil.dumpPlan("[Logical plan]", logPlan, SqlExplainFormat.TEXT,
        SqlExplainLevel.NON_COST_ATTRIBUTES));
Program decomp = 
  Programs.hep(
    ImmutableList.of(
      CoreRules.FILTER_SUB_QUERY_TO_CORRELATE,
      CoreRules.PROJECT_SUB_QUERY_TO_CORRELATE,
      CoreRules.JOIN_SUB_QUERY_TO_CORRELATE
    ),
    false,
    DefaultRelMetadataProvider.INSTANCE
  );

RelNode afterToCorrelate = decomp.run(cluster.getPlanner(), logPlan, 
logPlan.getTraitSet(), ImmutableList.of(), ImmutableList.of());
System.out.println(
    RelOptUtil.dumpPlan("[Post to Correlate plan]", afterToCorrelate, 
SqlExplainFormat.TEXT,
        SqlExplainLevel.NON_COST_ATTRIBUTES));

RelNode decorrelated = RelDecorrelator.decorrelateQuery(
  afterToCorrelate, 
  RelFactories.LOGICAL_BUILDER.create(cluster, catalogReader));

System.out.println(
    RelOptUtil.dumpPlan("[Decorrelated plan]", decorrelated, 
SqlExplainFormat.TEXT,
        SqlExplainLevel.NON_COST_ATTRIBUTES));
{code}

Plan from sql->rel conversion:
{code}
[Logical plan]
LogicalProject(variablesSet=[[$cor0]], EXPR$0=[$SCALAR_QUERY({
LogicalAggregate(group=[{}], EXPR$0=[COUNT()])
  LogicalFilter(condition=[=($0, $cor0.ps_partkey)])
    LogicalTableScan(table=[[PART]])
})], EXPR$1=[$SCALAR_QUERY({
LogicalAggregate(group=[{}], EXPR$0=[COUNT()])
  LogicalFilter(condition=[=($0, CASE(>($5, CAST(0):DOUBLE NOT NULL), 
+($cor0.ps_partkey, 1), 1234))])
    LogicalTableScan(table=[[SUPPLIER]])
})]), id = 17
  LogicalTableScan(table=[[PARTSUPP]]), id = 1
{code}

Plan after applying the sub_query_to_correlate rules:
{code}

[Post to Correlate plan]
LogicalProject(EXPR$0=[$5], EXPR$1=[$6]), id = 41
  LogicalCorrelate(correlation=[$cor0], joinType=[left], 
requiredColumns=[{0}]), id = 39
    LogicalCorrelate(correlation=[$cor0], joinType=[left], 
requiredColumns=[{0}]), id = 28
      LogicalTableScan(table=[[PARTSUPP]]), id = 1
      LogicalAggregate(group=[{}], EXPR$0=[COUNT()]), id = 26
        LogicalFilter(condition=[=($0, $cor0.ps_partkey)]), id = 24
          LogicalTableScan(table=[[PART]]), id = 3
    LogicalAggregate(group=[{}], EXPR$0=[COUNT()]), id = 37
      LogicalFilter(condition=[=($0, CASE(>($5, CAST(0):DOUBLE NOT NULL), 
+($cor0.ps_partkey, 1), 1234))]), id = 35
        LogicalTableScan(table=[[SUPPLIER]]), id = 8
{code}


Whats very strange is that if I use a different CorrelateFactory in the 
RelBuilder used in the decorrelation, no exception is produced, and 
decorrelation is performed (though I have not checked the correctness:
{code:java}
RelNode decorrelated = RelDecorrelator.decorrelateQuery(
      afterToCorrelate, 
      // Special builder with CorrelateFactory override
      RelBuilder.proto(
        Contexts.of(
          new CorrelateFactory() {
            // MyCorrelate which is a mirror of LogicalCorrelate
            class MyCorrelate extends Correlate{
              public MyCorrelate(
                RelOptCluster cluster,
                RelTraitSet traitSet,
                List<RelHint> hints,
                RelNode left,
                RelNode right,
                CorrelationId correlationId,
                ImmutableBitSet requiredColumns,
                JoinRelType joinType) {
              super(
                  cluster,
                  traitSet,
                  hints,
                  left,
                  right,
                  correlationId,
                  requiredColumns,
                  joinType);
              }

              @Override public MyCorrelate copy(
                  RelTraitSet traitSet,
                  RelNode left, RelNode right, CorrelationId correlationId,
                  ImmutableBitSet requiredColumns, JoinRelType joinType
                ) {
                return new MyCorrelate(getCluster(), traitSet, hints, left, 
right, correlationId, requiredColumns, joinType);
              }

                @Override public RelNode accept(RelShuttle shuttle) {
                  return shuttle.visit(this);
                }
            }

            public RelNode createCorrelate(RelNode left, RelNode right, 
List<RelHint> hints,
                CorrelationId correlationId, ImmutableBitSet requiredColumns, 
JoinRelType joinType) {
              // Mirror LogicalCorrelate.create
              final RelTraitSet traitSet = cluster.traitSetOf(Convention.NONE);
              return new MyCorrelate(cluster, traitSet, hints, left, right, 
correlationId,requiredColumns, joinType);
            }
          }
        )
      ).create(cluster, catalogReader)
    );
{code}

Producing this plan:
{code}
[Decorrelated plan]
LogicalProject(EXPR$0=[$5], EXPR$1=[CASE(IS NULL($7), 0:BIGINT, $7)]), id = 182
  LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $6)], joinType=[left]), id = 
180
    LogicalProject(ps_partkey=[$0], ps_suppkey=[$1], ps_availqty=[$2], 
ps_supplycost=[$3], ps_comment=[$4], EXPR$0=[CASE(IS NULL($6), 0:BIGINT, $6)]), 
id = 139
      LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $5)], joinType=[left]), 
id = 137
        LogicalTableScan(table=[[PARTSUPP]]), id = 1
        LogicalProject(p_partkey=[$0], EXPR$0=[CASE(IS NOT NULL($2), $2, 0)]), 
id = 135
          LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $1)], 
joinType=[left]), id = 133
            LogicalAggregate(group=[{0}]), id = 124
              LogicalTableScan(table=[[PARTSUPP]]), id = 1
            LogicalAggregate(group=[{0}], EXPR$0=[COUNT()]), id = 131
              LogicalProject(p_partkey=[$0]), id = 129
                LogicalFilter(condition=[IS NOT NULL($0)]), id = 127
                  LogicalTableScan(table=[[PART]]), id = 3
    LogicalProject(ps_partkey=[$0], EXPR$0=[CASE(IS NOT NULL($2), $2, 0)]), id 
= 178
      LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $1)], joinType=[left]), 
id = 176
        LogicalAggregate(group=[{0}]), id = 160
          LogicalProject(ps_partkey=[$0]), id = 158
            LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $5)], 
joinType=[left]), id = 156
              LogicalTableScan(table=[[PARTSUPP]]), id = 1
              LogicalProject(p_partkey=[$0], EXPR$0=[CASE(IS NOT NULL($2), $2, 
0)]), id = 154
                LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $1)], 
joinType=[left]), id = 152
                  LogicalAggregate(group=[{0}]), id = 143
                    LogicalTableScan(table=[[PARTSUPP]]), id = 1
                  LogicalAggregate(group=[{0}], EXPR$0=[COUNT()]), id = 150
                    LogicalProject(p_partkey=[$0]), id = 148
                      LogicalFilter(condition=[IS NOT NULL($0)]), id = 146
                        LogicalTableScan(table=[[PART]]), id = 3
        LogicalAggregate(group=[{0}], EXPR$0=[COUNT()]), id = 174
          LogicalProject(ps_partkey=[$7]), id = 172
            LogicalJoin(condition=[=($0, CASE(>($5, 0.0E0), +($7, 1), 1234))], 
joinType=[inner]), id = 184
              LogicalTableScan(table=[[SUPPLIER]]), id = 8
              LogicalAggregate(group=[{0}]), id = 166
                LogicalProject(ps_partkey=[$0]), id = 164
                  LogicalTableScan(table=[[PARTSUPP]]), id = 1
{code}





--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to