This is an automated email from the ASF dual-hosted git repository.

rubenql pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new a8802c721a [CALCITE-6593] NPE when outer joining tables with many 
fields and unmatching rows
a8802c721a is described below

commit a8802c721a2805159d166e45044c00364d1cb6c3
Author: rrueda <[email protected]>
AuthorDate: Mon Sep 23 21:56:38 2024 -0300

    [CALCITE-6593] NPE when outer joining tables with many fields and 
unmatching rows
    
    EnumUtils:
    - Split the code paths of compact code and normal code;
    - Keep the normal code path as is and change the compact code path:
      - Generate a null check if the row might be null;
      - Add the early break in case of semi/anti joins;
      - Remove the "optimization" of generating an array of a specific
      type if all output fields are of the same type;
    
    JavaRowType:
    - Make the copy method abstract and implement a specific copy for
    every JavaRowType;
    
    Also, add a new system property to determine the threshold that triggers
    compact code generation.
---
 .../calcite/adapter/enumerable/EnumUtils.java      | 143 +++++++------
 .../calcite/adapter/enumerable/JavaRowFormat.java  | 109 ++++++----
 .../calcite/config/CalciteSystemProperty.java      |  17 ++
 .../org/apache/calcite/util/BuiltInMethod.java     |   1 +
 .../calcite/test/LargeGeneratedJoinTest.java       | 234 +++++++++++++++++----
 5 files changed, 354 insertions(+), 150 deletions(-)

diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java
index 3bf60922ee..671b2d6bb4 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java
@@ -41,6 +41,7 @@ import org.apache.calcite.linq4j.tree.MethodDeclaration;
 import org.apache.calcite.linq4j.tree.NewArrayExpression;
 import org.apache.calcite.linq4j.tree.ParameterExpression;
 import org.apache.calcite.linq4j.tree.Primitive;
+import org.apache.calcite.linq4j.tree.Statement;
 import org.apache.calcite.linq4j.tree.Types;
 import org.apache.calcite.linq4j.tree.UnaryExpression;
 import org.apache.calcite.rel.RelNode;
@@ -64,7 +65,6 @@ import com.google.common.collect.ImmutableMap;
 
 import org.checkerframework.checker.nullness.qual.Nullable;
 
-import java.lang.reflect.Array;
 import java.lang.reflect.Method;
 import java.lang.reflect.Modifier;
 import java.lang.reflect.Type;
@@ -86,6 +86,8 @@ import java.util.Map;
 import java.util.TimeZone;
 import java.util.function.Function;
 
+import static 
org.apache.calcite.config.CalciteSystemProperty.JOIN_SELECTOR_COMPACT_CODE_THRESHOLD;
+
 import static java.util.Objects.requireNonNull;
 
 /**
@@ -159,40 +161,18 @@ public class EnumUtils {
 
   static Expression joinSelector(JoinRelType joinType, PhysType physType,
       List<PhysType> inputPhysTypes) {
-    // A parameter for each input.
-    final List<ParameterExpression> parameters = new ArrayList<>();
-
-    // Generate all fields.
-    final List<Expression> expressions = new ArrayList<>();
     final int outputFieldCount = physType.getRowType().getFieldCount();
-
     // If there are many output fields, create the output dynamically so that 
the code size stays
     // below the limit. See CALCITE-3094.
-    final boolean generateCompactCode = outputFieldCount >= 100;
-    final ParameterExpression compactOutputVar;
-    final BlockBuilder compactCode = new BlockBuilder();
-    if (generateCompactCode) {
-      Class<?> fieldClass = physType.fieldClass(0);
-      // If all fields have the same type, use the specific type. Otherwise 
just use Object.
-      for (int fieldIndex = 1; fieldIndex < outputFieldCount; ++fieldIndex) {
-        if (fieldClass != physType.fieldClass(fieldIndex)) {
-          fieldClass = Object.class;
-          break;
-        }
-      }
-
-      final Class<?> arrayClass = Array.newInstance(fieldClass, 0).getClass();
-      compactOutputVar = Expressions.variable(arrayClass, "outputArray");
-      final DeclarationStatement exp =
-          Expressions.declare(
-              0, compactOutputVar, new NewArrayExpression(fieldClass, 1,
-              Expressions.constant(outputFieldCount), null));
-      compactCode.add(exp);
-    } else {
-      compactOutputVar = null;
+    if (shouldGenerateCompactCode(outputFieldCount)) {
+      return joinSelectorCompact(joinType, physType, inputPhysTypes);
     }
 
-    int outputField = 0;
+    // A parameter for each input.
+    final List<ParameterExpression> parameters = new ArrayList<>();
+
+    // Generate all fields.
+    final List<Expression> expressions = new ArrayList<>();
     for (Ord<PhysType> ord : Ord.zip(inputPhysTypes)) {
       final PhysType inputPhysType =
           ord.e.makeNullable(joinType.generatesNullsOn(ord.i));
@@ -208,18 +188,6 @@ public class EnumUtils {
         break;
       }
       final int fieldCount = inputPhysType.getRowType().getFieldCount();
-      if (generateCompactCode) {
-        // use an array copy if possible
-        final Expression copyExpr =
-            Nullness.castNonNull(
-                inputPhysType.getFormat().copy(parameter, 
Nullness.castNonNull(compactOutputVar),
-                outputField, fieldCount));
-        compactCode.add(Expressions.statement(copyExpr));
-        outputField += fieldCount;
-        continue;
-      }
-
-      // otherwise access the fields individually
       for (int i = 0; i < fieldCount; i++) {
         Expression expression =
             inputPhysType.fieldReference(parameter, i,
@@ -234,37 +202,76 @@ public class EnumUtils {
         expressions.add(expression);
       }
     }
+    return Expressions.lambda(
+        Function2.class,
+        physType.record(expressions),
+        parameters);
+  }
 
-    if (generateCompactCode) {
-      compactCode.add(Nullness.castNonNull(compactOutputVar));
-
-      // This expression generates code of the form:
-      // new org.apache.calcite.linq4j.function.Function2() {
-      //   public String[] apply(org.apache.calcite.interpreter.Row left,
-      //       org.apache.calcite.interpreter.Row right) {
-      //     String[] outputArray = new String[left.length + right.length];
-      //     System.arraycopy(left.copyValues(), 0, outputArray, 0, 
left.length);
-      //     System.arraycopy(right.copyValues(), 0, outputArray, left.length, 
right.length);
-      //     return outputArray;
-      //   }
-      //   public String[] apply(Object left, Object right) {
-      //     return apply(
-      //         (org.apache.calcite.interpreter.Row) left,
-      //         (org.apache.calcite.interpreter.Row) right);
-      //   }
-      // }
-      // That is, it converts the left and right Row objects to Object[] using 
Row#copyValues()
-      // then writes each to an output Object[] using System.arraycopy()
-
-      return Expressions.lambda(
-          Function2.class,
-          compactCode.toBlock(),
-          parameters);
+  static boolean shouldGenerateCompactCode(int outputFieldCount) {
+    int compactCodeThreshold = JOIN_SELECTOR_COMPACT_CODE_THRESHOLD.value();
+    return compactCodeThreshold >= 0 && outputFieldCount >= 
compactCodeThreshold;
+  }
+
+  static Expression joinSelectorCompact(JoinRelType joinType, PhysType 
physType,
+      List<PhysType> inputPhysTypes) {
+    // A parameter for each input.
+    final List<ParameterExpression> parameters = new ArrayList<>();
+
+    // Generate all fields.
+    final int outputFieldCount = physType.getRowType().getFieldCount();
+
+    final BlockBuilder compactCode = new BlockBuilder();
+    // Even if the fields are all of the same type, they are always boxed,
+    // so we use an Object[] that is easier to match with the input arrays.
+    final ParameterExpression compactOutputVar =
+        Expressions.variable(Object[].class, "outputArray");
+    final DeclarationStatement exp =
+        Expressions.declare(
+            0, compactOutputVar, new NewArrayExpression(Object.class, 1,
+                Expressions.constant(outputFieldCount), null));
+    compactCode.add(exp);
+
+    int outputField = 0;
+    for (Ord<PhysType> ord : Ord.zip(inputPhysTypes)) {
+      final PhysType inputPhysType =
+          ord.e.makeNullable(joinType.generatesNullsOn(ord.i));
+      // If the parameter is an array we declare as Object[] because it
+      // needs to match the type of the array that will be returned
+      final Type parameterType = Types.isArray(inputPhysType.getJavaRowType())
+          ? Object[].class
+          : Primitive.box(inputPhysType.getJavaRowType());
+
+      final ParameterExpression parameter =
+          Expressions.parameter(parameterType, 
EnumUtils.LEFT_RIGHT.get(ord.i));
+      parameters.add(parameter);
+      if (outputField == outputFieldCount) {
+        // For instance, if semi-join needs to return just the left inputs
+        break;
+      }
+      final int fieldCount = inputPhysType.getRowType().getFieldCount();
+      // Delegate copying the row values to JavaRowFormat
+      final List<Statement> copyStatements =
+          Nullness.castNonNull(
+              inputPhysType.getFormat().copy(parameter, 
Nullness.castNonNull(compactOutputVar),
+                  outputField, fieldCount));
+      if (joinType.generatesNullsOn(ord.i)) {
+        // [CALCITE-6593] NPE when outer joining tables with many fields and 
unmatching rows
+        compactCode.add(
+            Expressions.ifThen(Expressions.notEqual(parameter, 
Expressions.constant(null)),
+                Expressions.block(copyStatements)));
+      } else {
+        for (Statement copyStatement : copyStatements) {
+          compactCode.add(copyStatement);
+        }
+      }
+      outputField += fieldCount;
     }
 
+    compactCode.add(Nullness.castNonNull(compactOutputVar));
     return Expressions.lambda(
         Function2.class,
-        physType.record(expressions),
+        compactCode.toBlock(),
         parameters);
   }
 
diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/JavaRowFormat.java 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/JavaRowFormat.java
index a5c86b939c..cfbb2f7e4f 100644
--- 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/JavaRowFormat.java
+++ 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/JavaRowFormat.java
@@ -24,6 +24,7 @@ import org.apache.calcite.linq4j.tree.IndexExpression;
 import org.apache.calcite.linq4j.tree.MemberExpression;
 import org.apache.calcite.linq4j.tree.MethodCallExpression;
 import org.apache.calcite.linq4j.tree.ParameterExpression;
+import org.apache.calcite.linq4j.tree.Statement;
 import org.apache.calcite.linq4j.tree.Types;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.runtime.FlatLists;
@@ -34,9 +35,11 @@ import org.apache.calcite.util.BuiltInMethod;
 import org.checkerframework.checker.nullness.qual.Nullable;
 
 import java.lang.reflect.Type;
+import java.util.ArrayList;
 import java.util.List;
 
 import static org.apache.calcite.util.BuiltInMethod.ARRAY_COPY;
+import static org.apache.calcite.util.BuiltInMethod.LIST_TO_ARRAY;
 import static org.apache.calcite.util.BuiltInMethod.ROW_COPY_VALUES;
 
 /**
@@ -80,6 +83,24 @@ public enum JavaRowFormat {
         return Expressions.field(expression, Types.nthField(field, type));
       }
     }
+
+    @Override public List<Statement> copy(ParameterExpression parameter,
+        ParameterExpression outputArray, int outputStartIndex, int length) {
+      // Parameter holds an expression representing a POJO Object
+      // Results in:
+      // outputArray[outputStartIndex] = parameter.field{1};
+      // ...
+      // outputArray[outputStartIndex + length - 1] = parameter.field{length - 
1};
+      final List<Statement> statements = new ArrayList<>(length);
+      for (int i = 0; i < length; i++) {
+        statements.add(
+            Expressions.statement(
+                Expressions.assign(
+            Expressions.arrayIndex(outputArray, 
Expressions.constant(outputStartIndex + i)),
+            field(parameter, i, null, Object.class))));
+      }
+      return statements;
+    }
   },
 
   SCALAR {
@@ -111,6 +132,19 @@ public enum JavaRowFormat {
       assert field == 0;
       return expression;
     }
+
+    @Override public List<Statement> copy(ParameterExpression parameter,
+        ParameterExpression outputArray, int outputStartIndex, int length) {
+      // Parameter holds an expression representing a scalar Object
+      // Results in:
+      // outputArray[outputStartIndex] = parameter;
+      assert length == 1;
+      return FlatLists.of(
+          Expressions.statement(
+              Expressions.assign(
+              Expressions.arrayIndex(outputArray,
+                  Expressions.constant(outputStartIndex)), parameter)));
+    }
   },
 
   /** A list that is comparable and immutable. Useful for records with 0 fields
@@ -199,6 +233,18 @@ public enum JavaRowFormat {
       }
       return EnumUtils.convert(e, fromType, fieldType);
     }
+
+    @Override public List<Statement> copy(ParameterExpression parameter,
+        ParameterExpression outputArray, int outputStartIndex, int length) {
+      // Parameter holds an expression representing a List
+      // Results in:
+      // System.arraycopy(parameter.toArray(), 0, outputArray, 
outputStartIndex, length);
+      return FlatLists.of(
+          Expressions.statement(
+              Expressions.call(ARRAY_COPY.method, Expressions.call(parameter, 
LIST_TO_ARRAY.method),
+              Expressions.constant(0), outputArray, 
Expressions.constant(outputStartIndex),
+              Expressions.constant(length))));
+    }
   },
 
   /**
@@ -230,9 +276,17 @@ public enum JavaRowFormat {
       return EnumUtils.convert(e, fromType, fieldType);
     }
 
-    @Override public Expression fieldDynamic(Expression expression, Expression 
field) {
-      return Expressions.call(expression,
-          BuiltInMethod.ROW_VALUE.method, Expressions.constant(field));
+    @Override public List<Statement> copy(ParameterExpression parameter,
+      ParameterExpression outputArray, int outputStartIndex, int length) {
+      // Parameter holds an expression representing a 
org.apache.calcite.interpreter.Row
+      // Results in:
+      // System.arraycopy(parameter.copyValues(), 0, outputArray, 
outputStartIndex, length);
+      return FlatLists.of(
+          Expressions.statement(
+          Expressions.call(ARRAY_COPY.method,
+              Expressions.call(Object[].class, parameter, 
ROW_COPY_VALUES.method),
+              Expressions.constant(0), outputArray, 
Expressions.constant(outputStartIndex),
+              Expressions.constant(length))));
     }
   },
 
@@ -266,21 +320,15 @@ public enum JavaRowFormat {
       return EnumUtils.convert(e, fromType, fieldType);
     }
 
-    @Override public Expression fieldDynamic(Expression expression, Expression 
field) {
-      return Expressions.arrayIndex(expression, field);
-    }
-
-    @Override public Expression setFieldDynamic(Expression expression, 
Expression field,
-        Expression value) {
-      final IndexExpression e =
-          Expressions.arrayIndex(expression, Expressions.constant(field));
-      return Expressions.assign(e, value);
-    }
-
-    @Override public Expression copy(ParameterExpression parameter,
+    @Override public List<Statement> copy(ParameterExpression parameter,
         ParameterExpression outputArray, int outputStartIndex, int length) {
-      return Expressions.call(ARRAY_COPY.method, parameter, 
Expressions.constant(0),
-          outputArray, Expressions.constant(outputStartIndex), 
Expressions.constant(length));
+      // Parameter holds an expression representing an Object[]
+      // Results in:
+      // System.arraycopy(parameter, 0, outputArray, outputStartIndex, length);
+      return FlatLists.of(
+          Expressions.statement(
+          Expressions.call(ARRAY_COPY.method, parameter, 
Expressions.constant(0),
+              outputArray, Expressions.constant(outputStartIndex), 
Expressions.constant(length))));
     }
   };
 
@@ -328,32 +376,9 @@ public enum JavaRowFormat {
   public abstract Expression field(Expression expression, int field,
       @Nullable Type fromType, Type fieldType);
 
-  /**
-   * Similar to {@link #field(Expression, int, Type, Type)}, where the field 
index is determined
-   * dynamically at runtime.
-   */
-  public Expression fieldDynamic(Expression expression, Expression field) {
-    throw new UnsupportedOperationException(this.toString());
-  }
-
-  public Expression setFieldDynamic(Expression expression, Expression field, 
Expression value) {
-    throw new UnsupportedOperationException(this.toString());
-  }
-
   /**
    * Returns an expression that copies the fields of a row of this type to the 
array.
    */
-  public @Nullable Expression copy(ParameterExpression parameter,
-      ParameterExpression outputArray, int outputStartIndex, int length) {
-    // Note: parameter holds an expression representing a 
org.apache.calcite.interpreter.Row.
-
-    // Copy the Row as an Object[].
-    final Expression rowParameterAsArrayExpression =
-        Expressions.call(Object[].class, parameter, ROW_COPY_VALUES.method);
-
-    // Use System.arraycopy() with the contents of the Row as the source.
-    return Expressions.call(ARRAY_COPY.method, rowParameterAsArrayExpression,
-        Expressions.constant(0), outputArray, 
Expressions.constant(outputStartIndex),
-        Expressions.constant(length));
-  }
+  public abstract List<Statement> copy(ParameterExpression parameter,
+      ParameterExpression outputArray, int outputStartIndex, int length);
 }
diff --git 
a/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java 
b/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java
index 1cd9eef069..4fdd7c79fd 100644
--- a/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java
+++ b/core/src/main/java/org/apache/calcite/config/CalciteSystemProperty.java
@@ -420,6 +420,23 @@ public final class CalciteSystemProperty<T> {
   public static final CalciteSystemProperty<Integer> 
FUNCTION_LEVEL_CACHE_MAX_SIZE =
       intProperty("calcite.function.cache.maxSize", 1_000, v -> v >= 0);
 
+  /**
+   * Minimum numbers of fields in a Join result that will trigger the "compact 
code generation".
+   * This feature reduces the risk of running into a compilation error due to 
the code of a
+   * dynamically generated method growing beyond the 64KB limit.
+   *
+   * <p>Note that the compact code makes use of arraycopy operations when 
possible,
+   * instead of using a static array initialization. For joins with a large 
number of fields
+   * the resulting code should be faster, but it can be slower for joins with 
a very small number
+   * of fields.
+   *
+   * <p>The default value is 100, a negative value disables completely the 
"compact code" feature.
+   *
+   * @see org.apache.calcite.adapter.enumerable.EnumUtils
+   */
+  public static final CalciteSystemProperty<Integer> 
JOIN_SELECTOR_COMPACT_CODE_THRESHOLD =
+      intProperty("calcite.join.selector.compact.code.threshold", 100);
+
   private static CalciteSystemProperty<Boolean> booleanProperty(String key,
       boolean defaultValue) {
     // Note that "" -> true (convenient for command-lines flags like '-Dflag')
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java 
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index a923fcd842..6284846d97 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -369,6 +369,7 @@ public enum BuiltInMethod {
   COLLECTION_RETAIN_ALL(Collection.class, "retainAll", Collection.class),
   LIST_CONTAINS(List.class, "contains", Object.class),
   LIST_GET(List.class, "get", int.class),
+  LIST_TO_ARRAY(List.class, "toArray"),
   ITERATOR_HAS_NEXT(Iterator.class, "hasNext"),
   ITERATOR_NEXT(Iterator.class, "next"),
   MATH_MAX(Math.class, "max", int.class, int.class),
diff --git 
a/core/src/test/java/org/apache/calcite/test/LargeGeneratedJoinTest.java 
b/core/src/test/java/org/apache/calcite/test/LargeGeneratedJoinTest.java
index b0a2f03d67..8a4d19f4e5 100644
--- a/core/src/test/java/org/apache/calcite/test/LargeGeneratedJoinTest.java
+++ b/core/src/test/java/org/apache/calcite/test/LargeGeneratedJoinTest.java
@@ -16,7 +16,6 @@
  */
 package org.apache.calcite.test;
 
-import org.apache.calcite.adapter.enumerable.EnumerableRules;
 import org.apache.calcite.adapter.java.AbstractQueryableTable;
 import org.apache.calcite.config.CalciteConnectionProperty;
 import org.apache.calcite.config.Lex;
@@ -26,10 +25,8 @@ import org.apache.calcite.linq4j.Enumerable;
 import org.apache.calcite.linq4j.Linq4j;
 import org.apache.calcite.linq4j.QueryProvider;
 import org.apache.calcite.linq4j.Queryable;
-import org.apache.calcite.plan.RelOptPlanner;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rel.type.RelDataTypeFactory;
-import org.apache.calcite.runtime.Hook;
 import org.apache.calcite.schema.QueryableTable;
 import org.apache.calcite.schema.Schema;
 import org.apache.calcite.schema.SchemaPlus;
@@ -46,10 +43,12 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.function.BiConsumer;
-import java.util.function.Consumer;
 import java.util.function.Function;
 
+import static 
org.apache.calcite.config.CalciteSystemProperty.JOIN_SELECTOR_COMPACT_CODE_THRESHOLD;
+
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 /**
@@ -85,7 +84,7 @@ public class LargeGeneratedJoinTest {
     };
   }
 
-  private static QueryableTable tab(int fieldCount) {
+  private static QueryableTable tab(String table, int fieldCount) {
     List<Row> lRow = new ArrayList<>();
     for (int r = 0; r < 2; r++) {
       Object[] current = new Object[fieldCount];
@@ -97,7 +96,7 @@ public class LargeGeneratedJoinTest {
 
     List<FieldT> fields = new ArrayList<>();
     for (int i = 0; i < fieldCount; i++) {
-      fields.add(field("F_" + i));
+      fields.add(field(table + "_F_" + i));
     }
 
     final Enumerable<?> enumerable = Linq4j.asEnumerable(lRow);
@@ -114,65 +113,220 @@ public class LargeGeneratedJoinTest {
     };
   }
 
-  @Test public void test() throws SQLException {
+  private static int getBaseTableSize() {
+    // If compact code generation is turned off, we generate tables that
+    // will cause the issue. Otherwise, to avoid impacting the test duration,
+    // we only generate tables wide enough to enable the compact code 
generation.
+    int compactCodeThreshold = JOIN_SELECTOR_COMPACT_CODE_THRESHOLD.value();
+    return compactCodeThreshold < 0 ? 3000 : Math.max(100, 
compactCodeThreshold);
+  }
+
+  private static int getT0Size() {
+    return getBaseTableSize();
+  }
+  private static int getT1Size() {
+    return getBaseTableSize() + 1;
+  }
+
+  private static CalciteAssert.AssertQuery assertQuery(String sql) {
     Schema rootSchema = new AbstractSchema() {
       @Override protected Map<String, Table> getTableMap() {
-        return ImmutableMap.of("T0", tab(100),
-            "T1", tab(101));
+        return ImmutableMap.of("T0", tab("T0", getT0Size()),
+            "T1", tab("T1", getT1Size()));
       }
     };
 
     final CalciteSchema sp = CalciteSchema.createRootSchema(false, true);
     sp.add("ROOT", rootSchema);
 
+    final CalciteAssert.AssertThat ca = CalciteAssert.that()
+        .with(CalciteConnectionProperty.LEX, Lex.JAVA)
+        .withSchema("ROOT", rootSchema)
+        .withDefaultSchema("ROOT");
+
+    return ca.query(sql);
+  }
+
+  @Test public void test() {
     String sql = "SELECT * \n"
         + "FROM ROOT.T0 \n"
         + "JOIN ROOT.T1 \n"
         + "ON TRUE";
 
-    sql = "select F_0||F_1, * from (" + sql + ")";
+    sql = "select T0_F_0||T0_F_1, * from (" + sql + ")";
+
+    final CalciteAssert.AssertQuery query = assertQuery(sql);
+    query.returns(rs -> {
+      try {
+        assertTrue(rs.next());
+        assertEquals(1 + getT0Size() + getT1Size(), 
rs.getMetaData().getColumnCount());
+        long row = 0;
+        do {
+          ++row;
+          for (int i = 1; i <= rs.getMetaData().getColumnCount(); ++i) {
+            // Rows have the format: v0v1, v0, v1, v2, ..., v99, v0, v1, v2, 
..., v99, v100
+            if (i == 1) {
+              assertEquals("v0v1", rs.getString(i),
+                  "Error at row: " + row + ", column: " + i);
+            } else if (i <= getT0Size() + 1) {
+              assertEquals("v" + (i - 2), rs.getString(i),
+                  "Error at row: " + row + ", column: " + i);
+            } else {
+              assertEquals("v" + ((i - 2) - getT0Size()), rs.getString(i),
+                  "Error at row: " + row + ", column: " + i);
+            }
+          }
+        } while (rs.next());
+        assertEquals(4, row);
+      } catch (SQLException e) {
+        throw new RuntimeException(e);
+      }
+    });
+  }
 
+  /**
+   * Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-6593";>[CALCITE-6593]
+   * NPE when outer joining tables with many fields and unmatching rows</a>.
+   */
+  @Test public void testLeftJoinWithEmptyRightSide() {
+    String sql = "SELECT * \n"
+        + "FROM ROOT.T0 \n"
+        + "LEFT JOIN (SELECT * FROM ROOT.T1 WHERE T1_F_0 = 'xyz') \n"
+        + "ON TRUE";
 
-    final CalciteAssert.AssertThat ca = CalciteAssert.that()
-        .with(CalciteConnectionProperty.LEX, Lex.JAVA)
-        .withSchema("ROOT", rootSchema)
-        .withDefaultSchema("ROOT");
+    sql = "select T0_F_0||T0_F_1, * from (" + sql + ")";
 
-    final CalciteAssert.AssertQuery query = ca.query(sql);
-    query.withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) pl -> {
-      pl.removeRule(EnumerableRules.ENUMERABLE_CORRELATE_RULE);
-      pl.addRule(EnumerableRules.ENUMERABLE_BATCH_NESTED_LOOP_JOIN_RULE);
+    final CalciteAssert.AssertQuery query = assertQuery(sql);
+    query.returns(rs -> {
+      try {
+        assertTrue(rs.next());
+        assertEquals(1 + getT0Size() + getT1Size(), 
rs.getMetaData().getColumnCount());
+        long row = 0;
+        do {
+          ++row;
+          for (int i = 1; i <= rs.getMetaData().getColumnCount(); ++i) {
+            // Rows have the format: v0v1, v0, v1, v2, ..., v99, null, ..., 
null
+            if (i == 1) {
+              assertEquals("v0v1", rs.getString(i),
+                  "Error at row: " + row + ", column: " + i);
+            } else if (i <= getT0Size() + 1) {
+              assertEquals("v" + (i - 2), rs.getString(i),
+                  "Error at row: " + row + ", column: " + i);
+            } else {
+              assertNull(rs.getString(i),
+                  "Error at row: " + row + ", column: " + i);
+            }
+          }
+        } while (rs.next());
+        assertEquals(2, row);
+      } catch (SQLException e) {
+        throw new RuntimeException(e);
+      }
     });
+  }
 
-    try {
-      query.returns(rs -> {
-        try {
-          assertTrue(rs.next());
-          assertEquals(101 + 100 + 1, rs.getMetaData().getColumnCount());
-          long row = 0;
-          do {
-            ++row;
-            for (int i = 1; i <= rs.getMetaData().getColumnCount(); ++i) {
-              // Rows have the format: v0v1, v0, v1, v2, ..., v99, v0, v1, v2, 
..., v99, v100
+  /**
+   * Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-6593";>[CALCITE-6593]
+   * NPE when outer joining tables with many fields and unmatching rows</a>.
+   */
+  @Test public void testRightJoinWithEmptyLeftSide() {
+    String sql = "SELECT * \n"
+        + "FROM (SELECT * FROM ROOT.T0 WHERE T0_F_0 = 'xyz') \n"
+        + "RIGHT JOIN ROOT.T1 \n"
+        + "ON TRUE";
+
+    sql = "select T1_F_0||T1_F_1, * from (" + sql + ")";
+
+    final CalciteAssert.AssertQuery query = assertQuery(sql);
+    query.returns(rs -> {
+      try {
+        assertTrue(rs.next());
+        assertEquals(1 + getT0Size() + getT1Size(), 
rs.getMetaData().getColumnCount());
+        long row = 0;
+        do {
+          ++row;
+          for (int i = 1; i <= rs.getMetaData().getColumnCount(); ++i) {
+            // Rows have the format: v0v1, null, ..., null, v0, v1, v2, ..., 
v100
+            if (i == 1) {
+              assertEquals("v0v1", rs.getString(i),
+                  "Error at row: " + row + ", column: " + i);
+            } else if (i <= getT0Size() + 1) {
+              assertNull(rs.getString(i),
+                  "Error at row: " + row + ", column: " + i);
+            } else {
+              assertEquals("v" + (i - 2 - getT0Size()), rs.getString(i),
+                  "Error at row: " + row + ", column: " + i);
+            }
+          }
+        } while (rs.next());
+        assertEquals(2, row);
+      } catch (SQLException e) {
+        throw new RuntimeException(e);
+      }
+    });
+  }
+
+  /**
+   * Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-6593";>[CALCITE-6593]
+   * NPE when outer joining tables with many fields and unmatching rows</a>.
+   */
+  @Test public void testFullJoinWithUnmatchedRows() {
+    String sql = "SELECT * \n"
+        + "FROM ROOT.T0 \n"
+        + "FULL JOIN ROOT.T1 \n"
+        + "ON T0_F_0 <> T1_F_0";
+
+    sql = "select T0_F_0||T0_F_1, T1_F_0||T1_F_1, * from (" + sql + ")";
+
+    final CalciteAssert.AssertQuery query = assertQuery(sql);
+    query.returns(rs -> {
+      try {
+        assertTrue(rs.next());
+        assertEquals(1 + 1 + getT0Size() + getT1Size(), 
rs.getMetaData().getColumnCount());
+        long row = 0;
+        do {
+          ++row;
+          for (int i = 1; i <= rs.getMetaData().getColumnCount(); ++i) {
+            if (row <= 2) {
+              // First 2 rows have the format: v0v1, null, v0, v1, v2, ..., 
v99, null, ..., null
+              if (i == 1) {
+                assertEquals("v0v1", rs.getString(i),
+                    "Error at row: " + row + ", column: " + i);
+              } else if (i == 2) {
+                assertNull(rs.getString(i),
+                    "Error at row: " + row + ", column: " + i);
+              } else if (i <= getT0Size() + 2) {
+                assertEquals("v" + (i - 3), rs.getString(i),
+                    "Error at row: " + row + ", column: " + i);
+              } else {
+                assertNull(rs.getString(i),
+                    "Error at row: " + row + ", column: " + i);
+              }
+            } else {
+              // Last 2 rows have the format: null, v0v1, null, ..., null, v0, 
v1, v2, ..., v100
               if (i == 1) {
+                assertNull(rs.getString(i),
+                    "Error at row: " + row + ", column: " + i);
+              } else if (i == 2) {
                 assertEquals("v0v1", rs.getString(i),
                     "Error at row: " + row + ", column: " + i);
-              } else if (i == rs.getMetaData().getColumnCount()) {
-                assertEquals("v100", rs.getString(i),
+              } else if (i <= getT0Size() + 2) {
+                assertNull(rs.getString(i),
                     "Error at row: " + row + ", column: " + i);
               } else {
-                assertEquals("v" + ((i - 2) % 100), rs.getString(i),
+                assertEquals("v" + (i - 3 - getT0Size()), rs.getString(i),
                     "Error at row: " + row + ", column: " + i);
               }
             }
-          } while (rs.next());
-          assertEquals(4, row);
-        } catch (SQLException e) {
-          throw new RuntimeException(e);
-        }
-      });
-    } catch (RuntimeException ex) {
-      throw (SQLException) ex.getCause();
-    }
+          }
+        } while (rs.next());
+        assertEquals(4, row);
+      } catch (SQLException e) {
+        throw new RuntimeException(e);
+      }
+    });
   }
 }

Reply via email to