This is an automated email from the ASF dual-hosted git repository.
rubenql pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new 1e424a0d4e [CALCITE-3094] Code of method grows beyond 64 KB when
joining two tables with many fields
1e424a0d4e is described below
commit 1e424a0d4eeec69e4da0a504cf080cf7a847704d
Author: James Duong <[email protected]>
AuthorDate: Fri Jun 14 16:25:16 2024 -0700
[CALCITE-3094] Code of method grows beyond 64 KB when joining two tables
with many fields
Change code generation such that when implementing joins across many fields
up to a certain
threshold, populate the output array using System.arraycopy() instead of
explicitly
instantiating an array with a large number of elements.
---
.../calcite/adapter/enumerable/EnumUtils.java | 72 ++++++++
.../calcite/adapter/enumerable/JavaRowFormat.java | 55 +++++++
.../java/org/apache/calcite/interpreter/Row.java | 2 +
.../org/apache/calcite/util/BuiltInMethod.java | 3 +
.../calcite/test/LargeGeneratedJoinTest.java | 182 +++++++++++++++++++++
5 files changed, 314 insertions(+)
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java
index 7382beeeec..06043a567c 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumUtils.java
@@ -22,6 +22,7 @@ import org.apache.calcite.linq4j.AbstractEnumerable;
import org.apache.calcite.linq4j.Enumerable;
import org.apache.calcite.linq4j.Enumerator;
import org.apache.calcite.linq4j.JoinType;
+import org.apache.calcite.linq4j.Nullness;
import org.apache.calcite.linq4j.Ord;
import org.apache.calcite.linq4j.function.Function1;
import org.apache.calcite.linq4j.function.Function2;
@@ -30,12 +31,14 @@ import org.apache.calcite.linq4j.tree.BlockBuilder;
import org.apache.calcite.linq4j.tree.BlockStatement;
import org.apache.calcite.linq4j.tree.ConstantExpression;
import org.apache.calcite.linq4j.tree.ConstantUntypedNull;
+import org.apache.calcite.linq4j.tree.DeclarationStatement;
import org.apache.calcite.linq4j.tree.Expression;
import org.apache.calcite.linq4j.tree.ExpressionType;
import org.apache.calcite.linq4j.tree.Expressions;
import org.apache.calcite.linq4j.tree.FunctionExpression;
import org.apache.calcite.linq4j.tree.MethodCallExpression;
import org.apache.calcite.linq4j.tree.MethodDeclaration;
+import org.apache.calcite.linq4j.tree.NewArrayExpression;
import org.apache.calcite.linq4j.tree.ParameterExpression;
import org.apache.calcite.linq4j.tree.Primitive;
import org.apache.calcite.linq4j.tree.Types;
@@ -61,6 +64,7 @@ import com.google.common.collect.ImmutableMap;
import org.checkerframework.checker.nullness.qual.Nullable;
+import java.lang.reflect.Array;
import java.lang.reflect.Method;
import java.lang.reflect.Modifier;
import java.lang.reflect.Type;
@@ -160,6 +164,34 @@ public class EnumUtils {
// Generate all fields.
final List<Expression> expressions = new ArrayList<>();
final int outputFieldCount = physType.getRowType().getFieldCount();
+
+ // If there are many output fields, create the output dynamically so that
the code size stays
+ // below the limit. See CALCITE-3094.
+ final boolean generateCompactCode = outputFieldCount >= 100;
+ final ParameterExpression compactOutputVar;
+ final BlockBuilder compactCode = new BlockBuilder();
+ if (generateCompactCode) {
+ Class<?> fieldClass = physType.fieldClass(0);
+ // If all fields have the same type, use the specific type. Otherwise
just use Object.
+ for (int fieldIndex = 1; fieldIndex < outputFieldCount; ++fieldIndex) {
+ if (fieldClass != physType.fieldClass(fieldIndex)) {
+ fieldClass = Object.class;
+ break;
+ }
+ }
+
+ final Class<?> arrayClass = Array.newInstance(fieldClass, 0).getClass();
+ compactOutputVar = Expressions.variable(arrayClass, "outputArray");
+ final DeclarationStatement exp =
+ Expressions.declare(
+ 0, compactOutputVar, new NewArrayExpression(fieldClass, 1,
+ Expressions.constant(outputFieldCount), null));
+ compactCode.add(exp);
+ } else {
+ compactOutputVar = null;
+ }
+
+ int outputField = 0;
for (Ord<PhysType> ord : Ord.zip(inputPhysTypes)) {
final PhysType inputPhysType =
ord.e.makeNullable(joinType.generatesNullsOn(ord.i));
@@ -175,6 +207,18 @@ public class EnumUtils {
break;
}
final int fieldCount = inputPhysType.getRowType().getFieldCount();
+ if (generateCompactCode) {
+ // use an array copy if possible
+ final Expression copyExpr =
+ Nullness.castNonNull(
+ inputPhysType.getFormat().copy(parameter,
Nullness.castNonNull(compactOutputVar),
+ outputField, fieldCount));
+ compactCode.add(Expressions.statement(copyExpr));
+ outputField += fieldCount;
+ continue;
+ }
+
+ // otherwise access the fields individually
for (int i = 0; i < fieldCount; i++) {
Expression expression =
inputPhysType.fieldReference(parameter, i,
@@ -189,6 +233,34 @@ public class EnumUtils {
expressions.add(expression);
}
}
+
+ if (generateCompactCode) {
+ compactCode.add(Nullness.castNonNull(compactOutputVar));
+
+ // This expression generates code of the form:
+ // new org.apache.calcite.linq4j.function.Function2() {
+ // public String[] apply(org.apache.calcite.interpreter.Row left,
+ // org.apache.calcite.interpreter.Row right) {
+ // String[] outputArray = new String[left.length + right.length];
+ // System.arraycopy(left.copyValues(), 0, outputArray, 0,
left.length);
+ // System.arraycopy(right.copyValues(), 0, outputArray, left.length,
right.length);
+ // return outputArray;
+ // }
+ // public String[] apply(Object left, Object right) {
+ // return apply(
+ // (org.apache.calcite.interpreter.Row) left,
+ // (org.apache.calcite.interpreter.Row) right);
+ // }
+ // }
+ // That is, it converts the left and right Row objects to Object[] using
Row#copyValues()
+ // then writes each to an output Object[] using System.arraycopy()
+
+ return Expressions.lambda(
+ Function2.class,
+ compactCode.toBlock(),
+ parameters);
+ }
+
return Expressions.lambda(
Function2.class,
physType.record(expressions),
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/JavaRowFormat.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/JavaRowFormat.java
index 4810b04dea..40c09cbc86 100644
---
a/core/src/main/java/org/apache/calcite/adapter/enumerable/JavaRowFormat.java
+++
b/core/src/main/java/org/apache/calcite/adapter/enumerable/JavaRowFormat.java
@@ -23,6 +23,7 @@ import org.apache.calcite.linq4j.tree.Expressions;
import org.apache.calcite.linq4j.tree.IndexExpression;
import org.apache.calcite.linq4j.tree.MemberExpression;
import org.apache.calcite.linq4j.tree.MethodCallExpression;
+import org.apache.calcite.linq4j.tree.ParameterExpression;
import org.apache.calcite.linq4j.tree.Types;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.runtime.FlatLists;
@@ -35,6 +36,9 @@ import org.checkerframework.checker.nullness.qual.Nullable;
import java.lang.reflect.Type;
import java.util.List;
+import static org.apache.calcite.util.BuiltInMethod.ARRAY_COPY;
+import static org.apache.calcite.util.BuiltInMethod.ROW_COPY_VALUES;
+
/**
* How a row is represented as a Java value.
*/
@@ -225,6 +229,11 @@ public enum JavaRowFormat {
}
return EnumUtils.convert(e, fromType, fieldType);
}
+
+ @Override public Expression fieldDynamic(Expression expression, Expression
field) {
+ return Expressions.call(expression,
+ BuiltInMethod.ROW_VALUE.method, Expressions.constant(field));
+ }
},
ARRAY {
@@ -256,6 +265,23 @@ public enum JavaRowFormat {
}
return EnumUtils.convert(e, fromType, fieldType);
}
+
+ @Override public Expression fieldDynamic(Expression expression, Expression
field) {
+ return Expressions.arrayIndex(expression, field);
+ }
+
+ @Override public Expression setFieldDynamic(Expression expression,
Expression field,
+ Expression value) {
+ final IndexExpression e =
+ Expressions.arrayIndex(expression, Expressions.constant(field));
+ return Expressions.assign(e, value);
+ }
+
+ @Override public @Nullable Expression copy(ParameterExpression parameter,
+ ParameterExpression outputArray, int outputStartIndex, int length) {
+ return Expressions.call(ARRAY_COPY.method, parameter,
Expressions.constant(0),
+ outputArray, Expressions.constant(outputStartIndex),
Expressions.constant(length));
+ }
};
public JavaRowFormat optimize(RelDataType rowType) {
@@ -301,4 +327,33 @@ public enum JavaRowFormat {
*/
public abstract Expression field(Expression expression, int field,
@Nullable Type fromType, Type fieldType);
+
+ /**
+ * Similar to {@link #field(Expression, int, Type, Type)}, where the field
index is determined
+ * dynamically at runtime.
+ */
+ public Expression fieldDynamic(Expression expression, Expression field) {
+ throw new UnsupportedOperationException(this.toString());
+ }
+
+ public Expression setFieldDynamic(Expression expression, Expression field,
Expression value) {
+ throw new UnsupportedOperationException(this.toString());
+ }
+
+ /**
+ * Returns an expression that copies the fields of a row of this type to the
array.
+ */
+ public @Nullable Expression copy(ParameterExpression parameter,
+ ParameterExpression outputArray, int outputStartIndex, int length) {
+ // Note: parameter holds an expression representing a
org.apache.calcite.interpreter.Row.
+
+ // Copy the Row as an Object[].
+ final Expression rowParameterAsArrayExpression =
+ Expressions.call(Object[].class, parameter, ROW_COPY_VALUES.method);
+
+ // Use System.arraycopy() with the contents of the Row as the source.
+ return Expressions.call(ARRAY_COPY.method, rowParameterAsArrayExpression,
+ Expressions.constant(0), outputArray,
Expressions.constant(outputStartIndex),
+ Expressions.constant(length));
+ }
}
diff --git a/core/src/main/java/org/apache/calcite/interpreter/Row.java
b/core/src/main/java/org/apache/calcite/interpreter/Row.java
index 1d35308c7b..073851d3b7 100644
--- a/core/src/main/java/org/apache/calcite/interpreter/Row.java
+++ b/core/src/main/java/org/apache/calcite/interpreter/Row.java
@@ -86,6 +86,8 @@ public class Row {
}
/** Returns a copy of the values. */
+ // Note: This implements BuiltInMethod.ROW_COPY_VALUES.
+ @SuppressWarnings("unused")
public @Nullable Object[] copyValues() {
return values.clone();
}
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index 7b144167da..0e0a3e5390 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -183,6 +183,7 @@ public enum BuiltInMethod {
JDBC_SCHEMA_DATA_SOURCE(JdbcSchema.class, "getDataSource"),
ROW_VALUE(Row.class, "getObject", int.class),
ROW_AS_COPY(Row.class, "asCopy", Object[].class),
+ ROW_COPY_VALUES(Row.class, "copyValues"), // This is an instance method that
returns an Object[].
RESULT_SET_ENUMERABLE_SET_TIMEOUT(ResultSetEnumerable.class, "setTimeout",
DataContext.class),
RESULT_SET_ENUMERABLE_OF(ResultSetEnumerable.class, "of", DataSource.class,
@@ -272,6 +273,8 @@ public enum BuiltInMethod {
FUNCTION1_APPLY(Function1.class, "apply", Object.class),
ARRAYS_AS_LIST(Arrays.class, "asList", Object[].class),
ARRAY(SqlFunctions.class, "array", Object[].class),
+ ARRAY_COPY(System.class, "arraycopy", Object.class, int.class, Object.class,
int.class,
+ int.class),
// class PairList.Helper is deprecated to discourage code from calling its
// methods directly, but use via Janino code generation is just fine.
@SuppressWarnings("deprecation")
diff --git
a/core/src/test/java/org/apache/calcite/test/LargeGeneratedJoinTest.java
b/core/src/test/java/org/apache/calcite/test/LargeGeneratedJoinTest.java
new file mode 100644
index 0000000000..b0365d1aff
--- /dev/null
+++ b/core/src/test/java/org/apache/calcite/test/LargeGeneratedJoinTest.java
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.test;
+
+import org.apache.calcite.adapter.enumerable.EnumerableRules;
+import org.apache.calcite.adapter.java.AbstractQueryableTable;
+import org.apache.calcite.config.CalciteConnectionProperty;
+import org.apache.calcite.config.Lex;
+import org.apache.calcite.interpreter.Row;
+import org.apache.calcite.jdbc.CalciteSchema;
+import org.apache.calcite.linq4j.Enumerable;
+import org.apache.calcite.linq4j.Linq4j;
+import org.apache.calcite.linq4j.QueryProvider;
+import org.apache.calcite.linq4j.Queryable;
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.runtime.Hook;
+import org.apache.calcite.schema.QueryableTable;
+import org.apache.calcite.schema.Schema;
+import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.schema.Table;
+import org.apache.calcite.schema.impl.AbstractSchema;
+import org.apache.calcite.sql.parser.SqlParseException;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.tools.RelConversionException;
+import org.apache.calcite.tools.ValidationException;
+
+import com.google.common.collect.ImmutableMap;
+
+import org.junit.jupiter.api.Test;
+
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.function.BiConsumer;
+import java.util.function.Consumer;
+import java.util.function.Function;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Test case for
+ * <a href="https://issues.apache.org/jira/browse/CALCITE-3094">[CALCITE-3094]
+ * Code of method grows beyond 64 KB when joining two tables with many
fields</a>.
+ */
+public class LargeGeneratedJoinTest {
+
+ /**
+ * Marker interface for Field.
+ */
+ interface FieldT extends BiConsumer<RelDataTypeFactory,
RelDataTypeFactory.Builder> {
+ }
+
+ /**
+ * Marker interface for Row.
+ */
+ interface RowT extends Function<RelDataTypeFactory, RelDataType> {
+ }
+
+ static FieldT field(String name) {
+ return (tf, b) -> b.add(name, SqlTypeName.VARCHAR);
+ }
+
+ static RowT row(FieldT... fields) {
+ return tf -> {
+ RelDataTypeFactory.Builder builder = tf.builder();
+ for (FieldT f : fields) {
+ f.accept(tf, builder);
+ }
+ return builder.build();
+ };
+ }
+
+ private static QueryableTable tab(int fieldCount) {
+ List<Row> lRow = new ArrayList<>();
+ for (int r = 0; r < 2; r++) {
+ Object[] current = new Object[fieldCount];
+ for (int i = 0; i < fieldCount; i++) {
+ current[i] = "v" + i;
+ }
+ lRow.add(Row.of(current));
+ }
+
+ List<FieldT> fields = new ArrayList<>();
+ for (int i = 0; i < fieldCount; i++) {
+ fields.add(field("F_" + i));
+ }
+
+ final Enumerable<?> enumerable = Linq4j.asEnumerable(lRow);
+ return new AbstractQueryableTable(Row.class) {
+
+ @Override public RelDataType getRowType(RelDataTypeFactory typeFactory) {
+ return row(fields.toArray(new FieldT[fieldCount])).apply(typeFactory);
+ }
+
+ @Override public <T> Queryable<T> asQueryable(QueryProvider
queryProvider, SchemaPlus schema,
+ String tableName) {
+ return (Queryable<T>) enumerable.asQueryable();
+ }
+ };
+ }
+
+ @Test public void test() throws SqlParseException, RelConversionException,
ValidationException,
+ SQLException {
+ Schema rootSchema = new AbstractSchema() {
+ @Override protected Map<String, Table> getTableMap() {
+ return ImmutableMap.of("T0", tab(100),
+ "T1", tab(101));
+ }
+ };
+
+ final CalciteSchema sp = CalciteSchema.createRootSchema(false, true);
+ sp.add("ROOT", rootSchema);
+
+ String sql = "SELECT * \n"
+ + "FROM ROOT.T0 \n"
+ + "JOIN ROOT.T1 \n"
+ + "ON TRUE";
+
+ sql = "select F_0||F_1, * from (" + sql + ")";
+
+
+ final CalciteAssert.AssertThat ca = CalciteAssert.that()
+ .with(CalciteConnectionProperty.LEX, Lex.JAVA)
+ .withSchema("ROOT", rootSchema)
+ .withDefaultSchema("ROOT");
+
+ final CalciteAssert.AssertQuery query = ca.query(sql);
+ query.withHook(Hook.PLANNER, (Consumer<RelOptPlanner>) pl -> {
+ pl.removeRule(EnumerableRules.ENUMERABLE_CORRELATE_RULE);
+ pl.addRule(EnumerableRules.ENUMERABLE_BATCH_NESTED_LOOP_JOIN_RULE);
+ });
+
+ try {
+ query.returns(rs -> {
+ try {
+ assertTrue(rs.next());
+ assertEquals(101 + 100 + 1, rs.getMetaData().getColumnCount());
+ long row = 0;
+ do {
+ ++row;
+ for (int i = 1; i <= rs.getMetaData().getColumnCount(); ++i) {
+ // Rows have the format: v0v1, v0, v1, v2, ..., v99, v0, v1, v2,
..., v99, v100
+ if (i == 1) {
+ assertEquals("v0v1", rs.getString(i),
+ "Error at row: " + row + ", column: " + i);
+ } else if (i == rs.getMetaData().getColumnCount()) {
+ assertEquals("v100", rs.getString(i),
+ "Error at row: " + row + ", column: " + i);
+ } else {
+ assertEquals("v" + ((i - 2) % 100), rs.getString(i),
+ "Error at row: " + row + ", column: " + i);
+ }
+ }
+ } while (rs.next());
+ assertEquals(4, row);
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ } catch (RuntimeException ex) {
+ throw (SQLException) ex.getCause();
+ }
+ }
+}