This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 8d74bfd18 IMPALA-13520: Support in clause coercing
8d74bfd18 is described below
commit 8d74bfd18cbdd63f2497fc007ffaa4ee0fadc270
Author: Steve Carlin <[email protected]>
AuthorDate: Wed Nov 6 08:44:25 2024 -0800
IMPALA-13520: Support in clause coercing
Calcite has special processing for any in clause. It has a callback
function that allows all the parameters to be coerced into its proper
type. While there exists a mechanism to do coercion, in the CoerceNodes
class, it only handles functions, and the in clause is handled in a
special way in Calcite.
So we use the Calcite mechanism to derive a common Impala type and
coerce all the parameters.
The CombineValuesNodesRule is also needed for this change. There is a
test case in test_exprs.test where an in clause contains 10,000
params in side the IN clause (e.g. int_col IN (1, 2, 3, ..., 10000).
In this case, Calcite creates 10,000 Values RelNodes which takes way
too long to process on the execution side. The rule combines all
the Values RelNodes into one Values RelNode with many tuples, which
Impala handles quickly when converted into the physical Impala
PlanNode.
Change-Id: I492845d623766b9182bca5eeca22eb3352ef2f3d
Reviewed-on: http://gerrit.cloudera.org:8080/22408
Reviewed-by: Joe McDonnell <[email protected]>
Reviewed-by: Aman Sinha <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
.../calcite/rules/CombineValuesNodesRule.java | 85 +++++++++++
.../impala/calcite/service/CalciteOptimizer.java | 4 +-
.../impala/calcite/service/CalciteValidator.java | 3 +
.../calcite/type/ImpalaTypeCoercionFactory.java | 29 ++++
.../calcite/type/ImpalaTypeCoercionImpl.java | 169 +++++++++++++++++++++
.../impala/calcite/type/ImpalaTypeConverter.java | 16 +-
6 files changed, 298 insertions(+), 8 deletions(-)
diff --git
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rules/CombineValuesNodesRule.java
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rules/CombineValuesNodesRule.java
new file mode 100644
index 000000000..e1d3be178
--- /dev/null
+++
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rules/CombineValuesNodesRule.java
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.calcite.rules;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.logical.LogicalUnion;
+import org.apache.calcite.rel.logical.LogicalValues;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+
+/**
+ * CombineValuesNodesRule is a rule to combine multiple Values RelNodes
+ * into a single Values RelNode with multiple tuples.
+ *
+ * This is needed when there are many literals in an IN clause. Calcite creates
+ * a RelNode for each literal this which becomes incredibly slow at execution
time.
+ *
+ * This rule only kicks in if there is a Union RelNode on top of multiple
values
+ * RelNodes.
+ */
+public class CombineValuesNodesRule extends RelOptRule {
+
+ public CombineValuesNodesRule() {
+ super(operand(LogicalUnion.class, none()));
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ final LogicalUnion union = call.rel(0);
+
+ List<RelNode> newRelNodes = new ArrayList<>();
+ RelDataType rowType = union.getRowType();
+ ImmutableList.Builder<ImmutableList<RexLiteral>> rowBuilder =
+ new ImmutableList.Builder();
+ int numTuples = 0;
+ for (RelNode input : union.getInputs()) {
+ // Calcite creates the HepRelVertex as an intermediary when doing
optimizations, so
+ // the Values RelNode needs to be retrieved off of this.
+ RelNode realInput = input instanceof HepRelVertex
+ ? ((HepRelVertex) input).getCurrentRel()
+ : input;
+ if (realInput instanceof LogicalValues) {
+ rowBuilder.addAll(((LogicalValues) realInput).getTuples());
+ numTuples++;
+ } else {
+ // If it's something other than a Values RelNode, the input will not
be combined
+ // with the Values RelNode and will be kept as/is.
+ newRelNodes.add(input);
+ }
+ }
+ if (numTuples > 1) {
+ LogicalValues newValues =
+ LogicalValues.create(union.getCluster(), rowType,
rowBuilder.build());
+ newRelNodes.add(newValues);
+ LogicalUnion newUnion = union.copy(union.getTraitSet(), newRelNodes,
union.all);
+ call.transformTo(newUnion);
+ }
+ }
+}
diff --git
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java
index d00182cdb..62e96745a 100644
---
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java
+++
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java
@@ -33,6 +33,7 @@ import org.apache.calcite.tools.RelBuilder;
import org.apache.impala.calcite.coercenodes.CoerceNodes;
import org.apache.impala.calcite.rel.node.ConvertToImpalaRelRules;
import org.apache.impala.calcite.rel.node.ImpalaPlanRel;
+import org.apache.impala.calcite.rules.CombineValuesNodesRule;
import org.apache.impala.calcite.rules.ConvertToCNFRules;
import org.apache.impala.calcite.rules.ExtractLiteralAgg;
import org.apache.impala.calcite.rules.ImpalaMinusToDistinctRule;
@@ -94,7 +95,8 @@ public class CalciteOptimizer implements CompilerStep {
new ConvertToCNFRules.FilterConvertToCNFRule(),
new ConvertToCNFRules.JoinConvertToCNFRule(),
new ConvertToCNFRules.ProjectConvertToCNFRule(),
- ImpalaMinusToDistinctRule.Config.DEFAULT.toRule()
+ ImpalaMinusToDistinctRule.Config.DEFAULT.toRule(),
+ new CombineValuesNodesRule()
));
builder.addMatchOrder(HepMatchOrder.BOTTOM_UP);
diff --git
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteValidator.java
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteValidator.java
index 028ee0460..43fd63913 100644
---
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteValidator.java
+++
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteValidator.java
@@ -26,6 +26,7 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.validate.SqlValidator;
import org.apache.calcite.sql.validate.SqlValidatorUtil;
import org.apache.impala.calcite.operators.ImpalaOperatorTable;
+import org.apache.impala.calcite.type.ImpalaTypeCoercionFactory;
import org.apache.impala.calcite.type.ImpalaTypeSystemImpl;
import org.apache.impala.calcite.validate.ImpalaConformance;
import org.apache.impala.common.AnalysisException;
@@ -62,6 +63,8 @@ public class CalciteValidator implements CompilerStep {
// without this)
.withIdentifierExpansion(true)
.withConformance(ImpalaConformance.INSTANCE)
+ .withTypeCoercionEnabled(true)
+ .withTypeCoercionFactory(new ImpalaTypeCoercionFactory())
);
}
diff --git
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/type/ImpalaTypeCoercionFactory.java
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/type/ImpalaTypeCoercionFactory.java
new file mode 100644
index 000000000..7f2bc8b65
--- /dev/null
+++
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/type/ImpalaTypeCoercionFactory.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.impala.calcite.type;
+
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.sql.validate.SqlValidator;
+import org.apache.calcite.sql.validate.implicit.TypeCoercion;
+import org.apache.calcite.sql.validate.implicit.TypeCoercionFactory;
+
+public class ImpalaTypeCoercionFactory implements TypeCoercionFactory {
+ public TypeCoercion create(RelDataTypeFactory typeFactory, SqlValidator
validator) {
+ return new ImpalaTypeCoercionImpl(typeFactory, validator);
+ }
+}
diff --git
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/type/ImpalaTypeCoercionImpl.java
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/type/ImpalaTypeCoercionImpl.java
new file mode 100644
index 000000000..2428a96cf
--- /dev/null
+++
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/type/ImpalaTypeCoercionImpl.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.impala.calcite.type;
+
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.sql.SqlCall;
+import org.apache.calcite.sql.SqlCallBinding;
+import org.apache.calcite.sql.SqlCharStringLiteral;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.calcite.sql.SqlNodeList;
+import org.apache.calcite.sql.SqlOperator;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.sql.parser.SqlParserPos;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.sql.type.SqlTypeUtil;
+import org.apache.calcite.sql.validate.SqlValidator;
+import org.apache.calcite.sql.validate.SqlValidatorScope;
+import org.apache.calcite.sql.validate.implicit.TypeCoercionImpl;
+import org.apache.impala.calcite.type.ImpalaTypeConverter;
+import org.apache.impala.catalog.Type;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+public class ImpalaTypeCoercionImpl extends TypeCoercionImpl {
+
+ public ImpalaTypeCoercionImpl(RelDataTypeFactory typeFactory, SqlValidator
validator) {
+ super(typeFactory, validator);
+ }
+
+ @Override
+ public RelDataType getWiderTypeFor(List<RelDataType> typeList,
+ boolean stringPromotion) {
+ // a little hack. At type coercion (validation) time, we can't tell if the
type
+ // is a char column or a char literal. The problem is that Calcite, as
is, treats
+ // char literals as char type instead of STRING. Let's treat all char
types as
+ // STRING right now. If it does turn out to be a real CHAR column, this
will
+ // be caught when resolving functions.
+ List<RelDataType> newTypeList = new ArrayList<>();
+ for (RelDataType type : typeList) {
+ if (type.getSqlTypeName().equals(SqlTypeName.CHAR)) {
+ newTypeList.add(ImpalaTypeConverter.getRelDataType(Type.STRING));
+ } else {
+ newTypeList.add(type);
+ }
+ }
+
+ return ImpalaTypeConverter.getCompatibleType(newTypeList, factory);
+ }
+
+ // Do type coercion for In Clause. Calcite allows numerics
+ // of different types to match (e.g. smallint to decimal). Most
+ // of these issues are taken care of in Coerce*Rules, but the problem
+ // with "In" is that Calcite converts the "in" clause to a bunch of
+ // "or" clauses in its RelNodeConverter step. At this point, some
+ // valuable information is lost. For instance, in the clause:
+ // "10.2 IN (int_col)", the clause is converted to
+ // =(10.2, cast(int_col as DECIMAL(3,1)". Of course, an int_col will
+ // never be equal to 10.2, so this would kinda be ok...but this causes
+ // an overflow error on the Impala backend. So we will set the right
+ // casting at validation time for the "In" clause.
+ @Override
+ public boolean inOperationCoercion(SqlCallBinding binding) {
+ // Only handle IN (or NOT IN) clause with a list as its second param
+ if (!(binding.operand(1) instanceof SqlNodeList)) {
+ return false;
+ }
+
+ SqlCall call = binding.getCall();
+ SqlValidatorScope scope = binding.getScope();
+
+ SqlNode leftOperand = call.operand(0);
+ RelDataType leftOperandType = deriveType(validator, scope, leftOperand);
+
+ SqlNodeList inList = (SqlNodeList) call.operand(1);
+ List<RelDataType> rightOperandTypes = new ArrayList<>();
+ Set<RelDataType> uniqueRightOperandTypes = new HashSet<>();
+ for (SqlNode node : inList) {
+ RelDataType derivedType = deriveType(validator, scope, node);
+ rightOperandTypes.add(derivedType);
+ uniqueRightOperandTypes.add(derivedType);
+ }
+
+ // commonType will contain a compatible type for both the left side of the
+ // IN operator and all the types within the IN clause.
+ RelDataType commonType =
+ ImpalaTypeConverter.getCompatibleType(uniqueRightOperandTypes,
factory);
+ commonType =
+ ImpalaTypeConverter.getCompatibleType(commonType, leftOperandType,
factory);
+
+ // This will mutate the binding if changed. The "coerced" parameter is set
+ // to true to let the caller know that something mutated.
+ boolean coerced = coerceInOperand(scope, call, 0, leftOperandType,
commonType);
+
+ coerced |= coerceInList(scope, inList, uniqueRightOperandTypes,
rightOperandTypes,
+ commonType);
+
+ return coerced;
+ }
+
+ private boolean coerceInOperand(SqlValidatorScope scope, SqlCall call,
+ int index, RelDataType fromType, RelDataType toType) {
+ if (!needsCasting(fromType, toType)) {
+ return false;
+ }
+ SqlNode castNode = castTo(call.operand(index), toType);
+ call.setOperand(index, castNode);
+ updateInferredType(castNode, toType);
+ return true;
+ }
+
+ private boolean coerceInList(SqlValidatorScope scope, SqlNodeList inList,
+ Set<RelDataType> uniqueFromTypes, List<RelDataType> fromTypes,
RelDataType toType) {
+
+ boolean coerced = uniqueFromTypes.stream().anyMatch(ft -> needsCasting(ft,
toType));
+
+ if (coerced) {
+ for (int i = 0; i < inList.size(); ++i) {
+ if (needsCasting(fromTypes.get(i), toType)) {
+ SqlNode castNode = castTo(inList.get(i), toType);
+ inList.set(i, castNode);
+ updateInferredType(castNode, toType);
+ }
+ }
+ }
+ return coerced;
+ }
+
+ private boolean needsCasting(RelDataType fromType, RelDataType toType) {
+ if (fromType.getSqlTypeName().equals(SqlTypeName.NULL)) {
+ return false;
+ }
+ if (toType.getSqlTypeName().equals(fromType.getSqlTypeName())) {
+ return false;
+ }
+ return true;
+ }
+
+ private RelDataType deriveType(SqlValidator validator, SqlValidatorScope
scope,
+ SqlNode node) {
+ if (node instanceof SqlCharStringLiteral) {
+ return ImpalaTypeConverter.getRelDataType(Type.STRING);
+ }
+ return validator.deriveType(scope, node);
+ }
+
+ private static SqlNode castTo(SqlNode node, RelDataType type) {
+ return SqlStdOperatorTable.CAST.createCall(SqlParserPos.ZERO, node,
+ SqlTypeUtil.convertTypeToSpec(type).withNullable(type.isNullable()));
+ }
+}
diff --git
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/type/ImpalaTypeConverter.java
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/type/ImpalaTypeConverter.java
index d26e10698..ed4b19bc7 100644
---
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/type/ImpalaTypeConverter.java
+++
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/type/ImpalaTypeConverter.java
@@ -33,6 +33,7 @@ import org.apache.impala.catalog.TypeCompatibility;
import org.apache.impala.thrift.TPrimitiveType;
import java.math.BigDecimal;
+import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -376,15 +377,16 @@ public class ImpalaTypeConverter {
return factory.createTypeWithNullability(rdt, false);
}
- public static RelDataType getCompatibleType(List<RelDataType> dataTypes,
+ public static RelDataType getCompatibleType(Collection<RelDataType>
dataTypes,
RelDataTypeFactory factory) {
Preconditions.checkState(dataTypes.size() > 0);
- if (dataTypes.size() == 1) {
- return dataTypes.get(0);
- }
- RelDataType commonType = dataTypes.get(0);
- for (int i = 1; i < dataTypes.size(); ++i) {
- commonType = getCompatibleType(commonType, dataTypes.get(i), factory);
+ RelDataType commonType = null;
+ for (RelDataType dataType : dataTypes) {
+ if (commonType == null) {
+ commonType = dataType;
+ continue;
+ }
+ commonType = getCompatibleType(commonType, dataType, factory);
}
return commonType;
}