This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 7b109ca45fc6a177dfe4796f0808400c32149715 Author: Steve Carlin <[email protected]> AuthorDate: Wed Oct 16 13:49:00 2024 -0700 IMPALA-13457: Calcite planner: fix datetime/interval issues for tpcds queries Added support for datetime interval operators. A dummy IntervalExpr was added to support logical to physical representation. The IntervalExpr is removed once the actual "+" operation for datetime is created, but the translation framework is simplified by working on the Calcite interval type in a separate temporary Expr. Some logic was inserted into CoerceOperandShuttle. This object is used to add casts when there is limitation in Calcite for number types. When we have an interval, we always want to represent it as a special type so we do not want to introduce a cast. Also, in ImpalaOperatorTable, the ability to use Impala functions over Calcite functions are needed because Calcite translates the year function into "extract" which causes some issues. Just using the Impala signature works for us here. Change-Id: I2b4afc3ab1d17ba1f168904a6ded052e1d62b3fe Reviewed-on: http://gerrit.cloudera.org:8080/21946 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../calcite/coercenodes/CoerceOperandShuttle.java | 31 +++++- .../impala/calcite/functions/IntervalExpr.java | 119 +++++++++++++++++++++ .../impala/calcite/functions/RexCallConverter.java | 62 +++++++++++ .../calcite/operators/ImpalaOperatorTable.java | 35 ++++-- 4 files changed, 238 insertions(+), 9 deletions(-) diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/coercenodes/CoerceOperandShuttle.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/coercenodes/CoerceOperandShuttle.java index 2de3550de..40e583ba8 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/coercenodes/CoerceOperandShuttle.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/coercenodes/CoerceOperandShuttle.java @@ -30,7 +30,10 @@ import org.apache.calcite.rex.RexOver; import org.apache.calcite.rex.RexShuttle; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlDatetimePlusOperator; +import org.apache.calcite.sql.fun.SqlDatetimeSubtractionOperator; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.type.SqlTypeUtil; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rel.type.RelDataTypeFactory; @@ -70,7 +73,7 @@ public class CoerceOperandShuttle extends RexShuttle { private final RexBuilder rexBuilder; private final List<RelNode> inputs; - public static Set<SqlKind> NO_CASTING_NEEDED = + public static Set<SqlKind> NO_CAST_OPERATORS = ImmutableSet.<SqlKind> builder() // Cast doesn't need any operand casting because it is already a cast. .add(SqlKind.CAST) @@ -102,7 +105,7 @@ public class CoerceOperandShuttle extends RexShuttle { RexCall castedOperandsCall = (RexCall) super.visitCall(call); // Certain operators will never need casting for their operands. - if (NO_CASTING_NEEDED.contains(castedOperandsCall.getOperator().getKind())) { + if (!isCastingNeeded(castedOperandsCall)) { return castedOperandsCall; } @@ -217,6 +220,30 @@ public class CoerceOperandShuttle extends RexShuttle { return retType; } + private boolean isCastingNeeded(RexCall rexCall) { + if (NO_CAST_OPERATORS.contains(rexCall.getOperator().getKind())) { + return false; + } + + // Time stamp operators don't require casting. When they are converted + // into Expr objects, they undergo special processing, so we leave them + // as/is. + if (isTimestampArithExpr(rexCall)) { + return false; + } + + return true; + } + + private static boolean isTimestampArithExpr(RexCall rexCall) { + return rexCall.getOperator() instanceof SqlDatetimePlusOperator + || rexCall.getOperator() instanceof SqlDatetimeSubtractionOperator + || SqlTypeName.INTERVAL_TYPES.contains(rexCall.getType().getSqlTypeName()) + || ((rexCall.getOperator().equals("+") || rexCall.getOperator().equals("-")) && + (SqlTypeUtil.isDatetime(rexCall.getOperands().get(0).getType()) || + SqlTypeUtil.isDatetime(rexCall.getOperands().get(1).getType()))); + } + /** * Handle getting the type of index. If there is only one input, then we * just use the index value to get the type. If there are two inputs, diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/IntervalExpr.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/IntervalExpr.java new file mode 100644 index 000000000..e2e8ebe5b --- /dev/null +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/IntervalExpr.java @@ -0,0 +1,119 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala.calcite.functions; + +import com.google.common.base.Preconditions; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.impala.analysis.Analyzer; +import org.apache.impala.analysis.Expr; +import org.apache.impala.analysis.ToSqlOptions; +import org.apache.impala.common.AnalysisException; +import org.apache.impala.thrift.TExprNode; + +/** + * IntervalExpr is a temporary expression class. It is needed because of + * the way Calcite handles Interval expressions and how the Calcite RexNode to + * Impala translation works. + * A calcite timestamp add expression might look like this: + * TIMESTAMP +( TIMESTAMP, INTERVAL YEAR *( INT, INTERVAL_YEAR) + * The main "plus" operation returns a timestamp. + * The two parameters are a timestamp and the interval to add. + * The inner "multiply" is an interval expression. The first parameter + * of the interval expression in this case is the number of years to + * multiply. The second parameter is a default "1" of INTERVAL_YEAR. + * + * To be concrete about this, if we were adding 12 years to our timestamp + * expression, the inner interval expression looks like this: + * mult(12, 1: INTERVAL_YEAR) + * + * This inner interval expression doesn't have any Impala equivalent, so that's + * why this class is necessary. This object will only exist as a placeholder. + * When the + expression is created as a ArithTimestampExpr, the information of + * the IntervalExpr will be used and then the object will be discarded. + */ +public class IntervalExpr extends Expr { + + private final RexCall intervalExpr_; + + private final Expr literal_; + + private final String timeUnit_; + + public IntervalExpr(RexCall intervalExpr, Expr literalExpr) { + Preconditions.checkState(intervalExpr.getOperator().getKind().equals(SqlKind.TIMES)); + Preconditions.checkState(SqlTypeName.INTERVAL_TYPES.contains( + intervalExpr.getOperands().get(1).getType().getSqlTypeName())); + intervalExpr_ = intervalExpr; + literal_ = literalExpr; + timeUnit_ = calcTimeIntervalUnit(intervalExpr); + + } + + public Expr getLiteral() { + return literal_; + } + + public String getTimeUnit() { + return timeUnit_; + } + + private String calcTimeIntervalUnit(RexCall call) { + // hack. Not sure the best way to get week out. Also need to figure out + // fracitional seconds + if (call.getOperands().get(1).getType().toString().contains("WEEK")) { + return "WEEK"; + } + if (call.getOperands().get(1).getType().toString().contains("MILLISECOND")) { + return "MILLISECOND"; + } + if (call.getOperands().get(1).getType().toString().contains("MICROSECOND")) { + return "MICROSECOND"; + } + if (call.getOperands().get(1).getType().toString().contains("NANOSECOND")) { + return "NANOSECOND"; + } + return call.getOperands().get(1).getType().getSqlTypeName().getStartUnit().toString(); + } + + @Override + protected void analyzeImpl(Analyzer analyzer) throws AnalysisException { + throw new RuntimeException("not implemented"); + } + + @Override + protected float computeEvalCost() { + throw new RuntimeException("not implemented"); + } + + @Override + protected String toSqlImpl(ToSqlOptions options) { + throw new RuntimeException("not implemented"); + } + + @Override + protected void toThrift(TExprNode msg) { + throw new RuntimeException("not implemented"); + } + + @Override + public Expr clone() { + throw new RuntimeException("not implemented"); + } +} diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/RexCallConverter.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/RexCallConverter.java index 7bbf01a02..5ef0859d6 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/RexCallConverter.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/RexCallConverter.java @@ -25,11 +25,15 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.type.SqlTypeUtil; import org.apache.impala.analysis.Analyzer; +import org.apache.impala.analysis.ArithmeticExpr; import org.apache.impala.analysis.BinaryPredicate; import org.apache.impala.analysis.CaseWhenClause; import org.apache.impala.analysis.CompoundPredicate; import org.apache.impala.analysis.Expr; +import org.apache.impala.analysis.TimestampArithmeticExpr; import org.apache.impala.calcite.type.ImpalaTypeConverter; import org.apache.impala.catalog.Function; import org.apache.impala.catalog.Type; @@ -77,6 +81,12 @@ public class RexCallConverter { String funcName = rexCall.getOperator().getName().toLowerCase(); + // Date addition expressions have special handling. + Expr dateExpr = handleDateExprs(funcName, rexCall, params, rexBuilder); + if (dateExpr != null) { + return dateExpr; + } + Function fn = getFunction(rexCall); if (fn == null) { @@ -104,6 +114,19 @@ public class RexCallConverter { } } + public static Expr handleDateExprs(String funcName, RexCall rexCall, + List<Expr> params, RexBuilder rexBuilder) { + + if (SqlTypeName.INTERVAL_TYPES.contains(rexCall.getType().getSqlTypeName())) { + return new IntervalExpr(rexCall, params.get(0)); + } + + if (isTimestampArithExpr(params)) { + return createTimestampExpr(rexCall, params); + } + return null; + } + private static Function getFunction(RexCall call) { List<RelDataType> argTypes = Lists.transform(call.getOperands(), RexNode::getType); String name = call.getOperator().getName(); @@ -167,4 +190,43 @@ public class RexCallConverter { Preconditions.checkNotNull(op, "Unknown Calcite op: " + sqlKind); return new AnalyzedBinaryCompExpr(fn, op, params.get(0), params.get(1)); } + + private static Expr createTimestampExpr(RexCall rexCall, List<Expr> params) { + // one of the timestamp expressions should be a date time. The other should + // be some number to add or subtract. timestampParamIndex contains the + // parameter that is the timestamp. + int timestampParamIndex = + SqlTypeUtil.isDatetime(rexCall.getOperands().get(0).getType()) + ? 0 + : 1; + // and intervalIndex contains the non-timestampParamIndex parameter + int intervalIndex = timestampParamIndex == 0 ? 1 : 0; + + IntervalExpr intervalExpr = (IntervalExpr) params.get(intervalIndex); + ArithmeticExpr.Operator op = getImpalaOp(rexCall); + return new TimestampArithmeticExpr(op, params.get(timestampParamIndex), + intervalExpr.getLiteral(), intervalExpr.getTimeUnit(), intervalIndex == 0 + ); + } + + private static ArithmeticExpr.Operator getImpalaOp(RexCall rexCall) { + if (rexCall.getOperator().getName().equals("DATE_ADD") || + rexCall.getOperator().getKind().equals(SqlKind.PLUS)) { + return ArithmeticExpr.Operator.ADD; + } + if (rexCall.getOperator().getName().equals("DATE_SUB") || + rexCall.getOperator().getKind().equals(SqlKind.MINUS)) { + return ArithmeticExpr.Operator.SUBTRACT; + } + throw new RuntimeException("Unknown Operator found in arith expr: " + + rexCall.getOperator().getName()); + } + + public static boolean isTimestampArithExpr(List<Expr> params) { + if (params.size() != 2) { + return false; + } + return params.get(0) instanceof IntervalExpr || + params.get(1) instanceof IntervalExpr; + } } diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaOperatorTable.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaOperatorTable.java index 7aee0ab37..323198440 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaOperatorTable.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaOperatorTable.java @@ -18,6 +18,7 @@ package org.apache.impala.calcite.operators; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; import org.apache.calcite.sql.SqlFunctionCategory; import org.apache.calcite.sql.SqlIdentifier; import org.apache.calcite.sql.validate.SqlNameMatcher; @@ -29,6 +30,7 @@ import org.apache.impala.catalog.BuiltinsDb; import org.apache.impala.catalog.Db; import java.util.List; +import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -47,12 +49,28 @@ import org.slf4j.LoggerFactory; * the function name is known by Impala. If so, an ImpalaOperator class is generated * on the fly. * + * There are some special functions where we would prefer to use the Impala operator + * over the Calcite operator due to some incompatibility. These are specified in + * USE_IMPALA_OPERATOR. + * * TODO: IMPALA-13095: Handle UDFs */ public class ImpalaOperatorTable extends ReflectiveSqlOperatorTable { protected static final Logger LOG = LoggerFactory.getLogger(ImpalaOperatorTable.class.getName()); + public static Set<String> USE_IMPALA_OPERATOR = + ImmutableSet.<String> builder() + .add("year") + .add("month") + .add("week") + .add("day") + .add("hour") + .add("minute") + .add("second") + .add("millisecond") + .build(); + private static ImpalaOperatorTable INSTANCE; /** @@ -70,12 +88,15 @@ public class ImpalaOperatorTable extends ReflectiveSqlOperatorTable { return; } - // Check Calcite operator table for existence. - SqlStdOperatorTable.instance().lookupOperatorOverloads(opName, category, syntax, - operatorList, nameMatcher); - Preconditions.checkState(operatorList.size() <= 1); - if (operatorList.size() == 1) { - return; + String lowercaseOpName = opName.getSimple().toLowerCase(); + if (!USE_IMPALA_OPERATOR.contains(lowercaseOpName)) { + // Check Calcite operator table for existence. + SqlStdOperatorTable.instance().lookupOperatorOverloads(opName, category, syntax, + operatorList, nameMatcher); + Preconditions.checkState(operatorList.size() <= 1); + if (operatorList.size() == 1) { + return; + } } // There shouldn't be more than one opName with our usage, so throw an exception @@ -86,7 +107,7 @@ public class ImpalaOperatorTable extends ReflectiveSqlOperatorTable { } // Check Impala Builtins for existence: TODO: IMPALA-13095: handle UDFs - if (!BuiltinsDb.getInstance().containsFunction(opName.getSimple())) { + if (!BuiltinsDb.getInstance().containsFunction(lowercaseOpName)) { return; }
