This is an automated email from the ASF dual-hosted git repository. csringhofer pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 52334ba4261b1a0a097a390d8aa2ffb45edc8bd1 Author: Steve Carlin <[email protected]> AuthorDate: Fri Sep 5 13:05:18 2025 -0700 IMPALA-14421: Calcite planner: case statement returning wrong types for char, varchar The 'case' function resolver in the original Impala planner has a quirk in it which caused issues in the Calcite planner. The function resolver for the original planner resolves all case statements with the "boolean" version. Later on, in the analysis of the CaseExpr, the proper types are assessed and the necessary casting is added. The Calcite planner follows a similar path. The resolver always returns boolean as well and the coerce nodes module determines the proper return type for the case statement. Two other related issues are also fixed here: Literal strings should be treated as type STRING instead of CHAR(X), but a null should literal should not be changed from a CHAR(x) to a STRING. This broke a 'case' test in the test framework where the columns were non-literals with type char(x), and the return value was a "null" which should not have forced a cast to string. A cast from a varchar to a varchar should be ignored. Testing: Added a test to calcite.test. Ensured the existing cast test in test_chars.py passed. Ran through the Jenkins Calcite testing framework. Change-Id: I82d657f4bfce432c458ee8198188dadf9f23f2ef Reviewed-on: http://gerrit.cloudera.org:8080/23560 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../calcite/coercenodes/CoerceOperandShuttle.java | 25 ++++++++++--- .../impala/calcite/functions/FunctionResolver.java | 21 +++++++++++ .../impala/calcite/functions/RexCallConverter.java | 19 ++++++++-- .../impala/calcite/type/ImpalaTypeConverter.java | 16 +++++++++ .../queries/QueryTest/calcite.test | 41 ++++++++++++++++++++++ 5 files changed, 116 insertions(+), 6 deletions(-) diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/coercenodes/CoerceOperandShuttle.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/coercenodes/CoerceOperandShuttle.java index de6330fc7..a241eaf95 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/coercenodes/CoerceOperandShuttle.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/coercenodes/CoerceOperandShuttle.java @@ -193,7 +193,8 @@ public class CoerceOperandShuttle extends RexShuttle { @Override public RexNode visitLiteral(RexLiteral literal) { // Coerce CHAR literal types into STRING - if (literal.getType().getSqlTypeName().equals(SqlTypeName.CHAR)) { + if (!literal.isNull() && + (literal.getType().getSqlTypeName().equals(SqlTypeName.CHAR))) { return rexBuilder.makeLiteral(RexLiteral.stringValue(literal), ImpalaTypeConverter.getRelDataType(Type.STRING), true, true); } @@ -218,7 +219,15 @@ public class CoerceOperandShuttle extends RexShuttle { } - private RelDataType getReturnType(RexNode rexNode, Type impalaReturnType) { + private RelDataType getReturnType(RexCall rexCall, Type impalaReturnType) { + // Case is a special case. Currently, there is a quirk in the Impala function + // resolver where it always returns the BOOLEAN signature. So the return type + // is evaluated here by finding the compatible type amongst the "then" clauses. + if (rexCall.getKind() == SqlKind.CASE) { + List<RelDataType> argTypes = + Lists.transform(rexCall.getOperands(), RexNode::getType); + return ImpalaTypeConverter.getCompatibleTypeForCase(argTypes, factory); + } RelDataType retType = ImpalaTypeConverter.getRelDataType(impalaReturnType); @@ -228,12 +237,12 @@ public class CoerceOperandShuttle extends RexShuttle { // have to calculate the precision and scale based on operand types. If // necessary, this code should be added later. Preconditions.checkState(!SqlTypeUtil.isDecimal(retType) || - SqlTypeUtil.isDecimal(rexNode.getType())); + SqlTypeUtil.isDecimal(rexCall.getType())); // So if the original return type is Decimal and the function resolves to // decimal, the precision and scale are saved from the original function. if (SqlTypeUtil.isDecimal(retType)) { - retType = rexNode.getType(); + retType = rexCall.getType(); } return retType; @@ -374,6 +383,14 @@ public class CoerceOperandShuttle extends RexShuttle { return fromType; } + // If both are varchar, return STRING type which + // covers the wildcard varchar and all varchar cases. + if (toImpalaType.equals(Type.VARCHAR)) { + if (fromType.getSqlTypeName().equals(SqlTypeName.VARCHAR)) { + return ImpalaTypeConverter.getRelDataType(Type.STRING); + } + } + if (!toImpalaType.isDecimal() || SqlTypeUtil.isNull(fromType)) { return ImpalaTypeConverter.getRelDataType(toImpalaType); } diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/FunctionResolver.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/FunctionResolver.java index b3e4e0baa..54f48a986 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/FunctionResolver.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/FunctionResolver.java @@ -22,6 +22,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.SqlKind; @@ -100,6 +101,7 @@ public class FunctionResolver { ImmutableSet.<String> builder() .add("grouping_id") .add("count") + .add("case") .build(); public static Function getSupertypeFunction(RexCall call) { @@ -185,6 +187,15 @@ public class FunctionResolver { : getImpalaFunction(lowercaseName, impalaArgTypes, exactMatch); } + /** + * getSpecialProcessingFunction handles functions that do not fit within the general + * getImpalaFunction retrieval of the function. Such functions include: + * + * - grouping_id() which is grabbed from AggregateFunction + * - count() which can have multiple arguments, but only one argument is resolved + * in the function + * - case() which always returns the "boolean" flavor and then gets resolved later + */ private static Function getSpecialProcessingFunction(String lowercaseName, List<Type> impalaArgTypes, boolean exactMatch) { if (lowercaseName.equals("grouping_id")) { @@ -206,6 +217,16 @@ public class FunctionResolver { return getImpalaFunction(lowercaseName, impalaArgTypes, exactMatch); } + if (lowercaseName.equals("case")) { + // hack: Impala has a quirk in it that the function resolver always returns + // the boolean case signature. The CaseExpr object resolves the actual + // signature that should be used. We need to follow the lead of the regular + // planner because the varchar case does not return the right value if + // we try to resolve it here. It will need to be resolved by the caller. + impalaArgTypes = Lists.newArrayList(Type.BOOLEAN); + return getImpalaFunction(lowercaseName, impalaArgTypes, exactMatch); + } + throw new RuntimeException("Special function not found: " + lowercaseName); } diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/RexCallConverter.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/RexCallConverter.java index fe0ac694e..5cf50002e 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/RexCallConverter.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/RexCallConverter.java @@ -122,7 +122,7 @@ public class RexCallConverter { switch (rexCall.getOperator().getKind()) { case CASE: - return createCaseExpr(fn, params, impalaRetType); + return createCaseExpr(fn, params, analyzer); case POSIX_REGEX_CASE_SENSITIVE: case POSIX_REGEX_CASE_INSENSITIVE: return createRegexExpr(fn, params, impalaRetType, rexCall); @@ -192,14 +192,29 @@ public class RexCallConverter { return new AnalyzedCaseExpr(fn, impalaRetType, decodeExpr); } - private static Expr createCaseExpr(Function fn, List<Expr> params, Type retType) { + private static Expr createCaseExpr(Function fn, List<Expr> params, + Analyzer analyzer) throws ImpalaException { List<CaseWhenClause> caseWhenClauses = new ArrayList<>(); Expr whenParam = null; // params alternate between "when" and the action expr + Type retType = null; for (Expr param : params) { if (whenParam == null) { whenParam = param; } else { + // The params are not always analyzed at this phase. + if (!param.isAnalyzed()) { + param.analyze(analyzer); + } + // The return type needs to be evaluated here since the case function + // resolver always returns boolean. At this point, the coercenodes module + // sets all the parameters to be the compatible type, and this is the type + // we use for the return value. + if (retType == null) { + retType = param.getType(); + } else { + Preconditions.checkState(retType.equals(param.getType())); + } caseWhenClauses.add(new CaseWhenClause(whenParam, param)); whenParam = null; } diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/type/ImpalaTypeConverter.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/type/ImpalaTypeConverter.java index ed4b19bc7..c4139930c 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/type/ImpalaTypeConverter.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/type/ImpalaTypeConverter.java @@ -27,12 +27,14 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.impala.analysis.NumericLiteral; +import org.apache.impala.calcite.functions.FunctionResolver; import org.apache.impala.catalog.ScalarType; import org.apache.impala.catalog.Type; import org.apache.impala.catalog.TypeCompatibility; import org.apache.impala.thrift.TPrimitiveType; import java.math.BigDecimal; +import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; @@ -377,6 +379,20 @@ public class ImpalaTypeConverter { return factory.createTypeWithNullability(rdt, false); } + public static RelDataType getCompatibleTypeForCase(List<RelDataType> dataTypes, + RelDataTypeFactory factory) { + List<RelDataType> compatibleTypes = new ArrayList<>(); + for (int i = 0; i < dataTypes.size(); ++i) { + // skip the "when" clauses which are always boolean and only evaluate the + // "then" and "else" clauses. + if (!FunctionResolver.shouldSkipOperandForCase(dataTypes.size(), i)) { + compatibleTypes.add(dataTypes.get(i)); + } + } + Preconditions.checkState(compatibleTypes.size() > 0); + return getCompatibleType(compatibleTypes, factory); + } + public static RelDataType getCompatibleType(Collection<RelDataType> dataTypes, RelDataTypeFactory factory) { Preconditions.checkState(dataTypes.size() > 0); diff --git a/testdata/workloads/functional-query/queries/QueryTest/calcite.test b/testdata/workloads/functional-query/queries/QueryTest/calcite.test index 5cccf98b8..685b2c363 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/calcite.test +++ b/testdata/workloads/functional-query/queries/QueryTest/calcite.test @@ -1138,3 +1138,44 @@ select * from (values(0)); ---- RUNTIME_PROFILE row_regex: .*SPOOL_QUERY_RESULTS=0.* ==== +---- QUERY +# IMPALA-14421: Repeat of test in chars.test since Calcite +# was returning the wrong types +WITH numbered AS ( + SELECT *, row_number() over (order by cs) as rn + FROM functional.chars_tiny) +SELECT * +FROM ( + SELECT CASE WHEN rn % 2 = 0 THEN cs END cs, + CASE WHEN rn % 2 = 1 THEN cl END cl, + CASE WHEN rn % 3 = 0 THEN vc END vc + FROM numbered + UNION ALL + SELECT CASE WHEN rn % 2 = 1 THEN cs END cs, + CASE WHEN rn % 2 = 0 THEN cl END cl, + CASE WHEN rn % 3 = 1 THEN vc END vc + FROM numbered) v +---- TYPES +char, char, string +---- HS2_TYPES +char, char, varchar +---- RESULTS +'NULL','1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb','NULL' +'2aaaa','NULL','NULL' +'NULL','3bbbbb ','3ccc' +'4aa ','NULL','NULL' +'NULL','5bbb ','NULL' +'6a ','NULL','6c' +'NULL','6b ','NULL' +'a ','NULL','NULL' +'NULL','NULL','NULL' +'1aaaa','NULL','1cccc' +'NULL','2bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb','NULL' +'3aaa ','NULL','NULL' +'NULL','4bbbb ','4cc' +'5a ','NULL','NULL' +'NULL','6b ','NULL' +'6a ','NULL','6c' +'NULL','b ','NULL' +'NULL','NULL','NULL' +====
