HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fa36381f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fa36381f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fa36381f Branch: refs/heads/master Commit: fa36381faad40576f62e2ac925ef2976efecd8b6 Parents: e2142b2 Author: Matt McCline <[email protected]> Authored: Sat Aug 25 09:21:25 2018 -0700 Committer: Matt McCline <[email protected]> Committed: Sat Aug 25 09:21:25 2018 -0700 ---------------------------------------------------------------------- .../exec/vector/VectorSMBMapJoinOperator.java | 2 +- .../ql/exec/vector/VectorizationContext.java | 46 +- .../exec/vector/VectorizationContext.java.orig | 3771 ------------------ .../expressions/CastStringGroupToString.java | 40 - .../ql/exec/vector/expressions/VectorElt.java | 168 +- .../VectorExpressionWriterFactory.java | 26 + .../ql/exec/vector/TestVectorRowObject.java | 3 +- .../hive/ql/exec/vector/TestVectorSerDeRow.java | 137 +- .../ql/exec/vector/VectorRandomRowSource.java | 67 +- .../hive/ql/exec/vector/VectorVerifyFast.java | 6 +- .../aggregation/TestVectorAggregation.java | 9 +- .../expressions/TestVectorArithmetic.java | 14 +- .../vector/expressions/TestVectorBetweenIn.java | 38 +- .../expressions/TestVectorCastStatement.java | 11 +- .../expressions/TestVectorCoalesceElt.java | 87 +- .../expressions/TestVectorDateAddSub.java | 10 +- .../vector/expressions/TestVectorDateDiff.java | 9 +- .../expressions/TestVectorFilterCompare.java | 12 +- .../expressions/TestVectorIfStatement.java | 3 +- .../vector/expressions/TestVectorIndex.java | 5 +- .../vector/expressions/TestVectorNegative.java | 21 +- .../exec/vector/expressions/TestVectorNull.java | 14 +- .../expressions/TestVectorStringConcat.java | 3 +- .../expressions/TestVectorStringUnary.java | 3 +- .../expressions/TestVectorStructField.java | 370 ++ .../vector/expressions/TestVectorSubStr.java | 3 +- .../expressions/TestVectorTimestampExtract.java | 3 +- .../fast/TestVectorMapJoinFastRowHashMap.java | 101 +- .../clientpositive/query_result_fileformat.q | 4 +- .../llap/vector_case_when_1.q.out | 8 +- .../llap/vector_char_mapjoin1.q.out | 1 - .../clientpositive/llap/vector_udf1.q.out | 18 +- .../clientpositive/llap/vectorized_casts.q.out | 6 +- .../query_result_fileformat.q.out | 76 +- .../clientpositive/vector_case_when_1.q.out | 8 +- .../clientpositive/vector_char_mapjoin1.q.out | 2 +- .../clientpositive/vectorized_casts.q.out | 6 +- .../hadoop/hive/serde2/RandomTypeUtil.java | 29 + 38 files changed, 1059 insertions(+), 4081 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index c13510e..07a6e9d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -131,7 +131,7 @@ public class VectorSMBMapJoinOperator extends SMBMapJoinOperator List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable); keyExpressions = vContext.getVectorExpressions(keyDesc); - keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc); + keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyExpressions); Map<Byte, List<ExprNodeDesc>> exprs = desc.getExprs(); bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable)); http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index b7feb1c..57f7c01 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -1806,6 +1806,25 @@ public class VectorizationContext { return vectorExpression; } + public void wrapWithDecimal64ToDecimalConversions(VectorExpression[] vecExprs) + throws HiveException{ + if (vecExprs == null) { + return; + } + final int size = vecExprs.length; + for (int i = 0; i < size; i++) { + VectorExpression vecExpr = vecExprs[i]; + if (vecExpr.getOutputTypeInfo() instanceof DecimalTypeInfo) { + DataTypePhysicalVariation outputDataTypePhysicalVariation = + vecExpr.getOutputDataTypePhysicalVariation(); + if (outputDataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { + vecExprs[i] = + wrapWithDecimal64ToDecimalConversion(vecExpr); + } + } + } + } + public VectorExpression wrapWithDecimal64ToDecimalConversion(VectorExpression inputExpression) throws HiveException { @@ -2903,7 +2922,11 @@ public class VectorizationContext { } else if (isTimestampFamily(inputType)) { return createVectorExpression(CastTimestampToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); } else if (isStringFamily(inputType)) { - return createVectorExpression(CastStringGroupToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + + // STRING and VARCHAR types require no conversion, so use a no-op. + // Also, CHAR is stored in BytesColumnVector with trimmed blank padding, so it also + // requires no conversion; + return getIdentityExpression(childExpr); } return null; } @@ -3123,8 +3146,27 @@ public class VectorizationContext { List<ExprNodeDesc> castChildren = new ArrayList<ExprNodeDesc>(); boolean wereCastUdfs = false; + Category commonTypeCategory = commonType.getCategory(); for (ExprNodeDesc desc: childExpr.subList(1, 4)) { - if (commonType.equals(desc.getTypeInfo())) { + TypeInfo childTypeInfo = desc.getTypeInfo(); + Category childCategory = childTypeInfo.getCategory(); + + if (childCategory != commonTypeCategory) { + return null; + } + final boolean isNeedsCast; + if (commonTypeCategory == Category.PRIMITIVE) { + + // Do not to strict TypeInfo comparisons for DECIMAL -- just compare the category. + // Otherwise, we generate unnecessary casts. + isNeedsCast = + ((PrimitiveTypeInfo) commonType).getPrimitiveCategory() != + ((PrimitiveTypeInfo) childTypeInfo).getPrimitiveCategory(); + } else { + isNeedsCast = !commonType.equals(desc.getTypeInfo()); + } + + if (!isNeedsCast) { castChildren.add(desc); } else { GenericUDF castUdf = getGenericUDFForCast(commonType);
