HIVE-19037: Vectorization: Miscellaneous cleanup (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9d0f9c07 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9d0f9c07 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9d0f9c07 Branch: refs/heads/master Commit: 9d0f9c07e50224416e5bc4c5961f032d83d446d9 Parents: fc48d72 Author: Matt McCline <[email protected]> Authored: Sat Mar 31 11:01:21 2018 -0500 Committer: Matt McCline <[email protected]> Committed: Sat Mar 31 11:01:21 2018 -0500 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 5 + .../test/resources/testconfiguration.properties | 1 + .../exec/vector/VectorExpressionDescriptor.java | 4 +- .../ql/exec/vector/VectorizationContext.java | 301 ++- .../ql/exec/vector/VectorizedBatchUtil.java | 70 +- .../expressions/ConstantVectorExpression.java | 12 + .../vector/expressions/VectorExpression.java | 12 +- .../VectorReduceSinkCommonOperator.java | 2 - .../hive/ql/optimizer/physical/Vectorizer.java | 91 +- .../clientpositive/annotate_stats_select.q | 1 + .../vector_annotate_stats_select.q | 146 ++ .../clientpositive/vector_null_projection.q | 4 +- .../llap/vector_annotate_stats_select.q.out | 2111 ++++++++++++++++++ .../llap/vector_case_when_1.q.out | 2 +- .../llap/vector_case_when_2.q.out | 2 +- .../llap/vector_null_projection.q.out | 30 +- .../clientpositive/llap/vector_nvl.q.out | 22 +- .../llap/vector_orc_merge_incompat_schema.q.out | 10 +- .../llap/vector_orc_nested_column_pruning.q.out | 75 +- .../llap/vector_udf_adaptor_1.q.out | 4 +- .../clientpositive/vector_case_when_1.q.out | 2 +- .../clientpositive/vector_case_when_2.q.out | 2 +- .../clientpositive/vector_null_projection.q.out | 29 +- .../results/clientpositive/vector_nvl.q.out | 21 +- .../vectorized_bucketmapjoin1.q.out | 72 +- .../hive/ql/exec/vector/ColumnVector.java | 3 +- .../hive/ql/exec/vector/VoidColumnVector.java | 65 + 27 files changed, 2855 insertions(+), 244 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/9d0f9c07/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 697b194..02367eb 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3013,6 +3013,11 @@ public class HiveConf extends Configuration { "internal use only, used to suppress \"Execution mode: vectorized\" EXPLAIN display.\n" + "The default is false, of course", true), + HIVE_TEST_VECTORIZER_SUPPRESS_FATAL_EXCEPTIONS( + "hive.test.vectorizer.suppress.fatal.exceptions", true, + "internal use only. When false, don't suppress fatal exceptions like\n" + + "NullPointerException, etc so the query will fail and assure it will be noticed", + true), HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true, "This property has been extended to control " + "whether to check, convert, and normalize partition value to conform to its column type in " http://git-wip-us.apache.org/repos/asf/hive/blob/9d0f9c07/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 669d0ea..516f804 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -727,6 +727,7 @@ minillaplocal.query.files=\ union_fast_stats.q,\ union_remove_26.q,\ union_top_level.q,\ + vector_annotate_stats_select.q,\ vector_auto_smb_mapjoin_14.q,\ vector_char_varchar_1.q,\ vector_complex_all.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/9d0f9c07/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java index 3167e9e..2d73ab4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java @@ -80,6 +80,7 @@ public class VectorExpressionDescriptor { DECIMAL_64 (0x1000), LIST (0x2000), MAP (0x4000), + VOID (0x8000), INT_DECIMAL_64_FAMILY (INT_FAMILY.value | DECIMAL_64.value), DATETIME_FAMILY (DATE.value | TIMESTAMP.value), INTERVAL_FAMILY (INTERVAL_YEAR_MONTH.value | INTERVAL_DAY_TIME.value), @@ -134,8 +135,7 @@ public class VectorExpressionDescriptor { } else if (VectorizationContext.mapTypePattern.matcher(lower).matches()) { return MAP; } else if (lower.equals("void")) { - // The old code let void through... - return INT_FAMILY; + return VOID; } else { return NONE; } http://git-wip-us.apache.org/repos/asf/hive/blob/9d0f9c07/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index e7f2b54..abbbe9a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -647,60 +647,74 @@ public class VectorizationContext { return ocm.currentScratchColumns(); } - private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc - exprDesc, VectorExpressionDescriptor.Mode mode) throws HiveException { - int columnNum = getInputColumnIndex(exprDesc.getColumn()); + private VectorExpression getFilterOnBooleanColumnExpression(ExprNodeColumnDesc exprDesc, + int columnNum) throws HiveException { VectorExpression expr = null; - switch (mode) { - case FILTER: - // Evaluate the column as a boolean, converting if necessary. - TypeInfo typeInfo = exprDesc.getTypeInfo(); - if (typeInfo.getCategory() == Category.PRIMITIVE && - ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) { - expr = new SelectColumnIsTrue(columnNum); - } else { - // Ok, we need to convert. - ArrayList<ExprNodeDesc> exprAsList = new ArrayList<ExprNodeDesc>(1); - exprAsList.add(exprDesc); - - // First try our cast method that will handle a few special cases. - VectorExpression castToBooleanExpr = getCastToBoolean(exprAsList); - if (castToBooleanExpr == null) { - - // Ok, try the UDF. - castToBooleanExpr = getVectorExpressionForUdf(null, UDFToBoolean.class, exprAsList, - VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo); - if (castToBooleanExpr == null) { - throw new HiveException("Cannot vectorize converting expression " + - exprDesc.getExprString() + " to boolean"); - } - } - - final int outputColumnNum = castToBooleanExpr.getOutputColumnNum(); - expr = new SelectColumnIsTrue(outputColumnNum); + // Evaluate the column as a boolean, converting if necessary. + TypeInfo typeInfo = exprDesc.getTypeInfo(); + if (typeInfo.getCategory() == Category.PRIMITIVE && + ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) { + expr = new SelectColumnIsTrue(columnNum); - expr.setChildExpressions(new VectorExpression[] {castToBooleanExpr}); + expr.setInputTypeInfos(typeInfo); + expr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE); - expr.setInputTypeInfos(castToBooleanExpr.getOutputTypeInfo()); - expr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE); + } else { + // Ok, we need to convert. + ArrayList<ExprNodeDesc> exprAsList = new ArrayList<ExprNodeDesc>(1); + exprAsList.add(exprDesc); + + // First try our cast method that will handle a few special cases. + VectorExpression castToBooleanExpr = getCastToBoolean(exprAsList); + if (castToBooleanExpr == null) { + + // Ok, try the UDF. + castToBooleanExpr = getVectorExpressionForUdf(null, UDFToBoolean.class, exprAsList, + VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo); + if (castToBooleanExpr == null) { + throw new HiveException("Cannot vectorize converting expression " + + exprDesc.getExprString() + " to boolean"); } - break; - case PROJECTION: - { - expr = new IdentityExpression(columnNum); + } - TypeInfo identityTypeInfo = exprDesc.getTypeInfo(); - DataTypePhysicalVariation identityDataTypePhysicalVariation = - getDataTypePhysicalVariation(columnNum); + final int outputColumnNum = castToBooleanExpr.getOutputColumnNum(); - expr.setInputTypeInfos(identityTypeInfo); - expr.setInputDataTypePhysicalVariations(identityDataTypePhysicalVariation); + expr = new SelectColumnIsTrue(outputColumnNum); - expr.setOutputTypeInfo(identityTypeInfo); - expr.setOutputDataTypePhysicalVariation(identityDataTypePhysicalVariation); - } - break; + expr.setChildExpressions(new VectorExpression[] {castToBooleanExpr}); + + expr.setInputTypeInfos(castToBooleanExpr.getOutputTypeInfo()); + expr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE); + } + return expr; + } + + private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc exprDesc, + VectorExpressionDescriptor.Mode mode) throws HiveException { + int columnNum = getInputColumnIndex(exprDesc.getColumn()); + VectorExpression expr = null; + switch (mode) { + case FILTER: + expr = getFilterOnBooleanColumnExpression(exprDesc, columnNum); + break; + case PROJECTION: + { + expr = new IdentityExpression(columnNum); + + TypeInfo identityTypeInfo = exprDesc.getTypeInfo(); + DataTypePhysicalVariation identityDataTypePhysicalVariation = + getDataTypePhysicalVariation(columnNum); + + expr.setInputTypeInfos(identityTypeInfo); + expr.setInputDataTypePhysicalVariations(identityDataTypePhysicalVariation); + + expr.setOutputTypeInfo(identityTypeInfo); + expr.setOutputDataTypePhysicalVariation(identityDataTypePhysicalVariation); + } + break; + default: + throw new RuntimeException("Unexpected mode " + mode); } return expr; } @@ -1550,6 +1564,11 @@ public class VectorizationContext { isColumnScaleEstablished = true; columnScale = scale1; } else if (child1 instanceof ExprNodeConstantDesc) { + if (isNullConst(child1)) { + + // Cannot handle NULL scalar parameter. + return null; + } hasScalar = true; builder.setInputExpressionType(0, InputExpressionType.SCALAR); } else { @@ -1575,6 +1594,11 @@ public class VectorizationContext { if (!isColumnScaleEstablished) { return null; } + if (isNullConst(child2)) { + + // Cannot handle NULL scalar parameter. + return null; + } hasScalar = true; builder.setInputExpressionType(1, InputExpressionType.SCALAR); } else { @@ -1609,8 +1633,13 @@ public class VectorizationContext { int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child); if (childrenMode == VectorExpressionDescriptor.Mode.FILTER) { - // In filter mode, the column must be a boolean - children.add(new SelectColumnIsTrue(colIndex)); + VectorExpression filterExpr = + getFilterOnBooleanColumnExpression((ExprNodeColumnDesc) child, colIndex); + if (filterExpr == null) { + return null; + } + + children.add(filterExpr); } arguments[i] = colIndex; } else { @@ -1720,6 +1749,10 @@ public class VectorizationContext { || (child instanceof ExprNodeFieldDesc)) { builder.setInputExpressionType(i, InputExpressionType.COLUMN); } else if (child instanceof ExprNodeConstantDesc) { + if (isNullConst(child)) { + // Cannot handle NULL scalar parameter. + return null; + } builder.setInputExpressionType(i, InputExpressionType.SCALAR); } else if (child instanceof ExprNodeDynamicValueDesc) { builder.setInputExpressionType(i, InputExpressionType.DYNAMICVALUE); @@ -2408,10 +2441,26 @@ public class VectorizationContext { private byte[] getStringScalarAsByteArray(ExprNodeConstantDesc exprNodeConstantDesc) throws HiveException { Object o = getScalarValue(exprNodeConstantDesc); - if (!(o instanceof byte[])) { - throw new HiveException("Expected constant argument of type string"); + if (o instanceof byte[]) { + return (byte[]) o; + } else if (o instanceof HiveChar) { + HiveChar hiveChar = (HiveChar) o; + try { + return hiveChar.getStrippedValue().getBytes("UTF-8"); + } catch (Exception ex) { + throw new HiveException(ex); + } + } else if (o instanceof HiveVarchar) { + HiveVarchar hiveVarchar = (HiveVarchar) o; + try { + return hiveVarchar.getValue().getBytes("UTF-8"); + } catch (Exception ex) { + throw new HiveException(ex); + } + } else { + throw new HiveException("Expected constant argument of string family but found " + + o.getClass().getSimpleName()); } - return (byte[]) o; } private PrimitiveCategory getAnyIntegerPrimitiveCategoryFromUdfClass(Class<? extends UDF> udfClass) { @@ -2461,7 +2510,8 @@ public class VectorizationContext { PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; String typename = type.getTypeName(); HiveDecimal rawDecimal; - switch (ptinfo.getPrimitiveCategory()) { + PrimitiveCategory primitiveCategory = ptinfo.getPrimitiveCategory(); + switch (primitiveCategory) { case FLOAT: rawDecimal = HiveDecimal.create(String.valueOf(scalar)); break; @@ -2480,11 +2530,20 @@ public class VectorizationContext { case LONG: rawDecimal = HiveDecimal.create((Long) scalar); break; + case STRING: + rawDecimal = HiveDecimal.create((String) scalar); + break; + case CHAR: + rawDecimal = HiveDecimal.create(((HiveChar) scalar).getStrippedValue()); + break; + case VARCHAR: + rawDecimal = HiveDecimal.create(((HiveVarchar) scalar).getValue()); + break; case DECIMAL: rawDecimal = (HiveDecimal) scalar; break; default: - throw new HiveException("Unsupported type " + typename + " for cast to HiveDecimal"); + throw new HiveException("Unsupported primitive category " + primitiveCategory + " for cast to HiveDecimal"); } if (rawDecimal == null) { if (LOG.isDebugEnabled()) { @@ -2523,7 +2582,8 @@ public class VectorizationContext { } PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; String typename = type.getTypeName(); - switch (ptinfo.getPrimitiveCategory()) { + PrimitiveCategory primitiveCategory = ptinfo.getPrimitiveCategory(); + switch (primitiveCategory) { case FLOAT: case DOUBLE: case BYTE: @@ -2531,11 +2591,17 @@ public class VectorizationContext { case INT: case LONG: return ((Number) scalar).doubleValue(); + case STRING: + return Double.valueOf((String) scalar); + case CHAR: + return Double.valueOf(((HiveChar) scalar).getStrippedValue()); + case VARCHAR: + return Double.valueOf(((HiveVarchar) scalar).getValue()); case DECIMAL: HiveDecimal decimalVal = (HiveDecimal) scalar; return decimalVal.doubleValue(); default: - throw new HiveException("Unsupported type "+typename+" for cast to Double"); + throw new HiveException("Unsupported primitive category " + primitiveCategory + " for cast to DOUBLE"); } } @@ -2546,7 +2612,8 @@ public class VectorizationContext { } PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; String typename = type.getTypeName(); - switch (ptinfo.getPrimitiveCategory()) { + PrimitiveCategory primitiveCategory = ptinfo.getPrimitiveCategory(); + switch (primitiveCategory) { case FLOAT: case DOUBLE: case BYTE: @@ -2554,6 +2621,45 @@ public class VectorizationContext { case INT: case LONG: return ((Number) scalar).longValue(); + case STRING: + case CHAR: + case VARCHAR: + { + final long longValue; + if (primitiveCategory == PrimitiveCategory.STRING) { + longValue = Long.valueOf((String) scalar); + } else if (primitiveCategory == PrimitiveCategory.CHAR) { + longValue = Long.valueOf(((HiveChar) scalar).getStrippedValue()); + } else { + longValue = Long.valueOf(((HiveVarchar) scalar).getValue()); + } + switch (integerPrimitiveCategory) { + case BYTE: + if (longValue != ((byte) longValue)) { + // Accurate byte value cannot be obtained. + return null; + } + break; + case SHORT: + if (longValue != ((short) longValue)) { + // Accurate short value cannot be obtained. + return null; + } + break; + case INT: + if (longValue != ((int) longValue)) { + // Accurate int value cannot be obtained. + return null; + } + break; + case LONG: + // No range check needed. + break; + default: + throw new RuntimeException("Unexpected integer primitive type " + integerPrimitiveCategory); + } + return longValue; + } case DECIMAL: HiveDecimal decimalVal = (HiveDecimal) scalar; switch (integerPrimitiveCategory) { @@ -2587,7 +2693,7 @@ public class VectorizationContext { // We only store longs in our LongColumnVector. return decimalVal.longValue(); default: - throw new HiveException("Unsupported type "+typename+" for cast to Long"); + throw new HiveException("Unsupported primitive category " + primitiveCategory + " for cast to LONG"); } } @@ -3231,6 +3337,16 @@ public class VectorizationContext { variableArgPositions.add(i); exprResultColumnNums.add(e.getOutputColumnNum()); argDescs[i].setVariable(e.getOutputColumnNum()); + } else if (child instanceof ExprNodeFieldDesc) { + // Get the GenericUDFStructField to process the field of Struct type + VectorExpression e = + getGenericUDFStructField( + (ExprNodeFieldDesc) child, VectorExpressionDescriptor.Mode.PROJECTION, + child.getTypeInfo()); + vectorExprs.add(e); + variableArgPositions.add(i); + exprResultColumnNums.add(e.getOutputColumnNum()); + argDescs[i].setVariable(e.getOutputColumnNum()); } else { throw new HiveException("Unable to vectorize custom UDF. Encountered unsupported expr desc : " + child); @@ -3535,41 +3651,44 @@ public class VectorizationContext { PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); switch (primitiveCategory) { - case BOOLEAN: - case BYTE: - case SHORT: - case INT: - case LONG: - case DATE: - case INTERVAL_YEAR_MONTH: - return ColumnVector.Type.LONG; - - case TIMESTAMP: - return ColumnVector.Type.TIMESTAMP; - - case INTERVAL_DAY_TIME: - return ColumnVector.Type.INTERVAL_DAY_TIME; - - case FLOAT: - case DOUBLE: - return ColumnVector.Type.DOUBLE; - - case STRING: - case CHAR: - case VARCHAR: - case BINARY: - return ColumnVector.Type.BYTES; - - case DECIMAL: - if (dataTypePhysicalVariation != null && - dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { - return ColumnVector.Type.DECIMAL_64; - } else { - return ColumnVector.Type.DECIMAL; - } + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + case DATE: + case INTERVAL_YEAR_MONTH: + return ColumnVector.Type.LONG; + + case TIMESTAMP: + return ColumnVector.Type.TIMESTAMP; + + case INTERVAL_DAY_TIME: + return ColumnVector.Type.INTERVAL_DAY_TIME; + + case FLOAT: + case DOUBLE: + return ColumnVector.Type.DOUBLE; + + case STRING: + case CHAR: + case VARCHAR: + case BINARY: + return ColumnVector.Type.BYTES; + + case DECIMAL: + if (dataTypePhysicalVariation != null && + dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { + return ColumnVector.Type.DECIMAL_64; + } else { + return ColumnVector.Type.DECIMAL; + } - default: - throw new HiveException("Unexpected primitive type category " + primitiveCategory); + case VOID: + return ColumnVector.Type.VOID; + + default: + throw new HiveException("Unexpected primitive type category " + primitiveCategory); } } default: http://git-wip-us.apache.org/repos/asf/hive/blob/9d0f9c07/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 44b7c95..d92ec32 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -143,38 +143,40 @@ public class VectorizedBatchUtil { { PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; switch(primitiveTypeInfo.getPrimitiveCategory()) { - case BOOLEAN: - case BYTE: - case SHORT: - case INT: - case LONG: - case DATE: - case INTERVAL_YEAR_MONTH: - return new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); - case TIMESTAMP: - return new TimestampColumnVector(VectorizedRowBatch.DEFAULT_SIZE); - case INTERVAL_DAY_TIME: - return new IntervalDayTimeColumnVector(VectorizedRowBatch.DEFAULT_SIZE); - case FLOAT: - case DOUBLE: - return new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE); - case BINARY: - case STRING: - case CHAR: - case VARCHAR: - return new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); - case DECIMAL: - DecimalTypeInfo tInfo = (DecimalTypeInfo) primitiveTypeInfo; - if (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { - return new Decimal64ColumnVector(VectorizedRowBatch.DEFAULT_SIZE, - tInfo.precision(), tInfo.scale()); - } else { - return new DecimalColumnVector(VectorizedRowBatch.DEFAULT_SIZE, - tInfo.precision(), tInfo.scale()); - } - default: - throw new RuntimeException("Vectorizaton is not supported for datatype:" - + primitiveTypeInfo.getPrimitiveCategory()); + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + case DATE: + case INTERVAL_YEAR_MONTH: + return new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + case TIMESTAMP: + return new TimestampColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + case INTERVAL_DAY_TIME: + return new IntervalDayTimeColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + case FLOAT: + case DOUBLE: + return new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + case BINARY: + case STRING: + case CHAR: + case VARCHAR: + return new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + case DECIMAL: + DecimalTypeInfo tInfo = (DecimalTypeInfo) primitiveTypeInfo; + if (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { + return new Decimal64ColumnVector(VectorizedRowBatch.DEFAULT_SIZE, + tInfo.precision(), tInfo.scale()); + } else { + return new DecimalColumnVector(VectorizedRowBatch.DEFAULT_SIZE, + tInfo.precision(), tInfo.scale()); + } + case VOID: + return new VoidColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + default: + throw new RuntimeException("Vectorizaton is not supported for datatype:" + + primitiveTypeInfo.getPrimitiveCategory()); } } case STRUCT: @@ -791,7 +793,9 @@ public class VectorizedBatchUtil { case LIST: case MAP: case UNION: - // No complex type support for now. + throw new RuntimeException("No complex type support: " + sourceColVector.type); + case VOID: + break; default: throw new RuntimeException("Unexpected column vector type " + sourceColVector.type); } http://git-wip-us.apache.org/repos/asf/hive/blob/9d0f9c07/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java index 5b89131..4cc1be5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java @@ -202,6 +202,13 @@ public class ConstantVectorExpression extends VectorExpression { } } + private void evaluateVoid(VectorizedRowBatch vrg) { + VoidColumnVector voidColVector = (VoidColumnVector) vrg.cols[outputColumnNum]; + voidColVector.isRepeating = true; + voidColVector.isNull[0] = true; + voidColVector.noNulls = false; + } + @Override public void evaluate(VectorizedRowBatch vrg) { switch (type) { @@ -223,6 +230,11 @@ public class ConstantVectorExpression extends VectorExpression { case INTERVAL_DAY_TIME: evaluateIntervalDayTime(vrg); break; + case VOID: + evaluateVoid(vrg); + break; + default: + throw new RuntimeException("Unexpected column vector type " + type); } } http://git-wip-us.apache.org/repos/asf/hive/blob/9d0f9c07/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java index 4407961..3d0ee50 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java @@ -285,20 +285,18 @@ public abstract class VectorExpression implements Serializable { } protected String getParamTypeString(int typeNum) { - if (inputTypeInfos == null || inputDataTypePhysicalVariations == null) { - fake++; + if (inputTypeInfos == null) { + return "<input types is null>"; } - if (typeNum >= inputTypeInfos.length || typeNum >= inputDataTypePhysicalVariations.length) { - fake++; + if (inputDataTypePhysicalVariations == null) { + return "<input data type physical variations is null>"; } return getTypeName(inputTypeInfos[typeNum], inputDataTypePhysicalVariations[typeNum]); } - static int fake; - public static String getTypeName(TypeInfo typeInfo, DataTypePhysicalVariation dataTypePhysicalVariation) { if (typeInfo == null) { - fake++; + return "<input type is null>"; } if (dataTypePhysicalVariation != null && dataTypePhysicalVariation != DataTypePhysicalVariation.NONE) { return typeInfo.toString() + "/" + dataTypePhysicalVariation; http://git-wip-us.apache.org/repos/asf/hive/blob/9d0f9c07/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java index cc876d5..4077552 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java @@ -154,8 +154,6 @@ public abstract class VectorReduceSinkCommonOperator extends TerminalOperator<Re VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { this(ctx); - LOG.info("VectorReduceSinkCommonOperator constructor"); - ReduceSinkDesc desc = (ReduceSinkDesc) conf; this.conf = desc; this.vContext = vContext; http://git-wip-us.apache.org/repos/asf/hive/blob/9d0f9c07/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index a822a4b..d3fbf07 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -320,6 +320,7 @@ public class Vectorizer implements PhysicalPlanResolver { private Collection<Class<?>> rowDeserializeInputFormatExcludes; private int vectorizedPTFMaxMemoryBufferingBatchCount; private int vectorizedTestingReducerBatchSize; + private boolean isTestVectorizerSuppressFatalExceptions; private boolean isSchemaEvolution; @@ -1847,6 +1848,25 @@ public class Vectorizer implements PhysicalPlanResolver { // The "not vectorized" information has been stored in the MapWork vertex. return false; + } catch (NullPointerException e) { + if (!isTestVectorizerSuppressFatalExceptions) { + // Re-throw without losing original stack trace. + throw e; + } + setNodeIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e)); + return false; + } catch (ClassCastException e) { + if (!isTestVectorizerSuppressFatalExceptions) { + throw e; + } + setNodeIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e)); + return false; + } catch (RuntimeException e) { + if (!isTestVectorizerSuppressFatalExceptions) { + throw e; + } + setNodeIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e)); + return false; } vectorTaskColumnInfo.setNeededVirtualColumnList( @@ -2020,6 +2040,25 @@ public class Vectorizer implements PhysicalPlanResolver { // The "not vectorized" information has been stored in the MapWork vertex. return false; + } catch (NullPointerException e) { + if (!isTestVectorizerSuppressFatalExceptions) { + // Re-throw without losing original stack trace. + throw e; + } + setNodeIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e)); + return false; + } catch (ClassCastException e) { + if (!isTestVectorizerSuppressFatalExceptions) { + throw e; + } + setNodeIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e)); + return false; + } catch (RuntimeException e) { + if (!isTestVectorizerSuppressFatalExceptions) { + throw e; + } + setNodeIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e)); + return false; } /* @@ -2226,6 +2265,10 @@ public class Vectorizer implements PhysicalPlanResolver { HiveConf.getIntVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_TESTING_REDUCER_BATCH_SIZE); + isTestVectorizerSuppressFatalExceptions = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_TEST_VECTORIZER_SUPPRESS_FATAL_EXCEPTIONS); + vectorizedInputFormatSupportEnabled = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED); @@ -2362,7 +2405,11 @@ public class Vectorizer implements PhysicalPlanResolver { private boolean validateSelectOperator(SelectOperator op) { List<ExprNodeDesc> descList = op.getConf().getColList(); for (ExprNodeDesc desc : descList) { - boolean ret = validateExprNodeDesc(desc, "Select"); + boolean ret = + validateExprNodeDesc( + desc, "Select", + VectorExpressionDescriptor.Mode.PROJECTION, + /* allowComplex */ true, /* allowVoidProjection */ true); if (!ret) { return false; } @@ -2737,6 +2784,12 @@ public class Vectorizer implements PhysicalPlanResolver { private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressionTitle, VectorExpressionDescriptor.Mode mode, boolean allowComplex) { + return validateExprNodeDescRecursive(desc, expressionTitle, mode, + allowComplex, /* allowVoidProjection */ false); + } + + private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressionTitle, + VectorExpressionDescriptor.Mode mode, boolean allowComplex, boolean allowVoidProjection) { if (desc instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc; String columnName = c.getColumn(); @@ -2760,7 +2813,8 @@ public class Vectorizer implements PhysicalPlanResolver { } } String typeName = desc.getTypeInfo().getTypeName(); - boolean ret = validateDataType(typeName, mode, allowComplex && isVectorizationComplexTypesEnabled); + boolean ret = validateDataType( + typeName, mode, allowComplex && isVectorizationComplexTypesEnabled, allowVoidProjection); if (!ret) { setExpressionIssue(expressionTitle, getValidateDataTypeErrorMsg( @@ -2783,7 +2837,8 @@ public class Vectorizer implements PhysicalPlanResolver { && desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) { // Don't restrict child expressions for projection. // Always use loose FILTER mode. - if (!validateStructInExpression(desc, expressionTitle, VectorExpressionDescriptor.Mode.FILTER)) { + if (!validateStructInExpression( + desc, expressionTitle, VectorExpressionDescriptor.Mode.FILTER)) { return false; } } else { @@ -2791,7 +2846,8 @@ public class Vectorizer implements PhysicalPlanResolver { // Don't restrict child expressions for projection. // Always use loose FILTER mode. if (!validateExprNodeDescRecursive( - d, expressionTitle, VectorExpressionDescriptor.Mode.FILTER, /* allowComplex */ true)) { + d, expressionTitle, VectorExpressionDescriptor.Mode.FILTER, + /* allowComplex */ true, allowVoidProjection)) { return false; } } @@ -2843,7 +2899,8 @@ public class Vectorizer implements PhysicalPlanResolver { private boolean validateExprNodeDesc(ExprNodeDesc desc, String expressionTitle) { return validateExprNodeDesc( - desc, expressionTitle, VectorExpressionDescriptor.Mode.PROJECTION, /* allowComplex */ true); + desc, expressionTitle, VectorExpressionDescriptor.Mode.PROJECTION, + /* allowComplex */ true, /* allowVoidProjection */ false); } boolean validateExprNodeDesc(ExprNodeDesc desc, String expressionTitle, @@ -2851,6 +2908,12 @@ public class Vectorizer implements PhysicalPlanResolver { return validateExprNodeDescRecursive(desc, expressionTitle, mode, allowComplex); } + boolean validateExprNodeDesc(ExprNodeDesc desc, String expressionTitle, + VectorExpressionDescriptor.Mode mode, boolean allowComplex, boolean allowVoidProjection) { + return validateExprNodeDescRecursive( + desc, expressionTitle, mode, allowComplex, allowVoidProjection); + } + private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) { if (VectorizationContext.isCustomUDF(genericUDFExpr)) { return true; @@ -2895,10 +2958,16 @@ public class Vectorizer implements PhysicalPlanResolver { public static boolean validateDataType(String type, VectorExpressionDescriptor.Mode mode, boolean allowComplex) { + return validateDataType(type, mode, allowComplex, /* allowVoidProjection */ false); + } + + public static boolean validateDataType(String type, VectorExpressionDescriptor.Mode mode, + boolean allowComplex, boolean allowVoidProjection) { type = type.toLowerCase(); boolean result = supportedDataTypesPattern.matcher(type).matches(); - if (result && mode == VectorExpressionDescriptor.Mode.PROJECTION && type.equals("void")) { + if (result && !allowVoidProjection && + mode == VectorExpressionDescriptor.Mode.PROJECTION && type.equals("void")) { return false; } @@ -2915,10 +2984,18 @@ public class Vectorizer implements PhysicalPlanResolver { public static String getValidateDataTypeErrorMsg(String type, VectorExpressionDescriptor.Mode mode, boolean allowComplex, boolean isVectorizationComplexTypesEnabled) { + return getValidateDataTypeErrorMsg( + type, mode, allowComplex, isVectorizationComplexTypesEnabled, false); + } + + public static String getValidateDataTypeErrorMsg(String type, VectorExpressionDescriptor.Mode mode, + boolean allowComplex, boolean isVectorizationComplexTypesEnabled, + boolean allowVoidProjection) { type = type.toLowerCase(); boolean result = supportedDataTypesPattern.matcher(type).matches(); - if (result && mode == VectorExpressionDescriptor.Mode.PROJECTION && type.equals("void")) { + if (result && !allowVoidProjection && + mode == VectorExpressionDescriptor.Mode.PROJECTION && type.equals("void")) { return "Vectorizing data type void not supported when mode = PROJECTION"; } http://git-wip-us.apache.org/repos/asf/hive/blob/9d0f9c07/ql/src/test/queries/clientpositive/annotate_stats_select.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/annotate_stats_select.q b/ql/src/test/queries/clientpositive/annotate_stats_select.q index 5aaff11..3921f90 100644 --- a/ql/src/test/queries/clientpositive/annotate_stats_select.q +++ b/ql/src/test/queries/clientpositive/annotate_stats_select.q @@ -1,3 +1,4 @@ +SET hive.vectorized.execution.enabled=false; set hive.mapred.mode=nonstrict; set hive.stats.fetch.column.stats=true; http://git-wip-us.apache.org/repos/asf/hive/blob/9d0f9c07/ql/src/test/queries/clientpositive/vector_annotate_stats_select.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_annotate_stats_select.q b/ql/src/test/queries/clientpositive/vector_annotate_stats_select.q new file mode 100644 index 0000000..e45dff6 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_annotate_stats_select.q @@ -0,0 +1,146 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; +set hive.mapred.mode=nonstrict; +set hive.stats.fetch.column.stats=true; + +create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + vc1 varchar(5), + m1 map<string, string>, + l1 array<int>, + st1 struct<c1:int, c2:string> +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile; + +create table alltypes_orc like alltypes; +alter table alltypes_orc set fileformat orc; + +load data local inpath '../../data/files/alltypes.txt' overwrite into table alltypes; + +insert overwrite table alltypes_orc select * from alltypes; + +-- basicStatState: COMPLETE colStatState: NONE numRows: 2 rawDataSize: 1514 +explain select * from alltypes_orc; + +-- statistics for complex types are not supported yet +analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1, s1, vc1; + +-- numRows: 2 rawDataSize: 1514 +explain select * from alltypes_orc; + +-- numRows: 2 rawDataSize: 8 +explain select bo1 from alltypes_orc; + +-- col alias renaming +-- numRows: 2 rawDataSize: 8 +explain select i1 as int1 from alltypes_orc; + +-- numRows: 2 rawDataSize: 174 +explain select s1 from alltypes_orc; + +-- column statistics for complex types unsupported and so statistics will not be updated +-- numRows: 2 rawDataSize: 1514 +explain select m1 from alltypes_orc; + +-- numRows: 2 rawDataSize: 246 +explain select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc; + +-- numRows: 2 rawDataSize: 0 +explain vectorization expression select null from alltypes_orc; + +-- numRows: 2 rawDataSize: 8 +explain vectorization expression select 11 from alltypes_orc; + +-- numRows: 2 rawDataSize: 16 +explain vectorization expression select 11L from alltypes_orc; + +-- numRows: 2 rawDataSize: 16 +explain vectorization expression select 11.0 from alltypes_orc; + +-- numRows: 2 rawDataSize: 178 +explain vectorization expression select "hello" from alltypes_orc; +explain vectorization expression select cast("hello" as char(5)) from alltypes_orc; +explain vectorization expression select cast("hello" as varchar(5)) from alltypes_orc; + +-- numRows: 2 rawDataSize: 96 +explain vectorization expression select unbase64("0xe23") from alltypes_orc; + +-- numRows: 2 rawDataSize: 16 +explain vectorization expression select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc; + +-- numRows: 2 rawDataSize: 80 +explain vectorization expression select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc; + +-- numRows: 2 rawDataSize: 112 +explain vectorization expression select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc; + +-- numRows: 2 rawDataSize: 224 +explain vectorization expression select cast("58.174" as DECIMAL) from alltypes_orc; + +-- numRows: 2 rawDataSize: 112 +explain vectorization expression select array(1,2,3) from alltypes_orc; + +-- numRows: 2 rawDataSize: 1508 +explain vectorization expression select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc; + +-- numRows: 2 rawDataSize: 112 +explain vectorization expression select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc; + +-- numRows: 2 rawDataSize: 250 +explain vectorization expression select CREATE_UNION(0, "hello") from alltypes_orc; + +-- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 8 +explain vectorization expression select count(*) from alltypes_orc; + +-- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 8 +explain vectorization expression select count(1) from alltypes_orc; + +-- column statistics for complex column types will be missing. data size will be calculated from available column statistics +-- numRows: 2 rawDataSize: 254 +explain vectorization expression select *,11 from alltypes_orc; + +-- subquery selects +-- inner select - numRows: 2 rawDataSize: 8 +-- outer select - numRows: 2 rawDataSize: 8 +explain vectorization expression select i1 from (select i1 from alltypes_orc limit 10) temp; + +-- inner select - numRows: 2 rawDataSize: 16 +-- outer select - numRows: 2 rawDataSize: 8 +explain vectorization expression select i1 from (select i1,11 from alltypes_orc limit 10) temp; + +-- inner select - numRows: 2 rawDataSize: 16 +-- outer select - numRows: 2 rawDataSize: 186 +explain vectorization expression select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp; + +-- inner select - numRows: 2 rawDataSize: 24 +-- outer select - numRows: 2 rawDataSize: 16 +explain vectorization expression select x from (select i1,11.0 as x from alltypes_orc limit 10) temp; + +-- inner select - numRows: 2 rawDataSize: 104 +-- outer select - numRows: 2 rawDataSize: 186 +explain vectorization expression select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp; + +-- inner select - numRows: 2 rawDataSize: 186 +-- middle select - numRows: 2 rawDataSize: 178 +-- outer select - numRows: 2 rawDataSize: 194 +explain vectorization expression select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2; + +-- This test is for FILTER operator where filter expression is a boolean column +-- numRows: 2 rawDataSize: 8 +explain vectorization expression select bo1 from alltypes_orc where bo1; + +-- numRows: 0 rawDataSize: 0 +explain vectorization expression select bo1 from alltypes_orc where !bo1; http://git-wip-us.apache.org/repos/asf/hive/blob/9d0f9c07/ql/src/test/queries/clientpositive/vector_null_projection.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_null_projection.q b/ql/src/test/queries/clientpositive/vector_null_projection.q index 711b8e7..6cea1e6 100644 --- a/ql/src/test/queries/clientpositive/vector_null_projection.q +++ b/ql/src/test/queries/clientpositive/vector_null_projection.q @@ -9,8 +9,8 @@ create table b(s string) stored as orc; insert into table a values('aaa'); insert into table b values('aaa'); --- We expect no vectorization due to NULL (void) projection type. -explain vectorization expression +-- We expect some vectorization due to NULL (void) projection type. +explain vectorization detail select NULL from a; select NULL from a;
