Repository: hive Updated Branches: refs/heads/master bf9568885 -> 98c2df152
Revert "HIVE-19564: Vectorization: Fix NULL / Wrong Results issues in Arithmetic (Matt McCline, reviewed by Teddy Choi)" This reverts commit bf95688851bd8aa2e70b4d49e965294050242fec. Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/98c2df15 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/98c2df15 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/98c2df15 Branch: refs/heads/master Commit: 98c2df1526cb11dec9db764c943a4d0dacabc2a0 Parents: bf95688 Author: Prasanth Jayachandran <[email protected]> Authored: Sat Jun 23 14:43:04 2018 -0700 Committer: Prasanth Jayachandran <[email protected]> Committed: Sat Jun 23 14:43:04 2018 -0700 ---------------------------------------------------------------------- .../ExpressionTemplates/ColumnDivideColumn.txt | 56 +- .../ExpressionTemplates/ScalarDivideColumn.txt | 65 +- .../expressions/LongColModuloLongColumn.java | 179 ++++++ .../LongColModuloLongColumnChecked.java | 52 ++ .../hive/ql/udf/generic/GenericUDFOPMod.java | 2 + .../exec/vector/TestVectorizationContext.java | 2 +- .../ql/exec/vector/VectorRandomRowSource.java | 8 +- .../expressions/TestVectorArithmetic.java | 602 ------------------- .../TestVectorArithmeticExpressions.java | 1 - .../vector/expressions/TestVectorNegative.java | 444 -------------- 10 files changed, 256 insertions(+), 1155 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/98c2df15/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt ---------------------------------------------------------------------- diff --git a/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt index 954b90e..c4a76ae 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt @@ -86,40 +86,19 @@ public class <ClassName> extends VectorExpression { */ boolean hasDivBy0 = false; if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - final <OperandType2> denom = vector2[0]; - hasDivBy0 = hasDivBy0 || (denom == 0); -#IF MANUAL_DIVIDE_BY_ZERO_CHECK - if (denom != 0) { - outputVector[0] = vector1[0] <OperatorSymbol> denom; - } -#ELSE + <OperandType2> denom = vector2[0]; outputVector[0] = vector1[0] <OperatorSymbol> denom; -#ENDIF MANUAL_DIVIDE_BY_ZERO_CHECK + hasDivBy0 = hasDivBy0 || (denom == 0); } else if (inputColVector1.isRepeating) { final <OperandType1> vector1Value = vector1[0]; if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - final <OperandType2> denom = vector2[i]; - hasDivBy0 = hasDivBy0 || (denom == 0); -#IF MANUAL_DIVIDE_BY_ZERO_CHECK - if (denom != 0) { - outputVector[i] = vector1Value <OperatorSymbol> denom; - } -#ELSE + <OperandType2> denom = vector2[i]; outputVector[i] = vector1Value <OperatorSymbol> denom; -#ENDIF MANUAL_DIVIDE_BY_ZERO_CHECK - } - } else { -#IF MANUAL_DIVIDE_BY_ZERO_CHECK - for(int i = 0; i != n; i++) { - final <OperandType2> denom = vector2[i]; hasDivBy0 = hasDivBy0 || (denom == 0); - if (denom != 0) { - outputVector[i] = vector1Value <OperatorSymbol> denom; - } } -#ELSE + } else { for(int i = 0; i != n; i++) { outputVector[i] = vector1Value <OperatorSymbol> vector2[i]; } @@ -127,7 +106,6 @@ public class <ClassName> extends VectorExpression { for(int i = 0; i != n; i++) { hasDivBy0 = hasDivBy0 || (vector2[i] == 0); } -#ENDIF MANUAL_DIVIDE_BY_ZERO_CHECK } } else if (inputColVector2.isRepeating) { final <OperandType2> vector2Value = vector2[0]; @@ -150,26 +128,11 @@ public class <ClassName> extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - final <OperandType2> denom = vector2[i]; - hasDivBy0 = hasDivBy0 || (denom == 0); -#IF MANUAL_DIVIDE_BY_ZERO_CHECK - if (denom != 0) { - outputVector[i] = vector1[i] <OperatorSymbol> denom; - } -#ELSE + <OperandType2> denom = vector2[i]; outputVector[i] = vector1[i] <OperatorSymbol> denom; -#ENDIF MANUAL_DIVIDE_BY_ZERO_CHECK - } - } else { -#IF MANUAL_DIVIDE_BY_ZERO_CHECK - for(int i = 0; i != n; i++) { - final <OperandType2> denom = vector2[i]; hasDivBy0 = hasDivBy0 || (denom == 0); - if (denom != 0) { - outputVector[i] = vector1[i] <OperatorSymbol> denom; - } } -#ELSE + } else { for(int i = 0; i != n; i++) { outputVector[i] = vector1[i] <OperatorSymbol> vector2[i]; } @@ -177,14 +140,13 @@ public class <ClassName> extends VectorExpression { for(int i = 0; i != n; i++) { hasDivBy0 = hasDivBy0 || (vector2[i] == 0); } -#ENDIF MANUAL_DIVIDE_BY_ZERO_CHECK } } #IF CHECKED - //when operating in checked mode make sure we handle overflows similar to non-vectorized expression - OverflowUtils.accountForOverflow<CamelReturnType>(getOutputTypeInfo(), outputColVector, - batch.selectedInUse, sel, n); + //when operating in checked mode make sure we handle overflows similar to non-vectorized expression + OverflowUtils.accountForOverflow<CamelReturnType>(getOutputTypeInfo(), outputColVector, + batch.selectedInUse, sel, n); #ELSE #ENDIF CHECKED /* For the case when the output can have null values, follow http://git-wip-us.apache.org/repos/asf/hive/blob/98c2df15/ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt ---------------------------------------------------------------------- diff --git a/ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt index 3cb7aaa..95e4ce1 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt @@ -20,7 +20,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import java.util.Arrays; -import org.apache.hadoop.hive.ql.exec.vector.expressions.OverflowUtils; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -95,15 +94,9 @@ public class <ClassName> extends VectorExpression { if (inputColVector.isRepeating) { if (inputColVector.noNulls || !inputIsNull[0]) { outputIsNull[0] = false; - final <OperandType2> denom = vector[0]; - hasDivBy0 = hasDivBy0 || (denom == 0); -#IF MANUAL_DIVIDE_BY_ZERO_CHECK - if (denom != 0) { - outputVector[0] = value <OperatorSymbol> denom; - } -#ELSE + <OperandType2> denom = vector[0]; outputVector[0] = value <OperatorSymbol> denom; -#ENDIF MANUAL_DIVIDE_BY_ZERO_CHECK + hasDivBy0 = hasDivBy0 || (denom == 0); } else { outputIsNull[0] = true; outputColVector.noNulls = false; @@ -119,27 +112,15 @@ public class <ClassName> extends VectorExpression { final int i = sel[j]; outputIsNull[i] = false; <OperandType2> denom = vector[i]; - hasDivBy0 = hasDivBy0 || (denom == 0); -#IF MANUAL_DIVIDE_BY_ZERO_CHECK - if (denom != 0) { - outputVector[i] = value <OperatorSymbol> denom; - } -#ELSE outputVector[i] = value <OperatorSymbol> denom; -#ENDIF MANUAL_DIVIDE_BY_ZERO_CHECK + hasDivBy0 = hasDivBy0 || (denom == 0); } } else { for(int j = 0; j != n; j++) { final int i = sel[j]; - final <OperandType2> denom = vector[i]; - hasDivBy0 = hasDivBy0 || (denom == 0); -#IF MANUAL_DIVIDE_BY_ZERO_CHECK - if (denom != 0) { - outputVector[i] = value <OperatorSymbol> denom; - } -#ELSE + <OperandType2> denom = vector[i]; outputVector[i] = value <OperatorSymbol> denom; -#ENDIF MANUAL_DIVIDE_BY_ZERO_CHECK + hasDivBy0 = hasDivBy0 || (denom == 0); } } } else { @@ -151,15 +132,9 @@ public class <ClassName> extends VectorExpression { outputColVector.noNulls = true; } for(int i = 0; i != n; i++) { - final <OperandType2> denom = vector[i]; - hasDivBy0 = hasDivBy0 || (denom == 0); -#IF MANUAL_DIVIDE_BY_ZERO_CHECK - if (denom != 0) { - outputVector[i] = value <OperatorSymbol> denom; - } -#ELSE + <OperandType2> denom = vector[i]; outputVector[i] = value <OperatorSymbol> denom; -#ENDIF MANUAL_DIVIDE_BY_ZERO_CHECK + hasDivBy0 = hasDivBy0 || (denom == 0); } } } else /* there are NULLs in the inputColVector */ { @@ -171,38 +146,20 @@ public class <ClassName> extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; outputIsNull[i] = inputIsNull[i]; - final <OperandType2> denom = vector[i]; - hasDivBy0 = hasDivBy0 || (denom == 0); -#IF MANUAL_DIVIDE_BY_ZERO_CHECK - if (denom != 0) { - outputVector[i] = value <OperatorSymbol> denom; - } -#ELSE + <OperandType2> denom = vector[i]; outputVector[i] = value <OperatorSymbol> denom; -#ENDIF MANUAL_DIVIDE_BY_ZERO_CHECK + hasDivBy0 = hasDivBy0 || (denom == 0); } } else { System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - final <OperandType2> denom = vector[i]; - hasDivBy0 = hasDivBy0 || (denom == 0); -#IF MANUAL_DIVIDE_BY_ZERO_CHECK - if (denom != 0) { - outputVector[i] = value <OperatorSymbol> denom; - } -#ELSE + <OperandType2> denom = vector[i]; outputVector[i] = value <OperatorSymbol> denom; -#ENDIF MANUAL_DIVIDE_BY_ZERO_CHECK + hasDivBy0 = hasDivBy0 || (denom == 0); } } } -#IF CHECKED - //when operating in checked mode make sure we handle overflows similar to non-vectorized expression - OverflowUtils.accountForOverflow<CamelReturnType>(getOutputTypeInfo(), outputColVector, - batch.selectedInUse, sel, n); -#ELSE -#ENDIF CHECKED if (!hasDivBy0) { NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } else { http://git-wip-us.apache.org/repos/asf/hive/blob/98c2df15/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumn.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumn.java new file mode 100644 index 0000000..60faebb --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumn.java @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * This operation is handled as a special case because Hive + * long%long division returns needs special handling to avoid + * for divide by zero exception + */ +public class LongColModuloLongColumn extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private final int colNum1; + private final int colNum2; + + public LongColModuloLongColumn(int colNum1, int colNum2, int outputColumnNum) { + super(outputColumnNum); + this.colNum1 = colNum1; + this.colNum2 = colNum2; + } + + public LongColModuloLongColumn() { + super(); + + // Dummy final assignments. + colNum1 = -1; + colNum2 = -1; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1]; + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + int[] sel = batch.selected; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + long[] vector2 = inputColVector2.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + /* + * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately. + */ + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + boolean hasDivBy0 = false; + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + long denom = vector2[0]; + hasDivBy0 = hasDivBy0 || (denom == 0); + if (denom != 0) { + outputVector[0] = vector1[0] % denom; + } + } else if (inputColVector1.isRepeating) { + final long vector1Value = vector1[0]; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + long denom = vector2[i]; + hasDivBy0 = hasDivBy0 || (denom == 0); + if (denom != 0) { + outputVector[i] = vector1Value % denom; + } + } + } else { + for(int i = 0; i != n; i++) { + hasDivBy0 = hasDivBy0 || (vector2[i] == 0); + if (vector2[i] != 0) { + outputVector[i] = vector1Value % vector2[i]; + } + } + } + } else if (inputColVector2.isRepeating) { + final long vector2Value = vector2[0]; + if (vector2Value == 0) { + // Denominator is zero, convert the batch to nulls + outputColVector.noNulls = false; + outputColVector.isRepeating = true; + outputColVector.isNull[0] = true; + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] % vector2Value; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] % vector2Value; + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + long denom = vector2[i]; + hasDivBy0 = hasDivBy0 || (denom == 0); + if (denom != 0) { + outputVector[i] = vector1[i] % denom; + } + } + } else { + for(int i = 0; i != n; i++) { + hasDivBy0 = hasDivBy0 || (vector2[i] == 0); + if (vector2[i] != 0) { + outputVector[i] = vector1[i] % vector2[i]; + } + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 % (col1 - 1) + * in the case when some col1 entries are null. + */ + if (!hasDivBy0) { + NullUtil.setNullDataEntriesLong(outputColVector, batch.selectedInUse, sel, n); + } else { + NullUtil.setNullAndDivBy0DataEntriesLong( + outputColVector, batch.selectedInUse, sel, n, inputColVector2); + } + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, colNum1) + ", " + getColumnParamString(1, colNum2); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType("long")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/98c2df15/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumnChecked.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumnChecked.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumnChecked.java new file mode 100644 index 0000000..24a860a --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColModuloLongColumnChecked.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * This vector expression implements a Checked variant of LongColModuloLongColumn + * If the outputTypeInfo is not long it casts the result column vector values to + * the set outputType so as to have similar result when compared to non-vectorized UDF + * execution. + */ +public class LongColModuloLongColumnChecked extends LongColModuloLongColumn { + public LongColModuloLongColumnChecked(int colNum1, int colNum2, int outputColumnNum) { + super(colNum1, colNum2, outputColumnNum); + } + + public LongColModuloLongColumnChecked() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + super.evaluate(batch); + //checked for overflow based on the outputTypeInfo + OverflowUtils + .accountForOverflowLong(outputTypeInfo, (LongColumnVector) batch.cols[outputColumnNum], batch.selectedInUse, + batch.selected, batch.size); + } + + @Override + public boolean supportsCheckedExecution() { + return true; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/98c2df15/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMod.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMod.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMod.java index bef32b4..044fb06 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMod.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMod.java @@ -21,6 +21,8 @@ package org.apache.hadoop.hive.ql.udf.generic; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColModuloLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColModuloLongColumnChecked; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; http://git-wip-us.apache.org/repos/asf/hive/blob/98c2df15/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index f51b8bb..791ac82 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -54,7 +54,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampScal import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprVarCharScalarStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNotNull; import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNull; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColModuloLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColModuloLongColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColEqualLongScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColGreaterLongScalar; http://git-wip-us.apache.org/repos/asf/hive/blob/98c2df15/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java index ae91b73..0e4dcfd 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java @@ -243,10 +243,6 @@ public class VectorRandomRowSource { return rowStructObjectInspector; } - public List<ObjectInspector> objectInspectorList() { - return objectInspectorList; - } - public StructObjectInspector partialRowStructObjectInspector(int partialFieldCount) { ArrayList<ObjectInspector> partialObjectInspectorList = new ArrayList<ObjectInspector>(partialFieldCount); @@ -449,11 +445,11 @@ public class VectorRandomRowSource { return getDecoratedTypeName(r, typeName, supportedTypes, allowedTypeNameSet, depth, maxDepth); } - public static ObjectInspector getObjectInspector(TypeInfo typeInfo) { + private ObjectInspector getObjectInspector(TypeInfo typeInfo) { return getObjectInspector(typeInfo, DataTypePhysicalVariation.NONE); } - public static ObjectInspector getObjectInspector(TypeInfo typeInfo, + private ObjectInspector getObjectInspector(TypeInfo typeInfo, DataTypePhysicalVariation dataTypePhysicalVariation) { final ObjectInspector objectInspector; http://git-wip-us.apache.org/repos/asf/hive/blob/98c2df15/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmetic.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmetic.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmetic.java deleted file mode 100644 index 36a8652..0000000 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmetic.java +++ /dev/null @@ -1,602 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import java.lang.reflect.Constructor; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Random; - -import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; -import org.apache.hadoop.hive.ql.exec.FunctionInfo; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; -import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; -import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; -import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; -import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateAdd; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateDiff; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateSub; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPDivide; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPMinus; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPMod; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPMultiply; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; -import org.apache.hadoop.hive.serde2.io.HiveCharWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; -import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; - -import junit.framework.Assert; - -import org.junit.Ignore; -import org.junit.Test; - -public class TestVectorArithmetic { - - public TestVectorArithmetic() { - // Arithmetic operations rely on getting conf from SessionState, need to initialize here. - SessionState ss = new SessionState(new HiveConf()); - ss.getConf().setVar(HiveConf.ConfVars.HIVE_COMPAT, "latest"); - SessionState.setCurrentSessionState(ss); - } - - @Test - public void testIntegers() throws Exception { - Random random = new Random(7743); - - doIntegerTests(random); - } - - @Test - public void testIntegerFloating() throws Exception { - Random random = new Random(7743); - - doIntegerFloatingTests(random); - } - - @Test - public void testFloating() throws Exception { - Random random = new Random(7743); - - doFloatingTests(random); - } - - @Test - public void testDecimals() throws Exception { - Random random = new Random(7743); - - doDecimalTests(random); - } - - public enum ArithmeticTestMode { - ROW_MODE, - ADAPTOR, - VECTOR_EXPRESSION; - - static final int count = values().length; - } - - public enum ColumnScalarMode { - COLUMN_COLUMN, - COLUMN_SCALAR, - SCALAR_COLUMN; - - static final int count = values().length; - } - - private static TypeInfo[] integerTypeInfos = new TypeInfo[] { - TypeInfoFactory.byteTypeInfo, - TypeInfoFactory.shortTypeInfo, - TypeInfoFactory.intTypeInfo, - TypeInfoFactory.longTypeInfo - }; - - // We have test failures with FLOAT. Ignoring this issue for now. - private static TypeInfo[] floatingTypeInfos = new TypeInfo[] { - // TypeInfoFactory.floatTypeInfo, - TypeInfoFactory.doubleTypeInfo - }; - - private void doIntegerTests(Random random) - throws Exception { - for (TypeInfo typeInfo : integerTypeInfos) { - for (ColumnScalarMode columnScalarMode : ColumnScalarMode.values()) { - doTestsWithDiffColumnScalar( - random, typeInfo, typeInfo, columnScalarMode); - } - } - } - - private void doIntegerFloatingTests(Random random) - throws Exception { - for (TypeInfo typeInfo1 : integerTypeInfos) { - for (TypeInfo typeInfo2 : floatingTypeInfos) { - for (ColumnScalarMode columnScalarMode : ColumnScalarMode.values()) { - doTestsWithDiffColumnScalar( - random, typeInfo1, typeInfo2, columnScalarMode); - } - } - } - for (TypeInfo typeInfo1 : floatingTypeInfos) { - for (TypeInfo typeInfo2 : integerTypeInfos) { - for (ColumnScalarMode columnScalarMode : ColumnScalarMode.values()) { - doTestsWithDiffColumnScalar( - random, typeInfo1, typeInfo2, columnScalarMode); - } - } - } - } - - private void doFloatingTests(Random random) - throws Exception { - for (TypeInfo typeInfo1 : floatingTypeInfos) { - for (TypeInfo typeInfo2 : floatingTypeInfos) { - for (ColumnScalarMode columnScalarMode : ColumnScalarMode.values()) { - doTestsWithDiffColumnScalar( - random, typeInfo1, typeInfo2, columnScalarMode); - } - } - } - } - - private static TypeInfo[] decimalTypeInfos = new TypeInfo[] { - new DecimalTypeInfo(38, 18), - new DecimalTypeInfo(25, 2), - new DecimalTypeInfo(19, 4), - new DecimalTypeInfo(18, 10), - new DecimalTypeInfo(17, 3), - new DecimalTypeInfo(12, 2), - new DecimalTypeInfo(7, 1) - }; - - private void doDecimalTests(Random random) - throws Exception { - for (TypeInfo typeInfo : decimalTypeInfos) { - for (ColumnScalarMode columnScalarMode : ColumnScalarMode.values()) { - doTestsWithDiffColumnScalar( - random, typeInfo, typeInfo, columnScalarMode); - } - } - } - - private TypeInfo getOutputTypeInfo(GenericUDF genericUdfClone, - List<ObjectInspector> objectInspectorList) - throws HiveException { - - ObjectInspector[] array = - objectInspectorList.toArray(new ObjectInspector[objectInspectorList.size()]); - ObjectInspector outputObjectInspector = genericUdfClone.initialize(array); - return TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector); - } - - public enum Arithmetic { - ADD, - SUBTRACT, - MULTIPLY, - DIVIDE, - MODULUS; - } - - private TypeInfo getDecimalScalarTypeInfo(Object scalarObject) { - HiveDecimal dec = (HiveDecimal) scalarObject; - int precision = dec.precision(); - int scale = dec.scale(); - return new DecimalTypeInfo(precision, scale); - } - - private void doTestsWithDiffColumnScalar(Random random, TypeInfo typeInfo1, TypeInfo typeInfo2, - ColumnScalarMode columnScalarMode) - throws Exception { - for (Arithmetic arithmetic : Arithmetic.values()) { - doTestsWithDiffColumnScalar(random, typeInfo1, typeInfo2, columnScalarMode, arithmetic); - } - } - - private void doTestsWithDiffColumnScalar(Random random, TypeInfo typeInfo1, TypeInfo typeInfo2, - ColumnScalarMode columnScalarMode, Arithmetic arithmetic) - throws Exception { - - String typeName1 = typeInfo1.getTypeName(); - PrimitiveCategory primitiveCategory1 = - ((PrimitiveTypeInfo) typeInfo1).getPrimitiveCategory(); - - String typeName2 = typeInfo2.getTypeName(); - PrimitiveCategory primitiveCategory2 = - ((PrimitiveTypeInfo) typeInfo2).getPrimitiveCategory(); - - List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>(); - List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = - new ArrayList<DataTypePhysicalVariation>(); - - List<String> columns = new ArrayList<String>(); - int columnNum = 0; - - ExprNodeDesc col1Expr; - Object scalar1Object = null; - if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || - columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) { - generationSpecList.add( - GenerationSpec.createSameType(typeInfo1)); - explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); - - String columnName = "col" + (columnNum++); - col1Expr = new ExprNodeColumnDesc(typeInfo1, columnName, "table", false); - columns.add(columnName); - } else { - scalar1Object = - VectorRandomRowSource.randomPrimitiveObject( - random, (PrimitiveTypeInfo) typeInfo1); - - // Adjust the decimal type to the scalar's type... - if (typeInfo1 instanceof DecimalTypeInfo) { - typeInfo1 = getDecimalScalarTypeInfo(scalar1Object); - } - - col1Expr = new ExprNodeConstantDesc(typeInfo1, scalar1Object); - } - ExprNodeDesc col2Expr; - Object scalar2Object = null; - if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || - columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) { - generationSpecList.add( - GenerationSpec.createSameType(typeInfo2)); - - explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); - - String columnName = "col" + (columnNum++); - col2Expr = new ExprNodeColumnDesc(typeInfo2, columnName, "table", false); - columns.add(columnName); - } else { - scalar2Object = - VectorRandomRowSource.randomPrimitiveObject( - random, (PrimitiveTypeInfo) typeInfo2); - - // Adjust the decimal type to the scalar's type... - if (typeInfo2 instanceof DecimalTypeInfo) { - typeInfo2 = getDecimalScalarTypeInfo(scalar2Object); - } - - col2Expr = new ExprNodeConstantDesc(typeInfo2, scalar2Object); - } - - List<ObjectInspector> objectInspectorList = new ArrayList<ObjectInspector>(); - objectInspectorList.add(VectorRandomRowSource.getObjectInspector(typeInfo1)); - objectInspectorList.add(VectorRandomRowSource.getObjectInspector(typeInfo2)); - - List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(); - children.add(col1Expr); - children.add(col2Expr); - - //---------------------------------------------------------------------------------------------- - - String[] columnNames = columns.toArray(new String[0]); - - VectorRandomRowSource rowSource = new VectorRandomRowSource(); - - rowSource.initGenerationSpecSchema( - random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, - explicitDataTypePhysicalVariationList); - - Object[][] randomRows = rowSource.randomRows(100000); - - VectorRandomBatchSource batchSource = - VectorRandomBatchSource.createInterestingBatches( - random, - rowSource, - randomRows, - null); - - GenericUDF genericUdf; - switch (arithmetic) { - case ADD: - genericUdf = new GenericUDFOPPlus(); - break; - case SUBTRACT: - genericUdf = new GenericUDFOPMinus(); - break; - case MULTIPLY: - genericUdf = new GenericUDFOPMultiply(); - break; - case DIVIDE: - genericUdf = new GenericUDFOPDivide(); - break; - case MODULUS: - genericUdf = new GenericUDFOPMod(); - break; - default: - throw new RuntimeException("Unexpected arithmetic " + arithmetic); - } - - ObjectInspector[] objectInspectors = - objectInspectorList.toArray(new ObjectInspector[objectInspectorList.size()]); - ObjectInspector outputObjectInspector = null; - try { - outputObjectInspector = genericUdf.initialize(objectInspectors); - } catch (Exception e) { - Assert.fail(e.toString()); - } - - TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector); - - ExprNodeGenericFuncDesc exprDesc = - new ExprNodeGenericFuncDesc(outputTypeInfo, genericUdf, children); - - final int rowCount = randomRows.length; - Object[][] resultObjectsArray = new Object[ArithmeticTestMode.count][]; - for (int i = 0; i < ArithmeticTestMode.count; i++) { - - Object[] resultObjects = new Object[rowCount]; - resultObjectsArray[i] = resultObjects; - - ArithmeticTestMode arithmeticTestMode = ArithmeticTestMode.values()[i]; - switch (arithmeticTestMode) { - case ROW_MODE: - doRowArithmeticTest( - typeInfo1, - typeInfo2, - columns, - children, - exprDesc, - arithmetic, - randomRows, - columnScalarMode, - rowSource.rowStructObjectInspector(), - outputTypeInfo, - resultObjects); - break; - case ADAPTOR: - case VECTOR_EXPRESSION: - doVectorArithmeticTest( - typeInfo1, - typeInfo2, - columns, - columnNames, - rowSource.typeInfos(), - rowSource.dataTypePhysicalVariations(), - children, - exprDesc, - arithmetic, - arithmeticTestMode, - columnScalarMode, - batchSource, - exprDesc.getWritableObjectInspector(), - outputTypeInfo, - resultObjects); - break; - default: - throw new RuntimeException("Unexpected IF statement test mode " + arithmeticTestMode); - } - } - - for (int i = 0; i < rowCount; i++) { - // Row-mode is the expected value. - Object expectedResult = resultObjectsArray[0][i]; - - for (int v = 1; v < ArithmeticTestMode.count; v++) { - Object vectorResult = resultObjectsArray[v][i]; - if (expectedResult == null || vectorResult == null) { - if (expectedResult != null || vectorResult != null) { - Assert.fail( - "Row " + i + - " typeName " + typeName1 + - " outputTypeName " + outputTypeInfo.getTypeName() + - " " + arithmetic + - " " + ArithmeticTestMode.values()[v] + - " " + columnScalarMode + - " result is NULL " + (vectorResult == null) + - " does not match row-mode expected result is NULL " + (expectedResult == null) + - (columnScalarMode == ColumnScalarMode.SCALAR_COLUMN ? - " scalar1 " + scalar1Object.toString() : "") + - " row values " + Arrays.toString(randomRows[i]) + - (columnScalarMode == ColumnScalarMode.COLUMN_SCALAR ? - " scalar2 " + scalar2Object.toString() : "")); - } - } else { - - if (!expectedResult.equals(vectorResult)) { - Assert.fail( - "Row " + i + - " typeName " + typeName1 + - " outputTypeName " + outputTypeInfo.getTypeName() + - " " + arithmetic + - " " + ArithmeticTestMode.values()[v] + - " " + columnScalarMode + - " result " + vectorResult.toString() + - " (" + vectorResult.getClass().getSimpleName() + ")" + - " does not match row-mode expected result " + expectedResult.toString() + - " (" + expectedResult.getClass().getSimpleName() + ")" + - (columnScalarMode == ColumnScalarMode.SCALAR_COLUMN ? - " scalar1 " + scalar1Object.toString() : "") + - " row values " + Arrays.toString(randomRows[i]) + - (columnScalarMode == ColumnScalarMode.COLUMN_SCALAR ? - " scalar2 " + scalar2Object.toString() : "")); - } - } - } - } - } - - private void doRowArithmeticTest(TypeInfo typeInfo1, - TypeInfo typeInfo2, - List<String> columns, List<ExprNodeDesc> children, - ExprNodeGenericFuncDesc exprDesc, - Arithmetic arithmetic, - Object[][] randomRows, ColumnScalarMode columnScalarMode, - ObjectInspector rowInspector, - TypeInfo outputTypeInfo, Object[] resultObjects) throws Exception { - - /* - System.out.println( - "*DEBUG* typeInfo " + typeInfo1.toString() + - " typeInfo2 " + typeInfo2 + - " arithmeticTestMode ROW_MODE" + - " columnScalarMode " + columnScalarMode + - " exprDesc " + exprDesc.toString()); - */ - - HiveConf hiveConf = new HiveConf(); - ExprNodeEvaluator evaluator = - ExprNodeEvaluatorFactory.get(exprDesc, hiveConf); - evaluator.initialize(rowInspector); - - ObjectInspector objectInspector = - TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( - outputTypeInfo); - - final int rowCount = randomRows.length; - for (int i = 0; i < rowCount; i++) { - Object[] row = randomRows[i]; - Object result = evaluator.evaluate(row); - Object copyResult = null; - try { - copyResult = - ObjectInspectorUtils.copyToStandardObject( - result, objectInspector, ObjectInspectorCopyOption.WRITABLE); - } catch (Exception e) { - Assert.fail(e.toString()); - } - resultObjects[i] = copyResult; - } - } - - private void extractResultObjects(VectorizedRowBatch batch, int rowIndex, - VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow, - ObjectInspector objectInspector, Object[] resultObjects) { - - boolean selectedInUse = batch.selectedInUse; - int[] selected = batch.selected; - for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) { - final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex); - resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow); - - Object copyResult = - ObjectInspectorUtils.copyToStandardObject( - scrqtchRow[0], objectInspector, ObjectInspectorCopyOption.WRITABLE); - resultObjects[rowIndex++] = copyResult; - } - } - - private void doVectorArithmeticTest(TypeInfo typeInfo1, - TypeInfo typeInfo2, - List<String> columns, - String[] columnNames, - TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, - List<ExprNodeDesc> children, - ExprNodeGenericFuncDesc exprDesc, - Arithmetic arithmetic, - ArithmeticTestMode arithmeticTestMode, ColumnScalarMode columnScalarMode, - VectorRandomBatchSource batchSource, - ObjectInspector objectInspector, - TypeInfo outputTypeInfo, Object[] resultObjects) - throws Exception { - - HiveConf hiveConf = new HiveConf(); - if (arithmeticTestMode == ArithmeticTestMode.ADAPTOR) { - hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true); - } - - VectorizationContext vectorizationContext = - new VectorizationContext( - "name", - columns, - Arrays.asList(typeInfos), - Arrays.asList(dataTypePhysicalVariations), - hiveConf); - VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc); - vectorExpression.transientInit(); - - String[] outputScratchTypeNames= vectorizationContext.getScratchColumnTypeNames(); - - VectorizedRowBatchCtx batchContext = - new VectorizedRowBatchCtx( - columnNames, - typeInfos, - dataTypePhysicalVariations, - /* dataColumnNums */ null, - /* partitionColumnCount */ 0, - /* virtualColumnCount */ 0, - /* neededVirtualColumns */ null, - outputScratchTypeNames, - null); - - VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); - - VectorExtractRow resultVectorExtractRow = new VectorExtractRow(); - resultVectorExtractRow.init( - new TypeInfo[] { outputTypeInfo }, new int[] { vectorExpression.getOutputColumnNum() }); - Object[] scrqtchRow = new Object[1]; - - /* - System.out.println( - "*DEBUG* typeInfo1 " + typeInfo1.toString() + - " typeInfo2 " + typeInfo2.toString() + - " arithmeticTestMode " + arithmeticTestMode + - " columnScalarMode " + columnScalarMode + - " vectorExpression " + vectorExpression.toString()); - */ - - batchSource.resetBatchIteration(); - int rowIndex = 0; - while (true) { - if (!batchSource.fillNextBatch(batch)) { - break; - } - vectorExpression.evaluate(batch); - extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, - objectInspector, resultObjects); - rowIndex += batch.size; - } - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/98c2df15/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java index a716224..f5491af 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java @@ -54,7 +54,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalScalarAddDec import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalScalarSubtractDecimalColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalScalarMultiplyDecimalColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongScalarChecked; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColModuloLongColumn; import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; http://git-wip-us.apache.org/repos/asf/hive/blob/98c2df15/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNegative.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNegative.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNegative.java deleted file mode 100644 index ce20f28..0000000 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNegative.java +++ /dev/null @@ -1,444 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import java.lang.reflect.Constructor; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Random; - -import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; -import org.apache.hadoop.hive.ql.exec.FunctionInfo; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; -import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; -import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; -import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; -import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateAdd; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateDiff; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateSub; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPDivide; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPMinus; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPMod; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPMultiply; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNegative; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; -import org.apache.hadoop.hive.serde2.io.HiveCharWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; -import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; - -import junit.framework.Assert; - -import org.junit.Ignore; -import org.junit.Test; - -public class TestVectorNegative { - - public TestVectorNegative() { - // Arithmetic operations rely on getting conf from SessionState, need to initialize here. - SessionState ss = new SessionState(new HiveConf()); - ss.getConf().setVar(HiveConf.ConfVars.HIVE_COMPAT, "latest"); - SessionState.setCurrentSessionState(ss); - } - - @Test - public void testInteger() throws Exception { - Random random = new Random(7743); - - doIntegerTests(random); - } - - @Test - public void testFloating() throws Exception { - Random random = new Random(7743); - - doFloatingTests(random); - } - - @Test - public void testDecimal() throws Exception { - Random random = new Random(7743); - - doDecimalTests(random); - } - - public enum NegativeTestMode { - ROW_MODE, - ADAPTOR, - VECTOR_EXPRESSION; - - static final int count = values().length; - } - - private static TypeInfo[] integerTypeInfos = new TypeInfo[] { - TypeInfoFactory.byteTypeInfo, - TypeInfoFactory.shortTypeInfo, - TypeInfoFactory.intTypeInfo, - TypeInfoFactory.longTypeInfo - }; - - private static TypeInfo[] floatingTypeInfos = new TypeInfo[] { - TypeInfoFactory.floatTypeInfo, - TypeInfoFactory.doubleTypeInfo - }; - - private void doIntegerTests(Random random) - throws Exception { - for (TypeInfo typeInfo : integerTypeInfos) { - doTests(random, typeInfo); - } - } - - private void doIntegerFloatingTests(Random random) - throws Exception { - for (TypeInfo typeInfo : integerTypeInfos) { - doTests(random, typeInfo); - } - } - - private void doFloatingTests(Random random) - throws Exception { - for (TypeInfo typeInfo : floatingTypeInfos) { - doTests(random, typeInfo); - } - } - - private static TypeInfo[] decimalTypeInfos = new TypeInfo[] { - new DecimalTypeInfo(38, 18), - new DecimalTypeInfo(25, 2), - new DecimalTypeInfo(19, 4), - new DecimalTypeInfo(18, 10), - new DecimalTypeInfo(17, 3), - new DecimalTypeInfo(12, 2), - new DecimalTypeInfo(7, 1) - }; - - private void doDecimalTests(Random random) - throws Exception { - for (TypeInfo typeInfo : decimalTypeInfos) { - doTests(random, typeInfo); - } - } - - private TypeInfo getOutputTypeInfo(GenericUDF genericUdfClone, - List<ObjectInspector> objectInspectorList) - throws HiveException { - - ObjectInspector[] array = - objectInspectorList.toArray(new ObjectInspector[objectInspectorList.size()]); - ObjectInspector outputObjectInspector = genericUdfClone.initialize(array); - return TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector); - } - - private void doTests(Random random, TypeInfo typeInfo) - throws Exception { - - String typeName = typeInfo.getTypeName(); - PrimitiveCategory primitiveCategory1 = - ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); - - List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>(); - List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = - new ArrayList<DataTypePhysicalVariation>(); - - List<String> columns = new ArrayList<String>(); - int columnNum = 0; - - generationSpecList.add( - GenerationSpec.createSameType(typeInfo)); - explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); - - ExprNodeDesc col1Expr; - String columnName = "col" + (columnNum++); - col1Expr = new ExprNodeColumnDesc(typeInfo, columnName, "table", false); - columns.add(columnName); - - List<ObjectInspector> objectInspectorList = new ArrayList<ObjectInspector>(); - objectInspectorList.add(VectorRandomRowSource.getObjectInspector(typeInfo)); - - List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(); - children.add(col1Expr); - - //---------------------------------------------------------------------------------------------- - - String[] columnNames = columns.toArray(new String[0]); - - VectorRandomRowSource rowSource = new VectorRandomRowSource(); - - rowSource.initGenerationSpecSchema( - random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, - explicitDataTypePhysicalVariationList); - - Object[][] randomRows = rowSource.randomRows(100000); - - VectorRandomBatchSource batchSource = - VectorRandomBatchSource.createInterestingBatches( - random, - rowSource, - randomRows, - null); - - GenericUDF genericUdf = new GenericUDFOPNegative(); - - ObjectInspector[] objectInspectors = - objectInspectorList.toArray(new ObjectInspector[objectInspectorList.size()]); - ObjectInspector outputObjectInspector = null; - try { - outputObjectInspector = genericUdf.initialize(objectInspectors); - } catch (Exception e) { - Assert.fail(e.toString()); - } - - TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector); - - ExprNodeGenericFuncDesc exprDesc = - new ExprNodeGenericFuncDesc(outputTypeInfo, genericUdf, children); - - final int rowCount = randomRows.length; - Object[][] resultObjectsArray = new Object[NegativeTestMode.count][]; - for (int i = 0; i < NegativeTestMode.count; i++) { - - Object[] resultObjects = new Object[rowCount]; - resultObjectsArray[i] = resultObjects; - - NegativeTestMode negativeTestMode = NegativeTestMode.values()[i]; - switch (negativeTestMode) { - case ROW_MODE: - doRowArithmeticTest( - typeInfo, - columns, - children, - exprDesc, - randomRows, - rowSource.rowStructObjectInspector(), - outputTypeInfo, - resultObjects); - break; - case ADAPTOR: - case VECTOR_EXPRESSION: - doVectorArithmeticTest( - typeInfo, - columns, - columnNames, - rowSource.typeInfos(), - rowSource.dataTypePhysicalVariations(), - children, - exprDesc, - negativeTestMode, - batchSource, - exprDesc.getWritableObjectInspector(), - outputTypeInfo, - resultObjects); - break; - default: - throw new RuntimeException("Unexpected Negative operator test mode " + negativeTestMode); - } - } - - for (int i = 0; i < rowCount; i++) { - // Row-mode is the expected value. - Object expectedResult = resultObjectsArray[0][i]; - - for (int v = 1; v < NegativeTestMode.count; v++) { - Object vectorResult = resultObjectsArray[v][i]; - if (expectedResult == null || vectorResult == null) { - if (expectedResult != null || vectorResult != null) { - Assert.fail( - "Row " + i + - " typeName " + typeName + - " outputTypeName " + outputTypeInfo.getTypeName() + - " " + NegativeTestMode.values()[v] + - " result is NULL " + (vectorResult == null) + - " does not match row-mode expected result is NULL " + (expectedResult == null) + - " row values " + Arrays.toString(randomRows[i])); - } - } else { - - if (!expectedResult.equals(vectorResult)) { - Assert.fail( - "Row " + i + - " typeName " + typeName + - " outputTypeName " + outputTypeInfo.getTypeName() + - " " + NegativeTestMode.values()[v] + - " result " + vectorResult.toString() + - " (" + vectorResult.getClass().getSimpleName() + ")" + - " does not match row-mode expected result " + expectedResult.toString() + - " (" + expectedResult.getClass().getSimpleName() + ")" + - " row values " + Arrays.toString(randomRows[i])); - } - } - } - } - } - - private void doRowArithmeticTest(TypeInfo typeInfo, - List<String> columns, List<ExprNodeDesc> children, - ExprNodeGenericFuncDesc exprDesc, - Object[][] randomRows, - ObjectInspector rowInspector, - TypeInfo outputTypeInfo, Object[] resultObjects) throws Exception { - - System.out.println( - "*DEBUG* typeInfo " + typeInfo.toString() + - " negativeTestMode ROW_MODE" + - " exprDesc " + exprDesc.toString()); - - HiveConf hiveConf = new HiveConf(); - ExprNodeEvaluator evaluator = - ExprNodeEvaluatorFactory.get(exprDesc, hiveConf); - evaluator.initialize(rowInspector); - - ObjectInspector objectInspector = - TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( - outputTypeInfo); - - final int rowCount = randomRows.length; - for (int i = 0; i < rowCount; i++) { - Object[] row = randomRows[i]; - Object result = evaluator.evaluate(row); - Object copyResult = null; - try { - copyResult = - ObjectInspectorUtils.copyToStandardObject( - result, objectInspector, ObjectInspectorCopyOption.WRITABLE); - } catch (Exception e) { - System.out.println("here"); - } - resultObjects[i] = copyResult; - } - } - - private void extractResultObjects(VectorizedRowBatch batch, int rowIndex, - VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow, - ObjectInspector objectInspector, Object[] resultObjects) { - - boolean selectedInUse = batch.selectedInUse; - int[] selected = batch.selected; - for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) { - final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex); - resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow); - - Object copyResult = - ObjectInspectorUtils.copyToStandardObject( - scrqtchRow[0], objectInspector, ObjectInspectorCopyOption.WRITABLE); - resultObjects[rowIndex++] = copyResult; - } - } - - private void doVectorArithmeticTest(TypeInfo typeInfo, - List<String> columns, - String[] columnNames, - TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, - List<ExprNodeDesc> children, - ExprNodeGenericFuncDesc exprDesc, - NegativeTestMode negativeTestMode, - VectorRandomBatchSource batchSource, - ObjectInspector objectInspector, - TypeInfo outputTypeInfo, Object[] resultObjects) - throws Exception { - - HiveConf hiveConf = new HiveConf(); - if (negativeTestMode == NegativeTestMode.ADAPTOR) { - hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true); - } - - VectorizationContext vectorizationContext = - new VectorizationContext( - "name", - columns, - Arrays.asList(typeInfos), - Arrays.asList(dataTypePhysicalVariations), - hiveConf); - VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc); - vectorExpression.transientInit(); - - String[] outputScratchTypeNames= vectorizationContext.getScratchColumnTypeNames(); - - VectorizedRowBatchCtx batchContext = - new VectorizedRowBatchCtx( - columnNames, - typeInfos, - dataTypePhysicalVariations, - /* dataColumnNums */ null, - /* partitionColumnCount */ 0, - /* virtualColumnCount */ 0, - /* neededVirtualColumns */ null, - outputScratchTypeNames, - null); - - VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); - - VectorExtractRow resultVectorExtractRow = new VectorExtractRow(); - resultVectorExtractRow.init( - new TypeInfo[] { outputTypeInfo }, new int[] { vectorExpression.getOutputColumnNum() }); - Object[] scrqtchRow = new Object[1]; - - System.out.println( - "*DEBUG* typeInfo " + typeInfo.toString() + - " negativeTestMode " + negativeTestMode + - " vectorExpression " + vectorExpression.toString()); - batchSource.resetBatchIteration(); - int rowIndex = 0; - while (true) { - if (!batchSource.fillNextBatch(batch)) { - break; - } - vectorExpression.evaluate(batch); - extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, - objectInspector, resultObjects); - rowIndex += batch.size; - } - } -}
