Author: hashutosh
Date: Fri Dec 20 03:31:26 2013
New Revision: 1552487
URL: http://svn.apache.org/r1552487
Log:
HIVE-6034 : vectorized % doesn't handle zeroes the same way as non-vectorized
(Sergey Shelukhin via Eric Hanson)
Modified:
hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java
hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt
hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
hive/trunk/ql/src/test/queries/clientpositive/vectorization_12.q
hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q
hive/trunk/ql/src/test/results/clientpositive/vectorization_12.q.out
hive/trunk/ql/src/test/results/clientpositive/vectorization_14.q.out
Modified: hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java?rev=1552487&r1=1552486&r2=1552487&view=diff
==============================================================================
--- hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java (original)
+++ hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java Fri Dec 20
03:31:26 2013
@@ -38,62 +38,51 @@ public class GenVectorCode extends Task
{"ColumnArithmeticScalar", "Add", "long", "long", "+"},
{"ColumnArithmeticScalar", "Subtract", "long", "long", "-"},
{"ColumnArithmeticScalar", "Multiply", "long", "long", "*"},
- {"ColumnArithmeticScalar", "Modulo", "long", "long", "%"},
{"ColumnArithmeticScalar", "Add", "long", "double", "+"},
{"ColumnArithmeticScalar", "Subtract", "long", "double", "-"},
{"ColumnArithmeticScalar", "Multiply", "long", "double", "*"},
- {"ColumnArithmeticScalar", "Modulo", "long", "double", "%"},
{"ColumnArithmeticScalar", "Add", "double", "long", "+"},
{"ColumnArithmeticScalar", "Subtract", "double", "long", "-"},
{"ColumnArithmeticScalar", "Multiply", "double", "long", "*"},
- {"ColumnArithmeticScalar", "Modulo", "double", "long", "%"},
{"ColumnArithmeticScalar", "Add", "double", "double", "+"},
{"ColumnArithmeticScalar", "Subtract", "double", "double", "-"},
{"ColumnArithmeticScalar", "Multiply", "double", "double", "*"},
- {"ColumnArithmeticScalar", "Modulo", "double", "double", "%"},
{"ScalarArithmeticColumn", "Add", "long", "long", "+"},
{"ScalarArithmeticColumn", "Subtract", "long", "long", "-"},
{"ScalarArithmeticColumn", "Multiply", "long", "long", "*"},
- {"ScalarArithmeticColumn", "Modulo", "long", "long", "%"},
{"ScalarArithmeticColumn", "Add", "long", "double", "+"},
{"ScalarArithmeticColumn", "Subtract", "long", "double", "-"},
{"ScalarArithmeticColumn", "Multiply", "long", "double", "*"},
- {"ScalarArithmeticColumn", "Modulo", "long", "double", "%"},
{"ScalarArithmeticColumn", "Add", "double", "long", "+"},
{"ScalarArithmeticColumn", "Subtract", "double", "long", "-"},
{"ScalarArithmeticColumn", "Multiply", "double", "long", "*"},
- {"ScalarArithmeticColumn", "Modulo", "double", "long", "%"},
{"ScalarArithmeticColumn", "Add", "double", "double", "+"},
{"ScalarArithmeticColumn", "Subtract", "double", "double", "-"},
{"ScalarArithmeticColumn", "Multiply", "double", "double", "*"},
- {"ScalarArithmeticColumn", "Modulo", "double", "double", "%"},
{"ColumnArithmeticColumn", "Add", "long", "long", "+"},
{"ColumnArithmeticColumn", "Subtract", "long", "long", "-"},
{"ColumnArithmeticColumn", "Multiply", "long", "long", "*"},
- {"ColumnArithmeticColumn", "Modulo", "long", "long", "%"},
{"ColumnArithmeticColumn", "Add", "long", "double", "+"},
{"ColumnArithmeticColumn", "Subtract", "long", "double", "-"},
{"ColumnArithmeticColumn", "Multiply", "long", "double", "*"},
- {"ColumnArithmeticColumn", "Modulo", "long", "double", "%"},
{"ColumnArithmeticColumn", "Add", "double", "long", "+"},
{"ColumnArithmeticColumn", "Subtract", "double", "long", "-"},
{"ColumnArithmeticColumn", "Multiply", "double", "long", "*"},
- {"ColumnArithmeticColumn", "Modulo", "double", "long", "%"},
{"ColumnArithmeticColumn", "Add", "double", "double", "+"},
{"ColumnArithmeticColumn", "Subtract", "double", "double", "-"},
{"ColumnArithmeticColumn", "Multiply", "double", "double", "*"},
- {"ColumnArithmeticColumn", "Modulo", "double", "double", "%"},
+
{"ColumnDivideScalar", "Divide", "long", "double", "/"},
{"ColumnDivideScalar", "Divide", "double", "long", "/"},
@@ -105,6 +94,19 @@ public class GenVectorCode extends Task
{"ColumnDivideColumn", "Divide", "double", "long", "/"},
{"ColumnDivideColumn", "Divide", "double", "double", "/"},
+ {"ColumnDivideScalar", "Modulo", "long", "long", "%"},
+ {"ColumnDivideScalar", "Modulo", "long", "double", "%"},
+ {"ColumnDivideScalar", "Modulo", "double", "long", "%"},
+ {"ColumnDivideScalar", "Modulo", "double", "double", "%"},
+ {"ScalarDivideColumn", "Modulo", "long", "long", "%"},
+ {"ScalarDivideColumn", "Modulo", "long", "double", "%"},
+ {"ScalarDivideColumn", "Modulo", "double", "long", "%"},
+ {"ScalarDivideColumn", "Modulo", "double", "double", "%"},
+ {"ColumnDivideColumn", "Modulo", "long", "long", "%"},
+ {"ColumnDivideColumn", "Modulo", "long", "double", "%"},
+ {"ColumnDivideColumn", "Modulo", "double", "long", "%"},
+ {"ColumnDivideColumn", "Modulo", "double", "double", "%"},
+
{"ColumnCompareScalar", "Equal", "long", "double", "=="},
{"ColumnCompareScalar", "Equal", "double", "double", "=="},
{"ColumnCompareScalar", "NotEqual", "long", "double", "!="},
@@ -1082,6 +1084,7 @@ public class GenVectorCode extends Task
templateString = templateString.replaceAll("<OperandType1>",
operandType1);
templateString = templateString.replaceAll("<OperandType2>",
operandType2);
templateString = templateString.replaceAll("<ReturnType>", returnType);
+ templateString = templateString.replaceAll("<CamelReturnType>",
getCamelCaseType(returnType));
writeFile(templateFile.lastModified(), expressionOutputDirectory,
expressionClassesDirectory,
className, templateString);
Modified:
hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt?rev=1552487&r1=1552486&r2=1552487&view=diff
==============================================================================
---
hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt
(original)
+++
hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt
Fri Dec 20 03:31:26 2013
@@ -83,20 +83,20 @@ public class <ClassName> extends VectorE
boolean hasDivBy0 = false;
if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
<OperandType2> denom = vector2[0];
- outputVector[0] = vector1[0] <OperatorSymbol> (double) denom;
+ outputVector[0] = vector1[0] <OperatorSymbol> denom;
hasDivBy0 = hasDivBy0 || (denom == 0);
} else if (inputColVector1.isRepeating) {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
<OperandType2> denom = vector2[i];
- outputVector[i] = vector1[0] <OperatorSymbol> (double) denom;
+ outputVector[i] = vector1[0] <OperatorSymbol> denom;
hasDivBy0 = hasDivBy0 || (denom == 0);
}
} else {
for(int i = 0; i != n; i++) {
<OperandType2> denom = vector2[i];
- outputVector[i] = vector1[0] <OperatorSymbol> (double) denom;
+ outputVector[i] = vector1[0] <OperatorSymbol> denom;
hasDivBy0 = hasDivBy0 || (denom == 0);
}
}
@@ -142,8 +142,7 @@ public class <ClassName> extends VectorE
if (!hasDivBy0) {
NullUtil.setNullDataEntries<CamelReturnType>(outputColVector,
batch.selectedInUse, sel, n);
} else {
- // Currently, the output from division is always double.
- NullUtil.setNullAndDivBy0DataEntriesDouble(
+ NullUtil.setNullAndDivBy0DataEntries<CamelReturnType>(
outputColVector, batch.selectedInUse, sel, n, inputColVector2);
}
}
Modified:
hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt?rev=1552487&r1=1552486&r2=1552487&view=diff
==============================================================================
---
hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt
(original)
+++
hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt
Fri Dec 20 03:31:26 2013
@@ -126,8 +126,7 @@ public class <ClassName> extends VectorE
if (!hasDivBy0) {
NullUtil.setNullOutputEntriesColScalar(outputColVector,
batch.selectedInUse, sel, n);
} else {
- // Currently, the output from division is always double.
- NullUtil.setNullAndDivBy0DataEntriesDouble(
+ NullUtil.setNullAndDivBy0DataEntries<CamelReturnType>(
outputColVector, batch.selectedInUse, sel, n, inputColVector);
}
}
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java?rev=1552487&r1=1552486&r2=1552487&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
Fri Dec 20 03:31:26 2013
@@ -27,7 +27,7 @@ import org.apache.hadoop.hive.ql.exec.ve
* Utility functions to handle null propagation.
*/
public class NullUtil {
- /*
+ /**
* Set the data value for all NULL entries to the designated NULL_VALUE.
*/
public static void setNullDataEntriesLong(
@@ -51,20 +51,19 @@ public class NullUtil {
}
}
}
-
+
// for use by Column-Scalar and Scalar-Column arithmetic for null propagation
public static void setNullOutputEntriesColScalar(
ColumnVector v, boolean selectedInUse, int[] sel, int n) {
if (v instanceof DoubleColumnVector) {
-
// No need to set null data entries because the input NaN values
// will automatically propagate to the output.
return;
}
setNullDataEntriesLong((LongColumnVector) v, selectedInUse, sel, n);
}
-
- /*
+
+ /**
* Set the data value for all NULL entries to NaN
*/
public static void setNullDataEntriesDouble(
@@ -89,10 +88,9 @@ public class NullUtil {
}
}
- /*
- * Set the data value for all NULL entries, as well as those coming from
division by zero,
- * to NaN. Assumes there are entries coming from division by zero.
- * We assume that infinities do not appear legally in the result, so we can
replace all of them.
+ /**
+ * Set all the entries for which denoms array contains zeroes to NULL; sets
all the data
+ * values for NULL entries for DoubleColumnVector.NULL_VALUE.
*/
public static void setNullAndDivBy0DataEntriesDouble(
DoubleColumnVector v, boolean selectedInUse, int[] sel, int n,
LongColumnVector denoms) {
@@ -117,10 +115,9 @@ public class NullUtil {
}
}
- /*
- * Set the data value for all NULL entries, as well as those coming from
division by zero,
- * to NaN. Assumes there are entries coming from division by zero.
- * We assume that infinities do not appear legally in the result, so we can
replace all of them.
+ /**
+ * Set all the entries for which denoms array contains zeroes to NULL; sets
all the data
+ * values for NULL entries for DoubleColumnVector.NULL_VALUE.
*/
public static void setNullAndDivBy0DataEntriesDouble(
DoubleColumnVector v, boolean selectedInUse, int[] sel, int n,
DoubleColumnVector denoms) {
@@ -145,6 +142,60 @@ public class NullUtil {
}
}
+ /**
+ * Set all the entries for which denoms array contains zeroes to NULL; sets
all the data
+ * values for NULL entries for LongColumnVector.NULL_VALUE.
+ */
+ public static void setNullAndDivBy0DataEntriesLong(
+ LongColumnVector v, boolean selectedInUse, int[] sel, int n,
LongColumnVector denoms) {
+ assert v.isRepeating || !denoms.isRepeating;
+ v.noNulls = false;
+ long[] vector = denoms.vector;
+ if (v.isRepeating && (v.isNull[0] = (v.isNull[0] || vector[0] == 0))) {
+ v.vector[0] = LongColumnVector.NULL_VALUE;
+ } else if (selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (v.isNull[i] = (v.isNull[i] || vector[i] == 0)) {
+ v.vector[i] = LongColumnVector.NULL_VALUE;
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if (v.isNull[i] = (v.isNull[i] || vector[i] == 0)) {
+ v.vector[i] = LongColumnVector.NULL_VALUE;
+ }
+ }
+ }
+ }
+
+ /**
+ * Set all the entries for which denoms array contains zeroes to NULL; sets
all the data
+ * values for NULL entries for LongColumnVector.NULL_VALUE.
+ */
+ public static void setNullAndDivBy0DataEntriesLong(
+ LongColumnVector v, boolean selectedInUse, int[] sel, int n,
DoubleColumnVector denoms) {
+ assert v.isRepeating || !denoms.isRepeating;
+ v.noNulls = false;
+ double[] vector = denoms.vector;
+ if (v.isRepeating && (v.isNull[0] = (v.isNull[0] || vector[0] == 0))) {
+ v.vector[0] = LongColumnVector.NULL_VALUE;
+ } else if (selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (v.isNull[i] = (v.isNull[i] || vector[i] == 0)) {
+ v.vector[i] = LongColumnVector.NULL_VALUE;
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if (v.isNull[i] = (v.isNull[i] || vector[i] == 0)) {
+ v.vector[i] = LongColumnVector.NULL_VALUE;
+ }
+ }
+ }
+ }
+
/*
* Propagate null values for a two-input operator.
*/
Modified: hive/trunk/ql/src/test/queries/clientpositive/vectorization_12.q
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorization_12.q?rev=1552487&r1=1552486&r2=1552487&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorization_12.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorization_12.q Fri Dec 20
03:31:26 2013
@@ -28,5 +28,5 @@ WHERE (((ctimestamp1 IS NULL)
OR ((cboolean2 <= 1)
AND (cbigint >= csmallint))))
GROUP BY cbigint, cboolean1, cstring1, ctimestamp1, cdouble
-ORDER BY ctimestamp1, cdouble;
+ORDER BY ctimestamp1, cdouble, cbigint, cstring1;
Modified: hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q?rev=1552487&r1=1552486&r2=1552487&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q Fri Dec 20
03:31:26 2013
@@ -29,5 +29,5 @@ WHERE (((ctinyint <= cbigint)
AND ((cbigint > -257)
OR (cfloat < cint))))
GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble
-ORDER BY cstring1, cfloat, cdouble;
+ORDER BY cstring1, cfloat, cdouble, ctimestamp1;
Modified: hive/trunk/ql/src/test/results/clientpositive/vectorization_12.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vectorization_12.q.out?rev=1552487&r1=1552486&r2=1552487&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vectorization_12.q.out
(original)
+++ hive/trunk/ql/src/test/results/clientpositive/vectorization_12.q.out Fri
Dec 20 03:31:26 2013
@@ -27,7 +27,7 @@ WHERE (((ctimestamp1 IS NULL)
OR ((cboolean2 <= 1)
AND (cbigint >= csmallint))))
GROUP BY cbigint, cboolean1, cstring1, ctimestamp1, cdouble
-ORDER BY ctimestamp1, cdouble
+ORDER BY ctimestamp1, cdouble, cbigint, cstring1
PREHOOK: type: QUERY
PREHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
@@ -60,7 +60,7 @@ WHERE (((ctimestamp1 IS NULL)
OR ((cboolean2 <= 1)
AND (cbigint >= csmallint))))
GROUP BY cbigint, cboolean1, cstring1, ctimestamp1, cdouble
-ORDER BY ctimestamp1, cdouble
+ORDER BY ctimestamp1, cdouble, cbigint, cstring1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
@@ -247,8 +247,8 @@ POSTHOOK: Input: default@alltypesorc
1864027286 true 0W67K0mT27r22f817281Ocq NULL -5818.0 3.7421376E7
-1864027286 1 1864027286 0.0 -5818.0 5818.0 -5818.0
-3.7421376E7 1.86402164785E9 1864027286 -1.554726368159204E-4 -5818.0
-5818.0 -3.7427194E7 0.0
1864027286 true FpsIohh60Bho67Fb7f NULL -5732.0 3.6868224E7
-1864027286 1 1864027286 0.0 -5732.0 5732.0 -5732.0
-3.6868224E7 1.86402164785E9 1864027286 -1.554726368159204E-4 -5732.0
-5732.0 -3.6873956E7 0.0
1864027286 true MGsGfU7253gN2Hnt2W NULL -5679.0 3.6527328E7
-1864027286 1 1864027286 0.0 -5679.0 5679.0 -5679.0
-3.6527328E7 1.86402164785E9 1864027286 -1.554726368159204E-4 -5679.0
-5679.0 -3.6533007E7 0.0
-1864027286 true lxQp116 NULL -5638.15 3.62645808E7
-1864027286 1 1864027286 0.0 -5638.15 5638.15
-5638.15 -3.62645808E7 1.86402164785E9 1864027286
-1.554726368159204E-4 -5638.15 -5638.15 -3.6270218949999996E7
0.0
-1887561756 false w62rRn0DnCSWJ1ht6qWa NULL -5638.15
3.62645808E7 1887561756 1 -1887561756 0.0 -5638.15
5638.15 -5638.15 -3.62645808E7 -1.88756739415E9 -1887561756
-1.554726368159204E-4 -5638.15 -5638.15 -3.6270218949999996E7
0.0
+1864027286 true lxQp116 NULL -5638.15 3.62645808E7
-1864027286 1 1864027286 0.0 -5638.15 5638.15
-5638.15 -3.62645808E7 1.86402164785E9 1864027286
-1.554726368159204E-4 -5638.15 -5638.15 -3.6270218949999996E7
0.0
1864027286 true wEe2THv60F6 NULL -5589.0 3.5948448E7
-1864027286 1 1864027286 0.0 -5589.0 5589.0 -5589.0
-3.5948448E7 1.86402164785E9 1864027286 -1.554726368159204E-4 -5589.0
-5589.0 -3.5954037E7 0.0
1864027286 true 7MHXQ0V71I NULL -5564.0 3.5787648E7
-1864027286 1 1864027286 0.0 -5564.0 5564.0 -5564.0
-3.5787648E7 1.86402164785E9 1864027286 -1.554726368159204E-4 -5564.0
-5564.0 -3.5793212E7 0.0
1864027286 true N7L608vFx24p0uNVwJr2o6G NULL -5536.0 3.5607552E7
-1864027286 1 1864027286 0.0 -5536.0 5536.0 -5536.0
-3.5607552E7 1.86402164785E9 1864027286 -1.554726368159204E-4 -5536.0
-5536.0 -3.5613088E7 0.0