Author: jitendra
Date: Mon Feb 17 08:42:12 2014
New Revision: 1568904
URL: http://svn.apache.org/r1568904
Log:
HIVE-5759. Implement vectorized support for COALESCE conditional expression.
(jitendra)
Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
hive/trunk/ql/src/test/queries/clientpositive/vector_coalesce.q
hive/trunk/ql/src/test/results/clientpositive/vector_coalesce.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java?rev=1568904&r1=1568903&r2=1568904&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
Mon Feb 17 08:42:12 2014
@@ -310,4 +310,15 @@ public class BytesColumnVector extends C
isRepeating = true;
setRef(0, value, 0, value.length);
}
+
+ @Override
+ public void setElement(int outElementNum, int inputElementNum, ColumnVector
inputVector) {
+ BytesColumnVector in = (BytesColumnVector) inputVector;
+ setVal(outElementNum, in.vector[inputElementNum],
in.start[inputElementNum], in.length[outElementNum]);
+ }
+
+ @Override
+ public void init() {
+ initBuffer(0);
+ }
}
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java?rev=1568904&r1=1568903&r2=1568904&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
Mon Feb 17 08:42:12 2014
@@ -142,5 +142,19 @@ public abstract class ColumnVector {
preFlattenIsRepeating = isRepeating;
preFlattenNoNulls = noNulls;
}
+
+ /**
+ * Set the element in this column vector from the given input vector.
+ */
+ public abstract void setElement(int outElementNum, int inputElementNum,
ColumnVector inputVector);
+
+ /**
+ * Initialize the column vector. This method can be overridden by specific
column vector types.
+ * Use this method only if the individual type of the column vector is not
known, otherwise its
+ * preferable to call specific initialization methods.
+ */
+ public void init() {
+ // Do nothing by default
+ }
}
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java?rev=1568904&r1=1568903&r2=1568904&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
Mon Feb 17 08:42:12 2014
@@ -76,6 +76,12 @@ public class DecimalColumnVector extends
// TODO Auto-generated method stub
}
+ @Override
+ public void setElement(int outElementNum, int inputElementNum, ColumnVector
inputVector) {
+ vector[outElementNum].update(((DecimalColumnVector)
inputVector).vector[inputElementNum]);
+ vector[outElementNum].changeScaleDestructive(scale);
+ }
+
/**
* Check if the value at position i fits in the available precision,
* and convert the value to NULL if it does not.
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java?rev=1568904&r1=1568903&r2=1568904&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
Mon Feb 17 08:42:12 2014
@@ -141,4 +141,9 @@ public class DoubleColumnVector extends
}
flattenNoNulls(selectedInUse, sel, size);
}
+
+ @Override
+ public void setElement(int outElementNum, int inputElementNum, ColumnVector
inputVector) {
+ vector[outElementNum] = ((DoubleColumnVector)
inputVector).vector[inputElementNum];
+ }
}
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java?rev=1568904&r1=1568903&r2=1568904&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
Mon Feb 17 08:42:12 2014
@@ -185,4 +185,9 @@ public class LongColumnVector extends Co
}
flattenNoNulls(selectedInUse, sel, size);
}
+
+ @Override
+ public void setElement(int outElementNum, int inputElementNum, ColumnVector
inputVector) {
+ vector[outElementNum] = ((LongColumnVector)
inputVector).vector[inputElementNum];
+ }
}
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1568904&r1=1568903&r2=1568904&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
Mon Feb 17 08:42:12 2014
@@ -93,10 +93,8 @@ import org.apache.hadoop.hive.ql.udf.UDF
import org.apache.hadoop.hive.ql.udf.UDFToString;
import org.apache.hadoop.hive.ql.udf.generic.*;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter;
import org.apache.hadoop.hive.serde2.typeinfo.*;
/**
@@ -115,7 +113,6 @@ public class VectorizationContext {
//columnName to column position map
private final Map<String, Integer> columnMap;
private final int firstOutputColumnIndex;
- private final Mode operatorMode = Mode.PROJECTION;
public static final Pattern decimalTypePattern =
Pattern.compile("decimal.*");
@@ -134,6 +131,7 @@ public class VectorizationContext {
castExpressionUdfs.add(GenericUDFToUtcTimestamp.class);
castExpressionUdfs.add(GenericUDFToChar.class);
castExpressionUdfs.add(GenericUDFToVarchar.class);
+ castExpressionUdfs.add(GenericUDFTimestamp.class);
castExpressionUdfs.add(UDFToByte.class);
castExpressionUdfs.add(UDFToBoolean.class);
castExpressionUdfs.add(UDFToDouble.class);
@@ -354,7 +352,7 @@ public class VectorizationContext {
List<ExprNodeDesc> childrenWithCasts = new ArrayList<ExprNodeDesc>();
boolean atleastOneCastNeeded = false;
for (ExprNodeDesc child : children) {
- ExprNodeDesc castExpression = getImplicitCastExpression(child,
commonType);
+ ExprNodeDesc castExpression = getImplicitCastExpression(genericUDF,
child, commonType);
if (castExpression != null) {
atleastOneCastNeeded = true;
childrenWithCasts.add(castExpression);
@@ -393,10 +391,19 @@ public class VectorizationContext {
return new DecimalTypeInfo(precision, scale);
}
- private ExprNodeDesc getImplicitCastExpression(ExprNodeDesc child, TypeInfo
castType) {
+ /**
+ * The GenericUDFs might need their children output to be cast to the given
castType.
+ * This method returns a cast expression that would achieve the required
casting.
+ */
+ private ExprNodeDesc getImplicitCastExpression(GenericUDF udf, ExprNodeDesc
child, TypeInfo castType) {
TypeInfo inputTypeInfo = child.getTypeInfo();
String inputTypeString = inputTypeInfo.getTypeName();
String castTypeString = castType.getTypeName();
+
+ if (inputTypeString.equals(castTypeString)) {
+ // Nothing to be done
+ return null;
+ }
boolean inputTypeDecimal = false;
boolean castTypeDecimal = false;
if (decimalTypePattern.matcher(inputTypeString).matches()) {
@@ -406,72 +413,82 @@ public class VectorizationContext {
castTypeDecimal = true;
}
- // If castType is decimal, try not to lose precision for numeric types.
- if (castTypeDecimal) {
- castType = updatePrecision(inputTypeInfo, (DecimalTypeInfo) castType);
- }
-
if (castTypeDecimal && !inputTypeDecimal) {
+
// Cast the input to decimal
+ // If castType is decimal, try not to lose precision for numeric types.
+ castType = updatePrecision(inputTypeInfo, (DecimalTypeInfo) castType);
GenericUDFToDecimal castToDecimalUDF = new GenericUDFToDecimal();
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(child);
ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType,
castToDecimalUDF, children);
return desc;
} else if (!castTypeDecimal && inputTypeDecimal) {
+
// Cast decimal input to returnType
- UDF udfClass = null;
- GenericUDF genericUdf = null;
- PrimitiveObjectInspector.PrimitiveCategory primitiveCategory =
- ((PrimitiveTypeInfo) castType).getPrimitiveCategory();
- switch (((PrimitiveTypeInfo) castType).getPrimitiveCategory()) {
- case BYTE:
- udfClass = new UDFToByte();
- break;
- case SHORT:
- udfClass = new UDFToShort();
- break;
- case INT:
- udfClass = new UDFToInteger();
- break;
- case LONG:
- udfClass = new UDFToLong();
- break;
- case FLOAT:
- udfClass = new UDFToFloat();
- break;
- case DOUBLE:
- udfClass = new UDFToDouble();
- break;
- case STRING:
- udfClass = new UDFToString();
- break;
- case BOOLEAN:
- udfClass = new UDFToBoolean();
- break;
- case DATE:
- genericUdf = new GenericUDFToDate();
- break;
- case TIMESTAMP:
- genericUdf = new GenericUDFToUnixTimeStamp();
- break;
- case BINARY:
- genericUdf = new GenericUDFToBinary();
- break;
- }
- if (genericUdf == null) {
- genericUdf = new GenericUDFBridge();
- ((GenericUDFBridge)
genericUdf).setUdfClassName(udfClass.getClass().getName());
- }
+ GenericUDF genericUdf = getGenericUDFForCast(castType);
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(child);
ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, genericUdf,
children);
return desc;
+ } else {
+
+ // Casts to exact types including long to double etc. are needed in some
special cases.
+ if (udf instanceof GenericUDFCoalesce) {
+ GenericUDF genericUdf = getGenericUDFForCast(castType);
+ List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+ children.add(child);
+ ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, genericUdf,
children);
+ return desc;
+ }
}
- // No cast needed
return null;
}
+ private GenericUDF getGenericUDFForCast(TypeInfo castType) {
+ UDF udfClass = null;
+ GenericUDF genericUdf = null;
+ switch (((PrimitiveTypeInfo) castType).getPrimitiveCategory()) {
+ case BYTE:
+ udfClass = new UDFToByte();
+ break;
+ case SHORT:
+ udfClass = new UDFToShort();
+ break;
+ case INT:
+ udfClass = new UDFToInteger();
+ break;
+ case LONG:
+ udfClass = new UDFToLong();
+ break;
+ case FLOAT:
+ udfClass = new UDFToFloat();
+ break;
+ case DOUBLE:
+ udfClass = new UDFToDouble();
+ break;
+ case STRING:
+ udfClass = new UDFToString();
+ break;
+ case BOOLEAN:
+ udfClass = new UDFToBoolean();
+ break;
+ case DATE:
+ genericUdf = new GenericUDFToDate();
+ break;
+ case TIMESTAMP:
+ genericUdf = new GenericUDFToUnixTimeStamp();
+ break;
+ case BINARY:
+ genericUdf = new GenericUDFToBinary();
+ break;
+ }
+ if (genericUdf == null) {
+ genericUdf = new GenericUDFBridge();
+ ((GenericUDFBridge)
genericUdf).setUdfClassName(udfClass.getClass().getName());
+ }
+ return genericUdf;
+ }
/* Return true if this is one of a small set of functions for which
@@ -568,7 +585,10 @@ public class VectorizationContext {
}
GenericUDF gudf = ((ExprNodeGenericFuncDesc) exprDesc).getGenericUDF();
- if (gudf instanceof GenericUDFOPNegative || gudf instanceof
GenericUDFOPPositive) {
+ if (gudf instanceof GenericUDFOPNegative || gudf instanceof
GenericUDFOPPositive
+ || castExpressionUdfs.contains(gudf)
+ || ((gudf instanceof GenericUDFBridge)
+ && castExpressionUdfs.contains(((GenericUDFBridge)
gudf).getUdfClass()))) {
ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprDesc);
ObjectInspector output = evaluator.initialize(null);
Object constant = evaluator.evaluate(null);
@@ -775,6 +795,9 @@ public class VectorizationContext {
private VectorExpression getGenericUdfVectorExpression(GenericUDF udf,
List<ExprNodeDesc> childExpr, Mode mode, TypeInfo returnType) throws
HiveException {
+
+ List<ExprNodeDesc> constantFoldedChildren =
foldConstantsForUnaryExprs(childExpr);
+ childExpr = constantFoldedChildren;
//First handle special cases
if (udf instanceof GenericUDFBetween) {
return getBetweenFilterExpression(childExpr, mode);
@@ -782,6 +805,10 @@ public class VectorizationContext {
return getInExpression(childExpr, mode);
} else if (udf instanceof GenericUDFOPPositive) {
return getIdentityExpression(childExpr);
+ } else if (udf instanceof GenericUDFCoalesce) {
+
+ // Coalesce is a special case because it can take variable number of
arguments.
+ return getCoalesceExpression(childExpr, returnType);
} else if (udf instanceof GenericUDFBridge) {
VectorExpression v =
getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode,
returnType);
@@ -798,7 +825,6 @@ public class VectorizationContext {
udfClass = ((GenericUDFBridge) udf).getUdfClass();
}
- List<ExprNodeDesc> constantFoldedChildren =
foldConstantsForUnaryExprs(childExpr);
VectorExpression ve = getVectorExpressionForUdf(udfClass,
constantFoldedChildren, mode, returnType);
if (ve == null) {
@@ -808,6 +834,33 @@ public class VectorizationContext {
return ve;
}
+ private VectorExpression getCoalesceExpression(List<ExprNodeDesc> childExpr,
TypeInfo returnType)
+ throws HiveException {
+ int[] inputColumns = new int[childExpr.size()];
+ VectorExpression[] vectorChildren = null;
+ try {
+ vectorChildren = getVectorExpressions(childExpr, Mode.PROJECTION);
+
+ int i = 0;
+ for (VectorExpression ve : vectorChildren) {
+ inputColumns[i++] = ve.getOutputColumn();
+ }
+
+ int outColumn =
ocm.allocateOutputColumn(getNormalizedTypeName(returnType.getTypeName()));
+ VectorCoalesce vectorCoalesce = new VectorCoalesce(inputColumns,
outColumn);
+ vectorCoalesce.setOutputType(returnType.getTypeName());
+ vectorCoalesce.setChildExpressions(vectorChildren);
+ return vectorCoalesce;
+ } finally {
+ // Free the output columns of the child expressions.
+ if (vectorChildren != null) {
+ for (VectorExpression v : vectorChildren) {
+ ocm.freeOutputColumn(v.getOutputColumn());
+ }
+ }
+ }
+ }
+
/**
* Create a filter or boolean-valued expression for column IN (
<list-of-constants> )
*/
Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java?rev=1568904&view=auto
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
(added)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
Mon Feb 17 08:42:12 2014
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+import java.util.Arrays;
+
+/**
+ * This expression returns the value of the first non-null expression
+ * in the given set of inputs expressions.
+ */
+public class VectorCoalesce extends VectorExpression {
+
+ private static final long serialVersionUID = 1L;
+ private int [] inputColumns;
+ private int outputColumn;
+
+ public VectorCoalesce(int [] inputColumns, int outputColumn) {
+ this();
+ this.inputColumns = inputColumns;
+ this.outputColumn = outputColumn;
+ }
+
+ public VectorCoalesce() {
+ super();
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ int[] sel = batch.selected;
+ int n = batch.size;
+ ColumnVector outputVector = batch.cols[outputColumn];
+ if (n <= 0) {
+ // Nothing to do
+ return;
+ }
+
+ outputVector.init();
+
+ outputVector.noNulls = false;
+ outputVector.isRepeating = false;
+ if (batch.selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector.isNull[i] = true;
+ for (int k = 0; k < inputColumns.length; k++) {
+ ColumnVector cv = batch.cols[inputColumns[k]];
+ if ( (cv.isRepeating) && (cv.noNulls || !cv.isNull[0])) {
+ outputVector.isNull[i] = false;
+ outputVector.setElement(i, 0, cv);
+ break;
+ } else if ((!cv.isRepeating) && (cv.noNulls || !cv.isNull[i])) {
+ outputVector.isNull[i] = false;
+ outputVector.setElement(i, i, cv);
+ break;
+ }
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ outputVector.isNull[i] = true;
+ for (int k = 0; k < inputColumns.length; k++) {
+ ColumnVector cv = batch.cols[inputColumns[k]];
+ if ((cv.isRepeating) && (cv.noNulls || !cv.isNull[0])) {
+ outputVector.isNull[i] = false;
+ outputVector.setElement(i, 0, cv);
+ break;
+ } else if ((!cv.isRepeating) && (cv.noNulls || !cv.isNull[i])) {
+ outputVector.isNull[i] = false;
+ outputVector.setElement(i, i, cv);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return outputType;
+ }
+
+ public int [] getInputColumns() {
+ return inputColumns;
+ }
+
+ public void setInputColumns(int [] inputColumns) {
+ this.inputColumns = inputColumns;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+
+ // Descriptor is not defined because it takes variable number of arguments
with different
+ // data types.
+ throw new UnsupportedOperationException("Undefined descriptor");
+ }
+}
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1568904&r1=1568903&r2=1568904&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
Mon Feb 17 08:42:12 2014
@@ -230,6 +230,7 @@ public class Vectorizer implements Physi
supportedGenericUDFs.add(GenericUDFIn.class);
supportedGenericUDFs.add(GenericUDFCase.class);
supportedGenericUDFs.add(GenericUDFWhen.class);
+ supportedGenericUDFs.add(GenericUDFCoalesce.class);
// For type casts
supportedGenericUDFs.add(UDFToLong.class);
Added: hive/trunk/ql/src/test/queries/clientpositive/vector_coalesce.q
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vector_coalesce.q?rev=1568904&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vector_coalesce.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/vector_coalesce.q Mon Feb 17
08:42:12 2014
@@ -0,0 +1,32 @@
+SET hive.vectorized.execution.enabled=true;
+EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble,
cstring1, cint, cfloat, csmallint)
+FROM alltypesorc
+WHERE (cdouble IS NULL) LIMIT 10;
+
+SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1,
cint, cfloat, csmallint)
+FROM alltypesorc
+WHERE (cdouble IS NULL) LIMIT 10;
+
+EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10,
(cdouble+log2(cint)), 0)
+FROM alltypesorc
+WHERE (ctinyint IS NULL) LIMIT 10;
+
+SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0)
+FROM alltypesorc
+WHERE (ctinyint IS NULL) LIMIT 10;
+
+EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0)
+FROM alltypesorc
+WHERE (cfloat IS NULL AND cbigint IS NULL) LIMIT 10;
+
+SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0)
+FROM alltypesorc
+WHERE (cfloat IS NULL AND cbigint IS NULL) LIMIT 10;
+
+EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2)
+FROM alltypesorc
+WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL LIMIT 10;
+
+SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2)
+FROM alltypesorc
+WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL LIMIT 10;
Added: hive/trunk/ql/src/test/results/clientpositive/vector_coalesce.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vector_coalesce.q.out?rev=1568904&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vector_coalesce.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/vector_coalesce.q.out Mon Feb
17 08:42:12 2014
@@ -0,0 +1,256 @@
+PREHOOK: query: EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint,
coalesce(cdouble, cstring1, cint, cfloat, csmallint)
+FROM alltypesorc
+WHERE (cdouble IS NULL) LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint,
coalesce(cdouble, cstring1, cint, cfloat, csmallint)
+FROM alltypesorc
+WHERE (cdouble IS NULL) LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 3143 Data size: 377237 Basic stats: COMPLETE
Column stats: NONE
+ Filter Operator
+ predicate: cdouble is null (type: boolean)
+ Statistics: Num rows: 1571 Data size: 188558 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cdouble (type: double), cstring1 (type: string),
cint (type: int), cfloat (type: float), csmallint (type: smallint),
COALESCE(cdouble,cstring1,cint,cfloat,csmallint) (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1571 Data size: 188558 Basic stats:
COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 1200 Basic stats:
COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 1200 Basic stats:
COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+
+PREHOOK: query: SELECT cdouble, cstring1, cint, cfloat, csmallint,
coalesce(cdouble, cstring1, cint, cfloat, csmallint)
+FROM alltypesorc
+WHERE (cdouble IS NULL) LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cdouble, cstring1, cint, cfloat, csmallint,
coalesce(cdouble, cstring1, cint, cfloat, csmallint)
+FROM alltypesorc
+WHERE (cdouble IS NULL) LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+NULL LFgU5WT87C2yJ4W4YU0r8Pp -285355633 -51.0 NULL
LFgU5WT87C2yJ4W4YU0r8Pp
+NULL 75bFXC7TqGo1SEaYAx4C58m NULL -51.0 NULL 75bFXC7TqGo1SEaYAx4C58m
+NULL v3p153e2bSkGS70v04G 354670578 -51.0 NULL
v3p153e2bSkGS70v04G
+NULL 0pOH7A4O8aQ37NuBqn 951003458 -51.0 NULL
0pOH7A4O8aQ37NuBqn
+NULL 8ShAFcD734S8Q26WjMwpq0Q 164554497 -51.0 NULL
8ShAFcD734S8Q26WjMwpq0Q
+NULL nOF31ehjY7ULCHMf 455419170 -51.0 NULL nOF31ehjY7ULCHMf
+NULL t32s57Cjt4a250qQgVNAB5T -109813638 -51.0 NULL
t32s57Cjt4a250qQgVNAB5T
+NULL nvO822k30OaH37Il 665801232 -51.0 NULL nvO822k30OaH37Il
+NULL M152O -601502867 -51.0 NULL M152O
+NULL FgJ7Hft6845s1766oyt82q 199879534 -51.0 NULL
FgJ7Hft6845s1766oyt82q
+PREHOOK: query: EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10,
(cdouble+log2(cint)), 0)
+FROM alltypesorc
+WHERE (ctinyint IS NULL) LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10,
(cdouble+log2(cint)), 0)
+FROM alltypesorc
+WHERE (ctinyint IS NULL) LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 23577 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctinyint is null (type: boolean)
+ Statistics: Num rows: 11788 Data size: 188610 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cdouble (type: double),
cint (type: int), COALESCE((ctinyint + 10),(cdouble + log2(cint)),0) (type:
double)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 11788 Data size: 188610 Basic stats:
COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 160 Basic stats:
COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 160 Basic stats:
COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+
+PREHOOK: query: SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10,
(cdouble+log2(cint)), 0)
+FROM alltypesorc
+WHERE (ctinyint IS NULL) LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10,
(cdouble+log2(cint)), 0)
+FROM alltypesorc
+WHERE (ctinyint IS NULL) LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+NULL -4213.0 528534767 -4184.022576865738
+NULL -3012.0 528534767 -2983.0225768657383
+NULL -4016.0 528534767 -3987.0225768657383
+NULL -11534.0 528534767 -11505.022576865738
+NULL -6147.0 528534767 -6118.022576865738
+NULL -7680.0 528534767 -7651.022576865738
+NULL -7314.0 528534767 -7285.022576865738
+NULL 11254.0 528534767 11282.977423134262
+NULL 13889.0 528534767 13917.977423134262
+NULL 3321.0 528534767 3349.9774231342617
+PREHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0)
+FROM alltypesorc
+WHERE (cfloat IS NULL AND cbigint IS NULL) LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0)
+FROM alltypesorc
+WHERE (cfloat IS NULL AND cbigint IS NULL) LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 31436 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (cfloat is null and cbigint is null) (type: boolean)
+ Statistics: Num rows: 7859 Data size: 94309 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cfloat (type: float), cbigint (type: bigint),
COALESCE(cfloat,cbigint,0) (type: float)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 7859 Data size: 94309 Basic stats:
COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 120 Basic stats:
COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 120 Basic stats:
COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+
+PREHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0)
+FROM alltypesorc
+WHERE (cfloat IS NULL AND cbigint IS NULL) LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0)
+FROM alltypesorc
+WHERE (cfloat IS NULL AND cbigint IS NULL) LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+NULL NULL 0.0
+NULL NULL 0.0
+NULL NULL 0.0
+NULL NULL 0.0
+NULL NULL 0.0
+NULL NULL 0.0
+NULL NULL 0.0
+NULL NULL 0.0
+NULL NULL 0.0
+NULL NULL 0.0
+PREHOOK: query: EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1,
ctimestamp2)
+FROM alltypesorc
+WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT ctimestamp1, ctimestamp2,
coalesce(ctimestamp1, ctimestamp2)
+FROM alltypesorc
+WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 4715 Data size: 377237 Basic stats: COMPLETE
Column stats: NONE
+ Filter Operator
+ predicate: (ctimestamp1 is not null or ctimestamp2 is not null)
(type: boolean)
+ Statistics: Num rows: 4715 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type:
timestamp), COALESCE(ctimestamp1,ctimestamp2) (type: timestamp)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 4715 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 800 Basic stats:
COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 800 Basic stats:
COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+
+PREHOOK: query: SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1,
ctimestamp2)
+FROM alltypesorc
+WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1,
ctimestamp2)
+FROM alltypesorc
+WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+1969-12-31 15:59:46.674 1969-12-31 16:00:08.875 1969-12-31 15:59:46.674
+NULL 1969-12-31 16:00:13.589 1969-12-31 16:00:13.589
+1969-12-31 15:59:55.787 1969-12-31 16:00:01.546 1969-12-31 15:59:55.787
+1969-12-31 15:59:44.187 1969-12-31 16:00:06.961 1969-12-31 15:59:44.187
+1969-12-31 15:59:50.434 1969-12-31 16:00:13.352 1969-12-31 15:59:50.434
+1969-12-31 16:00:15.007 1969-12-31 16:00:15.148 1969-12-31 16:00:15.007
+1969-12-31 16:00:07.021 1969-12-31 16:00:02.997 1969-12-31 16:00:07.021
+1969-12-31 16:00:04.963 1969-12-31 15:59:56.474 1969-12-31 16:00:04.963
+1969-12-31 15:59:52.176 1969-12-31 16:00:07.787 1969-12-31 15:59:52.176
+1969-12-31 15:59:44.569 1969-12-31 15:59:51.665 1969-12-31 15:59:44.569