Repository: hive Updated Branches: refs/heads/branch-1 54ba148d6 -> 6c6583274
HIVE-13562: Enable vector bridge for all non-vectorized udfs (Matt McCline, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6c658327 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6c658327 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6c658327 Branch: refs/heads/branch-1 Commit: 6c65832744fddbda665e83f8897b90aee18bba29 Parents: 54ba148 Author: Matt McCline <[email protected]> Authored: Thu May 19 11:52:18 2016 -0700 Committer: Matt McCline <[email protected]> Committed: Thu May 19 11:52:18 2016 -0700 ---------------------------------------------------------------------- .../ql/exec/vector/VectorizationContext.java | 167 ++++++------------- .../hive/ql/optimizer/physical/Vectorizer.java | 16 +- .../hive/ql/plan/ExprNodeGenericFuncDesc.java | 6 + .../clientpositive/vector_between_columns.q | 13 +- .../tez/vector_between_columns.q.out | 40 ++--- .../tez/vector_char_mapjoin1.q.out | 1 + .../clientpositive/tez/vector_decimal_udf.q.out | 1 + .../tez/vector_decimal_udf2.q.out | 1 + .../tez/vector_varchar_mapjoin1.q.out | 1 + .../clientpositive/vector_between_columns.q.out | 40 ++--- .../clientpositive/vector_char_mapjoin1.q.out | 1 + .../clientpositive/vector_decimal_udf.q.out | 1 + .../clientpositive/vector_decimal_udf2.q.out | 1 + .../results/clientpositive/vector_udf1.q.out | 4 + .../vector_varchar_mapjoin1.q.out | 1 + 15 files changed, 132 insertions(+), 162 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 94b1c9d..f09bfd6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -462,8 +462,8 @@ public class VectorizationContext { ve = getColumnVectorExpression((ExprNodeColumnDesc) exprDesc, mode); } else if (exprDesc instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc; - if (isCustomUDF(expr) || isNonVectorizedPathUDF(expr)) { - ve = getCustomUDFExpression(expr); + if (isCustomUDF(expr)) { + ve = getCustomUDFExpression(expr, mode); } else { // Add cast expression if needed. Child expressions of a udf may return different data types @@ -474,16 +474,23 @@ public class VectorizationContext { exprDesc.getChildren(), exprDesc.getTypeInfo()); ve = getGenericUdfVectorExpression(expr.getGenericUDF(), childExpressions, mode, exprDesc.getTypeInfo()); + if (ve == null) { + /* + * Ok, no vectorized class available. No problem -- try to use the VectorUDFAdaptor. + */ + ve = getCustomUDFExpression(expr, mode); + } } } else if (exprDesc instanceof ExprNodeConstantDesc) { ve = getConstantVectorExpression(((ExprNodeConstantDesc) exprDesc).getValue(), exprDesc.getTypeInfo(), mode); } if (ve == null) { - throw new HiveException("Could not vectorize expression: "+exprDesc.getName()); + throw new HiveException( + "Could not vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString()); } if (LOG.isDebugEnabled()) { - LOG.debug("Input Expression = " + exprDesc.getTypeInfo() + LOG.debug("Input Expression = " + exprDesc.toString() + ", Vectorized Expression = " + ve.toString()); } return ve; @@ -742,60 +749,6 @@ public class VectorizationContext { return genericUdf; } - - /* Return true if this is one of a small set of functions for which - * it is significantly easier to use the old code path in vectorized - * mode instead of implementing a new, optimized VectorExpression. - * - * Depending on performance requirements and frequency of use, these - * may be implemented in the future with an optimized VectorExpression. - */ - public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr) { - GenericUDF gudf = expr.getGenericUDF(); - if (gudf instanceof GenericUDFBridge) { - GenericUDFBridge bridge = (GenericUDFBridge) gudf; - Class<? extends UDF> udfClass = bridge.getUdfClass(); - if (udfClass.equals(UDFHex.class) - || udfClass.equals(UDFRegExpExtract.class) - || udfClass.equals(UDFRegExpReplace.class) - || udfClass.equals(UDFConv.class) - || isCastToIntFamily(udfClass) && isStringFamily(arg0Type(expr)) - || isCastToFloatFamily(udfClass) && isStringFamily(arg0Type(expr)) - || udfClass.equals(UDFToString.class) && - (arg0Type(expr).equals("timestamp") - || arg0Type(expr).equals("double") - || arg0Type(expr).equals("float"))) { - return true; - } - } else if ((gudf instanceof GenericUDFTimestamp && isStringFamily(arg0Type(expr))) - - /* GenericUDFCase and GenericUDFWhen are implemented with the UDF Adaptor because - * of their complexity and generality. In the future, variations of these - * can be optimized to run faster for the vectorized code path. For example, - * CASE col WHEN 1 then "one" WHEN 2 THEN "two" ELSE "other" END - * is an example of a GenericUDFCase that has all constant arguments - * except for the first argument. This is probably a common case and a - * good candidate for a fast, special-purpose VectorExpression. Then - * the UDF Adaptor code path could be used as a catch-all for - * non-optimized general cases. - */ - || gudf instanceof GenericUDFCase - || gudf instanceof GenericUDFWhen) { - return true; - } else if (gudf instanceof GenericUDFToChar && - (arg0Type(expr).equals("timestamp") - || arg0Type(expr).equals("double") - || arg0Type(expr).equals("float"))) { - return true; - } else if (gudf instanceof GenericUDFToVarchar && - (arg0Type(expr).equals("timestamp") - || arg0Type(expr).equals("double") - || arg0Type(expr).equals("float"))) { - return true; - } - return false; - } - public static boolean isCastToIntFamily(Class<? extends UDF> udfClass) { return udfClass.equals(UDFToByte.class) || udfClass.equals(UDFToShort.class) @@ -1193,35 +1146,37 @@ public class VectorizationContext { List<ExprNodeDesc> castedChildren = evaluateCastOnConstants(childExpr); childExpr = castedChildren; - //First handle special cases - if (udf instanceof GenericUDFBetween) { - return getBetweenFilterExpression(childExpr, mode, returnType); + //First handle special cases. If one of the special case methods cannot handle it, + // it returns null. + VectorExpression ve = null; + if (udf instanceof GenericUDFBetween && mode == Mode.FILTER) { + ve = getBetweenFilterExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFIn) { - return getInExpression(childExpr, mode, returnType); + ve = getInExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFOPPositive) { - return getIdentityExpression(childExpr); + ve = getIdentityExpression(childExpr); } else if (udf instanceof GenericUDFCoalesce) { // Coalesce is a special case because it can take variable number of arguments. - return getCoalesceExpression(childExpr, returnType); + ve = getCoalesceExpression(childExpr, returnType); } else if (udf instanceof GenericUDFElt) { - // Coalesce is a special case because it can take variable number of arguments. - return getEltExpression(childExpr, returnType); + // Elt is a special case because it can take variable number of arguments. + ve = getEltExpression(childExpr, returnType); } else if (udf instanceof GenericUDFBridge) { - VectorExpression v = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode, + ve = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode, returnType); - if (v != null) { - return v; - } } else if (udf instanceof GenericUDFToDecimal) { - return getCastToDecimal(childExpr, returnType); + ve = getCastToDecimal(childExpr, returnType); } else if (udf instanceof GenericUDFToChar) { - return getCastToChar(childExpr, returnType); + ve = getCastToChar(childExpr, returnType); } else if (udf instanceof GenericUDFToVarchar) { - return getCastToVarChar(childExpr, returnType); + ve = getCastToVarChar(childExpr, returnType); } else if (udf instanceof GenericUDFTimestamp) { - return getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, returnType); + ve = getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, returnType); + } + if (ve != null) { + return ve; } // Now do a general lookup Class<?> udfClass = udf.getClass(); @@ -1231,13 +1186,9 @@ public class VectorizationContext { isSubstituted = true; } - VectorExpression ve = getVectorExpressionForUdf((!isSubstituted ? udf : null), + ve = getVectorExpressionForUdf((!isSubstituted ? udf : null), udfClass, castedChildren, mode, returnType); - if (ve == null) { - throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported"); - } - return ve; } @@ -1326,7 +1277,7 @@ public class VectorizationContext { case INT: case LONG: return InConstantType.INT_FAMILY; - + case DATE: return InConstantType.TIMESTAMP; @@ -1336,16 +1287,16 @@ public class VectorizationContext { case FLOAT: case DOUBLE: return InConstantType.FLOAT_FAMILY; - + case STRING: case CHAR: case VARCHAR: case BINARY: return InConstantType.STRING_FAMILY; - + case DECIMAL: return InConstantType.DECIMAL; - + case INTERVAL_YEAR_MONTH: case INTERVAL_DAY_TIME: @@ -1602,16 +1553,20 @@ public class VectorizationContext { private VectorExpression getGenericUDFBridgeVectorExpression(GenericUDFBridge udf, List<ExprNodeDesc> childExpr, Mode mode, TypeInfo returnType) throws HiveException { Class<? extends UDF> cl = udf.getUdfClass(); + VectorExpression ve = null; if (isCastToIntFamily(cl)) { - return getCastToLongExpression(childExpr); + ve = getCastToLongExpression(childExpr); } else if (cl.equals(UDFToBoolean.class)) { - return getCastToBoolean(childExpr); + ve = getCastToBoolean(childExpr); } else if (isCastToFloatFamily(cl)) { - return getCastToDoubleExpression(cl, childExpr, returnType); + ve = getCastToDoubleExpression(cl, childExpr, returnType); } else if (cl.equals(UDFToString.class)) { - return getCastToString(childExpr, returnType); + ve = getCastToString(childExpr, returnType); } - return null; + if (ve == null && childExpr instanceof ExprNodeGenericFuncDesc) { + ve = getCustomUDFExpression((ExprNodeGenericFuncDesc) childExpr, mode); + } + return ve; } private HiveDecimal castConstantToDecimal(Object scalar, TypeInfo type) throws HiveException { @@ -1741,10 +1696,10 @@ public class VectorizationContext { returnType); } else if (isStringFamily(inputType)) { return createVectorExpression(CastStringToDecimal.class, childExpr, Mode.PROJECTION, returnType); - } else if (isDatetimeFamily(inputType)) { + } else if (inputType.equals("timestamp")) { return createVectorExpression(CastTimestampToDecimal.class, childExpr, Mode.PROJECTION, returnType); } - throw new HiveException("Unhandled cast input type: " + inputType); + throw null; } private VectorExpression getCastToString(List<ExprNodeDesc> childExpr, TypeInfo returnType) @@ -1767,11 +1722,7 @@ public class VectorizationContext { } else if (isDateFamily(inputType)) { return createVectorExpression(CastDateToString.class, childExpr, Mode.PROJECTION, returnType); } - /* The string type is deliberately omitted -- the planner removes string to string casts. - * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF. - */ - - throw new HiveException("Unhandled cast input type: " + inputType); + return null; } private VectorExpression getCastToChar(List<ExprNodeDesc> childExpr, TypeInfo returnType) @@ -1795,12 +1746,7 @@ public class VectorizationContext { } else if (isStringFamily(inputType)) { return createVectorExpression(CastStringGroupToChar.class, childExpr, Mode.PROJECTION, returnType); } - - /* - * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF. - */ - - throw new HiveException("Unhandled cast input type: " + inputType); + return null; } private VectorExpression getCastToVarChar(List<ExprNodeDesc> childExpr, TypeInfo returnType) @@ -1824,12 +1770,7 @@ public class VectorizationContext { } else if (isStringFamily(inputType)) { return createVectorExpression(CastStringGroupToVarChar.class, childExpr, Mode.PROJECTION, returnType); } - - /* - * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF. - */ - - throw new HiveException("Unhandled cast input type: " + inputType); + return null; } private VectorExpression getCastToDoubleExpression(Class<?> udf, List<ExprNodeDesc> childExpr, @@ -1852,8 +1793,6 @@ public class VectorizationContext { // float types require no conversion, so use a no-op return getIdentityExpression(childExpr); } - // The string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF. - return null; } @@ -1882,8 +1821,6 @@ public class VectorizationContext { ocm.freeOutputColumn(lenExpr.getOutputColumn()); return lenToBoolExpr; } - // cast(booleanExpr as boolean) case is omitted because planner removes it as a no-op - return null; } @@ -1903,8 +1840,6 @@ public class VectorizationContext { // integer and boolean types require no conversion, so use a no-op return getIdentityExpression(childExpr); } - // string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF. - return null; } @@ -2008,9 +1943,13 @@ public class VectorizationContext { /* * Return vector expression for a custom (i.e. not built-in) UDF. */ - private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr) + private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, Mode mode) throws HiveException { + if (mode != Mode.PROJECTION) { + return null; + } + //GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF(); List<ExprNodeDesc> childExprList = expr.getChildren(); http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 2b7335a..98f20b0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1477,14 +1477,14 @@ public class Vectorizer implements PhysicalPlanResolver { if (desc.getChildren() != null) { if (isInExpression && desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) { - // Don't restrict child expressions for projection. + // Don't restrict child expressions for projection. // Always use loose FILTER mode. if (!validateStructInExpression(desc, VectorExpressionDescriptor.Mode.FILTER)) { return false; } } else { for (ExprNodeDesc d : desc.getChildren()) { - // Don't restrict child expressions for projection. + // Don't restrict child expressions for projection. // Always use loose FILTER mode. if (!validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER)) { return false; @@ -1550,10 +1550,16 @@ public class Vectorizer implements PhysicalPlanResolver { return false; } } catch (Exception e) { - if (LOG.isDebugEnabled()) { - LOG.debug("Failed to vectorize", e); + if (e instanceof HiveException) { + LOG.info(e.getMessage()); + } else { + if (LOG.isDebugEnabled()) { + // Show stack trace. + LOG.debug("Failed to vectorize", e); + } else { + LOG.info("Failed to vectorize\n" + e.getMessage()); + } } - return false; } return true; http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java index 3a22218..055a77a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java @@ -136,6 +136,12 @@ public class ExprNodeGenericFuncDesc extends ExprNodeDesc implements public String toString() { StringBuilder sb = new StringBuilder(); sb.append(genericUDF.getClass().getSimpleName()); + if (genericUDF instanceof GenericUDFBridge) { + GenericUDFBridge genericUDFBridge = (GenericUDFBridge) genericUDF; + sb.append(" ==> "); + sb.append(genericUDFBridge.getUdfName()); + sb.append(" "); + } sb.append("("); for (int i = 0; i < chidren.size(); i++) { if (i > 0) { http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/queries/clientpositive/vector_between_columns.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_between_columns.q b/ql/src/test/queries/clientpositive/vector_between_columns.q index 4c83d0a..ba38445 100644 --- a/ql/src/test/queries/clientpositive/vector_between_columns.q +++ b/ql/src/test/queries/clientpositive/vector_between_columns.q @@ -6,7 +6,10 @@ set hive.fetch.task.conversion=none; set hive.mapred.mode=nonstrict; -- SORT_QUERY_RESULTS - +-- +-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween +-- because the mode = FILTER is not supported yet. +-- create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'; @@ -21,9 +24,9 @@ create table TSINT stored as orc AS SELECT * FROM TSINT_txt; create table TINT stored as orc AS SELECT * FROM TINT_txt; --- We DO NOT expect the following to vectorized because the BETWEEN range expressions --- are not constants. We currently do not support the range expressions being columns. + + explain -select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint; +select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint; -select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint; +select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint; http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out b/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out index 0086b1f..1361d2d 100644 --- a/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out @@ -1,12 +1,18 @@ PREHOOK: query: -- SORT_QUERY_RESULTS - +-- +-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween +-- because the mode = FILTER is not supported yet. +-- create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@TSINT_txt POSTHOOK: query: -- SORT_QUERY_RESULTS - +-- +-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween +-- because the mode = FILTER is not supported yet. +-- create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' POSTHOOK: type: CREATETABLE @@ -61,15 +67,11 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@TINT tint_txt.rnum tint_txt.cint Warning: Map Join MAPJOIN[12][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions --- are not constants. We currently do not support the range expressions being columns. -explain -select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +PREHOOK: query: explain +select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint PREHOOK: type: QUERY -POSTHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions --- are not constants. We currently do not support the range expressions being columns. -explain -select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +POSTHOOK: query: explain +select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint POSTHOOK: type: QUERY Explain STAGE DEPENDENCIES: @@ -106,8 +108,8 @@ STAGE PLANS: predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean) Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -138,18 +140,18 @@ STAGE PLANS: ListSink Warning: Map Join MAPJOIN[12][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint PREHOOK: type: QUERY PREHOOK: Input: default@tint PREHOOK: Input: default@tsint #### A masked pattern was here #### -POSTHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +POSTHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint POSTHOOK: type: QUERY POSTHOOK: Input: default@tint POSTHOOK: Input: default@tsint #### A masked pattern was here #### -tint.rnum tsint.rnum -1 1 -2 2 -3 3 -4 4 +tint.rnum tsint.rnum tint.cint tsint.csint +1 1 -1 -1 +2 2 0 0 +3 3 1 1 +4 4 10 10 http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out b/ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out index f495f95..cdb24cc 100644 --- a/ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out @@ -366,6 +366,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 2 Data size: 215 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: string) + Execution mode: vectorized Map 3 Map Operator Tree: TableScan http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out b/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out index b24ebcc..7c41b07 100644 --- a/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out @@ -2155,6 +2155,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out b/ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out index 1496a5f..7aef012 100644 --- a/ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out @@ -145,6 +145,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out b/ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out index ae5be44..b453b0e 100644 --- a/ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out @@ -354,6 +354,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 2 Data size: 200 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: string) + Execution mode: vectorized Map 3 Map Operator Tree: TableScan http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/vector_between_columns.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_between_columns.q.out b/ql/src/test/results/clientpositive/vector_between_columns.q.out index 23a6366..b5692c8 100644 --- a/ql/src/test/results/clientpositive/vector_between_columns.q.out +++ b/ql/src/test/results/clientpositive/vector_between_columns.q.out @@ -1,12 +1,18 @@ PREHOOK: query: -- SORT_QUERY_RESULTS - +-- +-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween +-- because the mode = FILTER is not supported yet. +-- create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@TSINT_txt POSTHOOK: query: -- SORT_QUERY_RESULTS - +-- +-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween +-- because the mode = FILTER is not supported yet. +-- create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' POSTHOOK: type: CREATETABLE @@ -61,15 +67,11 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@TINT tint_txt.rnum tint_txt.cint Warning: Map Join MAPJOIN[12][bigTable=?] in task 'Stage-3:MAPRED' is a cross product -PREHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions --- are not constants. We currently do not support the range expressions being columns. -explain -select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +PREHOOK: query: explain +select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint PREHOOK: type: QUERY -POSTHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions --- are not constants. We currently do not support the range expressions being columns. -explain -select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +POSTHOOK: query: explain +select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint POSTHOOK: type: QUERY Explain STAGE DEPENDENCIES: @@ -120,8 +122,8 @@ STAGE PLANS: predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean) Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -140,18 +142,18 @@ STAGE PLANS: ListSink Warning: Map Join MAPJOIN[12][bigTable=?] in task 'Stage-3:MAPRED' is a cross product -PREHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint PREHOOK: type: QUERY PREHOOK: Input: default@tint PREHOOK: Input: default@tsint #### A masked pattern was here #### -POSTHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +POSTHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint POSTHOOK: type: QUERY POSTHOOK: Input: default@tint POSTHOOK: Input: default@tsint #### A masked pattern was here #### -tint.rnum tsint.rnum -1 1 -2 2 -3 3 -4 4 +tint.rnum tsint.rnum tint.cint tsint.csint +1 1 -1 -1 +2 2 0 0 +3 3 1 1 +4 4 10 10 http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out b/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out index 744bfb3..af5f0de 100644 --- a/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out +++ b/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out @@ -371,6 +371,7 @@ STAGE PLANS: value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: string) Local Work: Map Reduce Local Work + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/vector_decimal_udf.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_decimal_udf.q.out b/ql/src/test/results/clientpositive/vector_decimal_udf.q.out index e381757..e8b7bf7 100644 --- a/ql/src/test/results/clientpositive/vector_decimal_udf.q.out +++ b/ql/src/test/results/clientpositive/vector_decimal_udf.q.out @@ -2085,6 +2085,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out b/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out index 4f6d7f1..d024f83 100644 --- a/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out +++ b/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out @@ -139,6 +139,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/vector_udf1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_udf1.q.out b/ql/src/test/results/clientpositive/vector_udf1.q.out index 748276f..10c2da6 100644 --- a/ql/src/test/results/clientpositive/vector_udf1.q.out +++ b/ql/src/test/results/clientpositive/vector_udf1.q.out @@ -65,6 +65,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -128,6 +129,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -191,6 +193,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -763,6 +766,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out b/ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out index 2994bff..936ef78 100644 --- a/ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out +++ b/ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out @@ -359,6 +359,7 @@ STAGE PLANS: value expressions: _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: string) Local Work: Map Reduce Local Work + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: string)
