hive git commit: HIVE-13562: Enable vector bridge for all non-vectorized udfs (Matt McCline, reviewed by Ashutosh Chauhan)

mmccline Thu, 19 May 2016 11:53:31 -0700

Repository: hive
Updated Branches:
  refs/heads/branch-1 54ba148d6 -> 6c6583274



HIVE-13562: Enable vector bridge for all non-vectorized udfs (Matt McCline, 
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6c658327
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6c658327
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6c658327

Branch: refs/heads/branch-1
Commit: 6c65832744fddbda665e83f8897b90aee18bba29
Parents: 54ba148
Author: Matt McCline <[email protected]>
Authored: Thu May 19 11:52:18 2016 -0700
Committer: Matt McCline <[email protected]>
Committed: Thu May 19 11:52:18 2016 -0700

----------------------------------------------------------------------
 .../ql/exec/vector/VectorizationContext.java    | 167 ++++++-------------
 .../hive/ql/optimizer/physical/Vectorizer.java  |  16 +-
 .../hive/ql/plan/ExprNodeGenericFuncDesc.java   |   6 +
 .../clientpositive/vector_between_columns.q     |  13 +-
 .../tez/vector_between_columns.q.out            |  40 ++---
 .../tez/vector_char_mapjoin1.q.out              |   1 +
 .../clientpositive/tez/vector_decimal_udf.q.out |   1 +
 .../tez/vector_decimal_udf2.q.out               |   1 +
 .../tez/vector_varchar_mapjoin1.q.out           |   1 +
 .../clientpositive/vector_between_columns.q.out |  40 ++---
 .../clientpositive/vector_char_mapjoin1.q.out   |   1 +
 .../clientpositive/vector_decimal_udf.q.out     |   1 +
 .../clientpositive/vector_decimal_udf2.q.out    |   1 +
 .../results/clientpositive/vector_udf1.q.out    |   4 +
 .../vector_varchar_mapjoin1.q.out               |   1 +
 15 files changed, 132 insertions(+), 162 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 94b1c9d..f09bfd6 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -462,8 +462,8 @@ public class VectorizationContext {
       ve = getColumnVectorExpression((ExprNodeColumnDesc) exprDesc, mode);
     } else if (exprDesc instanceof ExprNodeGenericFuncDesc) {
       ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc;
-      if (isCustomUDF(expr) || isNonVectorizedPathUDF(expr)) {
-        ve = getCustomUDFExpression(expr);
+      if (isCustomUDF(expr)) {
+        ve = getCustomUDFExpression(expr, mode);
       } else {
 
         // Add cast expression if needed. Child expressions of a udf may 
return different data types
@@ -474,16 +474,23 @@ public class VectorizationContext {
             exprDesc.getChildren(), exprDesc.getTypeInfo());
         ve = getGenericUdfVectorExpression(expr.getGenericUDF(),
             childExpressions, mode, exprDesc.getTypeInfo());
+        if (ve == null) {
+          /*
+           * Ok, no vectorized class available.  No problem -- try to use the 
VectorUDFAdaptor.
+           */
+          ve = getCustomUDFExpression(expr, mode);
+        }
       }
     } else if (exprDesc instanceof ExprNodeConstantDesc) {
       ve = getConstantVectorExpression(((ExprNodeConstantDesc) 
exprDesc).getValue(), exprDesc.getTypeInfo(),
           mode);
     }
     if (ve == null) {
-      throw new HiveException("Could not vectorize expression: 
"+exprDesc.getName());
+      throw new HiveException(
+          "Could not vectorize expression (mode = " + mode.name() + "): " + 
exprDesc.toString());
     }
     if (LOG.isDebugEnabled()) {
-      LOG.debug("Input Expression = " + exprDesc.getTypeInfo()
+      LOG.debug("Input Expression = " + exprDesc.toString()
           + ", Vectorized Expression = " + ve.toString());
     }
     return ve;
@@ -742,60 +749,6 @@ public class VectorizationContext {
     return genericUdf;
   }
 
-
-  /* Return true if this is one of a small set of functions for which
-   * it is significantly easier to use the old code path in vectorized
-   * mode instead of implementing a new, optimized VectorExpression.
-   *
-   * Depending on performance requirements and frequency of use, these
-   * may be implemented in the future with an optimized VectorExpression.
-   */
-  public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr) {
-    GenericUDF gudf = expr.getGenericUDF();
-    if (gudf instanceof GenericUDFBridge) {
-      GenericUDFBridge bridge = (GenericUDFBridge) gudf;
-      Class<? extends UDF> udfClass = bridge.getUdfClass();
-      if (udfClass.equals(UDFHex.class)
-          || udfClass.equals(UDFRegExpExtract.class)
-          || udfClass.equals(UDFRegExpReplace.class)
-          || udfClass.equals(UDFConv.class)
-          || isCastToIntFamily(udfClass) && isStringFamily(arg0Type(expr))
-          || isCastToFloatFamily(udfClass) && isStringFamily(arg0Type(expr))
-          || udfClass.equals(UDFToString.class) &&
-               (arg0Type(expr).equals("timestamp")
-                   || arg0Type(expr).equals("double")
-                   || arg0Type(expr).equals("float"))) {
-        return true;
-      }
-    } else if ((gudf instanceof GenericUDFTimestamp && 
isStringFamily(arg0Type(expr)))
-
-            /* GenericUDFCase and GenericUDFWhen are implemented with the UDF 
Adaptor because
-             * of their complexity and generality. In the future, variations 
of these
-             * can be optimized to run faster for the vectorized code path. 
For example,
-             * CASE col WHEN 1 then "one" WHEN 2 THEN "two" ELSE "other" END
-             * is an example of a GenericUDFCase that has all constant 
arguments
-             * except for the first argument. This is probably a common case 
and a
-             * good candidate for a fast, special-purpose VectorExpression. 
Then
-             * the UDF Adaptor code path could be used as a catch-all for
-             * non-optimized general cases.
-             */
-            || gudf instanceof GenericUDFCase
-            || gudf instanceof GenericUDFWhen) {
-      return true;
-    } else if (gudf instanceof GenericUDFToChar &&
-               (arg0Type(expr).equals("timestamp")
-                   || arg0Type(expr).equals("double")
-                   || arg0Type(expr).equals("float"))) {
-      return true;
-    } else if (gudf instanceof GenericUDFToVarchar &&
-            (arg0Type(expr).equals("timestamp")
-                || arg0Type(expr).equals("double")
-                || arg0Type(expr).equals("float"))) {
-      return true;
-    }
-    return false;
-  }
-
   public static boolean isCastToIntFamily(Class<? extends UDF> udfClass) {
     return udfClass.equals(UDFToByte.class)
         || udfClass.equals(UDFToShort.class)
@@ -1193,35 +1146,37 @@ public class VectorizationContext {
     List<ExprNodeDesc> castedChildren = evaluateCastOnConstants(childExpr);
     childExpr = castedChildren;
 
-    //First handle special cases
-    if (udf instanceof GenericUDFBetween) {
-      return getBetweenFilterExpression(childExpr, mode, returnType);
+    //First handle special cases.  If one of the special case methods cannot 
handle it,
+    // it returns null.
+    VectorExpression ve = null;
+    if (udf instanceof GenericUDFBetween && mode == Mode.FILTER) {
+      ve = getBetweenFilterExpression(childExpr, mode, returnType);
     } else if (udf instanceof GenericUDFIn) {
-      return getInExpression(childExpr, mode, returnType);
+      ve = getInExpression(childExpr, mode, returnType);
     } else if (udf instanceof GenericUDFOPPositive) {
-      return getIdentityExpression(childExpr);
+      ve = getIdentityExpression(childExpr);
     } else if (udf instanceof GenericUDFCoalesce) {
 
       // Coalesce is a special case because it can take variable number of 
arguments.
-      return getCoalesceExpression(childExpr, returnType);
+      ve = getCoalesceExpression(childExpr, returnType);
     } else if (udf instanceof GenericUDFElt) {
 
-      // Coalesce is a special case because it can take variable number of 
arguments.
-      return getEltExpression(childExpr, returnType);
+      // Elt is a special case because it can take variable number of 
arguments.
+      ve = getEltExpression(childExpr, returnType);
     } else if (udf instanceof GenericUDFBridge) {
-      VectorExpression v = 
getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode,
+      ve = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, 
childExpr, mode,
           returnType);
-      if (v != null) {
-        return v;
-      }
     } else if (udf instanceof GenericUDFToDecimal) {
-      return getCastToDecimal(childExpr, returnType);
+      ve = getCastToDecimal(childExpr, returnType);
     } else if (udf instanceof GenericUDFToChar) {
-      return getCastToChar(childExpr, returnType);
+      ve = getCastToChar(childExpr, returnType);
     } else if (udf instanceof GenericUDFToVarchar) {
-      return getCastToVarChar(childExpr, returnType);
+      ve = getCastToVarChar(childExpr, returnType);
     } else if (udf instanceof GenericUDFTimestamp) {
-      return getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, 
returnType);
+      ve = getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, 
returnType);
+    }
+    if (ve != null) {
+      return ve;
     }
     // Now do a general lookup
     Class<?> udfClass = udf.getClass();
@@ -1231,13 +1186,9 @@ public class VectorizationContext {
       isSubstituted = true;
     }
 
-    VectorExpression ve = getVectorExpressionForUdf((!isSubstituted ? udf : 
null),
+    ve = getVectorExpressionForUdf((!isSubstituted ? udf : null),
         udfClass, castedChildren, mode, returnType);
 
-    if (ve == null) {
-      throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not 
supported");
-    }
-
     return ve;
   }
 
@@ -1326,7 +1277,7 @@ public class VectorizationContext {
     case INT:
     case LONG:
       return InConstantType.INT_FAMILY;
-  
+
     case DATE:
       return InConstantType.TIMESTAMP;
 
@@ -1336,16 +1287,16 @@ public class VectorizationContext {
     case FLOAT:
     case DOUBLE:
       return InConstantType.FLOAT_FAMILY;
-  
+
     case STRING:
     case CHAR:
     case VARCHAR:
     case BINARY:
       return InConstantType.STRING_FAMILY;
-  
+
     case DECIMAL:
       return InConstantType.DECIMAL;
-  
+
 
     case INTERVAL_YEAR_MONTH:
     case INTERVAL_DAY_TIME:
@@ -1602,16 +1553,20 @@ public class VectorizationContext {
   private VectorExpression 
getGenericUDFBridgeVectorExpression(GenericUDFBridge udf,
       List<ExprNodeDesc> childExpr, Mode mode, TypeInfo returnType) throws 
HiveException {
     Class<? extends UDF> cl = udf.getUdfClass();
+    VectorExpression ve = null;
     if (isCastToIntFamily(cl)) {
-      return getCastToLongExpression(childExpr);
+      ve = getCastToLongExpression(childExpr);
     } else if (cl.equals(UDFToBoolean.class)) {
-      return getCastToBoolean(childExpr);
+      ve = getCastToBoolean(childExpr);
     } else if (isCastToFloatFamily(cl)) {
-      return getCastToDoubleExpression(cl, childExpr, returnType);
+      ve = getCastToDoubleExpression(cl, childExpr, returnType);
     } else if (cl.equals(UDFToString.class)) {
-      return getCastToString(childExpr, returnType);
+      ve = getCastToString(childExpr, returnType);
     }
-    return null;
+    if (ve == null && childExpr instanceof ExprNodeGenericFuncDesc) {
+      ve = getCustomUDFExpression((ExprNodeGenericFuncDesc) childExpr, mode);
+    }
+    return ve;
   }
 
   private HiveDecimal castConstantToDecimal(Object scalar, TypeInfo type) 
throws HiveException {
@@ -1741,10 +1696,10 @@ public class VectorizationContext {
           returnType);
     } else if (isStringFamily(inputType)) {
       return createVectorExpression(CastStringToDecimal.class, childExpr, 
Mode.PROJECTION, returnType);
-    } else if (isDatetimeFamily(inputType)) {
+    } else if (inputType.equals("timestamp")) {
       return createVectorExpression(CastTimestampToDecimal.class, childExpr, 
Mode.PROJECTION, returnType);
     }
-    throw new HiveException("Unhandled cast input type: " + inputType);
+    throw null;
   }
 
   private VectorExpression getCastToString(List<ExprNodeDesc> childExpr, 
TypeInfo returnType)
@@ -1767,11 +1722,7 @@ public class VectorizationContext {
     } else if (isDateFamily(inputType)) {
       return createVectorExpression(CastDateToString.class, childExpr, 
Mode.PROJECTION, returnType);
     }
-    /* The string type is deliberately omitted -- the planner removes string 
to string casts.
-     * Timestamp, float, and double types are handled by the legacy code path. 
See isLegacyPathUDF.
-     */
-
-    throw new HiveException("Unhandled cast input type: " + inputType);
+    return null;
   }
 
   private VectorExpression getCastToChar(List<ExprNodeDesc> childExpr, 
TypeInfo returnType)
@@ -1795,12 +1746,7 @@ public class VectorizationContext {
     } else if (isStringFamily(inputType)) {
       return createVectorExpression(CastStringGroupToChar.class, childExpr, 
Mode.PROJECTION, returnType);
     }
-
-    /*
-     * Timestamp, float, and double types are handled by the legacy code path. 
See isLegacyPathUDF.
-     */
-
-    throw new HiveException("Unhandled cast input type: " + inputType);
+    return null;
   }
 
   private VectorExpression getCastToVarChar(List<ExprNodeDesc> childExpr, 
TypeInfo returnType)
@@ -1824,12 +1770,7 @@ public class VectorizationContext {
     } else if (isStringFamily(inputType)) {
       return createVectorExpression(CastStringGroupToVarChar.class, childExpr, 
Mode.PROJECTION, returnType);
     }
-
-    /*
-     * Timestamp, float, and double types are handled by the legacy code path. 
See isLegacyPathUDF.
-     */
-
-    throw new HiveException("Unhandled cast input type: " + inputType);
+    return null;
   }
 
   private VectorExpression getCastToDoubleExpression(Class<?> udf, 
List<ExprNodeDesc> childExpr,
@@ -1852,8 +1793,6 @@ public class VectorizationContext {
       // float types require no conversion, so use a no-op
       return getIdentityExpression(childExpr);
     }
-    // The string type is deliberately omitted -- it's handled elsewhere. See 
isLegacyPathUDF.
-
     return null;
   }
 
@@ -1882,8 +1821,6 @@ public class VectorizationContext {
       ocm.freeOutputColumn(lenExpr.getOutputColumn());
       return lenToBoolExpr;
     }
-    // cast(booleanExpr as boolean) case is omitted because planner removes it 
as a no-op
-
     return null;
   }
 
@@ -1903,8 +1840,6 @@ public class VectorizationContext {
       // integer and boolean types require no conversion, so use a no-op
       return getIdentityExpression(childExpr);
     }
-    // string type is deliberately omitted -- it's handled elsewhere. See 
isLegacyPathUDF.
-
     return null;
   }
 
@@ -2008,9 +1943,13 @@ public class VectorizationContext {
   /*
    * Return vector expression for a custom (i.e. not built-in) UDF.
    */
-  private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr)
+  private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc 
expr,  Mode mode)
       throws HiveException {
 
+    if (mode != Mode.PROJECTION) {
+      return null;
+    }
+
     //GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF();
     List<ExprNodeDesc> childExprList = expr.getChildren();
 

http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 2b7335a..98f20b0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -1477,14 +1477,14 @@ public class Vectorizer implements PhysicalPlanResolver 
{
     if (desc.getChildren() != null) {
       if (isInExpression
           && desc.getChildren().get(0).getTypeInfo().getCategory() == 
Category.STRUCT) {
-        // Don't restrict child expressions for projection. 
+        // Don't restrict child expressions for projection.
         // Always use loose FILTER mode.
         if (!validateStructInExpression(desc, 
VectorExpressionDescriptor.Mode.FILTER)) {
           return false;
         }
       } else {
         for (ExprNodeDesc d : desc.getChildren()) {
-          // Don't restrict child expressions for projection. 
+          // Don't restrict child expressions for projection.
           // Always use loose FILTER mode.
           if (!validateExprNodeDescRecursive(d, 
VectorExpressionDescriptor.Mode.FILTER)) {
             return false;
@@ -1550,10 +1550,16 @@ public class Vectorizer implements PhysicalPlanResolver 
{
         return false;
       }
     } catch (Exception e) {
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("Failed to vectorize", e);
+      if (e instanceof HiveException) {
+        LOG.info(e.getMessage());
+      } else {
+        if (LOG.isDebugEnabled()) {
+          // Show stack trace.
+          LOG.debug("Failed to vectorize", e);
+        } else {
+          LOG.info("Failed to vectorize\n" + e.getMessage());
+        }
       }
-
       return false;
     }
     return true;

http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
index 3a22218..055a77a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
@@ -136,6 +136,12 @@ public class ExprNodeGenericFuncDesc extends ExprNodeDesc 
implements
   public String toString() {
     StringBuilder sb = new StringBuilder();
     sb.append(genericUDF.getClass().getSimpleName());
+    if (genericUDF instanceof GenericUDFBridge) {
+      GenericUDFBridge genericUDFBridge = (GenericUDFBridge) genericUDF;
+      sb.append(" ==> ");
+      sb.append(genericUDFBridge.getUdfName());
+      sb.append(" ");
+    }
     sb.append("(");
     for (int i = 0; i < chidren.size(); i++) {
       if (i > 0) {

http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/queries/clientpositive/vector_between_columns.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_between_columns.q 
b/ql/src/test/queries/clientpositive/vector_between_columns.q
index 4c83d0a..ba38445 100644
--- a/ql/src/test/queries/clientpositive/vector_between_columns.q
+++ b/ql/src/test/queries/clientpositive/vector_between_columns.q
@@ -6,7 +6,10 @@ set hive.fetch.task.conversion=none;
 set hive.mapred.mode=nonstrict;
 
 -- SORT_QUERY_RESULTS
-
+--
+-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to 
GenericUDFBetween
+-- because the mode = FILTER is not supported yet.
+--
 create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
 
@@ -21,9 +24,9 @@ create table TSINT stored as orc AS SELECT * FROM TSINT_txt;
 
 create table TINT stored as orc AS SELECT * FROM TINT_txt;
 
--- We DO NOT expect the following to vectorized because the BETWEEN range 
expressions
--- are not constants.  We currently do not support the range expressions being 
columns.
+
+
 explain
-select tint.rnum, tsint.rnum from tint , tsint where tint.cint between 
tsint.csint and tsint.csint;
+select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where 
tint.cint between tsint.csint and tsint.csint;
 
-select tint.rnum, tsint.rnum from tint , tsint where tint.cint between 
tsint.csint and tsint.csint;
+select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where 
tint.cint between tsint.csint and tsint.csint;

http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out 
b/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out
index 0086b1f..1361d2d 100644
--- a/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out
@@ -1,12 +1,18 @@
 PREHOOK: query: -- SORT_QUERY_RESULTS
-
+--
+-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to 
GenericUDFBetween
+-- because the mode = FILTER is not supported yet.
+--
 create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@TSINT_txt
 POSTHOOK: query: -- SORT_QUERY_RESULTS
-
+--
+-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to 
GenericUDFBetween
+-- because the mode = FILTER is not supported yet.
+--
 create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
 POSTHOOK: type: CREATETABLE
@@ -61,15 +67,11 @@ POSTHOOK: Output: database:default
 POSTHOOK: Output: default@TINT
 tint_txt.rnum  tint_txt.cint
 Warning: Map Join MAPJOIN[12][bigTable=?] in task 'Map 1' is a cross product
-PREHOOK: query: -- We DO NOT expect the following to vectorized because the 
BETWEEN range expressions
--- are not constants.  We currently do not support the range expressions being 
columns.
-explain
-select tint.rnum, tsint.rnum from tint , tsint where tint.cint between 
tsint.csint and tsint.csint
+PREHOOK: query: explain
+select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where 
tint.cint between tsint.csint and tsint.csint
 PREHOOK: type: QUERY
-POSTHOOK: query: -- We DO NOT expect the following to vectorized because the 
BETWEEN range expressions
--- are not constants.  We currently do not support the range expressions being 
columns.
-explain
-select tint.rnum, tsint.rnum from tint , tsint where tint.cint between 
tsint.csint and tsint.csint
+POSTHOOK: query: explain
+select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where 
tint.cint between tsint.csint and tsint.csint
 POSTHOOK: type: QUERY
 Explain
 STAGE DEPENDENCIES:
@@ -106,8 +108,8 @@ STAGE PLANS:
                         predicate: _col1 BETWEEN _col3 AND _col3 (type: 
boolean)
                         Statistics: Num rows: 2 Data size: 15 Basic stats: 
COMPLETE Column stats: NONE
                         Select Operator
-                          expressions: _col0 (type: int), _col2 (type: int)
-                          outputColumnNames: _col0, _col1
+                          expressions: _col0 (type: int), _col2 (type: int), 
_col1 (type: int), _col3 (type: smallint)
+                          outputColumnNames: _col0, _col1, _col2, _col3
                           Statistics: Num rows: 2 Data size: 15 Basic stats: 
COMPLETE Column stats: NONE
                           File Output Operator
                             compressed: false
@@ -138,18 +140,18 @@ STAGE PLANS:
         ListSink
 
 Warning: Map Join MAPJOIN[12][bigTable=?] in task 'Map 1' is a cross product
-PREHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint 
between tsint.csint and tsint.csint
+PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint 
, tsint where tint.cint between tsint.csint and tsint.csint
 PREHOOK: type: QUERY
 PREHOOK: Input: default@tint
 PREHOOK: Input: default@tsint
 #### A masked pattern was here ####
-POSTHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where 
tint.cint between tsint.csint and tsint.csint
+POSTHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from 
tint , tsint where tint.cint between tsint.csint and tsint.csint
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tint
 POSTHOOK: Input: default@tsint
 #### A masked pattern was here ####
-tint.rnum      tsint.rnum
-1      1
-2      2
-3      3
-4      4
+tint.rnum      tsint.rnum      tint.cint       tsint.csint
+1      1       -1      -1
+2      2       0       0
+3      3       1       1
+4      4       10      10

http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out 
b/ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out
index f495f95..cdb24cc 100644
--- a/ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_char_mapjoin1.q.out
@@ -366,6 +366,7 @@ STAGE PLANS:
                           sort order: +
                           Statistics: Num rows: 2 Data size: 215 Basic stats: 
COMPLETE Column stats: NONE
                           value expressions: _col1 (type: char(10)), _col2 
(type: int), _col3 (type: string)
+            Execution mode: vectorized
         Map 3 
             Map Operator Tree:
                 TableScan

http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out 
b/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out
index b24ebcc..7c41b07 100644
--- a/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out
@@ -2155,6 +2155,7 @@ STAGE PLANS:
                           input format: 
org.apache.hadoop.mapred.TextInputFormat
                           output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                           serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out 
b/ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out
index 1496a5f..7aef012 100644
--- a/ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out
@@ -145,6 +145,7 @@ STAGE PLANS:
                             input format: 
org.apache.hadoop.mapred.TextInputFormat
                             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                             serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out 
b/ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out
index ae5be44..b453b0e 100644
--- a/ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_varchar_mapjoin1.q.out
@@ -354,6 +354,7 @@ STAGE PLANS:
                           sort order: +
                           Statistics: Num rows: 2 Data size: 200 Basic stats: 
COMPLETE Column stats: NONE
                           value expressions: _col1 (type: varchar(10)), _col2 
(type: int), _col3 (type: string)
+            Execution mode: vectorized
         Map 3 
             Map Operator Tree:
                 TableScan

http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/vector_between_columns.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_between_columns.q.out 
b/ql/src/test/results/clientpositive/vector_between_columns.q.out
index 23a6366..b5692c8 100644
--- a/ql/src/test/results/clientpositive/vector_between_columns.q.out
+++ b/ql/src/test/results/clientpositive/vector_between_columns.q.out
@@ -1,12 +1,18 @@
 PREHOOK: query: -- SORT_QUERY_RESULTS
-
+--
+-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to 
GenericUDFBetween
+-- because the mode = FILTER is not supported yet.
+--
 create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@TSINT_txt
 POSTHOOK: query: -- SORT_QUERY_RESULTS
-
+--
+-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to 
GenericUDFBetween
+-- because the mode = FILTER is not supported yet.
+--
 create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
 POSTHOOK: type: CREATETABLE
@@ -61,15 +67,11 @@ POSTHOOK: Output: database:default
 POSTHOOK: Output: default@TINT
 tint_txt.rnum  tint_txt.cint
 Warning: Map Join MAPJOIN[12][bigTable=?] in task 'Stage-3:MAPRED' is a cross 
product
-PREHOOK: query: -- We DO NOT expect the following to vectorized because the 
BETWEEN range expressions
--- are not constants.  We currently do not support the range expressions being 
columns.
-explain
-select tint.rnum, tsint.rnum from tint , tsint where tint.cint between 
tsint.csint and tsint.csint
+PREHOOK: query: explain
+select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where 
tint.cint between tsint.csint and tsint.csint
 PREHOOK: type: QUERY
-POSTHOOK: query: -- We DO NOT expect the following to vectorized because the 
BETWEEN range expressions
--- are not constants.  We currently do not support the range expressions being 
columns.
-explain
-select tint.rnum, tsint.rnum from tint , tsint where tint.cint between 
tsint.csint and tsint.csint
+POSTHOOK: query: explain
+select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where 
tint.cint between tsint.csint and tsint.csint
 POSTHOOK: type: QUERY
 Explain
 STAGE DEPENDENCIES:
@@ -120,8 +122,8 @@ STAGE PLANS:
                   predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean)
                   Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE 
Column stats: NONE
                   Select Operator
-                    expressions: _col0 (type: int), _col2 (type: int)
-                    outputColumnNames: _col0, _col1
+                    expressions: _col0 (type: int), _col2 (type: int), _col1 
(type: int), _col3 (type: smallint)
+                    outputColumnNames: _col0, _col1, _col2, _col3
                     Statistics: Num rows: 2 Data size: 15 Basic stats: 
COMPLETE Column stats: NONE
                     File Output Operator
                       compressed: false
@@ -140,18 +142,18 @@ STAGE PLANS:
         ListSink
 
 Warning: Map Join MAPJOIN[12][bigTable=?] in task 'Stage-3:MAPRED' is a cross 
product
-PREHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint 
between tsint.csint and tsint.csint
+PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint 
, tsint where tint.cint between tsint.csint and tsint.csint
 PREHOOK: type: QUERY
 PREHOOK: Input: default@tint
 PREHOOK: Input: default@tsint
 #### A masked pattern was here ####
-POSTHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where 
tint.cint between tsint.csint and tsint.csint
+POSTHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from 
tint , tsint where tint.cint between tsint.csint and tsint.csint
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tint
 POSTHOOK: Input: default@tsint
 #### A masked pattern was here ####
-tint.rnum      tsint.rnum
-1      1
-2      2
-3      3
-4      4
+tint.rnum      tsint.rnum      tint.cint       tsint.csint
+1      1       -1      -1
+2      2       0       0
+3      3       1       1
+4      4       10      10

http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out 
b/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out
index 744bfb3..af5f0de 100644
--- a/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out
+++ b/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out
@@ -371,6 +371,7 @@ STAGE PLANS:
                     value expressions: _col1 (type: char(10)), _col2 (type: 
int), _col3 (type: string)
       Local Work:
         Map Reduce Local Work
+      Execution mode: vectorized
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: 
char(10)), VALUE._col1 (type: int), VALUE._col2 (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/vector_decimal_udf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_decimal_udf.q.out 
b/ql/src/test/results/clientpositive/vector_decimal_udf.q.out
index e381757..e8b7bf7 100644
--- a/ql/src/test/results/clientpositive/vector_decimal_udf.q.out
+++ b/ql/src/test/results/clientpositive/vector_decimal_udf.q.out
@@ -2085,6 +2085,7 @@ STAGE PLANS:
                     input format: org.apache.hadoop.mapred.TextInputFormat
                     output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out 
b/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out
index 4f6d7f1..d024f83 100644
--- a/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out
+++ b/ql/src/test/results/clientpositive/vector_decimal_udf2.q.out
@@ -139,6 +139,7 @@ STAGE PLANS:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/vector_udf1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_udf1.q.out 
b/ql/src/test/results/clientpositive/vector_udf1.q.out
index 748276f..10c2da6 100644
--- a/ql/src/test/results/clientpositive/vector_udf1.q.out
+++ b/ql/src/test/results/clientpositive/vector_udf1.q.out
@@ -65,6 +65,7 @@ STAGE PLANS:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
 
   Stage: Stage-0
     Fetch Operator
@@ -128,6 +129,7 @@ STAGE PLANS:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
 
   Stage: Stage-0
     Fetch Operator
@@ -191,6 +193,7 @@ STAGE PLANS:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
 
   Stage: Stage-0
     Fetch Operator
@@ -763,6 +766,7 @@ STAGE PLANS:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/6c658327/ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out 
b/ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out
index 2994bff..936ef78 100644
--- a/ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out
+++ b/ql/src/test/results/clientpositive/vector_varchar_mapjoin1.q.out
@@ -359,6 +359,7 @@ STAGE PLANS:
                     value expressions: _col1 (type: varchar(10)), _col2 (type: 
int), _col3 (type: string)
       Local Work:
         Map Reduce Local Work
+      Execution mode: vectorized
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: 
varchar(10)), VALUE._col1 (type: int), VALUE._col2 (type: string)

hive git commit: HIVE-13562: Enable vector bridge for all non-vectorized udfs (Matt McCline, reviewed by Ashutosh Chauhan)

Reply via email to