Repository: hive
Updated Branches:
  refs/heads/master 093341624 -> d35ad0677


HIVE-14155: Vectorization: Custom UDF Vectorization annotations are ignored 
(Gopal V, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d35ad067
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d35ad067
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d35ad067

Branch: refs/heads/master
Commit: d35ad06779650c8b5f6c259413bf03e9909ba72f
Parents: 0933416
Author: Gopal V <gop...@apache.org>
Authored: Sat Aug 27 01:25:28 2016 -0700
Committer: Gopal V <gop...@apache.org>
Committed: Sat Aug 27 01:25:36 2016 -0700

----------------------------------------------------------------------
 itests/custom-udfs/pom.xml                      |   1 +
 .../udf-vectorized-badexample/pom.xml           |  43 ++++++++
 .../hive/it/custom/udfs/GenericUDFRot13.java    |  32 ++++++
 .../custom/udfs/vector/VectorStringRot13.java   |  46 ++++++++
 .../ql/exec/vector/VectorizationContext.java    | 106 ++++++++++---------
 .../test/queries/clientpositive/vector_udf3.q   |  13 +++
 .../results/clientpositive/vector_udf3.q.out    |  76 +++++++++++++
 7 files changed, 266 insertions(+), 51 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/d35ad067/itests/custom-udfs/pom.xml
----------------------------------------------------------------------
diff --git a/itests/custom-udfs/pom.xml b/itests/custom-udfs/pom.xml
index 3e7443c..b230b41 100644
--- a/itests/custom-udfs/pom.xml
+++ b/itests/custom-udfs/pom.xml
@@ -42,6 +42,7 @@ limitations under the License.
     <module>udf-classloader-util</module>
     <module>udf-classloader-udf1</module>
     <module>udf-classloader-udf2</module>
+    <module>udf-vectorized-badexample</module>
   </modules>
 
   <dependencies>

http://git-wip-us.apache.org/repos/asf/hive/blob/d35ad067/itests/custom-udfs/udf-vectorized-badexample/pom.xml
----------------------------------------------------------------------
diff --git a/itests/custom-udfs/udf-vectorized-badexample/pom.xml 
b/itests/custom-udfs/udf-vectorized-badexample/pom.xml
new file mode 100644
index 0000000..35c1a2f
--- /dev/null
+++ b/itests/custom-udfs/udf-vectorized-badexample/pom.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.hive</groupId>
+    <artifactId>hive-it-custom-udfs</artifactId>
+    <version>2.2.0-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.hive.hive-it-custom-udfs</groupId>
+  <artifactId>udf-vectorized-badexample</artifactId>
+  <packaging>jar</packaging>
+  <name>Hive Integration - Custom UDFs - udf-vectorized-badexample</name>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.hive.hive-it-custom-udfs</groupId>
+      <artifactId>udf-classloader-util</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+  </dependencies>
+
+  <properties>
+    <hive.path.to.root>../../..</hive.path.to.root>
+  </properties>
+
+</project>

http://git-wip-us.apache.org/repos/asf/hive/blob/d35ad067/itests/custom-udfs/udf-vectorized-badexample/src/main/java/hive/it/custom/udfs/GenericUDFRot13.java
----------------------------------------------------------------------
diff --git 
a/itests/custom-udfs/udf-vectorized-badexample/src/main/java/hive/it/custom/udfs/GenericUDFRot13.java
 
b/itests/custom-udfs/udf-vectorized-badexample/src/main/java/hive/it/custom/udfs/GenericUDFRot13.java
new file mode 100644
index 0000000..8941175
--- /dev/null
+++ 
b/itests/custom-udfs/udf-vectorized-badexample/src/main/java/hive/it/custom/udfs/GenericUDFRot13.java
@@ -0,0 +1,32 @@
+package hive.it.custom.udfs; 
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.Text;
+import hive.it.custom.udfs.vector.VectorStringRot13;
+
+@VectorizedExpressions(value = { VectorStringRot13.class })
+public class GenericUDFRot13 extends GenericUDF {
+
+  @Override
+  public Object evaluate(DeferredObject[] arg0) throws HiveException {
+    /* this is the bad part - the vectorized UDF returns the right result */
+    return new Text("Unvectorized");
+  }
+
+  @Override
+  public String getDisplayString(String[] arg0) {
+    return String.format("Rot13(%s)", arg0[0]);
+  }
+
+  @Override
+  public ObjectInspector initialize(ObjectInspector[] arg0)
+      throws UDFArgumentException {
+    return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/d35ad067/itests/custom-udfs/udf-vectorized-badexample/src/main/java/hive/it/custom/udfs/vector/VectorStringRot13.java
----------------------------------------------------------------------
diff --git 
a/itests/custom-udfs/udf-vectorized-badexample/src/main/java/hive/it/custom/udfs/vector/VectorStringRot13.java
 
b/itests/custom-udfs/udf-vectorized-badexample/src/main/java/hive/it/custom/udfs/vector/VectorStringRot13.java
new file mode 100644
index 0000000..7fbfe32
--- /dev/null
+++ 
b/itests/custom-udfs/udf-vectorized-badexample/src/main/java/hive/it/custom/udfs/vector/VectorStringRot13.java
@@ -0,0 +1,46 @@
+package hive.it.custom.udfs.vector;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import 
org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringUnaryUDF;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringUnaryUDFDirect;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.orc.impl.TreeReaderFactory.BytesColumnVectorUtil;
+
+public class VectorStringRot13 extends StringUnaryUDFDirect {
+
+  public VectorStringRot13(int inputColumn, int outputColumn) {
+    super(inputColumn, outputColumn);
+  }
+  
+  public VectorStringRot13() {
+    super();
+  }
+
+  @Override
+  protected void func(BytesColumnVector outV, byte[][] vector, int[] start,
+      int[] length, int i) {
+    int off = start[i];
+    int len = length[i];
+    byte[] src = vector[i];
+    byte[] dst = new byte[len];
+    for (int j = 0; j < len ; j++) {
+      dst[j] = rot13(src[off+j]);
+    }
+    outV.setVal(i, dst, 0, length[i]);
+  }
+
+  private byte rot13(byte b) {
+    if (b >= 'a' && b <= 'm' || b >= 'A' && b <= 'M' ) {
+      return (byte) (b+13);
+    }
+    if (b >= 'n' && b <= 'z' || b >= 'N' && b <= 'Z') {
+      return (byte) (b-13);
+    }
+    return b;
+    }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/d35ad067/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 3f71fa8..f088941 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -531,58 +531,54 @@ public class VectorizationContext {
       ve = getColumnVectorExpression((ExprNodeColumnDesc) exprDesc, mode);
     } else if (exprDesc instanceof ExprNodeGenericFuncDesc) {
       ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc;
-      if (isCustomUDF(expr)) {
-        ve = getCustomUDFExpression(expr, mode);
-      } else {
-
-        // Add cast expression if needed. Child expressions of a udf may 
return different data types
-        // and that would require converting their data types to evaluate the 
udf.
-        // For example decimal column added to an integer column would require 
integer column to be
-        // cast to decimal.
-        List<ExprNodeDesc> childExpressions = 
getChildExpressionsWithImplicitCast(expr.getGenericUDF(),
-            exprDesc.getChildren(), exprDesc.getTypeInfo());
-        ve = getGenericUdfVectorExpression(expr.getGenericUDF(),
-            childExpressions, mode, exprDesc.getTypeInfo());
-        if (ve == null) {
-          // Ok, no vectorized class available.  No problem -- try to use the 
VectorUDFAdaptor
-          // when configured.
-          //
-          // NOTE: We assume if hiveVectorAdaptorUsageMode has not been set it 
because we are
-          // executing a test that didn't create a HiveConf, etc.  No usage of 
VectorUDFAdaptor in
-          // that case.
-          if (hiveVectorAdaptorUsageMode != null) {
-            switch (hiveVectorAdaptorUsageMode) {
-            case NONE:
-              // No VectorUDFAdaptor usage.
+      // Add cast expression if needed. Child expressions of a udf may return 
different data types
+      // and that would require converting their data types to evaluate the 
udf.
+      // For example decimal column added to an integer column would require 
integer column to be
+      // cast to decimal.
+         // Note: this is a no-op for custom UDFs
+      List<ExprNodeDesc> childExpressions = 
getChildExpressionsWithImplicitCast(expr.getGenericUDF(),
+          exprDesc.getChildren(), exprDesc.getTypeInfo());
+      ve = getGenericUdfVectorExpression(expr.getGenericUDF(),
+          childExpressions, mode, exprDesc.getTypeInfo());
+      if (ve == null) {
+        // Ok, no vectorized class available.  No problem -- try to use the 
VectorUDFAdaptor
+        // when configured.
+        //
+        // NOTE: We assume if hiveVectorAdaptorUsageMode has not been set it 
because we are
+        // executing a test that didn't create a HiveConf, etc.  No usage of 
VectorUDFAdaptor in
+        // that case.
+        if (hiveVectorAdaptorUsageMode != null) {
+          switch (hiveVectorAdaptorUsageMode) {
+          case NONE:
+            // No VectorUDFAdaptor usage.
+            throw new HiveException(
+                "Could not vectorize expression (mode = " + mode.name() + "): 
" + exprDesc.toString()
+                  + " because hive.vectorized.adaptor.usage.mode=none");
+          case CHOSEN:
+            if (isNonVectorizedPathUDF(expr, mode)) {
+              ve = getCustomUDFExpression(expr, mode);
+            } else {
               throw new HiveException(
                   "Could not vectorize expression (mode = " + mode.name() + 
"): " + exprDesc.toString()
-                    + " because hive.vectorized.adaptor.usage.mode=none");
-            case CHOSEN:
-              if (isNonVectorizedPathUDF(expr, mode)) {
-                ve = getCustomUDFExpression(expr, mode);
-              } else {
-                throw new HiveException(
-                    "Could not vectorize expression (mode = " + mode.name() + 
"): " + exprDesc.toString()
-                      + " because hive.vectorized.adaptor.usage.mode=chosen "
-                      + " and the UDF wasn't one of the chosen ones");
-              }
-              break;
-            case ALL:
-              if (LOG.isDebugEnabled()) {
-                LOG.debug("We will try to use the VectorUDFAdaptor for " + 
exprDesc.toString()
-                    + " because hive.vectorized.adaptor.usage.mode=all");
-              }
-              ve = getCustomUDFExpression(expr, mode);
-              break;
-            default:
-              throw new RuntimeException("Unknown hive vector adaptor usage 
mode " +
-                hiveVectorAdaptorUsageMode.name());
+                    + " because hive.vectorized.adaptor.usage.mode=chosen "
+                    + " and the UDF wasn't one of the chosen ones");
             }
-            if (ve == null) {
-              throw new HiveException(
-                  "Unable vectorize expression (mode = " + mode.name() + "): " 
+ exprDesc.toString()
-                    + " even for the VectorUDFAdaptor");
+            break;
+          case ALL:
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("We will try to use the VectorUDFAdaptor for " + 
exprDesc.toString()
+                  + " because hive.vectorized.adaptor.usage.mode=all");
             }
+            ve = getCustomUDFExpression(expr, mode);
+            break;
+          default:
+            throw new RuntimeException("Unknown hive vector adaptor usage mode 
" +
+              hiveVectorAdaptorUsageMode.name());
+          }
+          if (ve == null) {
+            throw new HiveException(
+                "Unable vectorize expression (mode = " + mode.name() + "): " + 
exprDesc.toString()
+                  + " even for the VectorUDFAdaptor");
           }
         }
       }
@@ -650,8 +646,13 @@ public class VectorizationContext {
    */
   private List<ExprNodeDesc> getChildExpressionsWithImplicitCast(GenericUDF 
genericUDF,
       List<ExprNodeDesc> children, TypeInfo returnType) throws HiveException {
-    if (isExcludedFromCast(genericUDF)) {
 
+    if (isCustomUDF(genericUDF.getUdfName())) {
+      // no implicit casts possible
+      return children;
+    }
+
+    if (isExcludedFromCast(genericUDF)) {
       // No implicit cast needed
       return children;
     }
@@ -946,9 +947,12 @@ public class VectorizationContext {
   }
 
   // Return true if this is a custom UDF or custom GenericUDF.
-  // This is for use only in the planner. It will fail in a task.
+  // This two functions are for use only in the planner. It will fail in a 
task.
   public static boolean isCustomUDF(ExprNodeGenericFuncDesc expr) {
-    String udfName = expr.getFuncText();
+    return isCustomUDF(expr.getFuncText());
+  }
+
+  private static boolean isCustomUDF(String udfName) {
     if (udfName == null) {
       return false;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/d35ad067/ql/src/test/queries/clientpositive/vector_udf3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_udf3.q 
b/ql/src/test/queries/clientpositive/vector_udf3.q
new file mode 100644
index 0000000..8a4df79
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_udf3.q
@@ -0,0 +1,13 @@
+ADD JAR ivy://org.apache.hive.hive-it-custom-udfs:udf-vectorized-badexample:+;
+
+CREATE TEMPORARY FUNCTION rot13 as 'hive.it.custom.udfs.GenericUDFRot13';
+
+set hive.vectorized.execution.enabled=true;
+
+EXPLAIN SELECT rot13(cstring1) from alltypesorc;
+
+SELECT cstring1, rot13(cstring1) from alltypesorc order by cstring1 desc limit 
10;
+
+set hive.vectorized.execution.enabled=false;
+
+SELECT cstring1, rot13(cstring1) from alltypesorc order by cstring1 desc limit 
10;

http://git-wip-us.apache.org/repos/asf/hive/blob/d35ad067/ql/src/test/results/clientpositive/vector_udf3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_udf3.q.out 
b/ql/src/test/results/clientpositive/vector_udf3.q.out
new file mode 100644
index 0000000..7c6a90a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_udf3.q.out
@@ -0,0 +1,76 @@
+PREHOOK: query: CREATE TEMPORARY FUNCTION rot13 as 
'hive.it.custom.udfs.GenericUDFRot13'
+PREHOOK: type: CREATEFUNCTION
+PREHOOK: Output: rot13
+POSTHOOK: query: CREATE TEMPORARY FUNCTION rot13 as 
'hive.it.custom.udfs.GenericUDFRot13'
+POSTHOOK: type: CREATEFUNCTION
+POSTHOOK: Output: rot13
+PREHOOK: query: EXPLAIN SELECT rot13(cstring1) from alltypesorc
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT rot13(cstring1) from alltypesorc
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypesorc
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+            Select Operator
+              expressions: Rot13(cstring1) (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT cstring1, rot13(cstring1) from alltypesorc order by 
cstring1 desc limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cstring1, rot13(cstring1) from alltypesorc order by 
cstring1 desc limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+yy2GiGM        ll2TvTZ
+yxN0212hM17E8J8bJj8D7b lkA0212uZ17R8W8oWw8Q7o
+ywA68u76Jv06axCv451avL4        ljN68h76Wi06nkPi451niY4
+yvNv1q liAi1d
+yv3gnG4a33hD7bIm7oxE5rw        li3taT4n33uQ7oVz7bkR5ej
+yv1js  li1wf
+yujO07KWj      lhwB07XJw
+ytpx1RL8F2I    lgck1EY8S2V
+ytj7g5W        lgw7t5J
+ytgaJW1Gvrkv5wFUJU2y1S lgtnWJ1Tiexi5jSHWH2l1F
+PREHOOK: query: SELECT cstring1, rot13(cstring1) from alltypesorc order by 
cstring1 desc limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cstring1, rot13(cstring1) from alltypesorc order by 
cstring1 desc limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+yy2GiGM        Unvectorized
+yxN0212hM17E8J8bJj8D7b Unvectorized
+ywA68u76Jv06axCv451avL4        Unvectorized
+yvNv1q Unvectorized
+yv3gnG4a33hD7bIm7oxE5rw        Unvectorized
+yv1js  Unvectorized
+yujO07KWj      Unvectorized
+ytpx1RL8F2I    Unvectorized
+ytj7g5W        Unvectorized
+ytgaJW1Gvrkv5wFUJU2y1S Unvectorized

Reply via email to