hadoop...

hashutosh Tue, 17 Sep 2013 14:15:40 -0700

Author: hashutosh
Date: Tue Sep 17 21:13:53 2013
New Revision: 1524226

URL: http://svn.apache.org/r1524226
Log:
HIVE-4961 : Create bridge for custom UDFs to operate in vectorized mode (Eric 
Hanson via Ashutosh Chauhan)


Added:
    
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/
    
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java
    
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
    
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/
    
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/TestVectorUDFAdaptor.java
    
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/generic/
    
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/generic/GenericUDFIsNull.java
    
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/
    
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/ConcatTextLongDoubleUDF.java
    
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/LongUDF.java
Modified:
    hive/branches/vectorization/build-common.xml
    
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
    
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
    
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
    
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java

Modified: hive/branches/vectorization/build-common.xml
URL: 
http://svn.apache.org/viewvc/hive/branches/vectorization/build-common.xml?rev=1524226&r1=1524225&r2=1524226&view=diff
==============================================================================
--- hive/branches/vectorization/build-common.xml (original)
+++ hive/branches/vectorization/build-common.xml Tue Sep 17 21:13:53 2013
@@ -478,7 +478,7 @@
       <batchtest todir="${test.build.dir}" unless="testcase">
         <fileset dir="${test.build.classes}"
                  includes="**/${test.include}.class"
-                 
excludes="**/ql/exec/vector/util/*.class,**/TestSerDe.class,**/TestHiveMetaStore.class,**/TestBeeLineDriver.class,**/TestHiveServer2Concurrency.class,**/*$*.class,${test.junit.exclude}"
 />
+                
excludes="**/ql/exec/vector/util/*.class,**/ql/exec/vector/udf/legacy/*.class,**/ql/exec/vector/udf/generic/*.class,**/TestSerDe.class,**/TestHiveMetaStore.class,**/TestBeeLineDriver.class,**/TestHiveServer2Concurrency.class,**/*$*.class,${test.junit.exclude}"
 />
       </batchtest>
       <batchtest todir="${test.build.dir}" if="testcase">
         <fileset dir="${test.build.classes}" includes="**/${testcase}.class"/>

Modified: 
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: 
http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1524226&r1=1524225&r2=1524226&view=diff
==============================================================================
--- 
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
 (original)
+++ 
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
 Tue Sep 17 21:13:53 2013
@@ -31,6 +31,8 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
 import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.exec.UDF;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression;
@@ -64,6 +66,8 @@ import org.apache.hadoop.hive.ql.exec.ve
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopLong;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampDouble;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampLong;
+import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
+import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.AggregationDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -141,6 +145,17 @@ public class VectorizationContext {
     }
   }
 
+  /* Return true if we are running in the planner, and false if we
+   * are running in a task.
+   */
+  /*
+  private boolean isPlanner() {
+
+    // This relies on the behavior that columnMap is null in the planner.
+    return columnMap == null;
+  }
+  */
+
   private class OutputColumnManager {
     private final int initialOutputCol;
     private int outputColCount = 0;
@@ -243,8 +258,12 @@ public class VectorizationContext {
       ve = getVectorExpression((ExprNodeColumnDesc) exprDesc);
     } else if (exprDesc instanceof ExprNodeGenericFuncDesc) {
       ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc;
-      ve = getVectorExpression(expr.getGenericUDF(),
-          expr.getChildExprs());
+      if (isCustomUDF(expr)) {
+        ve = getCustomUDFExpression(expr);
+      } else {
+        ve = getVectorExpression(expr.getGenericUDF(),
+            expr.getChildExprs());
+      }
     } else if (exprDesc instanceof ExprNodeConstantDesc) {
       ve = getConstantVectorExpression((ExprNodeConstantDesc) exprDesc);
     }
@@ -254,6 +273,21 @@ public class VectorizationContext {
     return ve;
   }
 
+  // Return true if this is a custom UDF or custom GenericUDF.
+  // This is for use only in the planner. It will fail in a task.
+  public static boolean isCustomUDF(ExprNodeGenericFuncDesc expr) {
+    String udfName = expr.getFuncText();
+    if (udfName == null) {
+      return false;
+    }
+    FunctionInfo funcInfo = FunctionRegistry.getFunctionInfo(udfName);
+    if (funcInfo == null) {
+      return false;
+    }
+    boolean isNativeFunc = funcInfo.isNative();
+    return !isNativeFunc;
+  }
+
   /**
    * Handles only the special case of unary operators on a constant.
    * @param exprDesc
@@ -474,6 +508,104 @@ public class VectorizationContext {
     throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not 
supported");
   }
 
+  /*
+   * Return vector expression for a custom (i.e. not built-in) UDF.
+   */
+  private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr)
+      throws HiveException {
+
+    //GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF();
+    List<ExprNodeDesc> childExprList = expr.getChildExprs();
+
+    // argument descriptors
+    VectorUDFArgDesc[] argDescs = new 
VectorUDFArgDesc[expr.getChildExprs().size()];
+    for (int i = 0; i < argDescs.length; i++) {
+      argDescs[i] = new VectorUDFArgDesc();
+    }
+
+    // positions of variable arguments (columns or non-constant expressions)
+    List<Integer> variableArgPositions = new ArrayList<Integer>();
+
+    // Column numbers of batch corresponding to expression result arguments
+    List<Integer> exprResultColumnNums = new ArrayList<Integer>();
+
+    // Prepare children
+    List<VectorExpression> vectorExprs = new ArrayList<VectorExpression>();
+
+    for (int i = 0; i < childExprList.size(); i++) {
+      ExprNodeDesc child = childExprList.get(i);
+      if (child instanceof ExprNodeGenericFuncDesc) {
+        VectorExpression e = getVectorExpression(child);
+        vectorExprs.add(e);
+        variableArgPositions.add(i);
+        exprResultColumnNums.add(e.getOutputColumn());
+        argDescs[i].setVariable(e.getOutputColumn());
+      } else if (child instanceof ExprNodeColumnDesc) {
+        variableArgPositions.add(i);
+        argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) 
child).getColumn()));
+      } else if (child instanceof ExprNodeConstantDesc) {
+
+        // this is a constant
+        argDescs[i].setConstant((ExprNodeConstantDesc) child);
+      } else {
+        throw new HiveException("Unable to vectorize Custom UDF");
+      }
+    }
+
+    // Allocate output column and get column number;
+    int outputCol = -1;
+    String resultColVectorType;
+    String resultType = expr.getTypeInfo().getTypeName();
+    if (resultType.equalsIgnoreCase("string")) {
+      resultColVectorType = "String";
+    } else if (isIntFamily(resultType)) {
+      resultColVectorType = "Long";
+    } else if (isFloatFamily(resultType)) {
+      resultColVectorType = "Double";
+    } else if (resultType.equalsIgnoreCase("timestamp")) {
+      resultColVectorType = "Long";
+    } else {
+      throw new HiveException("Unable to vectorize due to unsupported custom 
UDF return type "
+                                + resultType);
+    }
+    outputCol = ocm.allocateOutputColumn(resultColVectorType);
+
+    // Make vectorized operator
+    VectorExpression ve;
+    ve = new VectorUDFAdaptor(expr, outputCol, resultColVectorType, argDescs);
+
+    // Set child expressions
+    VectorExpression[] childVEs = null;
+    if (exprResultColumnNums.size() != 0) {
+      childVEs = new VectorExpression[exprResultColumnNums.size()];
+      for (int i = 0; i < childVEs.length; i++) {
+        childVEs[i] = vectorExprs.get(i);
+      }
+    }
+    ve.setChildExpressions(childVEs);
+
+    // Free output columns if inputs have non-leaf expression trees.
+    for (Integer i : exprResultColumnNums) {
+      ocm.freeOutputColumn(i);
+    }
+    return ve;
+  }
+
+  // return true if this is any kind of float
+  public static boolean isFloatFamily(String resultType) {
+    return resultType.equalsIgnoreCase("double")
+        || resultType.equalsIgnoreCase("float");
+  }
+
+  // Return true if this data type is handled in the output vector as an 
integer.
+  public static boolean isIntFamily(String resultType) {
+    return resultType.equalsIgnoreCase("tinyint")
+        || resultType.equalsIgnoreCase("smallint")
+        || resultType.equalsIgnoreCase("int")
+        || resultType.equalsIgnoreCase("bigint")
+        || resultType.equalsIgnoreCase("boolean");
+  }
+
   /* Return a unary string vector expression. This is used for functions like
    * UPPER() and LOWER().
    */

Added: 
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java
URL: 
http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java?rev=1524226&view=auto
==============================================================================
--- 
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java
 (added)
+++ 
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java
 Tue Sep 17 21:13:53 2013
@@ -0,0 +1,346 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.udf;
+
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import 
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
+import 
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
+import org.apache.hadoop.io.Text;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+
+/**
+ * A VectorUDFAdaptor is a vectorized expression for invoking a custom
+ * UDF on zero or more input vectors or constants which are the function 
arguments.
+ */
+public class VectorUDFAdaptor extends VectorExpression {
+
+  private static final long serialVersionUID = 1L;
+
+  private int outputColumn;
+  private String resultType;
+  private VectorUDFArgDesc[] argDescs;
+  private ExprNodeGenericFuncDesc expr;
+
+  private transient GenericUDF genericUDF;
+  private transient GenericUDF.DeferredObject[] deferredChildren;
+  private transient ObjectInspector outputOI;
+  private transient ObjectInspector[] childrenOIs;
+  private transient VectorExpressionWriter[] writers;
+
+  public VectorUDFAdaptor() {
+    super();
+  }
+
+  public VectorUDFAdaptor (
+      ExprNodeGenericFuncDesc expr,
+      int outputColumn,
+      String resultType,
+      VectorUDFArgDesc[] argDescs) throws HiveException {
+
+    this();
+    this.expr = expr;
+    this.outputColumn = outputColumn;
+    this.resultType = resultType;
+    this.argDescs = argDescs;
+  }
+
+  // Initialize transient fields. To be called after deserialization of other 
fields.
+  public void init() throws HiveException, UDFArgumentException {
+    genericUDF = expr.getGenericUDF();
+    deferredChildren = new 
GenericUDF.DeferredObject[expr.getChildExprs().size()];
+    childrenOIs = new ObjectInspector[expr.getChildExprs().size()];
+    writers = 
VectorExpressionWriterFactory.getExpressionWriters(expr.getChildExprs());
+    for (int i = 0; i < childrenOIs.length; i++) {
+      childrenOIs[i] = writers[i].getObjectInspector();
+    }
+    outputOI = VectorExpressionWriterFactory.genVectorExpressionWritable(expr)
+        .getObjectInspector();
+
+    genericUDF.initialize(childrenOIs);
+
+    // Initialize constant arguments
+    for (int i = 0; i < argDescs.length; i++) {
+      if (argDescs[i].isConstant()) {
+        argDescs[i].prepareConstant();
+      }
+    }
+  }
+
+  @Override
+  public void evaluate(VectorizedRowBatch batch) {
+
+    if (genericUDF == null) {
+      try {
+        init();
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    if (childExpressions != null) {
+      super.evaluateChildren(batch);
+    }
+
+    int[] sel = batch.selected;
+    int n = batch.size;
+    ColumnVector outV = batch.cols[outputColumn];
+
+    // If the output column is of type string, initialize the buffer to 
receive data.
+    if (outV instanceof BytesColumnVector) {
+      ((BytesColumnVector) outV).initBuffer();
+    }
+
+    if (n == 0) {
+      //Nothing to do
+      return;
+    }
+
+    batch.cols[outputColumn].noNulls = true;
+
+    /* If all input columns are repeating, just evaluate function
+     * for row 0 in the batch and set output repeating.
+     */
+    if (allInputColsRepeating(batch)) {
+      setResult(0, batch);
+      batch.cols[outputColumn].isRepeating = true;
+      return;
+    } else {
+      batch.cols[outputColumn].isRepeating = false;
+    }
+
+    if (batch.selectedInUse) {
+      for(int j = 0; j != n; j++) {
+        int i = sel[j];
+        setResult(i, batch);
+      }
+    } else {
+      for (int i = 0; i != n; i++) {
+        setResult(i, batch);
+      }
+    }
+  }
+
+  /* Return false if any input column is non-repeating, otherwise true.
+   * This returns false if all the arguments are constant or there
+   * are zero arguments.
+   *
+   * A possible future optimization is to set the output to isRepeating
+   * for cases of all-constant arguments for deterministic functions.
+   */
+  private boolean allInputColsRepeating(VectorizedRowBatch batch) {
+    int varArgCount = 0;
+    for (int i = 0; i < argDescs.length; i++) {
+      if (argDescs[i].isVariable() && 
!batch.cols[argDescs[i].getColumnNum()].isRepeating) {
+        return false;
+      }
+      varArgCount += 1;
+    }
+    if (varArgCount > 0) {
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  /* Calculate the function result for row i of the batch and
+   * set the output column vector entry i to the result.
+   */
+  private void setResult(int i, VectorizedRowBatch b) {
+
+    // get arguments
+    for (int j = 0; j < argDescs.length; j++) {
+      deferredChildren[j] = argDescs[j].getDeferredJavaObject(i, b, j, 
writers);
+    }
+
+    // call function
+    Object result;
+    try {
+      result = genericUDF.evaluate(deferredChildren);
+    } catch (HiveException e) {
+
+      /* For UDFs that expect primitive types (like int instead of Integer or 
IntWritable),
+       * this will catch the the exception that happens if they are passed a 
NULL value.
+       * Then the default NULL handling logic will apply, and the result will 
be NULL.
+       */
+      result = null;
+    }
+
+    // set output column vector entry
+    if (result == null) {
+      b.cols[outputColumn].noNulls = false;
+      b.cols[outputColumn].isNull[i] = true;
+    } else {
+      b.cols[outputColumn].isNull[i] = false;
+      setOutputCol(b.cols[outputColumn], i, result);
+    }
+  }
+
+  private void setOutputCol(ColumnVector colVec, int i, Object value) {
+
+    /* Depending on the output type, get the value, cast the result to the
+     * correct type if needed, and assign the result into the output vector.
+     */
+    if (outputOI instanceof WritableStringObjectInspector) {
+      BytesColumnVector bv = (BytesColumnVector) colVec;
+      Text t;
+      if (value instanceof String) {
+        t = new Text((String) value);
+      } else {
+        t = ((WritableStringObjectInspector) 
outputOI).getPrimitiveWritableObject(value);
+      }
+      bv.setVal(i, t.getBytes(), 0, t.getLength());
+    } else if (outputOI instanceof WritableIntObjectInspector) {
+      LongColumnVector lv = (LongColumnVector) colVec;
+      if (value instanceof Integer) {
+        lv.vector[i] = (Integer) value;
+      } else {
+        lv.vector[i] = ((WritableIntObjectInspector) outputOI).get(value);
+      }
+    } else if (outputOI instanceof WritableLongObjectInspector) {
+      LongColumnVector lv = (LongColumnVector) colVec;
+      if (value instanceof Long) {
+        lv.vector[i] = (Long) value;
+      } else {
+        lv.vector[i] = ((WritableLongObjectInspector) outputOI).get(value);
+      }
+    } else if (outputOI instanceof WritableDoubleObjectInspector) {
+      DoubleColumnVector dv = (DoubleColumnVector) colVec;
+      if (value instanceof Double) {
+        dv.vector[i] = (Double) value;
+      } else {
+        dv.vector[i] = ((WritableDoubleObjectInspector) outputOI).get(value);
+      }
+    } else if (outputOI instanceof WritableFloatObjectInspector) {
+      DoubleColumnVector dv = (DoubleColumnVector) colVec;
+      if (value instanceof Float) {
+        dv.vector[i] = (Float) value;
+      } else {
+        dv.vector[i] = ((WritableFloatObjectInspector) outputOI).get(value);
+      }
+    } else if (outputOI instanceof WritableShortObjectInspector) {
+      LongColumnVector lv = (LongColumnVector) colVec;
+      if (value instanceof Short) {
+        lv.vector[i] = (Short) value;
+      } else {
+        lv.vector[i] = ((WritableShortObjectInspector) outputOI).get(value);
+      }
+    } else if (outputOI instanceof WritableByteObjectInspector) {
+      LongColumnVector lv = (LongColumnVector) colVec;
+      if (value instanceof Byte) {
+        lv.vector[i] = (Byte) value;
+      } else {
+        lv.vector[i] = ((WritableByteObjectInspector) outputOI).get(value);
+      }
+    } else if (outputOI instanceof WritableTimestampObjectInspector) {
+      LongColumnVector lv = (LongColumnVector) colVec;
+      Timestamp ts;
+      if (value instanceof Timestamp) {
+        ts = (Timestamp) value;
+      } else {
+        ts = ((WritableTimestampObjectInspector) 
outputOI).getPrimitiveJavaObject(value);
+      }
+      /* Calculate the number of nanoseconds since the epoch as a long 
integer. By convention
+       * that is how Timestamp values are operated on in a vector.
+       */
+      long l = ts.getTime() * 1000000  // Shift the milliseconds value over by 
6 digits
+                                       // to scale for nanosecond precision.
+                                       // The milliseconds digits will by 
convention be all 0s.
+            + ts.getNanos() % 1000000; // Add on the remaining nanos.
+                                       // The % 1000000 operation removes the 
ms values
+                                       // so that the milliseconds are not 
counted twice.
+      lv.vector[i] = l;
+    } else if (outputOI instanceof WritableBooleanObjectInspector) {
+      LongColumnVector lv = (LongColumnVector) colVec;
+      if (value instanceof Boolean) {
+        lv.vector[i] = (Boolean) value ? 1 : 0;
+      } else {
+        lv.vector[i] = ((WritableBooleanObjectInspector) outputOI).get(value) 
? 1 : 0;
+      }
+    } else {
+      throw new RuntimeException("Unhandled object type " + 
outputOI.getTypeName());
+    }
+  }
+
+  @Override
+  public int getOutputColumn() {
+    return outputColumn;
+  }
+
+  public void setOutputColumn(int outputColumn) {
+    this.outputColumn = outputColumn;
+  }
+
+  @Override
+  public String getOutputType() {
+    return resultType;
+  }
+
+  public String getResultType() {
+    return resultType;
+  }
+
+  public void setResultType(String resultType) {
+    this.resultType = resultType;
+  }
+
+  public VectorUDFArgDesc[] getArgDescs() {
+    return argDescs;
+  }
+
+  public void setArgDescs(VectorUDFArgDesc[] argDescs) {
+    this.argDescs = argDescs;
+  }
+
+  public ExprNodeGenericFuncDesc getExpr() {
+    return expr;
+  }
+
+  public void setExpr(ExprNodeGenericFuncDesc expr) {
+    this.expr = expr;
+  }
+}

Added: 
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
URL: 
http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java?rev=1524226&view=auto
==============================================================================
--- 
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
 (added)
+++ 
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
 Tue Sep 17 21:13:53 2013
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.udf;
+
+import java.io.IOException;
+import java.io.Serializable;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import 
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
+import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+
+/**
+ * Descriptor for function argument.
+ */
+public class VectorUDFArgDesc implements Serializable {
+
+  private static final long serialVersionUID = 1L;
+
+  private boolean isConstant;
+  private int columnNum;
+  private transient GenericUDF.DeferredJavaObject constObjVal;
+  private ExprNodeConstantDesc constExpr;
+
+  public VectorUDFArgDesc() {
+  }
+
+  /**
+   * Set this argument to a constant value extracted from the
+   * expression tree.
+   */
+  public void setConstant(ExprNodeConstantDesc expr) {
+    isConstant = true;
+    constExpr = expr;
+  }
+
+  /* Prepare the constant for use when the function is called. To be used
+   * during initialization.
+   */
+  public void prepareConstant() {
+    PrimitiveCategory pc = ((PrimitiveTypeInfo) constExpr.getTypeInfo())
+        .getPrimitiveCategory();
+
+    // Convert from Java to Writable
+    Object writableValue = PrimitiveObjectInspectorFactory
+        .getPrimitiveJavaObjectInspector(pc).getPrimitiveWritableObject(
+          constExpr.getValue());
+
+    constObjVal = new GenericUDF.DeferredJavaObject(writableValue);
+  }
+
+  /**
+   * Set this argument to be a "variable" one which is to be taken from
+   * a specified column vector number i.
+   */
+  public void setVariable(int i) {
+    columnNum = i;
+  }
+
+  public boolean isConstant() {
+    return isConstant;
+  }
+
+  public boolean isVariable() {
+    return !isConstant;
+  }
+
+  public int getColumn() {
+    return columnNum;
+  }
+
+  public DeferredObject getDeferredJavaObject(int row, VectorizedRowBatch b, 
int argPosition,
+      VectorExpressionWriter[] writers) {
+
+    if (isConstant()) {
+      return this.constObjVal;
+    } else {
+
+      // get column
+      ColumnVector cv = b.cols[columnNum];
+
+      // write value to object that can be inspected
+      Object o;
+      try {
+        o = writers[argPosition].writeValue(cv, row);
+        return new GenericUDF.DeferredJavaObject(o);
+      } catch (HiveException e) {
+        throw new RuntimeException("Unable to get Java object from 
VectorizedRowBatch");
+      }
+    }
+  }
+
+  public boolean getIsConstant() {
+    return isConstant;
+  }
+
+  public void setIsConstant(boolean isConstant) {
+    this.isConstant = isConstant;
+  }
+
+  public int getColumnNum() {
+    return columnNum;
+  }
+
+  public void setColumnNum(int columnNum) {
+    this.columnNum = columnNum;
+  }
+
+  public ExprNodeConstantDesc getConstExpr() {
+    return constExpr;
+  }
+
+  public void setConstExpr(ExprNodeConstantDesc constExpr) {
+    this.constExpr = constExpr;
+  }
+}

Modified: 
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: 
http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1524226&r1=1524225&r2=1524226&view=diff
==============================================================================
--- 
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
 (original)
+++ 
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
 Tue Sep 17 21:13:53 2013
@@ -461,7 +461,7 @@ public class Vectorizer implements Physi
     }
     if (desc instanceof ExprNodeGenericFuncDesc) {
       ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
-      boolean r = validateGenericUdf(d.getGenericUDF());
+      boolean r = validateGenericUdf(d);
       if (!r) {
         return false;
       }
@@ -474,7 +474,11 @@ public class Vectorizer implements Physi
     return true;
   }
 
-  private boolean validateGenericUdf(GenericUDF genericUDF) {
+  private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) {
+    if (VectorizationContext.isCustomUDF(genericUDFExpr)) {
+      return true;
+    }
+    GenericUDF genericUDF = genericUDFExpr.getGenericUDF();
     if (genericUDF instanceof GenericUDFBridge) {
       Class<? extends UDF> udf = ((GenericUDFBridge) genericUDF).getUdfClass();
       return supportedGenericUDFs.contains(udf);

Modified: 
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
URL: 
http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java?rev=1524226&r1=1524225&r2=1524226&view=diff
==============================================================================
--- 
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
 (original)
+++ 
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
 Tue Sep 17 21:13:53 2013
@@ -659,7 +659,7 @@ public final class TypeCheckProcFactory 
 
       List<ExprNodeDesc> childrenList = new 
ArrayList<ExprNodeDesc>(children.length);
       childrenList.addAll(Arrays.asList(children));
-      return ExprNodeGenericFuncDesc.newInstance(genericUDF, childrenList);
+      return ExprNodeGenericFuncDesc.newInstance(genericUDF, udfName, 
childrenList);
     }
 
     static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr,
@@ -724,7 +724,7 @@ public final class TypeCheckProcFactory 
           // Calculate TypeInfo
           TypeInfo t = ((ListTypeInfo) myt).getListElementTypeInfo();
           desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry
-              .getGenericUDFForIndex(), children);
+              .getGenericUDFForIndex(), funcText, children);
         } else if (myt.getCategory() == Category.MAP) {
           // Only allow constant map key for now
           if (!(children.get(1) instanceof ExprNodeConstantDesc)) {
@@ -740,7 +740,7 @@ public final class TypeCheckProcFactory 
           // Calculate TypeInfo
           TypeInfo t = ((MapTypeInfo) myt).getMapValueTypeInfo();
           desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry
-              .getGenericUDFForIndex(), children);
+              .getGenericUDFForIndex(), funcText, children);
         } else {
           throw new SemanticException(ErrorMsg.NON_COLLECTION_TYPE.getMsg(expr,
               myt.getTypeName()));
@@ -861,7 +861,7 @@ public final class TypeCheckProcFactory 
           }
         }
 
-        desc = ExprNodeGenericFuncDesc.newInstance(fi.getGenericUDF(), 
children);
+        desc = ExprNodeGenericFuncDesc.newInstance(fi.getGenericUDF(), 
funcText, children);
       }
       // UDFOPPositive is a no-op.
       // However, we still create it, and then remove it here, to make sure we

Modified: 
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
URL: 
http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java?rev=1524226&r1=1524225&r2=1524226&view=diff
==============================================================================
--- 
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
 (original)
+++ 
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
 Tue Sep 17 21:13:53 2013
@@ -62,6 +62,7 @@ public class ExprNodeGenericFuncDesc ext
    */
   private GenericUDF genericUDF;
   private List<ExprNodeDesc> childExprs;
+  private transient String funcText;
   /**
    * This class uses a writableObjectInspector rather than a TypeInfo to store
    * the canonical type information for this NodeDesc.
@@ -73,13 +74,19 @@ public class ExprNodeGenericFuncDesc ext
   public ExprNodeGenericFuncDesc() {
   }
 
+  /* If the function has an explicit name like func(args) then call a
+   * constructor that explicitly provides the function name in the
+   * funcText argument.
+   */
   public ExprNodeGenericFuncDesc(TypeInfo typeInfo, GenericUDF genericUDF,
+      String funcText,
       List<ExprNodeDesc> children) {
     
this(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo),
-         genericUDF, children);
+         genericUDF, funcText, children);
   }
 
   public ExprNodeGenericFuncDesc(ObjectInspector oi, GenericUDF genericUDF,
+      String funcText,
       List<ExprNodeDesc> children) {
     super(TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
     this.writableObjectInspector =
@@ -87,6 +94,18 @@ public class ExprNodeGenericFuncDesc ext
     assert (genericUDF != null);
     this.genericUDF = genericUDF;
     this.childExprs = children;
+    this.funcText = funcText;
+  }
+
+  // Backward-compatibility interfaces for functions without a user-visible 
name.
+  public ExprNodeGenericFuncDesc(TypeInfo typeInfo, GenericUDF genericUDF,
+      List<ExprNodeDesc> children) {
+    this(typeInfo, genericUDF, null, children);
+  }
+
+  public ExprNodeGenericFuncDesc(ObjectInspector oi, GenericUDF genericUDF,
+      List<ExprNodeDesc> children) {
+    this(oi, genericUDF, null, children);
   }
 
   @Override
@@ -165,17 +184,20 @@ public class ExprNodeGenericFuncDesc ext
       cloneCh.add(ch.clone());
     }
     ExprNodeGenericFuncDesc clone = new ExprNodeGenericFuncDesc(typeInfo,
-        FunctionRegistry.cloneGenericUDF(genericUDF), cloneCh);
+        FunctionRegistry.cloneGenericUDF(genericUDF), funcText, cloneCh);
     return clone;
   }
 
   /**
-   * Create a exprNodeGenericFuncDesc based on the genericUDFClass and the
-   * children parameters.
+   * Create a ExprNodeGenericFuncDesc based on the genericUDFClass and the
+   * children parameters. If the function has an explicit name, the
+   * newInstance method should be passed the function name in the funcText
+   * argument.
    *
    * @throws UDFArgumentException
    */
   public static ExprNodeGenericFuncDesc newInstance(GenericUDF genericUDF,
+      String funcText,
       List<ExprNodeDesc> children) throws UDFArgumentException {
     ObjectInspector[] childrenOIs = new ObjectInspector[children.size()];
     for (int i = 0; i < childrenOIs.length; i++) {
@@ -232,7 +254,15 @@ public class ExprNodeGenericFuncDesc ext
       }
     }
 
-    return new ExprNodeGenericFuncDesc(oi, genericUDF, children);
+    return new ExprNodeGenericFuncDesc(oi, genericUDF, funcText, children);
+  }
+
+  /* Backward-compatibility interface for the case where there is no explicit
+   * name for the function.
+   */
+  public static ExprNodeGenericFuncDesc newInstance(GenericUDF genericUDF,
+    List<ExprNodeDesc> children) throws UDFArgumentException {
+    return newInstance(genericUDF, null, children);
   }
 
   @Override
@@ -285,4 +315,8 @@ public class ExprNodeGenericFuncDesc ext
   public void setSortedExpr(boolean isSortedExpr) {
     this.isSortedExpr = isSortedExpr;
   }
+
+  public String getFuncText() {
+    return this.funcText;
+  }
 }

Added: 
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/TestVectorUDFAdaptor.java
URL: 
http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/TestVectorUDFAdaptor.java?rev=1524226&view=auto
==============================================================================
--- 
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/TestVectorUDFAdaptor.java
 (added)
+++ 
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/TestVectorUDFAdaptor.java
 Tue Sep 17 21:13:53 2013
@@ -0,0 +1,310 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.udf;
+
+import static org.junit.Assert.*;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
+import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc;
+import org.apache.hadoop.hive.ql.exec.vector.udf.generic.GenericUDFIsNull;
+import 
org.apache.hadoop.hive.ql.exec.vector.udf.legacy.ConcatTextLongDoubleUDF;
+import org.apache.hadoop.hive.ql.exec.vector.udf.legacy.LongUDF;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.junit.Test;
+
+/*
+ * Test the vectorized UDF adaptor to verify that custom legacy and generic
+ * UDFs can be run in vectorized mode.
+ */
+
+public class TestVectorUDFAdaptor {
+
+  static byte[] blue = null;
+  static byte[] red = null;
+
+  static {
+    try {
+      blue = "blue".getBytes("UTF-8");
+      red = "red".getBytes("UTF-8");
+    } catch (Exception e) {
+      ; // do nothing
+    }
+  }
+
+  @Test
+  public void testLongUDF()  {
+
+    // create a syntax tree for a simple function call "longudf(col0)"
+    ExprNodeGenericFuncDesc funcDesc;
+    TypeInfo typeInfo = TypeInfoFactory.longTypeInfo;
+    GenericUDFBridge genericUDFBridge = new GenericUDFBridge("longudf", false,
+        LongUDF.class.getName());
+    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+    ExprNodeColumnDesc colDesc
+       = new ExprNodeColumnDesc(typeInfo, "col0", "tablename", false);
+    children.add(colDesc);
+    VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[1];
+    argDescs[0] = new VectorUDFArgDesc();
+    argDescs[0].setVariable(0);
+    funcDesc = new ExprNodeGenericFuncDesc(typeInfo, genericUDFBridge,
+        genericUDFBridge.getUdfName(), children);
+
+    // create the adaptor for this function call to work in vector mode
+    VectorUDFAdaptor vudf = null;
+    try {
+      vudf = new VectorUDFAdaptor(funcDesc, 1, "Long", argDescs);
+    } catch (HiveException e) {
+
+      // We should never get here.
+      assertTrue(false);
+    }
+
+    VectorizedRowBatch b = getBatchLongInLongOut();
+    vudf.evaluate(b);
+
+    // verify output
+    LongColumnVector out = (LongColumnVector) b.cols[1];
+    assertEquals(1000, out.vector[0]);
+    assertEquals(1001, out.vector[1]);
+    assertEquals(1002, out.vector[2]);
+    assertTrue(out.noNulls);
+    assertFalse(out.isRepeating);
+
+    // with nulls
+    b = getBatchLongInLongOut();
+    out = (LongColumnVector) b.cols[1];
+    b.cols[0].noNulls = false;
+    vudf.evaluate(b);
+    assertFalse(out.noNulls);
+    assertEquals(1000, out.vector[0]);
+    assertEquals(1001, out.vector[1]);
+    assertTrue(out.isNull[2]);
+    assertFalse(out.isRepeating);
+
+    // with repeating
+    b = getBatchLongInLongOut();
+    out = (LongColumnVector) b.cols[1];
+    b.cols[0].isRepeating = true;
+    vudf.evaluate(b);
+
+    // The implementation may or may not set output it isRepeting.
+    // That is implementation-defined.
+    assertTrue(b.cols[1].isRepeating && out.vector[0] == 1000
+        || !b.cols[1].isRepeating && out.vector[2] == 1000);
+    assertEquals(3, b.size);
+  }
+
+  @Test
+  public void testMultiArgumentUDF() {
+
+    // create a syntax tree for a function call "testudf(col0, col1, col2)"
+    ExprNodeGenericFuncDesc funcDesc;
+    TypeInfo typeInfoStr = TypeInfoFactory.stringTypeInfo;
+    TypeInfo typeInfoLong = TypeInfoFactory.longTypeInfo;
+    TypeInfo typeInfoDbl = TypeInfoFactory.doubleTypeInfo;
+    GenericUDFBridge genericUDFBridge = new GenericUDFBridge("testudf", false,
+        ConcatTextLongDoubleUDF.class.getName());
+    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+    children.add(new ExprNodeColumnDesc(typeInfoStr, "col0", "tablename", 
false));
+    children.add(new ExprNodeColumnDesc(typeInfoLong, "col1", "tablename", 
false));
+    children.add(new ExprNodeColumnDesc(typeInfoDbl, "col2", "tablename", 
false));
+
+    VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[3];
+    for (int i = 0; i < 3; i++) {
+      argDescs[i] = new VectorUDFArgDesc();
+      argDescs[i].setVariable(i);
+    }
+    funcDesc = new ExprNodeGenericFuncDesc(typeInfoStr, genericUDFBridge,
+        genericUDFBridge.getUdfName(), children);
+
+    // create the adaptor for this function call to work in vector mode
+    VectorUDFAdaptor vudf = null;
+    try {
+      vudf = new VectorUDFAdaptor(funcDesc, 3, "String", argDescs);
+    } catch (HiveException e) {
+
+      // We should never get here.
+      assertTrue(false);
+      throw new RuntimeException(e);
+    }
+
+    // with no nulls
+    VectorizedRowBatch b = getBatchStrDblLongWithStrOut();
+    vudf.evaluate(b);
+    byte[] result = null;
+    byte[] result2 = null;
+    try {
+      result = "red:1:1.0".getBytes("UTF-8");
+      result2 = "blue:0:0.0".getBytes("UTF-8");
+    } catch (Exception e) {
+      ;
+    }
+    BytesColumnVector out = (BytesColumnVector) b.cols[3];
+    int cmp = StringExpr.compare(result, 0, result.length, out.vector[1],
+        out.start[1], out.length[1]);
+    assertEquals(0, cmp);
+    assertTrue(out.noNulls);
+
+    // with nulls
+    b = getBatchStrDblLongWithStrOut();
+    b.cols[1].noNulls = false;
+    vudf.evaluate(b);
+    out = (BytesColumnVector) b.cols[3];
+    assertFalse(out.noNulls);
+    assertTrue(out.isNull[1]);
+
+    // with all input columns repeating
+    b = getBatchStrDblLongWithStrOut();
+    b.cols[0].isRepeating = true;
+    b.cols[1].isRepeating = true;
+    b.cols[2].isRepeating = true;
+    vudf.evaluate(b);
+
+    out = (BytesColumnVector) b.cols[3];
+    assertTrue(out.isRepeating);
+    cmp = StringExpr.compare(result2, 0, result2.length, out.vector[0],
+        out.start[0], out.length[0]);
+    assertEquals(0, cmp);
+    assertTrue(out.noNulls);
+  }
+
+  private VectorizedRowBatch getBatchLongInLongOut() {
+    VectorizedRowBatch b = new VectorizedRowBatch(2);
+    LongColumnVector in = new LongColumnVector();
+    LongColumnVector out = new LongColumnVector();
+    b.cols[0] = in;
+    b.cols[1] = out;
+    in.vector[0] = 0;
+    in.vector[1] = 1;
+    in.vector[2] = 2;
+    in.isNull[2] = true;
+    in.noNulls = true;
+    b.size = 3;
+    return b;
+  }
+
+  private VectorizedRowBatch getBatchStrDblLongWithStrOut() {
+    VectorizedRowBatch b = new VectorizedRowBatch(4);
+    BytesColumnVector strCol = new BytesColumnVector();
+    LongColumnVector longCol = new LongColumnVector();
+    DoubleColumnVector dblCol = new DoubleColumnVector();
+    BytesColumnVector outCol = new BytesColumnVector();
+    b.cols[0] = strCol;
+    b.cols[1] = longCol;
+    b.cols[2] = dblCol;
+    b.cols[3] = outCol;
+
+    strCol.initBuffer();
+    strCol.setVal(0, blue, 0, blue.length);
+    strCol.setVal(1, red, 0, red.length);
+    longCol.vector[0] = 0;
+    longCol.vector[1] = 1;
+    dblCol.vector[0] = 0.0;
+    dblCol.vector[1] = 1.0;
+
+    // set one null value for possible later use
+    longCol.isNull[1] = true;
+
+    // but have no nulls initially
+    longCol.noNulls = true;
+    strCol.noNulls = true;
+    dblCol.noNulls = true;
+    outCol.initBuffer();
+    b.size = 2;
+    return b;
+  }
+
+
+  // test the UDF adaptor for a generic UDF (as opposed to a legacy UDF)
+  @Test
+  public void testGenericUDF() {
+
+    // create a syntax tree for a function call 'myisnull(col0, "UNKNOWN")'
+    ExprNodeGenericFuncDesc funcDesc;
+    GenericUDF genericUDF = new GenericUDFIsNull();
+    TypeInfo typeInfoStr = TypeInfoFactory.stringTypeInfo;
+
+    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+    children.add(new ExprNodeColumnDesc(typeInfoStr, "col0", "tablename", 
false));
+    children.add(new ExprNodeConstantDesc(typeInfoStr, "UNKNOWN"));
+
+    VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[2];
+    for (int i = 0; i < 2; i++) {
+      argDescs[i] = new VectorUDFArgDesc();
+    }
+    argDescs[0].setVariable(0);
+    argDescs[1].setConstant((ExprNodeConstantDesc) children.get(1));
+    funcDesc = new ExprNodeGenericFuncDesc(typeInfoStr, genericUDF, 
"myisnull", children);
+
+    // create the adaptor for this function call to work in vector mode
+    VectorUDFAdaptor vudf = null;
+    try {
+      vudf = new VectorUDFAdaptor(funcDesc, 3, "String", argDescs);
+    } catch (HiveException e) {
+
+      // We should never get here.
+      assertTrue(false);
+    }
+
+    VectorizedRowBatch b;
+
+    byte[] red = null;
+    byte[] unknown = null;
+    try {
+      red = "red".getBytes("UTF-8");
+      unknown = "UNKNOWN".getBytes("UTF-8");
+    } catch (Exception e) {
+      ;
+    }
+    BytesColumnVector out;
+
+    // with nulls
+    b = getBatchStrDblLongWithStrOut();
+    b.cols[0].noNulls = false;
+    b.cols[0].isNull[0] = true; // set 1st entry to null
+    vudf.evaluate(b);
+    out = (BytesColumnVector) b.cols[3];
+
+    // verify outputs
+    int cmp = StringExpr.compare(red, 0, red.length,
+        out.vector[1], out.start[1], out.length[1]);
+    assertEquals(0, cmp);
+    cmp = StringExpr.compare(unknown, 0, unknown.length,
+        out.vector[0], out.start[0], out.length[0]);
+    assertEquals(0, cmp);
+
+    // output entry should not be null for null input for this particular 
generic UDF
+    assertTrue(out.noNulls || !out.isNull[0]);
+  }
+}

Added: 
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/generic/GenericUDFIsNull.java
URL: 
http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/generic/GenericUDFIsNull.java?rev=1524226&view=auto
==============================================================================
--- 
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/generic/GenericUDFIsNull.java
 (added)
+++ 
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/generic/GenericUDFIsNull.java
 Tue Sep 17 21:13:53 2013
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+@Description(name = "myisnull",
+value = "_FUNC_(value,default_value) - Returns default value if value is null 
else returns value",
+extended = "Example:\n"
++ "  > SELECT _FUNC_(null,'bla') FROM src LIMIT 1;\n" + "  bla")
+/*
+ * This is a copy of GenericUDFNvl, which is built-in. We'll make it a generic
+ * custom UDF for test purposes.
+ */
+public class GenericUDFIsNull extends GenericUDF{
+  private transient GenericUDFUtils.ReturnObjectInspectorResolver 
returnOIResolver;
+  private transient ObjectInspector[] argumentOIs;
+
+  @Override
+  public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
+    argumentOIs = arguments;
+    if (arguments.length != 2) {
+      throw new UDFArgumentLengthException(
+          "The operator 'MYISNULL'  accepts 2 arguments.");
+    }
+    returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
+    if (!(returnOIResolver.update(arguments[0]) && returnOIResolver
+        .update(arguments[1]))) {
+      throw new UDFArgumentTypeException(2,
+          "The first and seconds arguments of function MYISNULL should have 
the same type, "
+          + "but they are different: \"" + arguments[0].getTypeName()
+          + "\" and \"" + arguments[1].getTypeName() + "\"");
+    }
+    return returnOIResolver.get();
+  }
+
+  @Override
+  public Object evaluate(DeferredObject[] arguments) throws HiveException {
+    Object retVal = returnOIResolver.convertIfNecessary(arguments[0].get(),
+        argumentOIs[0]);
+    if (retVal == null ){
+      retVal = returnOIResolver.convertIfNecessary(arguments[1].get(),
+          argumentOIs[1]);
+    }
+    return retVal;
+  }
+
+  @Override
+  public String getDisplayString(String[] children) {
+    StringBuilder sb = new StringBuilder();
+    sb.append("if ");
+    sb.append(children[0]);
+    sb.append(" is null ");
+    sb.append("returns");
+    sb.append(children[1]);
+    return sb.toString() ;
+  }
+
+}

Added: 
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/ConcatTextLongDoubleUDF.java
URL: 
http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/ConcatTextLongDoubleUDF.java?rev=1524226&view=auto
==============================================================================
--- 
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/ConcatTextLongDoubleUDF.java
 (added)
+++ 
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/ConcatTextLongDoubleUDF.java
 Tue Sep 17 21:13:53 2013
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.udf.legacy;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.io.Text;
+
+@Description(
+               name = "testudf",
+               value = "_FUNC_(str) - combines arguments to output string",
+               extended = "Example:\n" +
+               "  > SELECT testudf(name, dob, salary) FROM employee;\n" +
+               "  Jack"
+               )
+
+/* This is a test function that takes three different kinds
+ * of arguments, for use to verify vectorized UDF invocation.
+ */
+public class ConcatTextLongDoubleUDF extends UDF {
+       public Text evaluate(Text s, Long i, Double d) {
+
+               if (s == null
+                               || i == null
+                               || d == null) {
+                       return null;
+               }
+               StringBuilder sb = new StringBuilder();
+               sb.append(s.toString());
+               sb.append(":");
+               sb.append(i);
+               sb.append(":");
+               sb.append(d);
+               return new Text(sb.toString());
+       }
+}

Added: 
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/LongUDF.java
URL: 
http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/LongUDF.java?rev=1524226&view=auto
==============================================================================
--- 
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/LongUDF.java
 (added)
+++ 
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/LongUDF.java
 Tue Sep 17 21:13:53 2013
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.udf.legacy;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.LongWritable;
+
+/* A UDF like one a user would create, implementing the UDF interface.
+ * This is to be used to test the vectorized UDF adaptor for legacy-style UDFs.
+ */
+
+@Description(
+   name = "longudf",
+   value = "_FUNC_(arg) - returns arg + 1000",
+   extended = "Example:\n" +
+   "  > SELECT longudf(eno) FROM employee;\n"
+   )
+
+public class LongUDF extends UDF {
+ public LongWritable evaluate(LongWritable i) {
+   if (i == null) {
+     return null;
+   }
+   return new LongWritable(i.get() + 1000);
+ }
+}
\ No newline at end of file

svn commit: r1524226 - in /hive/branches/vectorization: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/ ql/src/java/org/apache/hadoop...

Reply via email to