http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java.orig ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java.orig b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java.orig deleted file mode 100644 index 20cc894..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java.orig +++ /dev/null @@ -1,3771 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector; - -import java.lang.reflect.Constructor; -import java.nio.charset.StandardCharsets; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.regex.Pattern; - -import org.apache.commons.lang.ArrayUtils; -import org.apache.hadoop.hive.common.type.Date; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; -import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; -import org.apache.hadoop.hive.ql.exec.FunctionInfo; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.exec.UDF; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType; -import org.apache.hadoop.hive.ql.exec.vector.expressions.*; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; -import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; -import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; -import org.apache.hadoop.hive.ql.udf.*; -import org.apache.hadoop.hive.ql.udf.generic.*; -import org.apache.hadoop.hive.serde2.ByteStream.Output; -import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; -import org.apache.hadoop.hive.serde2.io.DateWritableV2; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hive.common.util.AnnotationUtils; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; - -/** - * Context class for vectorization execution. - * Main role is to map column names to column indices and serves as a - * factory class for building vectorized expressions out of descriptors. - * - */ -public class VectorizationContext { - - private static final Logger LOG = LoggerFactory.getLogger( - VectorizationContext.class.getName()); - - private final String contextName; - private final int level; - - VectorExpressionDescriptor vMap; - - private final List<String> initialColumnNames; - private List<TypeInfo> initialTypeInfos; - private List<DataTypePhysicalVariation> initialDataTypePhysicalVariations; - - private List<Integer> projectedColumns; - private List<String> projectionColumnNames; - private Map<String, Integer> projectionColumnMap; - - //columnName to column position map - // private final Map<String, Integer> columnMap; - private int firstOutputColumnIndex; - - public enum HiveVectorAdaptorUsageMode { - NONE, - CHOSEN, - ALL; - - public static HiveVectorAdaptorUsageMode getHiveConfValue(HiveConf hiveConf) { - String string = HiveConf.getVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTOR_ADAPTOR_USAGE_MODE); - return valueOf(string.toUpperCase()); - } - } - - private HiveVectorAdaptorUsageMode hiveVectorAdaptorUsageMode; - private boolean testVectorAdaptorOverride; - - public enum HiveVectorIfStmtMode { - ADAPTOR, - GOOD, - BETTER; - - public static HiveVectorIfStmtMode getHiveConfValue(HiveConf hiveConf) { - String string = HiveConf.getVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZED_IF_EXPR_MODE); - return valueOf(string.toUpperCase()); - } - } - - private HiveVectorIfStmtMode hiveVectorIfStmtMode; - - //when set to true use the overflow checked vector expressions - private boolean useCheckedVectorExpressions; - - private boolean reuseScratchColumns = - HiveConf.ConfVars.HIVE_VECTORIZATION_TESTING_REUSE_SCRATCH_COLUMNS.defaultBoolVal; - - private boolean adaptorSuppressEvaluateExceptions; - - private void setHiveConfVars(HiveConf hiveConf) { - hiveVectorAdaptorUsageMode = HiveVectorAdaptorUsageMode.getHiveConfValue(hiveConf); - testVectorAdaptorOverride = - HiveConf.getBoolVar(hiveConf, ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE); - hiveVectorIfStmtMode = HiveVectorIfStmtMode.getHiveConfValue(hiveConf); - this.reuseScratchColumns = - HiveConf.getBoolVar(hiveConf, ConfVars.HIVE_VECTORIZATION_TESTING_REUSE_SCRATCH_COLUMNS); - this.ocm.setReuseColumns(reuseScratchColumns); - useCheckedVectorExpressions = - HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_USE_CHECKED_EXPRESSIONS); - adaptorSuppressEvaluateExceptions = - HiveConf.getBoolVar( - hiveConf, HiveConf.ConfVars.HIVE_VECTORIZED_ADAPTOR_SUPPRESS_EVALUATE_EXCEPTIONS); - } - - private void copyHiveConfVars(VectorizationContext vContextEnvironment) { - hiveVectorAdaptorUsageMode = vContextEnvironment.hiveVectorAdaptorUsageMode; - testVectorAdaptorOverride = vContextEnvironment.testVectorAdaptorOverride; - hiveVectorIfStmtMode = vContextEnvironment.hiveVectorIfStmtMode; - this.reuseScratchColumns = vContextEnvironment.reuseScratchColumns; - useCheckedVectorExpressions = vContextEnvironment.useCheckedVectorExpressions; - adaptorSuppressEvaluateExceptions = vContextEnvironment.adaptorSuppressEvaluateExceptions; - this.ocm.setReuseColumns(reuseScratchColumns); - } - - // Convenient constructor for initial batch creation takes - // a list of columns names and maps them to 0..n-1 indices. - public VectorizationContext( - String contextName, - List<String> initialColumnNames, - List<TypeInfo> initialTypeInfos, - List<DataTypePhysicalVariation> initialDataTypePhysicalVariations, - HiveConf hiveConf) { - this.contextName = contextName; - level = 0; - this.initialColumnNames = initialColumnNames; - this.initialTypeInfos = initialTypeInfos; - this.initialDataTypePhysicalVariations = initialDataTypePhysicalVariations; - this.projectionColumnNames = initialColumnNames; - - projectedColumns = new ArrayList<Integer>(); - projectionColumnMap = new HashMap<String, Integer>(); - for (int i = 0; i < this.projectionColumnNames.size(); i++) { - projectedColumns.add(i); - projectionColumnMap.put(projectionColumnNames.get(i), i); - } - - int firstOutputColumnIndex = projectedColumns.size(); - this.ocm = new OutputColumnManager(firstOutputColumnIndex); - this.firstOutputColumnIndex = firstOutputColumnIndex; - vMap = new VectorExpressionDescriptor(); - - if (hiveConf != null) { - setHiveConfVars(hiveConf); - } - } - - // Convenient constructor for initial batch creation takes - // a list of columns names and maps them to 0..n-1 indices. - public VectorizationContext(String contextName, List<String> initialColumnNames, - HiveConf hiveConf) { - this.contextName = contextName; - level = 0; - this.initialColumnNames = initialColumnNames; - this.projectionColumnNames = initialColumnNames; - - projectedColumns = new ArrayList<Integer>(); - projectionColumnMap = new HashMap<String, Integer>(); - for (int i = 0; i < this.projectionColumnNames.size(); i++) { - projectedColumns.add(i); - projectionColumnMap.put(projectionColumnNames.get(i), i); - } - - int firstOutputColumnIndex = projectedColumns.size(); - this.ocm = new OutputColumnManager(firstOutputColumnIndex); - this.firstOutputColumnIndex = firstOutputColumnIndex; - vMap = new VectorExpressionDescriptor(); - - if (hiveConf != null) { - setHiveConfVars(hiveConf); - } - } - - public VectorizationContext(String contextName, List<String> initialColumnNames, - VectorizationContext vContextEnvironment) { - this(contextName, initialColumnNames, (HiveConf) null); - copyHiveConfVars(vContextEnvironment); - } - - @VisibleForTesting - public VectorizationContext(String contextName, List<String> initialColumnNames) { - this(contextName, initialColumnNames, (HiveConf) null); - } - - // Constructor to with the individual addInitialColumn method - // followed by a call to finishedAddingInitialColumns. - public VectorizationContext(String contextName, HiveConf hiveConf) { - this.contextName = contextName; - level = 0; - initialColumnNames = new ArrayList<String>(); - projectedColumns = new ArrayList<Integer>(); - projectionColumnNames = new ArrayList<String>(); - projectionColumnMap = new HashMap<String, Integer>(); - this.ocm = new OutputColumnManager(0); - this.firstOutputColumnIndex = 0; - vMap = new VectorExpressionDescriptor(); - - if (hiveConf != null) { - setHiveConfVars(hiveConf); - } - - } - - @VisibleForTesting - public VectorizationContext(String contextName) { - this(contextName, (HiveConf) null); - } - - // Constructor useful making a projection vectorization context. E.g. VectorSelectOperator. - // Use with resetProjectionColumns and addProjectionColumn. - // Keeps existing output column map, etc. - public VectorizationContext(String contextName, VectorizationContext vContext) { - this.contextName = contextName; - level = vContext.level + 1; - this.initialColumnNames = vContext.initialColumnNames; - this.initialTypeInfos = vContext.initialTypeInfos; - this.initialDataTypePhysicalVariations = vContext.initialDataTypePhysicalVariations; - this.projectedColumns = new ArrayList<Integer>(); - this.projectionColumnNames = new ArrayList<String>(); - this.projectionColumnMap = new HashMap<String, Integer>(); - - this.ocm = vContext.ocm; - this.firstOutputColumnIndex = vContext.firstOutputColumnIndex; - vMap = new VectorExpressionDescriptor(); - - copyHiveConfVars(vContext); - } - - // Add an initial column to a vectorization context when - // a vectorized row batch is being created. - public void addInitialColumn(String columnName) { - initialColumnNames.add(columnName); - int index = projectedColumns.size(); - projectedColumns.add(index); - projectionColumnNames.add(columnName); - projectionColumnMap.put(columnName, index); - } - - // Finishes the vectorization context after all the initial - // columns have been added. - @VisibleForTesting - public void finishedAddingInitialColumns() { - int firstOutputColumnIndex = projectedColumns.size(); - this.ocm = new OutputColumnManager(firstOutputColumnIndex); - this.ocm.setReuseColumns(this.reuseScratchColumns); - this.firstOutputColumnIndex = firstOutputColumnIndex; - } - - // Empties the projection columns. - public void resetProjectionColumns() { - projectedColumns = new ArrayList<Integer>(); - projectionColumnNames = new ArrayList<String>(); - projectionColumnMap = new HashMap<String, Integer>(); - } - - // Add a projection column to a projection vectorization context. - public void addProjectionColumn(String columnName, int vectorBatchColIndex) { - if (vectorBatchColIndex < 0) { - throw new RuntimeException("Negative projected column number"); - } - projectedColumns.add(vectorBatchColIndex); - projectionColumnNames.add(columnName); - projectionColumnMap.put(columnName, vectorBatchColIndex); - } - - public void setInitialTypeInfos(List<TypeInfo> initialTypeInfos) { - this.initialTypeInfos = initialTypeInfos; - final int size = initialTypeInfos.size(); - initialDataTypePhysicalVariations = new ArrayList<DataTypePhysicalVariation>(size); - for (int i = 0; i < size; i++) { - initialDataTypePhysicalVariations.add(DataTypePhysicalVariation.NONE); - } - } - - public void setInitialDataTypePhysicalVariations( - List<DataTypePhysicalVariation> initialDataTypePhysicalVariations) { - this.initialDataTypePhysicalVariations = initialDataTypePhysicalVariations; - } - - public List<String> getInitialColumnNames() { - return initialColumnNames; - } - - public List<Integer> getProjectedColumns() { - return projectedColumns; - } - - public List<String> getProjectionColumnNames() { - return projectionColumnNames; - } - - public Map<String, Integer> getProjectionColumnMap() { - return projectionColumnMap; - } - - public TypeInfo[] getInitialTypeInfos() { - return initialTypeInfos.toArray(new TypeInfo[0]); - } - - public TypeInfo getTypeInfo(int columnNum) throws HiveException { - if (initialTypeInfos == null) { - throw new HiveException("initialTypeInfos array is null in contextName " + contextName); - } - final int initialSize = initialTypeInfos.size(); - if (columnNum < initialSize) { - return initialTypeInfos.get(columnNum); - } else { - String typeName = ocm.getScratchTypeName(columnNum); - - // Replace unparsable synonyms. - typeName = VectorizationContext.mapTypeNameSynonyms(typeName); - - // Make CHAR and VARCHAR type info parsable. - if (typeName.equals("char")) { - typeName = "char(" + HiveChar.MAX_CHAR_LENGTH + ")"; - } else if (typeName.equals("varchar")) { - typeName = "varchar(" + HiveVarchar.MAX_VARCHAR_LENGTH + ")"; - } - - TypeInfo typeInfo = - TypeInfoUtils.getTypeInfoFromTypeString(typeName); - return typeInfo; - } - } - - public DataTypePhysicalVariation getDataTypePhysicalVariation(int columnNum) throws HiveException { - if (initialDataTypePhysicalVariations == null) { - return null; - } - if (columnNum < initialDataTypePhysicalVariations.size()) { - return initialDataTypePhysicalVariations.get(columnNum); - } - return ocm.getDataTypePhysicalVariation(columnNum); - } - - public TypeInfo[] getAllTypeInfos() throws HiveException { - final int size = initialTypeInfos.size() + ocm.outputColCount; - - TypeInfo[] result = new TypeInfo[size]; - for (int i = 0; i < size; i++) { - result[i] = getTypeInfo(i); - } - return result; - } - - public static final Pattern decimalTypePattern = Pattern.compile("decimal.*", - Pattern.CASE_INSENSITIVE); - - public static final Pattern charTypePattern = Pattern.compile("char.*", - Pattern.CASE_INSENSITIVE); - - public static final Pattern varcharTypePattern = Pattern.compile("varchar.*", - Pattern.CASE_INSENSITIVE); - - public static final Pattern charVarcharTypePattern = Pattern.compile("char.*|varchar.*", - Pattern.CASE_INSENSITIVE); - - public static final Pattern structTypePattern = Pattern.compile("struct.*", - Pattern.CASE_INSENSITIVE); - - public static final Pattern listTypePattern = Pattern.compile("array.*", - Pattern.CASE_INSENSITIVE); - - public static final Pattern mapTypePattern = Pattern.compile("map.*", - Pattern.CASE_INSENSITIVE); - - //Map column number to type (this is always non-null for a useful vec context) - private OutputColumnManager ocm; - - // Set of UDF classes for type casting data types in row-mode. - private static Set<Class<?>> castExpressionUdfs = new HashSet<Class<?>>(); - static { - castExpressionUdfs.add(GenericUDFToString.class); - castExpressionUdfs.add(GenericUDFToDecimal.class); - castExpressionUdfs.add(GenericUDFToBinary.class); - castExpressionUdfs.add(GenericUDFToDate.class); - castExpressionUdfs.add(GenericUDFToUnixTimeStamp.class); - castExpressionUdfs.add(GenericUDFToUtcTimestamp.class); - castExpressionUdfs.add(GenericUDFToChar.class); - castExpressionUdfs.add(GenericUDFToVarchar.class); - castExpressionUdfs.add(GenericUDFTimestamp.class); - castExpressionUdfs.add(GenericUDFToIntervalYearMonth.class); - castExpressionUdfs.add(GenericUDFToIntervalDayTime.class); - castExpressionUdfs.add(UDFToByte.class); - castExpressionUdfs.add(UDFToBoolean.class); - castExpressionUdfs.add(UDFToDouble.class); - castExpressionUdfs.add(UDFToFloat.class); - castExpressionUdfs.add(UDFToInteger.class); - castExpressionUdfs.add(UDFToLong.class); - castExpressionUdfs.add(UDFToShort.class); - } - - // Set of GenericUDFs which require need implicit type casting of decimal parameters. - // Vectorization for mathmatical functions currently depends on decimal params automatically - // being converted to the return type (see getImplicitCastExpression()), which is not correct - // in the general case. This set restricts automatic type conversion to just these functions. - private static Set<Class<?>> udfsNeedingImplicitDecimalCast = new HashSet<Class<?>>(); - static { - udfsNeedingImplicitDecimalCast.add(GenericUDFOPPlus.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPMinus.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPMultiply.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPDivide.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPMod.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFRound.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFBRound.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFFloor.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFCbrt.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFCeil.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFAbs.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFPosMod.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFPower.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFFactorial.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPPositive.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPNegative.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFCoalesce.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFElt.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFGreatest.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFLeast.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFIn.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPEqual.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPEqualNS.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPNotEqual.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPLessThan.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPEqualOrLessThan.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPGreaterThan.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPEqualOrGreaterThan.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFBetween.class); - udfsNeedingImplicitDecimalCast.add(UDFSqrt.class); - udfsNeedingImplicitDecimalCast.add(UDFRand.class); - udfsNeedingImplicitDecimalCast.add(UDFLn.class); - udfsNeedingImplicitDecimalCast.add(UDFLog2.class); - udfsNeedingImplicitDecimalCast.add(UDFSin.class); - udfsNeedingImplicitDecimalCast.add(UDFAsin.class); - udfsNeedingImplicitDecimalCast.add(UDFCos.class); - udfsNeedingImplicitDecimalCast.add(UDFAcos.class); - udfsNeedingImplicitDecimalCast.add(UDFLog10.class); - udfsNeedingImplicitDecimalCast.add(UDFLog.class); - udfsNeedingImplicitDecimalCast.add(UDFExp.class); - udfsNeedingImplicitDecimalCast.add(UDFDegrees.class); - udfsNeedingImplicitDecimalCast.add(UDFRadians.class); - udfsNeedingImplicitDecimalCast.add(UDFAtan.class); - udfsNeedingImplicitDecimalCast.add(UDFTan.class); - udfsNeedingImplicitDecimalCast.add(UDFOPLongDivide.class); - } - - protected boolean needsImplicitCastForDecimal(GenericUDF udf) { - Class<?> udfClass = udf.getClass(); - if (udf instanceof GenericUDFBridge) { - udfClass = ((GenericUDFBridge) udf).getUdfClass(); - } - return udfsNeedingImplicitDecimalCast.contains(udfClass); - } - - public int getInputColumnIndex(String name) throws HiveException { - if (name == null) { - throw new HiveException("Null column name"); - } - if (!projectionColumnMap.containsKey(name)) { - throw new HiveException(String.format("The column %s is not in the vectorization context column map %s.", - name, projectionColumnMap.toString())); - } - final int projectedColumnNum = projectionColumnMap.get(name); - if (projectedColumnNum < 0) { - throw new HiveException("Negative projected column number"); - } - return projectedColumnNum; - } - - protected int getInputColumnIndex(ExprNodeColumnDesc colExpr) throws HiveException { - // Call the regular method since it does error checking. - return getInputColumnIndex(colExpr.getColumn()); - } - - private static class OutputColumnManager { - private final int initialOutputCol; - private int outputColCount = 0; - private boolean reuseScratchColumns = true; - - protected OutputColumnManager(int initialOutputCol) { - this.initialOutputCol = initialOutputCol; - } - - //The complete list of output columns. These should be added to the - //Vectorized row batch for processing. The index in the row batch is - //equal to the index in this array plus initialOutputCol. - //Start with size 100 and double when needed. - private String[] scratchVectorTypeNames = new String[100]; - private DataTypePhysicalVariation[] scratchDataTypePhysicalVariations = - new DataTypePhysicalVariation[100]; - - private final Set<Integer> usedOutputColumns = new HashSet<Integer>(); - - int allocateOutputColumn(TypeInfo typeInfo) throws HiveException { - return allocateOutputColumn(typeInfo, DataTypePhysicalVariation.NONE); - } - - int allocateOutputColumn(TypeInfo typeInfo, - DataTypePhysicalVariation dataTypePhysicalVariation) throws HiveException { - - if (initialOutputCol < 0) { - // This is a test calling. - return 0; - } - - // CONCERN: We currently differentiate DECIMAL columns by their precision and scale..., - // which could lead to a lot of extra unnecessary scratch columns. - String vectorTypeName = getScratchName(typeInfo); - int relativeCol = allocateOutputColumnInternal(vectorTypeName, dataTypePhysicalVariation); - return initialOutputCol + relativeCol; - } - - private int allocateOutputColumnInternal(String columnType, DataTypePhysicalVariation dataTypePhysicalVariation) { - for (int i = 0; i < outputColCount; i++) { - - // Re-use an existing, available column of the same required type. - if (usedOutputColumns.contains(i) || - !(scratchVectorTypeNames[i].equalsIgnoreCase(columnType) && - scratchDataTypePhysicalVariations[i] == dataTypePhysicalVariation)) { - continue; - } - //Use i - usedOutputColumns.add(i); - return i; - } - //Out of allocated columns - if (outputColCount < scratchVectorTypeNames.length) { - int newIndex = outputColCount; - scratchVectorTypeNames[outputColCount] = columnType; - scratchDataTypePhysicalVariations[outputColCount++] = dataTypePhysicalVariation; - usedOutputColumns.add(newIndex); - return newIndex; - } else { - //Expand the array - scratchVectorTypeNames = Arrays.copyOf(scratchVectorTypeNames, 2*outputColCount); - scratchDataTypePhysicalVariations = Arrays.copyOf(scratchDataTypePhysicalVariations, 2*outputColCount); - int newIndex = outputColCount; - scratchVectorTypeNames[outputColCount] = columnType; - scratchDataTypePhysicalVariations[outputColCount++] = dataTypePhysicalVariation; - usedOutputColumns.add(newIndex); - return newIndex; - } - } - - void freeOutputColumn(int index) { - if (initialOutputCol < 0 || reuseScratchColumns == false) { - // This is a test - return; - } - int colIndex = index-initialOutputCol; - if (colIndex >= 0) { - usedOutputColumns.remove(index-initialOutputCol); - } - } - - public int[] currentScratchColumns() { - TreeSet<Integer> treeSet = new TreeSet<Integer>(); - for (Integer col : usedOutputColumns) { - treeSet.add(initialOutputCol + col); - } - return ArrayUtils.toPrimitive(treeSet.toArray(new Integer[0])); - } - - public String getScratchTypeName(int columnNum) { - return scratchVectorTypeNames[columnNum - initialOutputCol]; - } - - public DataTypePhysicalVariation getDataTypePhysicalVariation(int columnNum) { - if (scratchDataTypePhysicalVariations == null) { - return null; - } - return scratchDataTypePhysicalVariations[columnNum - initialOutputCol]; - } - - // Allow debugging by disabling column reuse (input cols are never reused by design, only - // scratch cols are) - public void setReuseColumns(boolean reuseColumns) { - this.reuseScratchColumns = reuseColumns; - } - } - - public int allocateScratchColumn(TypeInfo typeInfo) throws HiveException { - return ocm.allocateOutputColumn(typeInfo); - } - - public int[] currentScratchColumns() { - return ocm.currentScratchColumns(); - } - - private VectorExpression getFilterOnBooleanColumnExpression(ExprNodeColumnDesc exprDesc, - int columnNum) throws HiveException { - VectorExpression expr = null; - - // Evaluate the column as a boolean, converting if necessary. - TypeInfo typeInfo = exprDesc.getTypeInfo(); - if (typeInfo.getCategory() == Category.PRIMITIVE && - ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) { - expr = new SelectColumnIsTrue(columnNum); - - expr.setInputTypeInfos(typeInfo); - expr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE); - - } else { - // Ok, we need to convert. - ArrayList<ExprNodeDesc> exprAsList = new ArrayList<ExprNodeDesc>(1); - exprAsList.add(exprDesc); - - // First try our cast method that will handle a few special cases. - VectorExpression castToBooleanExpr = getCastToBoolean(exprAsList); - if (castToBooleanExpr == null) { - - // Ok, try the UDF. - castToBooleanExpr = getVectorExpressionForUdf(null, UDFToBoolean.class, exprAsList, - VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo); - if (castToBooleanExpr == null) { - throw new HiveException("Cannot vectorize converting expression " + - exprDesc.getExprString() + " to boolean"); - } - } - - final int outputColumnNum = castToBooleanExpr.getOutputColumnNum(); - - expr = new SelectColumnIsTrue(outputColumnNum); - - expr.setChildExpressions(new VectorExpression[] {castToBooleanExpr}); - - expr.setInputTypeInfos(castToBooleanExpr.getOutputTypeInfo()); - expr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE); - } - return expr; - } - - private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc exprDesc, - VectorExpressionDescriptor.Mode mode) throws HiveException { - int columnNum = getInputColumnIndex(exprDesc.getColumn()); - VectorExpression expr = null; - switch (mode) { - case FILTER: - expr = getFilterOnBooleanColumnExpression(exprDesc, columnNum); - break; - case PROJECTION: - { - expr = new IdentityExpression(columnNum); - - TypeInfo identityTypeInfo = exprDesc.getTypeInfo(); - DataTypePhysicalVariation identityDataTypePhysicalVariation = - getDataTypePhysicalVariation(columnNum); - - expr.setInputTypeInfos(identityTypeInfo); - expr.setInputDataTypePhysicalVariations(identityDataTypePhysicalVariation); - - expr.setOutputTypeInfo(identityTypeInfo); - expr.setOutputDataTypePhysicalVariation(identityDataTypePhysicalVariation); - } - break; - default: - throw new RuntimeException("Unexpected mode " + mode); - } - return expr; - } - - public VectorExpression[] getVectorExpressionsUpConvertDecimal64(List<ExprNodeDesc> exprNodes) - throws HiveException { - VectorExpression[] vecExprs = - getVectorExpressions(exprNodes, VectorExpressionDescriptor.Mode.PROJECTION); - final int size = vecExprs.length; - for (int i = 0; i < size; i++) { - VectorExpression vecExpr = vecExprs[i]; - if (vecExpr.getOutputColumnVectorType() == ColumnVector.Type.DECIMAL_64) { - vecExprs[i] = wrapWithDecimal64ToDecimalConversion(vecExpr); - } - } - return vecExprs; - } - - public VectorExpression[] getVectorExpressions(List<ExprNodeDesc> exprNodes) throws HiveException { - return getVectorExpressions(exprNodes, VectorExpressionDescriptor.Mode.PROJECTION); - } - - public VectorExpression[] getVectorExpressions(List<ExprNodeDesc> exprNodes, VectorExpressionDescriptor.Mode mode) - throws HiveException { - - int i = 0; - if (null == exprNodes) { - return new VectorExpression[0]; - } - VectorExpression[] ret = new VectorExpression[exprNodes.size()]; - for (ExprNodeDesc e : exprNodes) { - ret[i++] = getVectorExpression(e, mode); - } - return ret; - } - - public VectorExpression getVectorExpression(ExprNodeDesc exprDesc) throws HiveException { - return getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION); - } - - /** - * Returns a vector expression for a given expression - * description. - * @param exprDesc, Expression description - * @param mode - * @return {@link VectorExpression} - * @throws HiveException - */ - public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, VectorExpressionDescriptor.Mode mode) throws HiveException { - VectorExpression ve = null; - if (exprDesc instanceof ExprNodeColumnDesc) { - ve = getColumnVectorExpression((ExprNodeColumnDesc) exprDesc, mode); - } else if (exprDesc instanceof ExprNodeGenericFuncDesc) { - ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc; - // push not through between... - if ("not".equals(expr.getFuncText())) { - if (expr.getChildren() != null && expr.getChildren().size() == 1) { - ExprNodeDesc child = expr.getChildren().get(0); - if (child instanceof ExprNodeGenericFuncDesc) { - ExprNodeGenericFuncDesc childExpr = (ExprNodeGenericFuncDesc) child; - if ("between".equals(childExpr.getFuncText())) { - ExprNodeConstantDesc flag = (ExprNodeConstantDesc) childExpr.getChildren().get(0); - List<ExprNodeDesc> newChildren = new ArrayList<>(); - if (Boolean.TRUE.equals(flag.getValue())) { - newChildren.add(new ExprNodeConstantDesc(Boolean.FALSE)); - } else { - newChildren.add(new ExprNodeConstantDesc(Boolean.TRUE)); - } - newChildren - .addAll(childExpr.getChildren().subList(1, childExpr.getChildren().size())); - expr.setTypeInfo(childExpr.getTypeInfo()); - expr.setGenericUDF(childExpr.getGenericUDF()); - expr.setChildren(newChildren); - } - } - } - } - // Add cast expression if needed. Child expressions of a udf may return different data types - // and that would require converting their data types to evaluate the udf. - // For example decimal column added to an integer column would require integer column to be - // cast to decimal. - // Note: this is a no-op for custom UDFs - List<ExprNodeDesc> childExpressions = getChildExpressionsWithImplicitCast(expr.getGenericUDF(), - exprDesc.getChildren(), exprDesc.getTypeInfo()); - - // Are we forcing the usage of VectorUDFAdaptor for test purposes? - if (!testVectorAdaptorOverride) { - ve = getGenericUdfVectorExpression(expr.getGenericUDF(), - childExpressions, mode, exprDesc.getTypeInfo()); - } - if (ve == null) { - // Ok, no vectorized class available. No problem -- try to use the VectorUDFAdaptor - // when configured. - // - // NOTE: We assume if hiveVectorAdaptorUsageMode has not been set it because we are - // executing a test that didn't create a HiveConf, etc. No usage of VectorUDFAdaptor in - // that case. - if (hiveVectorAdaptorUsageMode != null) { - switch (hiveVectorAdaptorUsageMode) { - case NONE: - // No VectorUDFAdaptor usage. - throw new HiveException( - "Could not vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString() - + " because hive.vectorized.adaptor.usage.mode=none"); - case CHOSEN: - if (isNonVectorizedPathUDF(expr, mode)) { - ve = getCustomUDFExpression(expr, mode); - } else { - throw new HiveException( - "Could not vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString() - + " because hive.vectorized.adaptor.usage.mode=chosen" - + " and the UDF wasn't one of the chosen ones"); - } - break; - case ALL: - if (LOG.isDebugEnabled()) { - LOG.debug("We will try to use the VectorUDFAdaptor for " + exprDesc.toString() - + " because hive.vectorized.adaptor.usage.mode=all"); - } - ve = getCustomUDFExpression(expr, mode); - break; - default: - throw new RuntimeException("Unknown hive vector adaptor usage mode " + - hiveVectorAdaptorUsageMode.name()); - } - if (ve == null) { - throw new HiveException( - "Unable vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString() - + " even for the VectorUDFAdaptor"); - } - } - } - } else if (exprDesc instanceof ExprNodeConstantDesc) { - ve = getConstantVectorExpression(((ExprNodeConstantDesc) exprDesc).getValue(), exprDesc.getTypeInfo(), - mode); - } else if (exprDesc instanceof ExprNodeDynamicValueDesc) { - ve = getDynamicValueVectorExpression((ExprNodeDynamicValueDesc) exprDesc, mode); - } else if (exprDesc instanceof ExprNodeFieldDesc) { - // Get the GenericUDFStructField to process the field of Struct type - ve = getGenericUDFStructField((ExprNodeFieldDesc)exprDesc, - mode, exprDesc.getTypeInfo()); - } - if (ve == null) { - throw new HiveException( - "Could not vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString()); - } - if (LOG.isDebugEnabled()) { - LOG.debug("Input Expression = " + exprDesc.toString() - + ", Vectorized Expression = " + ve.toString()); - } - - return ve; - } - - private VectorExpression getGenericUDFStructField(ExprNodeFieldDesc exprNodeFieldDesc, - VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { - // set the arguments for GenericUDFStructField - List<ExprNodeDesc> children = new ArrayList<>(2); - children.add(exprNodeFieldDesc.getDesc()); - children.add(new ExprNodeConstantDesc(getStructFieldIndex(exprNodeFieldDesc))); - - return getVectorExpressionForUdf(null, GenericUDFStructField.class, children, mode, returnType); - } - - /** - * The field of Struct is stored in StructColumnVector.fields[index]. - * Check the StructTypeInfo.getAllStructFieldNames() and compare to the field name, get the index. - */ - private int getStructFieldIndex(ExprNodeFieldDesc exprNodeFieldDesc) throws HiveException { - ExprNodeDesc structNodeDesc = exprNodeFieldDesc.getDesc(); - String fieldName = exprNodeFieldDesc.getFieldName(); - StructTypeInfo structTypeInfo = (StructTypeInfo) structNodeDesc.getTypeInfo(); - int index = 0; - boolean isFieldExist = false; - for (String fn : structTypeInfo.getAllStructFieldNames()) { - if (fieldName.equals(fn)) { - isFieldExist = true; - break; - } - index++; - } - if (isFieldExist) { - return index; - } else { - throw new HiveException("Could not vectorize expression:" + exprNodeFieldDesc.toString() - + ", the field " + fieldName + " doesn't exist."); - } - } - - /** - * Given a udf and its children, return the common type to which the children's type should be - * cast. - */ - private TypeInfo getCommonTypeForChildExpressions(GenericUDF genericUdf, - List<ExprNodeDesc> children, TypeInfo returnType) throws HiveException { - TypeInfo commonType; - if (genericUdf instanceof GenericUDFBaseCompare) { - - // Apply comparison rules - TypeInfo tLeft = children.get(0).getTypeInfo(); - TypeInfo tRight = children.get(1).getTypeInfo(); - commonType = FunctionRegistry.getCommonClassForComparison(tLeft, tRight); - if (commonType == null) { - commonType = returnType; - } - } else if (genericUdf instanceof GenericUDFIn) { - TypeInfo colTi = children.get(0).getTypeInfo(); - if (colTi.getCategory() != Category.PRIMITIVE) { - return colTi; // Handled later, only struct will be supported. - } - TypeInfo opTi = GenericUDFUtils.deriveInType(children); - if (opTi == null || opTi.getCategory() != Category.PRIMITIVE) { - throw new HiveException("Cannot vectorize IN() - common type is " + opTi); - } - if (((PrimitiveTypeInfo)colTi).getPrimitiveCategory() != - ((PrimitiveTypeInfo)opTi).getPrimitiveCategory()) { - throw new HiveException("Cannot vectorize IN() - casting a column is not supported. " - + "Column type is " + colTi + " but the common type is " + opTi); - } - return colTi; - } else { - // The children type should be converted to return type - commonType = returnType; - } - return commonType; - } - - /** - * Add a cast expression to the expression tree if needed. The output of child expressions of a given UDF might - * need a cast if their return type is different from the return type of the UDF. - * - * @param genericUDF The given UDF - * @param children Child expressions of the UDF that might require a cast. - * @param returnType The return type of the UDF. - * @return List of child expressions added with cast. - */ - private List<ExprNodeDesc> getChildExpressionsWithImplicitCast(GenericUDF genericUDF, - List<ExprNodeDesc> children, TypeInfo returnType) throws HiveException { - - if (isCustomUDF(genericUDF.getUdfName())) { - // no implicit casts possible - return children; - } - - if (isExcludedFromCast(genericUDF)) { - // No implicit cast needed - return children; - } - if (children == null) { - return null; - } - - TypeInfo commonType = getCommonTypeForChildExpressions(genericUDF, children, returnType); - - if (commonType == null) { - - // Couldn't determine common type, don't cast - return children; - } - - List<ExprNodeDesc> childrenWithCasts = new ArrayList<ExprNodeDesc>(); - boolean atleastOneCastNeeded = false; - if (genericUDF instanceof GenericUDFElt) { - int i = 0; - for (ExprNodeDesc child : children) { - TypeInfo castType = commonType; - if (i++ == 0) { - castType = isIntFamily(child.getTypeString()) ? child.getTypeInfo() : TypeInfoFactory.intTypeInfo; - } - ExprNodeDesc castExpression = getImplicitCastExpression(genericUDF, child, castType); - if (castExpression != null) { - atleastOneCastNeeded = true; - childrenWithCasts.add(castExpression); - } else { - childrenWithCasts.add(child); - } - } - } else { - for (ExprNodeDesc child : children) { - ExprNodeDesc castExpression = getImplicitCastExpression(genericUDF, child, commonType); - if (castExpression != null) { - atleastOneCastNeeded = true; - childrenWithCasts.add(castExpression); - } else { - childrenWithCasts.add(child); - } - } - } - if (atleastOneCastNeeded) { - return childrenWithCasts; - } else { - return children; - } - } - - private boolean isExcludedFromCast(GenericUDF genericUDF) { - boolean ret = castExpressionUdfs.contains(genericUDF.getClass()) - || (genericUDF instanceof GenericUDFRound) || (genericUDF instanceof GenericUDFBetween); - - if (ret) { - return ret; - } - - if (genericUDF instanceof GenericUDFBridge) { - Class<?> udfClass = ((GenericUDFBridge) genericUDF).getUdfClass(); - return castExpressionUdfs.contains(udfClass) - || UDFSign.class.isAssignableFrom(udfClass); - } - return false; - } - - /** - * Creates a DecimalTypeInfo object with appropriate precision and scale for the given - * inputTypeInfo. - */ - private TypeInfo updatePrecision(TypeInfo inputTypeInfo, DecimalTypeInfo returnType) { - if (!(inputTypeInfo instanceof PrimitiveTypeInfo)) { - return returnType; - } - PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) inputTypeInfo; - int precision = getPrecisionForType(ptinfo); - // TODO: precision and scale would be practically invalid for string conversion (38,38) - int scale = HiveDecimalUtils.getScaleForType(ptinfo); - return new DecimalTypeInfo(precision, scale); - } - - /** - * The GenericUDFs might need their children output to be cast to the given castType. - * This method returns a cast expression that would achieve the required casting. - */ - private ExprNodeDesc getImplicitCastExpression(GenericUDF udf, ExprNodeDesc child, TypeInfo castType) - throws HiveException { - TypeInfo inputTypeInfo = child.getTypeInfo(); - String inputTypeString = inputTypeInfo.getTypeName(); - String castTypeString = castType.getTypeName(); - - if (inputTypeString.equals(castTypeString)) { - // Nothing to be done - return null; - } - boolean inputTypeDecimal = false; - boolean castTypeDecimal = false; - if (decimalTypePattern.matcher(inputTypeString).matches()) { - inputTypeDecimal = true; - } - if (decimalTypePattern.matcher(castTypeString).matches()) { - castTypeDecimal = true; - } - - if (castTypeDecimal && !inputTypeDecimal) { - if (needsImplicitCastForDecimal(udf)) { - // Cast the input to decimal - // If castType is decimal, try not to lose precision for numeric types. - castType = updatePrecision(inputTypeInfo, (DecimalTypeInfo) castType); - GenericUDFToDecimal castToDecimalUDF = new GenericUDFToDecimal(); - castToDecimalUDF.setTypeInfo(castType); - List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(); - children.add(child); - ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, castToDecimalUDF, children); - return desc; - } - } else if (!castTypeDecimal && inputTypeDecimal) { - if (needsImplicitCastForDecimal(udf)) { - // Cast decimal input to returnType - GenericUDF genericUdf = getGenericUDFForCast(castType); - List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(); - children.add(child); - ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, genericUdf, children); - return desc; - } - } else { - - // Casts to exact types including long to double etc. are needed in some special cases. - if (udf instanceof GenericUDFCoalesce || udf instanceof GenericUDFNvl - || udf instanceof GenericUDFElt) { - GenericUDF genericUdf = getGenericUDFForCast(castType); - List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(); - children.add(child); - ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, genericUdf, children); - return desc; - } - } - return null; - } - - private int getPrecisionForType(PrimitiveTypeInfo typeInfo) { - if (isFloatFamily(typeInfo.getTypeName())) { - return HiveDecimal.MAX_PRECISION; - } - return HiveDecimalUtils.getPrecisionForType(typeInfo); - } - - public static GenericUDF getGenericUDFForCast(TypeInfo castType) throws HiveException { - UDF udfClass = null; - GenericUDF genericUdf = null; - switch (((PrimitiveTypeInfo) castType).getPrimitiveCategory()) { - case BYTE: - udfClass = new UDFToByte(); - break; - case SHORT: - udfClass = new UDFToShort(); - break; - case INT: - udfClass = new UDFToInteger(); - break; - case LONG: - udfClass = new UDFToLong(); - break; - case FLOAT: - udfClass = new UDFToFloat(); - break; - case DOUBLE: - udfClass = new UDFToDouble(); - break; - case STRING: - genericUdf = new GenericUDFToString(); - break; - case CHAR: - genericUdf = new GenericUDFToChar(); - break; - case VARCHAR: - genericUdf = new GenericUDFToVarchar(); - break; - case BOOLEAN: - udfClass = new UDFToBoolean(); - break; - case DATE: - genericUdf = new GenericUDFToDate(); - break; - case TIMESTAMP: - genericUdf = new GenericUDFTimestamp(); - break; - case INTERVAL_YEAR_MONTH: - genericUdf = new GenericUDFToIntervalYearMonth(); - break; - case INTERVAL_DAY_TIME: - genericUdf = new GenericUDFToIntervalDayTime(); - break; - case BINARY: - genericUdf = new GenericUDFToBinary(); - break; - case DECIMAL: - genericUdf = new GenericUDFToDecimal(); - break; - case VOID: - case UNKNOWN: - // fall-through to throw exception, its not expected for execution to reach here. - break; - } - if (genericUdf == null) { - if (udfClass == null) { - throw new HiveException("Could not add implicit cast for type "+castType.getTypeName()); - } - GenericUDFBridge genericUDFBridge = new GenericUDFBridge(); - genericUDFBridge.setUdfClassName(udfClass.getClass().getName()); - genericUDFBridge.setUdfName(udfClass.getClass().getSimpleName()); - genericUdf = genericUDFBridge; - } - if (genericUdf instanceof SettableUDF) { - ((SettableUDF) genericUdf).setTypeInfo(castType); - } - return genericUdf; - } - - /* Return true if this is one of a small set of functions for which - * it is significantly easier to use the old code path in vectorized - * mode instead of implementing a new, optimized VectorExpression. - * - * Depending on performance requirements and frequency of use, these - * may be implemented in the future with an optimized VectorExpression. - */ - public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr, - VectorExpressionDescriptor.Mode mode) { - GenericUDF gudf = expr.getGenericUDF(); - if (gudf instanceof GenericUDFBridge) { - GenericUDFBridge bridge = (GenericUDFBridge) gudf; - Class<? extends UDF> udfClass = bridge.getUdfClass(); - if (udfClass.equals(UDFHex.class) - || udfClass.equals(UDFRegExpExtract.class) - || udfClass.equals(UDFRegExpReplace.class) - || udfClass.equals(UDFConv.class) - || udfClass.equals(UDFFromUnixTime.class) && isIntFamily(arg0Type(expr)) - || isCastToIntFamily(udfClass) && isStringFamily(arg0Type(expr)) - || isCastToFloatFamily(udfClass) && isStringFamily(arg0Type(expr))) { - return true; - } - } else if ((gudf instanceof GenericUDFTimestamp && isStringFamily(arg0Type(expr))) - - /* GenericUDFCase and GenericUDFWhen are implemented with the UDF Adaptor because - * of their complexity and generality. In the future, variations of these - * can be optimized to run faster for the vectorized code path. For example, - * CASE col WHEN 1 then "one" WHEN 2 THEN "two" ELSE "other" END - * is an example of a GenericUDFCase that has all constant arguments - * except for the first argument. This is probably a common case and a - * good candidate for a fast, special-purpose VectorExpression. Then - * the UDF Adaptor code path could be used as a catch-all for - * non-optimized general cases. - */ - || gudf instanceof GenericUDFCase - || gudf instanceof GenericUDFWhen) { - return true; - } else if ((gudf instanceof GenericUDFToString - || gudf instanceof GenericUDFToChar - || gudf instanceof GenericUDFToVarchar) && - (arg0Type(expr).equals("timestamp") - || arg0Type(expr).equals("double") - || arg0Type(expr).equals("float"))) { - return true; - } else if (gudf instanceof GenericUDFBetween && (mode == VectorExpressionDescriptor.Mode.PROJECTION)) { - // between has 4 args here, but can be vectorized like this - return true; - } - return false; - } - - public static boolean isCastToIntFamily(Class<? extends UDF> udfClass) { - return udfClass.equals(UDFToByte.class) - || udfClass.equals(UDFToShort.class) - || udfClass.equals(UDFToInteger.class) - || udfClass.equals(UDFToLong.class); - - // Boolean is purposely excluded. - } - - public static boolean isCastToFloatFamily(Class<? extends UDF> udfClass) { - return udfClass.equals(UDFToDouble.class) - || udfClass.equals(UDFToFloat.class); - } - - // Return the type string of the first argument (argument 0). - public static String arg0Type(ExprNodeGenericFuncDesc expr) { - String type = expr.getChildren().get(0).getTypeString(); - return type; - } - - // Return true if this is a custom UDF or custom GenericUDF. - // This two functions are for use only in the planner. It will fail in a task. - public static boolean isCustomUDF(ExprNodeGenericFuncDesc expr) { - return isCustomUDF(expr.getFuncText()); - } - - private static boolean isCustomUDF(String udfName) { - if (udfName == null) { - return false; - } - FunctionInfo funcInfo; - try { - funcInfo = FunctionRegistry.getFunctionInfo(udfName); - } catch (SemanticException e) { - LOG.warn("Failed to load " + udfName, e); - funcInfo = null; - } - if (funcInfo == null) { - return false; - } - boolean isNativeFunc = funcInfo.isNative(); - return !isNativeFunc; - } - - /** - * Handles only the special cases of cast/+ve/-ve operator on a constant. - * @param exprDesc - * @return The same expression if no evaluation done, else return the constant - * expression. - * @throws HiveException - */ - ExprNodeDesc evaluateCastOnConstants(ExprNodeDesc exprDesc) throws HiveException { - if (!(exprDesc instanceof ExprNodeGenericFuncDesc)) { - return exprDesc; - } - - if (exprDesc.getChildren() == null || (exprDesc.getChildren().size() != 1) ) { - return exprDesc; - } - - ExprNodeConstantDesc foldedChild = null; - if (!( exprDesc.getChildren().get(0) instanceof ExprNodeConstantDesc)) { - - // try recursive folding - ExprNodeDesc expr = evaluateCastOnConstants(exprDesc.getChildren().get(0)); - if (expr instanceof ExprNodeConstantDesc) { - foldedChild = (ExprNodeConstantDesc) expr; - } - } else { - foldedChild = (ExprNodeConstantDesc) exprDesc.getChildren().get(0); - } - - if (foldedChild == null) { - return exprDesc; - } - - ObjectInspector childoi = foldedChild.getWritableObjectInspector(); - GenericUDF gudf = ((ExprNodeGenericFuncDesc) exprDesc).getGenericUDF(); - - // Only evaluate +ve/-ve or cast on constant or recursive casting. - if (gudf instanceof GenericUDFOPNegative || gudf instanceof GenericUDFOPPositive || - castExpressionUdfs.contains(gudf.getClass()) - || ((gudf instanceof GenericUDFBridge) - && castExpressionUdfs.contains(((GenericUDFBridge) gudf).getUdfClass()))) { - ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprDesc); - ObjectInspector output = evaluator.initialize(childoi); - Object constant = evaluator.evaluate(null); - Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output); - return new ExprNodeConstantDesc(exprDesc.getTypeInfo(), java); - } - - return exprDesc; - } - - /* For cast on constant operator in all members of the input list and return new list - * containing results. - */ - private List<ExprNodeDesc> evaluateCastOnConstants(List<ExprNodeDesc> childExpr) - throws HiveException { - List<ExprNodeDesc> evaluatedChildren = new ArrayList<ExprNodeDesc>(); - if (childExpr != null) { - for (ExprNodeDesc expr : childExpr) { - expr = this.evaluateCastOnConstants(expr); - evaluatedChildren.add(expr); - } - } - return evaluatedChildren; - } - - private VectorExpression getConstantVectorExpression(Object constantValue, TypeInfo typeInfo, - VectorExpressionDescriptor.Mode mode) throws HiveException { - String typeName = typeInfo.getTypeName(); - VectorExpressionDescriptor.ArgumentType vectorArgType = - VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(typeName); - if (vectorArgType == VectorExpressionDescriptor.ArgumentType.NONE) { - throw new HiveException("No vector argument type for type name " + typeName); - } - int outCol = -1; - if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { - outCol = ocm.allocateOutputColumn(typeInfo); - } - if (constantValue == null) { - return new ConstantVectorExpression(outCol, typeInfo, true); - } - - // Boolean is special case. - if (typeName.equalsIgnoreCase("boolean")) { - if (mode == VectorExpressionDescriptor.Mode.FILTER) { - if (((Boolean) constantValue).booleanValue()) { - return new FilterConstantBooleanVectorExpression(1); - } else { - return new FilterConstantBooleanVectorExpression(0); - } - } else { - if (((Boolean) constantValue).booleanValue()) { - return new ConstantVectorExpression(outCol, 1, typeInfo); - } else { - return new ConstantVectorExpression(outCol, 0, typeInfo); - } - } - } - - switch (vectorArgType) { - case INT_FAMILY: - return new ConstantVectorExpression(outCol, ((Number) constantValue).longValue(), typeInfo); - case DATE: - return new ConstantVectorExpression(outCol, DateWritableV2.dateToDays((Date) constantValue), typeInfo); - case TIMESTAMP: - return new ConstantVectorExpression(outCol, - ((org.apache.hadoop.hive.common.type.Timestamp) constantValue).toSqlTimestamp(), typeInfo); - case INTERVAL_YEAR_MONTH: - return new ConstantVectorExpression(outCol, - ((HiveIntervalYearMonth) constantValue).getTotalMonths(), typeInfo); - case INTERVAL_DAY_TIME: - return new ConstantVectorExpression(outCol, (HiveIntervalDayTime) constantValue, typeInfo); - case FLOAT_FAMILY: - return new ConstantVectorExpression(outCol, ((Number) constantValue).doubleValue(), typeInfo); - case DECIMAL: - return new ConstantVectorExpression(outCol, (HiveDecimal) constantValue, typeInfo); - case STRING: - return new ConstantVectorExpression(outCol, ((String) constantValue).getBytes(), typeInfo); - case CHAR: - return new ConstantVectorExpression(outCol, ((HiveChar) constantValue), typeInfo); - case VARCHAR: - return new ConstantVectorExpression(outCol, ((HiveVarchar) constantValue), typeInfo); - default: - throw new HiveException("Unsupported constant type: " + typeName + ", object class " + constantValue.getClass().getSimpleName()); - } - } - - private VectorExpression getDynamicValueVectorExpression(ExprNodeDynamicValueDesc dynamicValueExpr, - VectorExpressionDescriptor.Mode mode) throws HiveException { - String typeName = dynamicValueExpr.getTypeInfo().getTypeName(); - VectorExpressionDescriptor.ArgumentType vectorArgType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(typeName); - if (vectorArgType == VectorExpressionDescriptor.ArgumentType.NONE) { - throw new HiveException("No vector argument type for type name " + typeName); - } - int outCol = -1; - if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { - outCol = ocm.allocateOutputColumn(dynamicValueExpr.getTypeInfo()); - } - - return new DynamicValueVectorExpression(outCol, dynamicValueExpr.getTypeInfo(), dynamicValueExpr.getDynamicValue()); - } - - /** - * Used as a fast path for operations that don't modify their input, like unary + - * and casting boolean to long. IdentityExpression and its children are always - * projections. - */ - private VectorExpression getIdentityExpression(List<ExprNodeDesc> childExprList) - throws HiveException { - ExprNodeDesc childExpr = childExprList.get(0); - int identityCol; - TypeInfo identityTypeInfo; - DataTypePhysicalVariation identityDataTypePhysicalVariation; - VectorExpression v1 = null; - if (childExpr instanceof ExprNodeGenericFuncDesc) { - v1 = getVectorExpression(childExpr); - identityCol = v1.getOutputColumnNum(); - identityTypeInfo = v1.getOutputTypeInfo(); - identityDataTypePhysicalVariation = v1.getOutputDataTypePhysicalVariation(); - } else if (childExpr instanceof ExprNodeColumnDesc) { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr; - identityCol = getInputColumnIndex(colDesc.getColumn()); - identityTypeInfo = colDesc.getTypeInfo(); - - // CONSIDER: Validation of type information - - identityDataTypePhysicalVariation = getDataTypePhysicalVariation(identityCol); - } else { - throw new HiveException("Expression not supported: "+childExpr); - } - - VectorExpression ve = new IdentityExpression(identityCol); - - if (v1 != null) { - ve.setChildExpressions(new VectorExpression [] {v1}); - } - - ve.setInputTypeInfos(identityTypeInfo); - ve.setInputDataTypePhysicalVariations(identityDataTypePhysicalVariation); - - ve.setOutputTypeInfo(identityTypeInfo); - ve.setOutputDataTypePhysicalVariation(identityDataTypePhysicalVariation); - - return ve; - } - - - private boolean checkExprNodeDescForDecimal64(ExprNodeDesc exprNodeDesc) throws HiveException { - if (exprNodeDesc instanceof ExprNodeColumnDesc) { - int colIndex = getInputColumnIndex((ExprNodeColumnDesc) exprNodeDesc); - DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex); - return (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64); - } else if (exprNodeDesc instanceof ExprNodeGenericFuncDesc) { - - // Is the result Decimal64 precision? - TypeInfo returnType = exprNodeDesc.getTypeInfo(); - if (!checkTypeInfoForDecimal64(returnType)) { - return false; - } - DecimalTypeInfo returnDecimalType = (DecimalTypeInfo) returnType; - - GenericUDF udf = ((ExprNodeGenericFuncDesc) exprNodeDesc).getGenericUDF(); - Class<?> udfClass = udf.getClass(); - - // We have a class-level annotation that says whether the UDF's vectorization expressions - // support Decimal64. - VectorizedExpressionsSupportDecimal64 annotation = - AnnotationUtils.getAnnotation(udfClass, VectorizedExpressionsSupportDecimal64.class); - if (annotation == null) { - return false; - } - - // Carefully check the children to make sure they are Decimal64. - List<ExprNodeDesc> children = exprNodeDesc.getChildren(); - for (ExprNodeDesc childExprNodeDesc : children) { - - // Some cases were converted before calling getVectorExpressionForUdf. - // So, emulate those cases first. - - if (childExprNodeDesc instanceof ExprNodeConstantDesc) { - DecimalTypeInfo childDecimalTypeInfo = - decimalTypeFromCastToDecimal(childExprNodeDesc, returnDecimalType); - if (childDecimalTypeInfo == null) { - return false; - } - if (!checkTypeInfoForDecimal64(childDecimalTypeInfo)) { - return false; - } - continue; - } - - // Otherwise, recurse. - if (!checkExprNodeDescForDecimal64(childExprNodeDesc)) { - return false; - } - } - return true; - } else if (exprNodeDesc instanceof ExprNodeConstantDesc) { - return checkTypeInfoForDecimal64(exprNodeDesc.getTypeInfo()); - } - return false; - } - - private boolean checkTypeInfoForDecimal64(TypeInfo typeInfo) { - if (typeInfo instanceof DecimalTypeInfo) { - DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo; - return HiveDecimalWritable.isPrecisionDecimal64(decimalTypeInfo.precision()); - } - return false; - } - - public boolean haveCandidateForDecimal64VectorExpression(int numChildren, - List<ExprNodeDesc> childExpr, TypeInfo returnType) throws HiveException { - - // For now, just 2 Decimal64 inputs and a Decimal64 or boolean output. - return (numChildren == 2 && - checkExprNodeDescForDecimal64(childExpr.get(0)) && - checkExprNodeDescForDecimal64(childExpr.get(1)) && - (checkTypeInfoForDecimal64(returnType) || - returnType.equals(TypeInfoFactory.booleanTypeInfo))); - } - - private VectorExpression getDecimal64VectorExpressionForUdf(GenericUDF genericUdf, - Class<?> udfClass, List<ExprNodeDesc> childExpr, int numChildren, - VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { - - ExprNodeDesc child1 = childExpr.get(0); - ExprNodeDesc child2 = childExpr.get(1); - - DecimalTypeInfo decimalTypeInfo1 = (DecimalTypeInfo) child1.getTypeInfo(); - DecimalTypeInfo decimalTypeInfo2 = (DecimalTypeInfo) child2.getTypeInfo(); - - DataTypePhysicalVariation dataTypePhysicalVariation1 = DataTypePhysicalVariation.DECIMAL_64; - DataTypePhysicalVariation dataTypePhysicalVariation2 = DataTypePhysicalVariation.DECIMAL_64; - - final int scale1 = decimalTypeInfo1.scale(); - final int scale2 = decimalTypeInfo2.scale(); - - VectorExpressionDescriptor.Builder builder = new VectorExpressionDescriptor.Builder(); - builder.setNumArguments(numChildren); - builder.setMode(mode); - - boolean isColumnScaleEstablished = false; - int columnScale = 0; - boolean hasScalar = false; - builder.setArgumentType(0, ArgumentType.DECIMAL_64); - if (child1 instanceof ExprNodeGenericFuncDesc || - child1 instanceof ExprNodeColumnDesc) { - builder.setInputExpressionType(0, InputExpressionType.COLUMN); - isColumnScaleEstablished = true; - columnScale = scale1; - } else if (child1 instanceof ExprNodeConstantDesc) { - if (isNullConst(child1)) { - - // Cannot handle NULL scalar parameter. - return null; - } - hasScalar = true; - builder.setInputExpressionType(0, InputExpressionType.SCALAR); - } else { - - // Currently, only functions, columns, and scalars supported. - return null; - } - - builder.setArgumentType(1, ArgumentType.DECIMAL_64); - if (child2 instanceof ExprNodeGenericFuncDesc || - child2 instanceof ExprNodeColumnDesc) { - builder.setInputExpressionType(1, InputExpressionType.COLUMN); - if (!isColumnScaleEstablished) { - isColumnScaleEstablished = true; - columnScale = scale2; - } else if (columnScale != scale2) { - - // We only support Decimal64 on 2 columns when the have the same scale. - return null; - } - } else if (child2 instanceof ExprNodeConstantDesc) { - // Cannot have SCALAR, SCALAR. - if (!isColumnScaleEstablished) { - return null; - } - if (isNullConst(child2)) { - - // Cannot handle NULL scalar parameter. - return null; - } - hasScalar = true; - builder.setInputExpressionType(1, InputExpressionType.SCALAR); - } else { - - // Currently, only functions, columns, and scalars supported. - return null; - } - - VectorExpressionDescriptor.Descriptor descriptor = builder.build(); - Class<?> vectorClass = - this.vMap.getVectorExpressionClass(udfClass, descriptor, useCheckedVectorExpressions); - if (vectorClass == null) { - return null; - } - - VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass); - - /* - * Custom build arguments. - */ - - List<VectorExpression> children = new ArrayList<VectorExpression>(); - Object[] arguments = new Object[numChildren]; - - for (int i = 0; i < numChildren; i++) { - ExprNodeDesc child = childExpr.get(i); - if (child instanceof ExprNodeGenericFuncDesc) { - VectorExpression vChild = getVectorExpression(child, childrenMode); - children.add(vChild); - arguments[i] = vChild.getOutputColumnNum(); - } else if (child instanceof ExprNodeColumnDesc) { - int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child); - if (childrenMode == VectorExpressionDescriptor.Mode.FILTER) { - - VectorExpression filterExpr = - getFilterOnBooleanColumnExpression((ExprNodeColumnDesc) child, colIndex); - if (filterExpr == null) { - return null; - } - - children.add(filterExpr); - } - arguments[i] = colIndex; - } else { - Preconditions.checkState(child instanceof ExprNodeConstantDesc); - ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) child; - HiveDecimal hiveDecimal = (HiveDecimal) constDesc.getValue(); - if (hiveDecimal.scale() > columnScale) { - - // For now, bail out on decimal constants with larger scale than column scale. - return null; - } - final long decimal64Scalar = new HiveDecimalWritable(hiveDecimal).serialize64(columnScale); - arguments[i] = decimal64Scalar; - } - } - - /* - * Instantiate Decimal64 vector expression. - * - * The instantiateExpression method sets the output column and type information. - */ - VectorExpression vectorExpression = - instantiateExpression(vectorClass, returnType, DataTypePhysicalVariation.DECIMAL_64, arguments); - if (vectorExpression == null) { - handleCouldNotInstantiateVectorExpression(vectorClass, returnType, DataTypePhysicalVariation.DECIMAL_64, arguments); - } - - vectorExpression.setInputTypeInfos(decimalTypeInfo1, decimalTypeInfo2); - vectorExpression.setInputDataTypePhysicalVariations(dataTypePhysicalVariation1, dataTypePhysicalVariation2); - - if ((vectorExpression != null) && !children.isEmpty()) { - vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0])); - } - - return vectorExpression; - } - - private VectorExpression getVectorExpressionForUdf(GenericUDF genericUdf, - Class<?> udfClass, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, - TypeInfo returnType) throws HiveException { - - int numChildren = (childExpr == null) ? 0 : childExpr.size(); - - if (numChildren > 2 && genericUdf != null && mode == VectorExpressionDescriptor.Mode.FILTER && - ((genericUdf instanceof GenericUDFOPOr) || (genericUdf instanceof GenericUDFOPAnd))) { - - // Special case handling for Multi-OR and Multi-AND. - - for (int i = 0; i < numChildren; i++) { - ExprNodeDesc child = childExpr.get(i); - String childTypeString = child.getTypeString(); - if (childTypeString == null) { - throw new HiveException("Null child type name string"); - } - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(childTypeString); - Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); - if (columnVectorType != ColumnVector.Type.LONG){ - return null; - } - if (!(child instanceof ExprNodeGenericFuncDesc) && !(child instanceof ExprNodeColumnDesc)) { - return null; - } - } - Class<?> vclass; - if (genericUdf instanceof GenericUDFOPOr) { - vclass = FilterExprOrExpr.class; - } else if (genericUdf instanceof GenericUDFOPAnd) { - vclass = FilterExprAndExpr.class; - } else { - throw new RuntimeException("Unexpected multi-child UDF"); - } - VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass); - return createVectorExpression(vclass, childExpr, childrenMode, returnType); - } - if (numChildren > VectorExpressionDescriptor.MAX_NUM_ARGUMENTS) { - return null; - } - - // Should we intercept here for a possible Decimal64 vector expression class? - if (haveCandidateForDecimal64VectorExpression(numChildren, childExpr, returnType)) { - VectorExpression result = getDecimal64VectorExpressionForUdf(genericUdf, udfClass, - childExpr, numChildren, mode, returnType); - if (result != null) { - return result; - } - // Otherwise, fall through and proceed with non-Decimal64 vector expression classes... - } - - VectorExpressionDescriptor.Builder builder = new VectorExpressionDescriptor.Builder(); - builder.setNumArguments(numChildren); - builder.setMode(mode); - for (int i = 0; i < numChildren; i++) { - ExprNodeDesc child = childExpr.get(i); - TypeInfo childTypeInfo = child.getTypeInfo(); - String childTypeString = childTypeInfo.toString(); - if (childTypeString == null) { - throw new HiveException("Null child type name string"); - } - String undecoratedTypeName = getUndecoratedName(childTypeString); - if (undecoratedTypeName == null) { - throw new HiveException("No match for type string " + childTypeString + " from undecorated type name method"); - } - builder.setArgumentType(i, undecoratedTypeName); - if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeColumnDesc) - || (child instanceof ExprNodeFieldDesc)) { - builder.setInputExpressionType(i, InputExpressionType.COLUMN); - } else if (child instanceof ExprNodeConstantDesc) { - if (isNullConst(child)) { - // Cannot handle NULL scalar parameter. - return null; - } - builder.setInputExpressionType(i, InputExpressionType.SCALAR); - } else if (child instanceof ExprNodeDynamicValueDesc) { - builder.setInputExpressionType(i, InputExpressionType.DYNAMICVALUE); - } else { - throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName()); - } - } - VectorExpressionDescriptor.Descriptor descriptor = builder.build(); - Class<?> vclass = - this.vMap.getVectorExpressionClass(udfClass, descriptor, useCheckedVectorExpressions); - if (vclass == null) { - if (LOG.isDebugEnabled()) { - LOG.debug("No vector udf found for "+udfClass.getSimpleName() + ", descriptor: "+descriptor); - } - return null; - } - VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass); - return createVectorExpression(vclass, childExpr, childrenMode, returnType); - } - - private VectorExpression createDecimal64ToDecimalConversion(int colIndex, TypeInfo resultTypeInfo) - throws HiveException { - Object [] conversionArgs = new Object[1]; - conversionArgs[0] = colIndex; - VectorExpression vectorExpression = - instantiateExpression( - ConvertDecimal64ToDecimal.class, - resultTypeInfo, - DataTypePhysicalVariation.NONE, - conversionArgs); - if (vectorExpression == null) { - handleCouldNotInstantiateVectorExpression( - ConvertDecimal64ToDecimal.class, resultTypeInfo, DataTypePhysicalVariation.NONE, - conversionArgs); - } - - vectorExpression.setInputTypeInfos(resultTypeInfo); - vectorExpression.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.DECIMAL_64); - - return vectorExpression; - } - - public VectorExpression wrapWithDecimal64ToDecimalConversion(VectorExpression inputExpression) - throws HiveException { - - VectorExpression wrapExpression = createDecimal64ToDecimalConversion( - inputExpression.getOutputColumnNum(), inputExpression.getOutputTypeInfo()); - if (inputExpression instanceof IdentityExpression) { - return wrapExpression; - } - - // CONCERN: Leaking scratch column? - VectorExpression[] child = new VectorExpression[1]; - child[0] = inputExpression; - wrapExpression.setChildExpressions(child); - - return wrapExpression; - } - - private VectorExpression createVectorExpression(Class<?> vectorClass, - List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode childrenMode, TypeInfo returnType) throws HiveException { - int numChildren = childExpr == null ? 0: childExpr.size(); - - TypeInfo[] inputTypeInfos = new TypeInfo[numChildren]; - DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[numChildren]; - - List<VectorExpression> children = new ArrayList<VectorExpression>(); - Object[] arguments = new Object[numChildren]; - - for (int i = 0; i < numChildren; i++) { - ExprNodeDesc child = childExpr.get(i); - TypeInfo childTypeInfo = child.getTypeInfo(); - - inputTypeInfos[i] = childTypeInfo; - inputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE; // Assume. - - if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeFieldDesc)) { - VectorExpression vChild = getVectorExpression(child, childrenMode); - children.add(vChild); - arguments[i] = vChild.getOutputColumnNum(); - - // Update. - inputDataTypePhysicalVariations[i] = vChild.getOutputDataTypePhysicalVariation(); - } else if (child instanceof ExprNodeColumnDesc) { - int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child); - - // CONSIDER: Validate type information - - if (childTypeInfo instanceof DecimalTypeInfo) { - - // In this method, we must only process non-Decimal64 column vectors. - // Convert Decimal64 columns to regular decimal. - DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex); - if (dataTypePhysicalVariation != null && dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { - - // FUTURE: Can we reuse this conversion? - VectorExpression vChild = createDecimal64ToDecimalConversion(colIndex, childTypeInfo); - children.add(vChild); - arguments[i] = vChild.getOutputColumnNum(); - - // Update. - inputDataTypePhysicalVariations[i] = vChild.getOutputDataTypePhysicalVariation(); - continue; - } - } - if (childrenMode == VectorExpressionDescriptor.Mode.FILTER) { - - // In filter mode, the column must be a boolean - SelectColumnIsTrue selectColumnIsTrue = new SelectColumnIsTrue(colIndex); - - selectColumnIsTrue.setInputTypeInfos(childTypeInfo); - selectColumnIsTrue.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE); - - children.add(selectColumnIsTrue); - } - arguments[i] = colIndex; - } else if (child instanceof ExprNodeConstantDesc) { - Object scalarValue = getVectorTypeScalarValue((ExprNodeConstantDesc) child); - arguments[i] = (null == scalarValue) ? getConstantVectorExpression(null, child.getTypeInfo(), childrenMode) : scalarValue; - } else if (child instanceof ExprNodeDynamicValueDesc) { - arguments[i] = ((ExprNodeDynamicValueDesc) child).getDynamicValue(); - } else { - throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName()); - } - } - VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments); - if (vectorExpression == null) { - handleCouldNotInstantiateVectorExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments); - } - - vectorExpression.setInputTypeInfos(inputTypeInfos); - vectorExpression.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations); - - if ((vectorExpression != null) && !children.isEmpty()) { - vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0])); - } - - for (VectorExpression ve : children) { - ocm.freeOutputColumn(ve.getOutputColumnNum()); - } - - return vectorExpression; - } - - private void handleCouldNotInstantiateVectorExpression(Class<?> vectorClass, TypeInfo returnType, - DataTypePhysicalVariation dataTypePhysicalVariation, Object[] arguments) throws HiveException { - String displayString = "Could not instantiate vector expression class " + vectorClass.getName() + - " for arguments " + Arrays.toString(arguments) + " return type " + - VectorExpression.getTypeName(returnType, dataTypePhysicalVariation); - throw new HiveException(displayString); - } - - private VectorExpressionDescriptor.Mode getChildrenMode(VectorExpressionDescriptor.Mode mode, Class<?> udf) { - if (mode.equals(VectorExpressionDescriptor.Mode.FILTER) && (udf.equals(GenericUDFOPAnd.class) || udf.equals(GenericUDFOPOr.class))) { - return VectorExpressionDescriptor.Mode.FILTER; - } - return VectorExpressionDescriptor.Mode.PROJECTION; - } - - private String getNewInstanceArgumentString(Object [] args) { - if (args == null) { - return "arguments: NULL"; - } - ArrayList<String> argClasses = new ArrayList<String>(); - for (Object obj : args) { - argClasses.add(obj.getClass().getSimpleName()); - } - return "arguments: " + Arrays.toString(args) + ", argument classes: " + argClasses.toString(); - } - - private static final int STACK_LENGTH_LIMIT = 15; - - public static String getStackTraceAsSingleLine(Throwable e) { - StringBuilder sb = new StringBuilder(); - sb.append(e); - sb.append(" stack trace: "); - StackTraceElement[] stackTrace = e.getStackTrace(); - int length = stackTrace.length; - boolean isTruncated = false; - if (length > STACK_LENGTH_LIMIT) { - length = STACK_LENGTH_LIMIT; - isTruncated = true; - } - for (int i = 0; i < length; i++) { - if (i > 0) { - sb.append(", "); - } - sb.append(stackTrace[i]); - } - if (isTruncated) { - sb.append(", ..."); - } - - // Attempt to cleanup stack trace elements that vary by VM. - String cleaned = sb.toString().replaceAll("GeneratedConstructorAccessor[0-9]*", "GeneratedConstructorAccessor<omitted>"); - - return cleaned; - } - - public VectorExpression instantiateExpression(Class<?> vclass, TypeInfo returnTypeInfo, - DataTypePhysicalVariation returnDataTypePhysicalVariation, Object...args) - throws HiveException { - VectorExpression ve = null; - Constructor<?> ctor = getConstructor(vclass); - int numParams = ctor.getParameterTypes().length; - int argsLength = (args == null) ? 0 : args.length; - if (numParams == 0) { - try { - ve = (VectorExpression) ctor.newInstance(); - } catch (Exception ex) { - throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with 0 arguments, exception: " + - getStackTraceAsSingleLine(ex)); - } - } else if (numParams == argsLength) { - try { - ve = (VectorExpression) ctor.newInstance(args); - } catch (Exception ex) { - throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with " + getNewInstanceArgumentString(args) + ", exception: " + - getStackTraceAsSingleLine(ex)); - } - } else if (numParams == argsLength + 1) { - // Additional argument is needed, which is the outputcolumn. - Object [] newArgs = null; - try { - if (returnTypeInfo == null) { - throw new HiveException("Missing output type information"); - } - String returnTypeName = returnTypeInfo.getTypeName(); - returnTypeName = VectorizationContext.mapTypeNameSynonyms(returnTypeName); - - // Special handling for decimal because decimal types need scale and precision parameter. - // This special handling should be avoided by using returnType uniformly for all cases. - final int outputColumnNum = - ocm.allocateOutputColumn(returnTypeInfo, returnDataTypePhysicalVariation); - - newArgs = Arrays.copyOf(args, numParams); - newArgs[numParams-1] = outputColumnNum; - - ve = (VectorExpression) ctor.newInstance(newArgs); - - /* - * Caller is responsible for setting children and input type information. - */ - ve.setOutputTypeInfo(returnTypeInfo); - ve.setOutputDataTypePhysicalVariation(returnDataTypePhysicalVariation); - - } catch (Exception ex) { - throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with arguments " + getNewInstanceArgumentString(newArgs) + ", exception: " + - getStackTraceAsSingleLine(ex)); - } - } - // Add maxLength parameter to UDFs that have CHAR or VARCHAR output. - if (ve instanceof TruncStringOutput) { - TruncStringOutput truncStringOutput = (TruncStringOutput) ve; - if (returnTypeInfo instanceof BaseCharTypeInfo) { - BaseCharTypeInfo baseCharTypeInfo = (BaseCharTypeInfo) returnTypeInfo; - truncStringOutput.setMaxLength(baseCharTypeInfo.getLength()); - } - } - return ve; - } - - private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, - List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { - - List<ExprNodeDesc> castedChildren = evaluateCastOnConstants(childExpr); - childExpr = castedChildren; - - //First handle special cases. If one of the special case methods cannot handle it, - // it returns null. - VectorExpression ve = null; - if (udf instanceof GenericUDFBetween && mode == VectorExpressionDescriptor.Mode.FILTER) { - ve = getBetweenFilterExpression(childExpr, mode, returnType); - } else if (udf instanceof GenericUDFIn) { - ve = getInExpression(childExpr, mode, returnType); - } else if (udf instanceof GenericUDFIf) { - ve = getIfExpression((GenericUDFIf) udf, childExpr, mode, returnType); - } else if (udf instanceof GenericUDFWhen) { - ve = getWhenExpression(childExpr, mode, returnType); - } else if (udf instanceof GenericUDFOPPositive) { - ve = getIdentityExpression(childExpr); - } else if (udf instanceof GenericUDFCoalesce || udf instanceof GenericUDFNvl) { - - // Coalesce is a special case because it can take variable number of arguments. - // Nvl is a specialization of the Coalesce. - ve = getCoalesceExpression(childExpr, returnType); - } else if (udf instanceof GenericUDFElt) { - - // Elt is a special case because it can take variable number of arguments. - ve = getEltExpression(childExpr, returnType); - } else if (udf instanceof GenericUDFBridge) { - ve = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode, - returnType); - } else if (udf instanceof GenericUDFToString) { - ve = getCastToString(childExpr, returnType); - } else if (udf instanceof GenericUDFToDecimal) { - ve = getCastToDecimal(childExpr, returnType); - } else if (udf instanceof GenericUDFToChar) { - ve = getCastToChar(childExpr, returnType); - } else if (udf instanceof GenericUDFToVarchar) { - ve = getCastToVarChar(childExpr, returnType); - } else if (udf instanceof GenericUDFTimestamp) { - ve = getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, returnType); - } - if (ve != null) { - return ve; - } - // Now do a general lookup - Class<?> udfClass = udf.getClass(); - boolean isSubstituted = false; - if (udf instanceof GenericUDFBridge) { - udfClass = ((GenericUDFBridge) udf).getUdfClass(); - isSubstituted = true; - } - - ve = getVectorExpressionForUdf((!isSubstituted ? udf : null), - udfClass, castedChildren, mode, returnType); - - return ve; - } - - private VectorExpression getCastToTimestamp(GenericUDFTimestamp udf, - List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { - VectorExpression ve = getVectorExpressionForUdf(udf, udf.getClass(), childExpr, mode, returnType); - - // Replace with the milliseconds conversion - if (!udf.isIntToTimestampInSeconds() && ve instanceof CastLongToTimestamp) { - ve = createVectorExpression(CastMillisecondsLongToTimestamp.class, - childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } - - return ve; - } - - private void freeNonColumns(VectorExpression[] vectorChildren) { - if (vectorChildren == null) { - return; - } - for (VectorExpression v : vectorChildren) { - if (!(v instanceof IdentityExpression)) { - ocm.freeOutputColumn(v.getOutputColumnNum()); - } - } - } - - private VectorExpression getCoalesceExpression(List<ExprNodeDesc> childExpr, TypeInfo returnType) - throws HiveException { - int[] inputColumns = new int[childExpr.size()]; - VectorExpression[] vectorChildren = - getVectorExpressions(childExpr, VectorExpressionDescriptor.Mode.PROJECTION); - - final int size = vectorChildren.length; - TypeInfo[] inputTypeInfos = new TypeInfo[size]; - DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[size]; - int i = 0; - for (VectorExpression ve : vectorChildren) { - inputColumns[i] = ve.getOutputCo
<TRUNCATED>