http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java index de0300a..894ef59 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.sql.Date; +import java.sql.Timestamp; import java.util.List; import org.slf4j.Logger; @@ -28,6 +30,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.VectorPartitionConversion; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -39,609 +42,689 @@ import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -import org.apache.hive.common.util.DateUtils; +import org.apache.hadoop.io.Writable; + +import com.google.common.base.Preconditions; + /** - * This class assigns specified columns of a row from a Writable row Object[]. - * - * The caller provides the hive type names and target column numbers in the order desired to - * assign from the Writable row Object[]. + * This class assigns specified columns of a row from a Writable row objects. * - * This class is abstract to allow the subclasses to control batch reuse. + * The caller provides the data types and projection column numbers of a subset of the columns + * to assign. */ -public abstract class VectorAssignRow { +public class VectorAssignRow { + private static final long serialVersionUID = 1L; private static final Logger LOG = LoggerFactory.getLogger(VectorAssignRow.class); - protected abstract class Assigner { - protected int columnIndex; - - Assigner(int columnIndex) { - this.columnIndex = columnIndex; - } - - public int getColumnIndex() { - return columnIndex; - } - - abstract void setColumnVector(VectorizedRowBatch batch); - - abstract void forgetColumnVector(); - - abstract void assign(int batchIndex, Object object); - } - - private class VoidAssigner extends Assigner { - - VoidAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - } - - @Override - void forgetColumnVector() { - } - - @Override - void assign(int batchIndex, Object object) { - // This is no-op, there is no column to assign to and the object is expected to be null. - assert (object == null); - } + /* + * These members have information for assigning a row column objects into the VectorizedRowBatch + * columns. + * + * We say "target" because when there is conversion the data type being converted is the source. + */ + boolean[] isConvert; + // For each column, are we converting the row column object? + + int[] projectionColumnNums; + // Assigning can be a subset of columns, so this is the projection -- + // the batch column numbers. + + Category[] targetCategories; + // The data type category of each column being assigned. + + PrimitiveCategory[] targetPrimitiveCategories; + // The data type primitive category of each column being assigned. + + int[] maxLengths; + // For the CHAR and VARCHAR data types, the maximum character length of + // the columns. Otherwise, 0. + + /* + * These members have information for data type conversion. + * Not defined if there is no conversion. + */ + PrimitiveObjectInspector[] convertSourcePrimitiveObjectInspectors; + // The primitive object inspector of the source data type for any column being + // converted. Otherwise, null. + + Writable[] convertTargetWritables; + // Conversion to the target data type requires a "helper" target writable in a + // few cases. + + /* + * Allocate the target related arrays. + */ + private void allocateArrays(int count) { + isConvert = new boolean[count]; + projectionColumnNums = new int[count]; + targetCategories = new Category[count]; + targetPrimitiveCategories = new PrimitiveCategory[count]; + maxLengths = new int[count]; } - private abstract class AbstractLongAssigner extends Assigner { - - protected LongColumnVector colVector; - protected long[] vector; - - AbstractLongAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - colVector = (LongColumnVector) batch.cols[columnIndex]; - vector = colVector.vector; - } - - @Override - void forgetColumnVector() { - colVector = null; - vector = null; - } + /* + * Allocate the source conversion related arrays (optional). + */ + private void allocateConvertArrays(int count) { + convertSourcePrimitiveObjectInspectors = new PrimitiveObjectInspector[count]; + convertTargetWritables = new Writable[count]; } - protected class BooleanAssigner extends AbstractLongAssigner { - - BooleanAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - BooleanWritable bw = (BooleanWritable) object; - vector[batchIndex] = (bw.get() ? 1 : 0); - colVector.isNull[batchIndex] = false; + /* + * Initialize one column's target related arrays. + */ + private void initTargetEntry(int logicalColumnIndex, int projectionColumnNum, TypeInfo typeInfo) { + isConvert[logicalColumnIndex] = false; + projectionColumnNums[logicalColumnIndex] = projectionColumnNum; + Category category = typeInfo.getCategory(); + targetCategories[logicalColumnIndex] = category; + if (category == Category.PRIMITIVE) { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + targetPrimitiveCategories[logicalColumnIndex] = primitiveCategory; + switch (primitiveCategory) { + case CHAR: + maxLengths[logicalColumnIndex] = ((CharTypeInfo) primitiveTypeInfo).getLength(); + break; + case VARCHAR: + maxLengths[logicalColumnIndex] = ((VarcharTypeInfo) primitiveTypeInfo).getLength(); + break; + default: + // No additional data type specific setting. + break; } } } - protected class ByteAssigner extends AbstractLongAssigner { - - ByteAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - ByteWritable bw = (ByteWritable) object; - vector[batchIndex] = bw.get(); - colVector.isNull[batchIndex] = false; + /* + * Initialize one column's source conversion related arrays. + * Assumes initTargetEntry has already been called. + */ + private void initConvertSourceEntry(int logicalColumnIndex, TypeInfo convertSourceTypeInfo) { + isConvert[logicalColumnIndex] = true; + Category convertSourceCategory = convertSourceTypeInfo.getCategory(); + if (convertSourceCategory == Category.PRIMITIVE) { + PrimitiveTypeInfo convertSourcePrimitiveTypeInfo = (PrimitiveTypeInfo) convertSourceTypeInfo; + convertSourcePrimitiveObjectInspectors[logicalColumnIndex] = + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + convertSourcePrimitiveTypeInfo); + + // These need to be based on the target. + PrimitiveCategory targetPrimitiveCategory = targetPrimitiveCategories[logicalColumnIndex]; + switch (targetPrimitiveCategory) { + case DATE: + convertTargetWritables[logicalColumnIndex] = new DateWritable(); + break; + case STRING: + convertTargetWritables[logicalColumnIndex] = new Text(); + break; + default: + // No additional data type specific setting. + break; } } } - private class ShortAssigner extends AbstractLongAssigner { + /* + * Initialize using an StructObjectInspector and a column projection list. + */ + public void init(StructObjectInspector structObjectInspector, List<Integer> projectedColumns) + throws HiveException { - ShortAssigner(int columnIndex) { - super(columnIndex); - } + List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs(); + final int count = fields.size(); + allocateArrays(count); - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - ShortWritable sw = (ShortWritable) object; - vector[batchIndex] = sw.get(); - colVector.isNull[batchIndex] = false; - } - } - } + for (int i = 0; i < count; i++) { - private class IntAssigner extends AbstractLongAssigner { + int projectionColumnNum = projectedColumns.get(i); - IntAssigner(int columnIndex) { - super(columnIndex); - } + StructField field = fields.get(i); + ObjectInspector fieldInspector = field.getFieldObjectInspector(); + TypeInfo typeInfo = + TypeInfoUtils.getTypeInfoFromTypeString(fieldInspector.getTypeName()); - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - IntWritable iw = (IntWritable) object; - vector[batchIndex] = iw.get(); - colVector.isNull[batchIndex] = false; - } + initTargetEntry(i, projectionColumnNum, typeInfo); } } - private class LongAssigner extends AbstractLongAssigner { - - LongAssigner(int columnIndex) { - super(columnIndex); - } + /* + * Initialize using an StructObjectInspector. + * No projection -- the column range 0 .. fields.size()-1 + */ + public void init(StructObjectInspector structObjectInspector) throws HiveException { - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - LongWritable lw = (LongWritable) object; - vector[batchIndex] = lw.get(); - colVector.isNull[batchIndex] = false; - } - } - } + List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs(); + final int count = fields.size(); + allocateArrays(count); - private class DateAssigner extends AbstractLongAssigner { + for (int i = 0; i < count; i++) { - DateAssigner(int columnIndex) { - super(columnIndex); - } + StructField field = fields.get(i); + ObjectInspector fieldInspector = field.getFieldObjectInspector(); + TypeInfo typeInfo = + TypeInfoUtils.getTypeInfoFromTypeString(fieldInspector.getTypeName()); - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - DateWritable bw = (DateWritable) object; - vector[batchIndex] = bw.getDays(); - colVector.isNull[batchIndex] = false; - } + initTargetEntry(i, i, typeInfo); } } - private abstract class AbstractTimestampAssigner extends Assigner { + /* + * Initialize using target data type names. + * No projection -- the column range 0 .. types.size()-1 + */ + public void init(List<String> typeNames) throws HiveException { - protected TimestampColumnVector colVector; + final int count = typeNames.size(); + allocateArrays(count); - AbstractTimestampAssigner(int columnIndex) { - super(columnIndex); - } + for (int i = 0; i < count; i++) { - @Override - void setColumnVector(VectorizedRowBatch batch) { - colVector = (TimestampColumnVector) batch.cols[columnIndex]; - } + TypeInfo typeInfo = + TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i)); - @Override - void forgetColumnVector() { - colVector = null; + initTargetEntry(i, i, typeInfo); } } - private class TimestampAssigner extends AbstractTimestampAssigner { + /** + * Initialize for conversion from a provided (source) data types to the target data types + * desired in the VectorizedRowBatch. + * + * No projection -- the column range 0 .. count-1 + * + * where count is the minimum of the target data type array size, included array size, + * and source data type array size. + * + * @param sourceTypeInfos + * @param targetTypeInfos + * @param columnsToIncludeTruncated + * Flag array indicating which columns are to be included. + * "Truncated" because all false entries on the end of the array have been + * eliminated. + * @return the minimum count described above is returned. That is, the number of columns + * that will be processed by assign. + */ + public int initConversion(TypeInfo[] sourceTypeInfos, TypeInfo[] targetTypeInfos, + boolean[] columnsToIncludeTruncated) { + + int targetColumnCount; + if (columnsToIncludeTruncated == null) { + targetColumnCount = targetTypeInfos.length; + } else { + targetColumnCount = Math.min(targetTypeInfos.length, columnsToIncludeTruncated.length); + } + + int sourceColumnCount = Math.min(sourceTypeInfos.length, targetColumnCount); + + allocateArrays(sourceColumnCount); + allocateConvertArrays(sourceColumnCount); + + for (int i = 0; i < sourceColumnCount; i++) { + + if (columnsToIncludeTruncated != null && !columnsToIncludeTruncated[i]) { + + // Field not included in query. - TimestampAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); } else { - colVector.set(batchIndex, ((TimestampWritable) object).getTimestamp()); - colVector.isNull[batchIndex] = false; - } - } - } + TypeInfo targetTypeInfo = targetTypeInfos[i]; - private class IntervalYearMonthAssigner extends AbstractLongAssigner { + if (targetTypeInfo.getCategory() != ObjectInspector.Category.PRIMITIVE) { - IntervalYearMonthAssigner(int columnIndex) { - super(columnIndex); - } + // For now, we don't have an assigner for complex types... - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - HiveIntervalYearMonthWritable iymw = (HiveIntervalYearMonthWritable) object; - HiveIntervalYearMonth iym = iymw.getHiveIntervalYearMonth(); - vector[batchIndex] = iym.getTotalMonths(); - colVector.isNull[batchIndex] = false; - } - } - } + } else { + TypeInfo sourceTypeInfo = sourceTypeInfos[i]; - private abstract class AbstractIntervalDayTimeAssigner extends Assigner { + if (!sourceTypeInfo.equals(targetTypeInfo)) { - protected IntervalDayTimeColumnVector colVector; + if (VectorPartitionConversion.isImplicitVectorColumnConversion( + sourceTypeInfo, targetTypeInfo)) { - AbstractIntervalDayTimeAssigner(int columnIndex) { - super(columnIndex); - } + // Do implicit conversion accepting the source type and putting it in the same + // target type ColumnVector type. + initTargetEntry(i, i, sourceTypeInfo); - @Override - void setColumnVector(VectorizedRowBatch batch) { - colVector = (IntervalDayTimeColumnVector) batch.cols[columnIndex]; - } + } else { - @Override - void forgetColumnVector() { - colVector = null; - } - } + // Do formal conversion... + initTargetEntry(i, i, targetTypeInfo); + initConvertSourceEntry(i, sourceTypeInfo); - private class IntervalDayTimeAssigner extends AbstractIntervalDayTimeAssigner { + } + } else { - IntervalDayTimeAssigner(int columnIndex) { - super(columnIndex); - } + // No conversion. + initTargetEntry(i, i, targetTypeInfo); - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - HiveIntervalDayTimeWritable idtw = (HiveIntervalDayTimeWritable) object; - HiveIntervalDayTime idt = idtw.getHiveIntervalDayTime(); - colVector.set(batchIndex, idt); - colVector.isNull[batchIndex] = false; + } + } } } - } - - private abstract class AbstractDoubleAssigner extends Assigner { - - protected DoubleColumnVector colVector; - protected double[] vector; - - AbstractDoubleAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - colVector = (DoubleColumnVector) batch.cols[columnIndex]; - vector = colVector.vector; - } - - @Override - void forgetColumnVector() { - colVector = null; - vector = null; - } - } - - private class FloatAssigner extends AbstractDoubleAssigner { - - FloatAssigner(int columnIndex) { - super(columnIndex); - } - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - FloatWritable fw = (FloatWritable) object; - vector[batchIndex] = fw.get(); - colVector.isNull[batchIndex] = false; - } - } + return sourceColumnCount; } - private class DoubleAssigner extends AbstractDoubleAssigner { - - DoubleAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - DoubleWritable dw = (DoubleWritable) object; - vector[batchIndex] = dw.get(); - colVector.isNull[batchIndex] = false; + /** + * Assign a row's column object to the ColumnVector at batchIndex in the VectorizedRowBatch. + * + * @param batch + * @param batchIndex + * @param logicalColumnIndex + * @param object The row column object whose type is the target data type. + */ + public void assignRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex, + Object object) { + final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + return; + } + Category targetCategory = targetCategories[logicalColumnIndex]; + if (targetCategory == null) { + /* + * This is a column that we don't want (i.e. not included) -- we are done. + */ + return; + } + switch (targetCategory) { + case PRIMITIVE: + { + PrimitiveCategory targetPrimitiveCategory = targetPrimitiveCategories[logicalColumnIndex]; + switch (targetPrimitiveCategory) { + case VOID: + VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + return; + case BOOLEAN: + ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + (((BooleanWritable) object).get() ? 1 : 0); + break; + case BYTE: + ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((ByteWritable) object).get(); + break; + case SHORT: + ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((ShortWritable) object).get(); + break; + case INT: + ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((IntWritable) object).get(); + break; + case LONG: + ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((LongWritable) object).get(); + break; + case TIMESTAMP: + ((TimestampColumnVector) batch.cols[projectionColumnNum]).set( + batchIndex, ((TimestampWritable) object).getTimestamp()); + break; + case DATE: + ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((DateWritable) object).getDays(); + break; + case FLOAT: + ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((FloatWritable) object).get(); + break; + case DOUBLE: + ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((DoubleWritable) object).get(); + break; + case BINARY: + { + BytesWritable bw = (BytesWritable) object; + ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + batchIndex, bw.getBytes(), 0, bw.getLength()); + } + break; + case STRING: + { + Text tw = (Text) object; + ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + batchIndex, tw.getBytes(), 0, tw.getLength()); + } + break; + case VARCHAR: + { + // UNDONE: Performance problem with conversion to String, then bytes... + + // We store VARCHAR type stripped of pads. + HiveVarchar hiveVarchar; + if (object instanceof HiveVarchar) { + hiveVarchar = (HiveVarchar) object; + } else { + hiveVarchar = ((HiveVarcharWritable) object).getHiveVarchar(); + } + + // TODO: HIVE-13624 Do we need maxLength checking? + + byte[] bytes = hiveVarchar.getValue().getBytes(); + ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + batchIndex, bytes, 0, bytes.length); + } + break; + case CHAR: + { + // UNDONE: Performance problem with conversion to String, then bytes... + + // We store CHAR type stripped of pads. + HiveChar hiveChar; + if (object instanceof HiveChar) { + hiveChar = (HiveChar) object; + } else { + hiveChar = ((HiveCharWritable) object).getHiveChar(); + } + + // TODO: HIVE-13624 Do we need maxLength checking? + + // We store CHAR in vector row batch with padding stripped. + byte[] bytes = hiveChar.getStrippedValue().getBytes(); + ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + batchIndex, bytes, 0, bytes.length); + } + break; + case DECIMAL: + if (object instanceof HiveDecimal) { + ((DecimalColumnVector) batch.cols[projectionColumnNum]).set( + batchIndex, (HiveDecimal) object); + } else { + ((DecimalColumnVector) batch.cols[projectionColumnNum]).set( + batchIndex, (HiveDecimalWritable) object); + } + break; + case INTERVAL_YEAR_MONTH: + ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth().getTotalMonths(); + break; + case INTERVAL_DAY_TIME: + ((IntervalDayTimeColumnVector) batch.cols[projectionColumnNum]).set( + batchIndex, ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime()); + break; + default: + throw new RuntimeException("Primitive category " + targetPrimitiveCategory.name() + + " not supported"); + } } - } - } - - private abstract class AbstractBytesAssigner extends Assigner { - - protected BytesColumnVector colVector; - - AbstractBytesAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - colVector = (BytesColumnVector) batch.cols[columnIndex]; - } - - @Override - void forgetColumnVector() { - colVector = null; - } - } - - private class BinaryAssigner extends AbstractBytesAssigner { - - BinaryAssigner(int columnIndex) { - super(columnIndex); + break; + default: + throw new RuntimeException("Category " + targetCategory.name() + " not supported"); } - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - BytesWritable bw = (BytesWritable) object; - colVector.setVal(batchIndex, bw.getBytes(), 0, bw.getLength()); - colVector.isNull[batchIndex] = false; - } - } + /* + * We always set the null flag to false when there is a value. + */ + batch.cols[projectionColumnNum].isNull[batchIndex] = false; } - private class StringAssigner extends AbstractBytesAssigner { - - StringAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - Text tw = (Text) object; - colVector.setVal(batchIndex, tw.getBytes(), 0, tw.getLength()); - colVector.isNull[batchIndex] = false; + /** + * Convert row's column object and then assign it the ColumnVector at batchIndex + * in the VectorizedRowBatch. + * + * Public so VectorDeserializeRow can use this method to convert a row's column object. + * + * @param batch + * @param batchIndex + * @param logicalColumnIndex + * @param object The row column object whose type is the VectorAssignRow.initConversion + * source data type. + * + */ + public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, + int logicalColumnIndex, Object object) { + Preconditions.checkState(isConvert[logicalColumnIndex]); + final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + return; + } + try { + Category targetCategory = targetCategories[logicalColumnIndex]; + if (targetCategory == null) { + /* + * This is a column that we don't want (i.e. not included) -- we are done. + */ + return; } - } - } - - private class VarCharAssigner extends AbstractBytesAssigner { - - VarCharAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - // We store VARCHAR type stripped of pads. - HiveVarchar hiveVarchar; - if (object instanceof HiveVarchar) { - hiveVarchar = (HiveVarchar) object; - } else { - hiveVarchar = ((HiveVarcharWritable) object).getHiveVarchar(); + switch (targetCategory) { + case PRIMITIVE: + PrimitiveCategory targetPrimitiveCategory = targetPrimitiveCategories[logicalColumnIndex]; + switch (targetPrimitiveCategory) { + case VOID: + VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + return; + case BOOLEAN: + ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + (PrimitiveObjectInspectorUtils.getBoolean( + object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]) ? 1 : 0); + break; + case BYTE: + ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + PrimitiveObjectInspectorUtils.getByte( + object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + break; + case SHORT: + ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + PrimitiveObjectInspectorUtils.getShort( + object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + break; + case INT: + ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + PrimitiveObjectInspectorUtils.getInt( + object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + break; + case LONG: + ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + PrimitiveObjectInspectorUtils.getLong( + object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + break; + case TIMESTAMP: + { + Timestamp timestamp = + PrimitiveObjectInspectorUtils.getTimestamp( + object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + if (timestamp == null) { + VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + return; + } + ((TimestampColumnVector) batch.cols[projectionColumnNum]).set( + batchIndex, timestamp); + } + break; + case DATE: + { + Date date = PrimitiveObjectInspectorUtils.getDate( + object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + if (date == null) { + VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + return; + } + DateWritable dateWritable = (DateWritable) convertTargetWritables[logicalColumnIndex]; + dateWritable.set(date); + ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + dateWritable.getDays(); + } + break; + case FLOAT: + ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + PrimitiveObjectInspectorUtils.getFloat( + object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + break; + case DOUBLE: + ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + PrimitiveObjectInspectorUtils.getDouble( + object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + break; + case BINARY: + { + BytesWritable bytesWritable = + PrimitiveObjectInspectorUtils.getBinary( + object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + if (bytesWritable == null) { + VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + return; + } + ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + batchIndex, bytesWritable.getBytes(), 0, bytesWritable.getLength()); + } + break; + case STRING: + { + String string = PrimitiveObjectInspectorUtils.getString( + object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + if (string == null) { + VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + return; + } + Text text = (Text) convertTargetWritables[logicalColumnIndex]; + text.set(string); + ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + batchIndex, text.getBytes(), 0, text.getLength()); + } + break; + case VARCHAR: + { + // UNDONE: Performance problem with conversion to String, then bytes... + + HiveVarchar hiveVarchar = + PrimitiveObjectInspectorUtils.getHiveVarchar( + object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + if (hiveVarchar == null) { + VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + return; + } + + // TODO: Do we need maxLength checking? + + byte[] bytes = hiveVarchar.getValue().getBytes(); + ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + batchIndex, bytes, 0, bytes.length); + } + break; + case CHAR: + { + // UNDONE: Performance problem with conversion to String, then bytes... + + HiveChar hiveChar = + PrimitiveObjectInspectorUtils.getHiveChar( + object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + if (hiveChar == null) { + VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + return; + } + // We store CHAR in vector row batch with padding stripped. + + // TODO: Do we need maxLength checking? + + byte[] bytes = hiveChar.getStrippedValue().getBytes(); + ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + batchIndex, bytes, 0, bytes.length); + } + break; + case DECIMAL: + { + HiveDecimal hiveDecimal = + PrimitiveObjectInspectorUtils.getHiveDecimal( + object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + if (hiveDecimal == null) { + VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + return; + } + ((DecimalColumnVector) batch.cols[projectionColumnNum]).set( + batchIndex, hiveDecimal); + } + break; + case INTERVAL_YEAR_MONTH: + { + HiveIntervalYearMonth intervalYearMonth = + PrimitiveObjectInspectorUtils.getHiveIntervalYearMonth( + object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + if (intervalYearMonth == null) { + VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + return; + } + ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + intervalYearMonth.getTotalMonths(); + } + break; + case INTERVAL_DAY_TIME: + { + HiveIntervalDayTime intervalDayTime = + PrimitiveObjectInspectorUtils.getHiveIntervalDayTime( + object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + if (intervalDayTime == null) { + VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + return; + } + ((IntervalDayTimeColumnVector) batch.cols[projectionColumnNum]).set( + batchIndex, intervalDayTime); + } + break; + default: + throw new RuntimeException("Primitive category " + targetPrimitiveCategory.name() + + " not supported"); } - byte[] bytes = hiveVarchar.getValue().getBytes(); - colVector.setVal(batchIndex, bytes, 0, bytes.length); - colVector.isNull[batchIndex] = false; + break; + default: + throw new RuntimeException("Category " + targetCategory.name() + " not supported"); } - } - } - - private class CharAssigner extends AbstractBytesAssigner { + } catch (NumberFormatException e) { - CharAssigner(int columnIndex) { - super(columnIndex); + // Some of the conversion methods throw this exception on numeric parsing errors. + VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + return; } - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - // We store CHAR type stripped of pads. - HiveChar hiveChar; - if (object instanceof HiveChar) { - hiveChar = (HiveChar) object; - } else { - hiveChar = ((HiveCharWritable) object).getHiveChar(); - } - - // We store CHAR in vector row batch with padding stripped. - byte[] bytes = hiveChar.getStrippedValue().getBytes(); - colVector.setVal(batchIndex, bytes, 0, bytes.length); - colVector.isNull[batchIndex] = false; - } - } + // We always set the null flag to false when there is a value. + batch.cols[projectionColumnNum].isNull[batchIndex] = false; } - private class DecimalAssigner extends Assigner { - - protected DecimalColumnVector colVector; - - DecimalAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - colVector = (DecimalColumnVector) batch.cols[columnIndex]; - } - - @Override - void forgetColumnVector() { - colVector = null; - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + /* + * Assign a row from an array of objects. + */ + public void assignRow(VectorizedRowBatch batch, int batchIndex, Object[] objects) { + final int count = isConvert.length; + for (int i = 0; i < count; i++) { + if (isConvert[i]) { + assignConvertRowColumn(batch, batchIndex, i, objects[i]); } else { - if (object instanceof HiveDecimal) { - colVector.set(batchIndex, (HiveDecimal) object); - } else { - colVector.set(batchIndex, (HiveDecimalWritable) object); - } - colVector.isNull[batchIndex] = false; + assignRowColumn(batch, batchIndex, i, objects[i]); } } } - private Assigner createAssigner(PrimitiveTypeInfo primitiveTypeInfo, int columnIndex) throws HiveException { - PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); - Assigner assigner; - switch (primitiveCategory) { - case VOID: - assigner = new VoidAssigner(columnIndex); - break; - case BOOLEAN: - assigner = new BooleanAssigner(columnIndex); - break; - case BYTE: - assigner = new ByteAssigner(columnIndex); - break; - case SHORT: - assigner = new ShortAssigner(columnIndex); - break; - case INT: - assigner = new IntAssigner(columnIndex); - break; - case LONG: - assigner = new LongAssigner(columnIndex); - break; - case TIMESTAMP: - assigner = new TimestampAssigner(columnIndex); - break; - case DATE: - assigner = new DateAssigner(columnIndex); - break; - case FLOAT: - assigner = new FloatAssigner(columnIndex); - break; - case DOUBLE: - assigner = new DoubleAssigner(columnIndex); - break; - case BINARY: - assigner = new BinaryAssigner(columnIndex); - break; - case STRING: - assigner = new StringAssigner(columnIndex); - break; - case VARCHAR: - assigner = new VarCharAssigner(columnIndex); - break; - case CHAR: - assigner = new CharAssigner(columnIndex); - break; - case DECIMAL: - assigner = new DecimalAssigner(columnIndex); - break; - case INTERVAL_YEAR_MONTH: - assigner = new IntervalYearMonthAssigner(columnIndex); - break; - case INTERVAL_DAY_TIME: - assigner = new IntervalDayTimeAssigner(columnIndex); - break; - default: - throw new HiveException("No vector row assigner for primitive category " + - primitiveCategory); - } - return assigner; - } - - Assigner[] assigners; - - public void init(StructObjectInspector structObjectInspector, List<Integer> projectedColumns) throws HiveException { - - List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs(); - assigners = new Assigner[fields.size()]; - - int i = 0; - for (StructField field : fields) { - int columnIndex = projectedColumns.get(i); - ObjectInspector fieldInspector = field.getFieldObjectInspector(); - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString( - fieldInspector.getTypeName()); - assigners[i] = createAssigner(primitiveTypeInfo, columnIndex); - i++; - } - } - - public void init(List<String> typeNames) throws HiveException { - - assigners = new Assigner[typeNames.size()]; - - int i = 0; - for (String typeName : typeNames) { - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); - assigners[i] = createAssigner(primitiveTypeInfo, i); - i++; - } - } + /* + * Assign a row from a list of standard objects up to a count + */ + public void assignRow(VectorizedRowBatch batch, int batchIndex, + List<Object> standardObjects, int columnCount) { - protected void setBatch(VectorizedRowBatch batch) throws HiveException { - for (int i = 0; i < assigners.length; i++) { - Assigner assigner = assigners[i]; - int columnIndex = assigner.getColumnIndex(); - if (batch.cols[columnIndex] == null) { - throw new HiveException("Unexpected null vector column " + columnIndex); + for (int i = 0; i < columnCount; i++) { + if (isConvert[i]) { + assignConvertRowColumn(batch, batchIndex, i, standardObjects.get(i)); + } else { + assignRowColumn(batch, batchIndex, i, standardObjects.get(i)); } - assigner.setColumnVector(batch); - } - } - - protected void forgetBatch() { - for (Assigner assigner : assigners) { - assigner.forgetColumnVector(); } } - - public void assignRowColumn(int batchIndex, int logicalColumnIndex, Object object) { - assigners[logicalColumnIndex].assign(batchIndex, object); - } - - public void assignRow(int batchIndex, Object[] objects) { - int i = 0; - for (Assigner assigner : assigners) { - assigner.assign(batchIndex, objects[i++]); - } - } - -} +} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowDynBatch.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowDynBatch.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowDynBatch.java deleted file mode 100644 index a696825..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowDynBatch.java +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector; - -import org.apache.hadoop.hive.ql.metadata.HiveException; - -/** - * This class assigns specified columns of a VectorizedRowBatch row from a Writable row Object[]. - * - * The caller provides the hive type names and target column numbers in the order desired to - * assign from the Writable row Object[]. - * - * This class is for use when the batch being assigned may change each time before processOp - * is called. - */ -public class VectorAssignRowDynBatch extends VectorAssignRow { - - public void setBatchOnEntry(VectorizedRowBatch batch) throws HiveException { - setBatch(batch); - } - - public void forgetBatchOnExit() { - forgetBatch(); - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowSameBatch.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowSameBatch.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowSameBatch.java deleted file mode 100644 index 8c7c2ad..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowSameBatch.java +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector; - -import org.apache.hadoop.hive.ql.metadata.HiveException; - -/** - * This class assigns specified columns of a VectorizedRowBatch row from a Writable row Object[]. - * - * The caller provides the hive type names and target column numbers in the order desired to - * assign from the Writable row Object[]. - * - * This class is for use when the batch being assigned is always the same. - */ -public class VectorAssignRowSameBatch extends VectorAssignRow { - - public void setOneBatch(VectorizedRowBatch batch) throws HiveException { - setBatch(batch); - } -} \ No newline at end of file
