http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 1ddd9be..4a156a2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -29,7 +29,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.Properties; import java.util.Set; import java.util.Stack; import java.util.regex.Pattern; @@ -38,7 +37,6 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.*; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; @@ -98,7 +96,6 @@ import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.VectorPartitionConversion; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; @@ -113,6 +110,7 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; +import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo; import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc; @@ -160,8 +158,12 @@ import org.apache.hadoop.hive.ql.udf.UDFYear; import org.apache.hadoop.hive.ql.udf.generic.*; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.NullStructSerDe; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -169,8 +171,10 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.mapred.SequenceFileInputFormat; +import org.apache.hadoop.mapred.TextInputFormat; -import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; public class Vectorizer implements PhysicalPlanResolver { @@ -184,8 +188,15 @@ public class Vectorizer implements PhysicalPlanResolver { Set<String> supportedAggregationUdfs = new HashSet<String>(); private HiveConf hiveConf; + private boolean isSpark; + boolean useVectorizedInputFileFormat; + boolean useVectorDeserialize; + boolean useRowDeserialize; + + boolean isSchemaEvolution; + public Vectorizer() { StringBuilder patternBuilder = new StringBuilder(); @@ -341,6 +352,7 @@ public class Vectorizer implements PhysicalPlanResolver { List<String> columnNames; List<TypeInfo> typeInfos; int partitionColumnCount; + boolean useVectorizedInputFileFormat; String[] scratchTypeNameArray; @@ -362,7 +374,9 @@ public class Vectorizer implements PhysicalPlanResolver { public void setScratchTypeNameArray(String[] scratchTypeNameArray) { this.scratchTypeNameArray = scratchTypeNameArray; } - + public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { + this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; + } public void setNonVectorizedOps(Set<Operator<? extends OperatorDesc>> nonVectorizedOps) { this.nonVectorizedOps = nonVectorizedOps; } @@ -383,6 +397,8 @@ public class Vectorizer implements PhysicalPlanResolver { partitionColumnCount, scratchTypeNameArray); baseWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx); + + baseWork.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat); } } @@ -443,6 +459,10 @@ public class Vectorizer implements PhysicalPlanResolver { + ReduceSinkOperator.getOperatorName()), np); } + /* + * Determine if there is only one TableScanOperator. Currently in Map vectorization, we do not + * try to vectorize multiple input trees. + */ private ImmutablePair<String, TableScanOperator> verifyOnlyOneTableScanOperator(MapWork mapWork) { // Eliminate MR plans with more than one TableScanOperator. @@ -476,8 +496,6 @@ public class Vectorizer implements PhysicalPlanResolver { private void getTableScanOperatorSchemaInfo(TableScanOperator tableScanOperator, List<String> logicalColumnNameList, List<TypeInfo> logicalTypeInfoList) { - TableScanDesc tableScanDesc = tableScanOperator.getConf(); - // Add all non-virtual columns to make a vectorization context for // the TableScan operator. RowSchema rowSchema = tableScanOperator.getSchema(); @@ -494,35 +512,141 @@ public class Vectorizer implements PhysicalPlanResolver { } } - private String getColumns(List<String> columnNames, int start, int length, - Character separator) { - return Joiner.on(separator).join(columnNames.subList(start, start + length)); - } - - private String getTypes(List<TypeInfo> typeInfos, int start, int length) { - return TypeInfoUtils.getTypesString(typeInfos.subList(start, start + length)); - } - - private boolean verifyAndSetVectorPartDesc(PartitionDesc pd) { + private String getHiveOptionsString() { + StringBuilder sb = new StringBuilder(); + sb.append(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname); + sb.append("="); + sb.append(useVectorizedInputFileFormat); + sb.append(", "); + sb.append(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE.varname); + sb.append("="); + sb.append(useVectorDeserialize); + sb.append(", and "); + sb.append(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_ROW_DESERIALIZE.varname); + sb.append("="); + sb.append(useRowDeserialize); + return sb.toString(); + } + + /* + * There are 3 modes of reading for vectorization: + * + * 1) One for the Vectorized Input File Format which returns VectorizedRowBatch as the row. + * + * 2) One for using VectorDeserializeRow to deserialize each row into the VectorizedRowBatch. + * Currently, these Input File Formats: + * TEXTFILE + * SEQUENCEFILE + * + * 3) And one using the regular partition deserializer to get the row object and assigning + * the row object into the VectorizedRowBatch with VectorAssignRow. + * This picks up Input File Format not supported by the other two. + */ + private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable) { + + String inputFileFormatClassName = pd.getInputFileFormatClassName(); // Look for Pass-Thru case where InputFileFormat has VectorizedInputFormatInterface // and reads VectorizedRowBatch as a "row". - if (Utilities.isInputFileFormatVectorized(pd)) { + if (isAcidTable || useVectorizedInputFileFormat) { + + if (Utilities.isInputFileFormatVectorized(pd)) { + + if (!useVectorizedInputFileFormat) { + LOG.info("ACID tables con only be vectorized for the input file format -- " + + "i.e. when Hive Configuration option " + + HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname + + "=true"); + return false; + } + + pd.setVectorPartitionDesc( + VectorPartitionDesc.createVectorizedInputFileFormat( + inputFileFormatClassName, Utilities.isInputFileFormatSelfDescribing(pd))); + + return true; + } + + // Today, ACID tables are only ORC and that format is vectorizable. Verify this + // assumption. + Preconditions.checkState(!isAcidTable); + } + + if (!(isSchemaEvolution || isAcidTable) && + (useVectorDeserialize || useRowDeserialize)) { + LOG.info("Input format: " + inputFileFormatClassName + " cannot be vectorized" + + " when both " + HiveConf.ConfVars.HIVE_SCHEMA_EVOLUTION.varname + "=false and " + + " ACID table is " + isAcidTable + " and " + + " given the Hive Configuration options " + getHiveOptionsString()); + return false; + } + + String deserializerClassName = pd.getDeserializerClassName(); + + // Look for InputFileFormat / Serde combinations we can deserialize more efficiently + // using VectorDeserializeRow and a deserialize class with the DeserializeRead interface. + // + // Do the "vectorized" row-by-row deserialization into a VectorizedRowBatch in the + // VectorMapOperator. + + if (useVectorDeserialize) { + + // Currently, we support LazySimple deserialization: + // + // org.apache.hadoop.mapred.TextInputFormat + // org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + // + // AND + // + // org.apache.hadoop.mapred.SequenceFileInputFormat + // org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + if (inputFileFormatClassName.equals(TextInputFormat.class.getName()) && + deserializerClassName.equals(LazySimpleSerDe.class.getName())) { + + pd.setVectorPartitionDesc( + VectorPartitionDesc.createVectorDeserialize( + inputFileFormatClassName, VectorDeserializeType.LAZY_SIMPLE)); + + return true; + } else if (inputFileFormatClassName.equals(SequenceFileInputFormat.class.getName()) && + deserializerClassName.equals(LazyBinarySerDe.class.getName())) { - pd.setVectorPartitionDesc(VectorPartitionDesc.createVectorizedInputFileFormat()); + pd.setVectorPartitionDesc( + VectorPartitionDesc.createVectorDeserialize( + inputFileFormatClassName, VectorDeserializeType.LAZY_BINARY)); + + return true; + } + } + + // Otherwise, if enabled, deserialize rows using regular Serde and add the object + // inspect-able Object[] row to a VectorizedRowBatch in the VectorMapOperator. + + if (useRowDeserialize) { + + pd.setVectorPartitionDesc( + VectorPartitionDesc.createRowDeserialize( + inputFileFormatClassName, + Utilities.isInputFileFormatSelfDescribing(pd), + deserializerClassName)); return true; + } - LOG.info("Input format: " + pd.getInputFileFormatClassName() - + ", doesn't provide vectorized input"); + LOG.info("Input format: " + inputFileFormatClassName + " cannot be vectorized" + + " given the Hive Configuration options " + getHiveOptionsString()); return false; } private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String alias, - TableScanOperator tableScanOperator, VectorTaskColumnInfo vectorTaskColumnInfo) { + TableScanOperator tableScanOperator, VectorTaskColumnInfo vectorTaskColumnInfo) + throws SemanticException { + + boolean isAcidTable = tableScanOperator.getConf().isAcidTable(); // These names/types are the data columns plus partition columns. final List<String> allColumnNameList = new ArrayList<String>(); @@ -531,23 +655,16 @@ public class Vectorizer implements PhysicalPlanResolver { getTableScanOperatorSchemaInfo(tableScanOperator, allColumnNameList, allTypeInfoList); final int allColumnCount = allColumnNameList.size(); - // Validate input format and schema evolution capability. - - // For the table, enter a null value in the multi-key map indicating no conversion necessary - // if the schema matches the table. - - HashMap<ImmutablePair, boolean[]> conversionMap = new HashMap<ImmutablePair, boolean[]>(); - + /* + * Validate input formats of all the partitions can be vectorized. + */ boolean isFirst = true; int dataColumnCount = 0; int partitionColumnCount = 0; - List<String> dataColumnList = null; - String dataColumnsString = ""; - List<TypeInfo> dataTypeInfoList = null; + List<String> tableDataColumnList = null; + List<TypeInfo> tableDataTypeInfoList = null; - // Validate the input format - VectorPartitionConversion partitionConversion = new VectorPartitionConversion(); LinkedHashMap<String, ArrayList<String>> pathToAliases = mapWork.getPathToAliases(); LinkedHashMap<String, PartitionDesc> pathToPartitionInfo = mapWork.getPathToPartitionInfo(); for (Entry<String, ArrayList<String>> entry: pathToAliases.entrySet()) { @@ -563,31 +680,18 @@ public class Vectorizer implements PhysicalPlanResolver { // We seen this already. continue; } - if (!verifyAndSetVectorPartDesc(partDesc)) { + if (!verifyAndSetVectorPartDesc(partDesc, isAcidTable)) { return false; } VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); - LOG.info("Vectorizer path: " + path + ", read type " + - vectorPartDesc.getVectorMapOperatorReadType().name() + ", aliases " + aliases); - - Properties partProps = partDesc.getProperties(); - - String nextDataColumnsString = - partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMNS); - String[] nextDataColumns = nextDataColumnsString.split(","); - - String nextDataTypesString = - partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMN_TYPES); - - // We convert to an array of TypeInfo using a library routine since it parses the information - // and can handle use of different separators, etc. We cannot use the raw type string - // for comparison in the map because of the different separators used. - List<TypeInfo> nextDataTypeInfoList = - TypeInfoUtils.getTypeInfosFromTypeString(nextDataTypesString); + if (LOG.isInfoEnabled()) { + LOG.info("Vectorizer path: " + path + ", " + vectorPartDesc.toString() + + ", aliases " + aliases); + } if (isFirst) { - // We establish with the first one whether the table is partitioned or not. + // Determine the data and partition columns using the first partition descriptor. LinkedHashMap<String, String> partSpec = partDesc.getPartSpec(); if (partSpec != null && partSpec.size() > 0) { @@ -598,85 +702,83 @@ public class Vectorizer implements PhysicalPlanResolver { dataColumnCount = allColumnCount; } - dataColumnList = allColumnNameList.subList(0, dataColumnCount); - dataColumnsString = getColumns(allColumnNameList, 0, dataColumnCount, ','); - dataTypeInfoList = allTypeInfoList.subList(0, dataColumnCount); - - // Add the table (non-partitioned) columns and types into the map as not needing - // conversion (i.e. null). - conversionMap.put( - new ImmutablePair(dataColumnsString, dataTypeInfoList), null); + tableDataColumnList = allColumnNameList.subList(0, dataColumnCount); + tableDataTypeInfoList = allTypeInfoList.subList(0, dataColumnCount); isFirst = false; } - ImmutablePair columnNamesAndTypesCombination = - new ImmutablePair(nextDataColumnsString, nextDataTypeInfoList); - - boolean[] conversionFlags; - if (conversionMap.containsKey(columnNamesAndTypesCombination)) { - - conversionFlags = conversionMap.get(columnNamesAndTypesCombination); - - } else { - - List<String> nextDataColumnList = Arrays.asList(nextDataColumns); - - // Validate the column names that are present are the same. Missing columns will be - // implicitly defaulted to null. + // We need to get the partition's column names from the partition serde. + // (e.g. Avro provides the table schema and ignores the partition schema..). + // + Deserializer deserializer; + StructObjectInspector partObjectInspector; + try { + deserializer = partDesc.getDeserializer(hiveConf); + partObjectInspector = (StructObjectInspector) deserializer.getObjectInspector(); + } catch (Exception e) { + throw new SemanticException(e); + } + String nextDataColumnsString = ObjectInspectorUtils.getFieldNames(partObjectInspector); + String[] nextDataColumns = nextDataColumnsString.split(","); + List<String> nextDataColumnList = Arrays.asList(nextDataColumns); + + /* + * Validate the column names that are present are the same. Missing columns will be + * implicitly defaulted to null. + */ + if (nextDataColumnList.size() > tableDataColumnList.size()) { + LOG.info( + String.format( + "Could not vectorize partition %s " + + "(deserializer " + deserializer.getClass().getName() + ")" + + "The partition column names %d is greater than the number of table columns %d", + path, nextDataColumnList.size(), tableDataColumnList.size())); + return false; + } + if (!(deserializer instanceof NullStructSerDe)) { - if (nextDataColumnList.size() > dataColumnList.size()) { - LOG.info( - String.format("Could not vectorize partition %s. The partition column names %d is greater than the number of table columns %d", - path, nextDataColumnList.size(), dataColumnList.size())); - return false; - } + // (Don't insist NullStructSerDe produce correct column names). for (int i = 0; i < nextDataColumnList.size(); i++) { String nextColumnName = nextDataColumnList.get(i); - String tableColumnName = dataColumnList.get(i); + String tableColumnName = tableDataColumnList.get(i); if (!nextColumnName.equals(tableColumnName)) { LOG.info( - String.format("Could not vectorize partition %s. The partition column name %s is does not match table column name %s", + String.format( + "Could not vectorize partition %s " + + "(deserializer " + deserializer.getClass().getName() + ")" + + "The partition column name %s is does not match table column name %s", path, nextColumnName, tableColumnName)); return false; } } + } - // The table column types might have been changed with ALTER. There are restrictions - // here for vectorization. - - // Some readers / deserializers take responsibility for conversion themselves. - - // If we need to check for conversion, the conversion object may come back null - // indicating from a vectorization point of view the conversion is implicit. That is, - // all implicit integer upgrades. + List<TypeInfo> nextDataTypeInfoList; + if (vectorPartDesc.getIsInputFileFormatSelfDescribing()) { - if (vectorPartDesc.getNeedsDataTypeConversionCheck() && - !nextDataTypeInfoList.equals(dataTypeInfoList)) { + /* + * Self-Describing Input Format will convert its data to the table schema. + */ + nextDataTypeInfoList = tableDataTypeInfoList; - // The results will be in 2 members: validConversion and conversionFlags - partitionConversion.validateConversion(nextDataTypeInfoList, dataTypeInfoList); - if (!partitionConversion.getValidConversion()) { - return false; - } - conversionFlags = partitionConversion.getResultConversionFlags(); - } else { - conversionFlags = null; - } - - // We enter this in our map so we don't have to check again for subsequent partitions. + } else { + String nextDataTypesString = ObjectInspectorUtils.getFieldTypes(partObjectInspector); - conversionMap.put(columnNamesAndTypesCombination, conversionFlags); + // We convert to an array of TypeInfo using a library routine since it parses the information + // and can handle use of different separators, etc. We cannot use the raw type string + // for comparison in the map because of the different separators used. + nextDataTypeInfoList = + TypeInfoUtils.getTypeInfosFromTypeString(nextDataTypesString); } - vectorPartDesc.setConversionFlags(conversionFlags); - - vectorPartDesc.setTypeInfos(nextDataTypeInfoList); + vectorPartDesc.setDataTypeInfos(nextDataTypeInfoList); } vectorTaskColumnInfo.setColumnNames(allColumnNameList); vectorTaskColumnInfo.setTypeInfos(allTypeInfoList); vectorTaskColumnInfo.setPartitionColumnCount(partitionColumnCount); + vectorTaskColumnInfo.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat); return true; } @@ -1203,7 +1305,23 @@ public class Vectorizer implements PhysicalPlanResolver { LOG.info("Vectorization is disabled"); return physicalContext; } + isSpark = (HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")); + + useVectorizedInputFileFormat = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT); + useVectorDeserialize = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE); + useRowDeserialize = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_USE_ROW_DESERIALIZE); + + isSchemaEvolution = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_SCHEMA_EVOLUTION); + // create dispatcher and graph walker Dispatcher disp = new VectorizationDispatcher(physicalContext); TaskGraphWalker ogw = new TaskGraphWalker(disp);
http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java index 429a058..20f787b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java @@ -28,6 +28,7 @@ import java.util.Stack; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; @@ -68,6 +69,8 @@ public abstract class BaseWork extends AbstractOperatorDesc { protected VectorizedRowBatchCtx vectorizedRowBatchCtx; + protected boolean useVectorizedInputFileFormat; + protected boolean llapMode = false; protected boolean uberMode = false; @@ -158,6 +161,9 @@ public abstract class BaseWork extends AbstractOperatorDesc { // ----------------------------------------------------------------------------------------------- + /* + * The vectorization context for creating the VectorizedRowBatch for the node. + */ public VectorizedRowBatchCtx getVectorizedRowBatchCtx() { return vectorizedRowBatchCtx; } @@ -166,6 +172,23 @@ public abstract class BaseWork extends AbstractOperatorDesc { this.vectorizedRowBatchCtx = vectorizedRowBatchCtx; } + /* + * Whether the HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT variable + * (hive.vectorized.use.vectorized.input.format) was true when the Vectorizer class evaluated + * vectorizing this node. + * + * When Vectorized Input File Format looks at this flag, it can determine whether it should + * operate vectorized or not. In some modes, the node can be vectorized but use row + * serialization. + */ + public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { + this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; + } + + public boolean getUseVectorizedInputFileFormat() { + return useVectorizedInputFileFormat; + } + // ----------------------------------------------------------------------------------------------- /** http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index 0851d9e..f034812 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol; @@ -130,6 +131,8 @@ public class MapWork extends BaseWork { private boolean doSplitsGrouping = true; + private VectorizedRowBatch vectorizedRowBatch; + // bitsets can't be correctly serialized by Kryo's default serializer // BitSet::wordsInUse is transient, so force dumping into a lower form private byte[] includedBuckets; @@ -635,4 +638,12 @@ public class MapWork extends BaseWork { // see comment next to the field this.includedBuckets = includedBuckets.toByteArray(); } + + public void setVectorizedRowBatch(VectorizedRowBatch vectorizedRowBatch) { + this.vectorizedRowBatch = vectorizedRowBatch; + } + + public VectorizedRowBatch getVectorizedRowBatch() { + return vectorizedRowBatch; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionConversion.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionConversion.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionConversion.java index 8fe298d..f46581a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionConversion.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionConversion.java @@ -19,9 +19,6 @@ package org.apache.hadoop.hive.ql.plan; import java.util.HashMap; -import java.util.List; - -import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -33,134 +30,75 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; */ public class VectorPartitionConversion { - private static long serialVersionUID = 1L; - - private boolean validConversion; - private boolean[] resultConversionFlags; - - private TypeInfo invalidFromTypeInfo; - private TypeInfo invalidToTypeInfo; - - public boolean getValidConversion() { - return validConversion; - } - - public boolean[] getResultConversionFlags() { - return resultConversionFlags; - } - - public TypeInfo getInvalidFromTypeInfo() { - return invalidFromTypeInfo; - } - - public TypeInfo getInvalidToTypeInfo() { - return invalidToTypeInfo; - } - // Currently, we only support these no-precision-loss or promotion data type conversions: - // // - // Short -> Int IMPLICIT WITH VECTORIZATION - // Short -> BigInt IMPLICIT WITH VECTORIZATION - // Int --> BigInt IMPLICIT WITH VECTORIZATION // - // CONSIDER ADDING: - // Float -> Double IMPLICIT WITH VECTORIZATION - // (Char | VarChar) -> String IMPLICIT WITH VECTORIZATION + // TinyInt --> SmallInt + // TinyInt --> Int + // TinyInt --> BigInt + // + // SmallInt -> Int + // SmallInt -> BigInt + // + // Int --> BigInt // - private static HashMap<PrimitiveCategory, PrimitiveCategory[]> validFromPrimitiveMap = + // Float -> Double + // + // Since we stare Char without padding, it can become a String implicitly. + // (Char | VarChar) -> String + // + private static HashMap<PrimitiveCategory, PrimitiveCategory[]> implicitPrimitiveMap = new HashMap<PrimitiveCategory, PrimitiveCategory[]>(); static { - validFromPrimitiveMap.put( + implicitPrimitiveMap.put( + PrimitiveCategory.BOOLEAN, + new PrimitiveCategory[] { + PrimitiveCategory.BYTE, PrimitiveCategory.SHORT, PrimitiveCategory.INT, PrimitiveCategory.LONG }); + implicitPrimitiveMap.put( + PrimitiveCategory.BYTE, + new PrimitiveCategory[] { + PrimitiveCategory.SHORT, PrimitiveCategory.INT, PrimitiveCategory.LONG }); + implicitPrimitiveMap.put( PrimitiveCategory.SHORT, - new PrimitiveCategory[] { PrimitiveCategory.INT, PrimitiveCategory.LONG }); - validFromPrimitiveMap.put( + new PrimitiveCategory[] { + PrimitiveCategory.INT, PrimitiveCategory.LONG }); + implicitPrimitiveMap.put( PrimitiveCategory.INT, - new PrimitiveCategory[] { PrimitiveCategory.LONG }); + new PrimitiveCategory[] { + PrimitiveCategory.LONG }); + implicitPrimitiveMap.put( + PrimitiveCategory.FLOAT, + new PrimitiveCategory[] { + PrimitiveCategory.DOUBLE }); + implicitPrimitiveMap.put( + PrimitiveCategory.CHAR, + new PrimitiveCategory[] { + PrimitiveCategory.STRING }); + implicitPrimitiveMap.put( + PrimitiveCategory.VARCHAR, + new PrimitiveCategory[] { + PrimitiveCategory.STRING }); } - private boolean validateOne(TypeInfo fromTypeInfo, TypeInfo toTypeInfo) { - - if (fromTypeInfo.equals(toTypeInfo)) { - return false; - } + public static boolean isImplicitVectorColumnConversion(TypeInfo fromTypeInfo, + TypeInfo toTypeInfo) { if (fromTypeInfo.getCategory() == Category.PRIMITIVE && toTypeInfo.getCategory() == Category.PRIMITIVE) { - PrimitiveCategory fromPrimitiveCategory = ((PrimitiveTypeInfo) fromTypeInfo).getPrimitiveCategory(); - PrimitiveCategory toPrimitiveCategory = ((PrimitiveTypeInfo) toTypeInfo).getPrimitiveCategory(); - - PrimitiveCategory[] toPrimitiveCategories = - validFromPrimitiveMap.get(fromPrimitiveCategory); - if (toPrimitiveCategories == null || - !ArrayUtils.contains(toPrimitiveCategories, toPrimitiveCategory)) { - invalidFromTypeInfo = fromTypeInfo; - invalidToTypeInfo = toTypeInfo; - - // Tell caller a bad one was found. - validConversion = false; - return false; - } - } else { - // Ignore checking complex types. Assume they will not be included in the query. - } - - return true; - } - - public void validateConversion(List<TypeInfo> fromTypeInfoList, - List<TypeInfo> toTypeInfoList) { - - final int columnCount = fromTypeInfoList.size(); - resultConversionFlags = new boolean[columnCount]; - - // The method validateOne will turn this off when invalid conversion is found. - validConversion = true; - - boolean atLeastOneConversion = false; - for (int i = 0; i < columnCount; i++) { - TypeInfo fromTypeInfo = fromTypeInfoList.get(i); - TypeInfo toTypeInfo = toTypeInfoList.get(i); - - resultConversionFlags[i] = validateOne(fromTypeInfo, toTypeInfo); - if (!validConversion) { - return; - } - } - - if (atLeastOneConversion) { - // Leave resultConversionFlags set. - } else { - resultConversionFlags = null; - } - } - - public void validateConversion(TypeInfo[] fromTypeInfos, TypeInfo[] toTypeInfos) { - - final int columnCount = fromTypeInfos.length; - resultConversionFlags = new boolean[columnCount]; - - // The method validateOne will turn this off when invalid conversion is found. - validConversion = true; - - boolean atLeastOneConversion = false; - for (int i = 0; i < columnCount; i++) { - TypeInfo fromTypeInfo = fromTypeInfos[i]; - TypeInfo toTypeInfo = toTypeInfos[i]; - - resultConversionFlags[i] = validateOne(fromTypeInfo, toTypeInfo); - if (!validConversion) { - return; - } - if (resultConversionFlags[i]) { - atLeastOneConversion = true; + PrimitiveCategory fromPrimitiveCategory = + ((PrimitiveTypeInfo) fromTypeInfo).getPrimitiveCategory(); + PrimitiveCategory toPrimitiveCategory = + ((PrimitiveTypeInfo) toTypeInfo).getPrimitiveCategory(); + PrimitiveCategory[] toPrimitiveCategories = implicitPrimitiveMap.get(fromPrimitiveCategory); + if (toPrimitiveCategories != null) { + for (PrimitiveCategory candidatePrimitiveCategory : toPrimitiveCategories) { + if (candidatePrimitiveCategory == toPrimitiveCategory) { + return true; + } + } } + return false; } - - if (atLeastOneConversion) { - // Leave resultConversionFlags set. - } else { - resultConversionFlags = null; - } + return false; } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java index 45151f2..2b61ec0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java @@ -39,44 +39,120 @@ public class VectorPartitionDesc { // No data type conversion check? Assume ALTER TABLE prevented conversions that // VectorizedInputFileFormat cannot handle... // + // VECTOR_DESERIALIZE: + // LAZY_SIMPLE: + // Capable of converting on its own. + // LAZY_BINARY + // Partition schema assumed to match file contents. + // Conversion necessary from partition field values to vector columns. + // ROW_DESERIALIZE + // Partition schema assumed to match file contents. + // Conversion necessary from partition field values to vector columns. + // public static enum VectorMapOperatorReadType { NONE, - VECTORIZED_INPUT_FILE_FORMAT + VECTORIZED_INPUT_FILE_FORMAT, + VECTOR_DESERIALIZE, + ROW_DESERIALIZE } + public static enum VectorDeserializeType { + NONE, + LAZY_SIMPLE, + LAZY_BINARY + } private final VectorMapOperatorReadType vectorMapOperatorReadType; + private final VectorDeserializeType vectorDeserializeType; - private final boolean needsDataTypeConversionCheck; + private final String rowDeserializerClassName; + private final String inputFileFormatClassName; - private boolean[] conversionFlags; + boolean isInputFileFormatSelfDescribing; - private TypeInfo[] typeInfos; + private TypeInfo[] dataTypeInfos; - private VectorPartitionDesc(VectorMapOperatorReadType vectorMapOperatorReadType, - boolean needsDataTypeConversionCheck) { + private VectorPartitionDesc(String inputFileFormatClassName, + boolean isInputFileFormatSelfDescribing, VectorMapOperatorReadType vectorMapOperatorReadType) { this.vectorMapOperatorReadType = vectorMapOperatorReadType; - this.needsDataTypeConversionCheck = needsDataTypeConversionCheck; + this.vectorDeserializeType = VectorDeserializeType.NONE; + this.inputFileFormatClassName = inputFileFormatClassName; + rowDeserializerClassName = null; + this.isInputFileFormatSelfDescribing = isInputFileFormatSelfDescribing; + dataTypeInfos = null; + } - conversionFlags = null; - typeInfos = null; + /** + * Create a VECTOR_DESERIALIZE flavor object. + * @param vectorMapOperatorReadType + * @param vectorDeserializeType + * @param needsDataTypeConversionCheck + */ + private VectorPartitionDesc(String inputFileFormatClassName, + VectorDeserializeType vectorDeserializeType) { + this.vectorMapOperatorReadType = VectorMapOperatorReadType.VECTOR_DESERIALIZE; + this.vectorDeserializeType = vectorDeserializeType; + this.inputFileFormatClassName = inputFileFormatClassName; + rowDeserializerClassName = null; + isInputFileFormatSelfDescribing = false; + dataTypeInfos = null; } - public static VectorPartitionDesc createVectorizedInputFileFormat() { - return new VectorPartitionDesc(VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT, true); + /** + * Create a ROW_DESERIALIZE flavor object. + * @param rowDeserializerClassName + * @param inputFileFormatClassName + */ + private VectorPartitionDesc(String inputFileFormatClassName, + boolean isInputFileFormatSelfDescribing, String rowDeserializerClassName) { + this.vectorMapOperatorReadType = VectorMapOperatorReadType.ROW_DESERIALIZE; + this.vectorDeserializeType = VectorDeserializeType.NONE; + this.inputFileFormatClassName = inputFileFormatClassName; + this.rowDeserializerClassName = rowDeserializerClassName; + this.isInputFileFormatSelfDescribing = isInputFileFormatSelfDescribing; + dataTypeInfos = null; } + public static VectorPartitionDesc createVectorizedInputFileFormat(String inputFileFormatClassName, + boolean isInputFileFormatSelfDescribing) { + return new VectorPartitionDesc( + inputFileFormatClassName, + isInputFileFormatSelfDescribing, + VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT); + } + + public static VectorPartitionDesc createVectorDeserialize(String inputFileFormatClassName, + VectorDeserializeType vectorDeserializeType) { + return new VectorPartitionDesc(inputFileFormatClassName, vectorDeserializeType); + } + + public static VectorPartitionDesc createRowDeserialize(String inputFileFormatClassName, + boolean isInputFileFormatSelfDescribing, String rowDeserializerClassName) { + return new VectorPartitionDesc(rowDeserializerClassName, isInputFileFormatSelfDescribing, + inputFileFormatClassName); + } @Override public VectorPartitionDesc clone() { - VectorPartitionDesc result = - new VectorPartitionDesc(vectorMapOperatorReadType, - needsDataTypeConversionCheck); - result.conversionFlags = - (conversionFlags == null ? null : - Arrays.copyOf(conversionFlags, conversionFlags.length)); - result.typeInfos = Arrays.copyOf(typeInfos, typeInfos.length); + VectorPartitionDesc result; + switch (vectorMapOperatorReadType) { + case VECTORIZED_INPUT_FILE_FORMAT: + result = new VectorPartitionDesc(inputFileFormatClassName, isInputFileFormatSelfDescribing, + vectorMapOperatorReadType); + break; + case VECTOR_DESERIALIZE: + result = new VectorPartitionDesc(inputFileFormatClassName, vectorDeserializeType); + break; + case ROW_DESERIALIZE: + result = new VectorPartitionDesc(inputFileFormatClassName, isInputFileFormatSelfDescribing, + rowDeserializerClassName); + break; + default: + throw new RuntimeException("Unexpected vector map operator read type " + vectorMapOperatorReadType.name()); + } + result.dataTypeInfos = Arrays.copyOf(dataTypeInfos, dataTypeInfos.length); + return result; } @@ -84,27 +160,55 @@ public class VectorPartitionDesc { return vectorMapOperatorReadType; } - public boolean getNeedsDataTypeConversionCheck() { - return needsDataTypeConversionCheck; + public String getInputFileFormatClassName() { + return inputFileFormatClassName; + } + + public VectorDeserializeType getVectorDeserializeType() { + return vectorDeserializeType; } - public void setConversionFlags(boolean[] conversionFlags) { - this.conversionFlags = conversionFlags; + public String getRowDeserializerClassName() { + return rowDeserializerClassName; } - public boolean[] getConversionFlags() { - return conversionFlags; + public boolean getIsInputFileFormatSelfDescribing() { + return isInputFileFormatSelfDescribing; } - public TypeInfo[] getTypeInfos() { - return typeInfos; + public TypeInfo[] getDataTypeInfos() { + return dataTypeInfos; } - public void setTypeInfos(List<TypeInfo> typeInfoList) { - typeInfos = typeInfoList.toArray(new TypeInfo[0]); + public void setDataTypeInfos(List<TypeInfo> dataTypeInfoList) { + dataTypeInfos = dataTypeInfoList.toArray(new TypeInfo[0]); } - public int getNonPartColumnCount() { - return typeInfos.length; + public int getDataColumnCount() { + return dataTypeInfos.length; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("vector map operator read type "); + sb.append(vectorMapOperatorReadType.name()); + sb.append(", input file format class name "); + sb.append(inputFileFormatClassName); + switch (vectorMapOperatorReadType) { + case VECTORIZED_INPUT_FILE_FORMAT: + break; + case VECTOR_DESERIALIZE: + sb.append(", deserialize type "); + sb.append(vectorDeserializeType.name()); + break; + case ROW_DESERIALIZE: + sb.append(", deserializer class name "); + sb.append(rowDeserializerClassName); + break; + default: + throw new RuntimeException("Unexpected vector map operator read type " + vectorMapOperatorReadType.name()); + } + return sb.toString(); } } http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java index a5946d1..959a2af 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java @@ -34,13 +34,13 @@ import junit.framework.TestCase; */ public class TestVectorRowObject extends TestCase { - void examineBatch(VectorizedRowBatch batch, VectorExtractRowSameBatch vectorExtractRow, + void examineBatch(VectorizedRowBatch batch, VectorExtractRow vectorExtractRow, Object[][] randomRows, int firstRandomRowIndex ) { int rowSize = vectorExtractRow.getCount(); Object[] row = new Object[rowSize]; for (int i = 0; i < batch.size; i++) { - vectorExtractRow.extractRow(i, row); + vectorExtractRow.extractRow(batch, i, row); Object[] expectedRow = randomRows[firstRandomRowIndex + i]; for (int c = 0; c < rowSize; c++) { if (!row[c].equals(expectedRow[c])) { @@ -67,20 +67,18 @@ public class TestVectorRowObject extends TestCase { cv.noNulls = false; } - VectorAssignRowSameBatch vectorAssignRow = new VectorAssignRowSameBatch(); + VectorAssignRow vectorAssignRow = new VectorAssignRow(); vectorAssignRow.init(source.typeNames()); - vectorAssignRow.setOneBatch(batch); - - VectorExtractRowSameBatch vectorExtractRow = new VectorExtractRowSameBatch(); + + VectorExtractRow vectorExtractRow = new VectorExtractRow(); vectorExtractRow.init(source.typeNames()); - vectorExtractRow.setOneBatch(batch); Object[][] randomRows = source.randomRows(100000); int firstRandomRowIndex = 0; for (int i = 0; i < randomRows.length; i++) { Object[] row = randomRows[i]; - vectorAssignRow.assignRow(batch.size, row); + vectorAssignRow.assignRow(batch, batch.size, row); batch.size++; if (batch.size == batch.DEFAULT_SIZE) { examineBatch(batch, vectorExtractRow, randomRows, firstRandomRowIndex); http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java index 7c0c8d1..e37d2bf 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java @@ -59,6 +59,7 @@ import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -100,7 +101,7 @@ public class TestVectorSerDeRow extends TestCase { switch (primitiveCategory) { case BOOLEAN: { - Boolean value = deserializeRead.readBoolean(); + Boolean value = deserializeRead.currentBoolean; BooleanWritable expectedWritable = (BooleanWritable) expected; if (!value.equals(expectedWritable.get())) { TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")"); @@ -109,7 +110,7 @@ public class TestVectorSerDeRow extends TestCase { break; case BYTE: { - Byte value = deserializeRead.readByte(); + Byte value = deserializeRead.currentByte; ByteWritable expectedWritable = (ByteWritable) expected; if (!value.equals(expectedWritable.get())) { TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")"); @@ -118,7 +119,7 @@ public class TestVectorSerDeRow extends TestCase { break; case SHORT: { - Short value = deserializeRead.readShort(); + Short value = deserializeRead.currentShort; ShortWritable expectedWritable = (ShortWritable) expected; if (!value.equals(expectedWritable.get())) { TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")"); @@ -127,7 +128,7 @@ public class TestVectorSerDeRow extends TestCase { break; case INT: { - Integer value = deserializeRead.readInt(); + Integer value = deserializeRead.currentInt; IntWritable expectedWritable = (IntWritable) expected; if (!value.equals(expectedWritable.get())) { TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")"); @@ -136,7 +137,7 @@ public class TestVectorSerDeRow extends TestCase { break; case LONG: { - Long value = deserializeRead.readLong(); + Long value = deserializeRead.currentLong; LongWritable expectedWritable = (LongWritable) expected; if (!value.equals(expectedWritable.get())) { TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")"); @@ -145,18 +146,16 @@ public class TestVectorSerDeRow extends TestCase { break; case DATE: { - DeserializeRead.ReadDateResults readDateResults = deserializeRead.createReadDateResults(); - deserializeRead.readDate(readDateResults); - Date value = readDateResults.getDate(); + DateWritable value = deserializeRead.currentDateWritable; DateWritable expectedWritable = (DateWritable) expected; - if (!value.equals(expectedWritable.get())) { + if (!value.equals(expectedWritable)) { TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); } } break; case FLOAT: { - Float value = deserializeRead.readFloat(); + Float value = deserializeRead.currentFloat; FloatWritable expectedWritable = (FloatWritable) expected; if (!value.equals(expectedWritable.get())) { TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")"); @@ -165,7 +164,7 @@ public class TestVectorSerDeRow extends TestCase { break; case DOUBLE: { - Double value = deserializeRead.readDouble(); + Double value = deserializeRead.currentDouble; DoubleWritable expectedWritable = (DoubleWritable) expected; if (!value.equals(expectedWritable.get())) { TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")"); @@ -173,57 +172,69 @@ public class TestVectorSerDeRow extends TestCase { } break; case STRING: - { - DeserializeRead.ReadStringResults readStringResults = deserializeRead.createReadStringResults(); - deserializeRead.readString(readStringResults); - - char[] charsBuffer = new char[readStringResults.bytes.length]; - for (int c = 0; c < charsBuffer.length; c++) { - charsBuffer[c] = (char) (readStringResults.bytes[c] & 0xFF); - } - - byte[] stringBytes = Arrays.copyOfRange(readStringResults.bytes, readStringResults.start, readStringResults.start + readStringResults.length); - - char[] charsRange = new char[stringBytes.length]; - for (int c = 0; c < charsRange.length; c++) { - charsRange[c] = (char) (stringBytes[c] & 0xFF); - } - - Text text = new Text(stringBytes); - String value = text.toString(); - Text expectedWritable = (Text) expected; - if (!value.equals(expectedWritable.toString())) { - TestCase.fail("String field mismatch (expected '" + expectedWritable.toString() + "' found '" + value + "')"); - } - } - break; case CHAR: - { - DeserializeRead.ReadHiveCharResults readHiveCharResults = deserializeRead.createReadHiveCharResults(); - deserializeRead.readHiveChar(readHiveCharResults); - HiveChar hiveChar = readHiveCharResults.getHiveChar(); - HiveCharWritable expectedWritable = (HiveCharWritable) expected; - if (!hiveChar.equals(expectedWritable.getHiveChar())) { - TestCase.fail("Char field mismatch (expected '" + expectedWritable.getHiveChar() + "' found '" + hiveChar + "')"); - } - } - break; case VARCHAR: + case BINARY: { - DeserializeRead.ReadHiveVarcharResults readHiveVarcharResults = deserializeRead.createReadHiveVarcharResults(); - deserializeRead.readHiveVarchar(readHiveVarcharResults); - HiveVarchar hiveVarchar = readHiveVarcharResults.getHiveVarchar(); - HiveVarcharWritable expectedWritable = (HiveVarcharWritable) expected; - if (!hiveVarchar.equals(expectedWritable.getHiveVarchar())) { - TestCase.fail("Varchar field mismatch (expected '" + expectedWritable.getHiveVarchar() + "' found '" + hiveVarchar + "')"); + byte[] stringBytes = + Arrays.copyOfRange( + deserializeRead.currentBytes, + deserializeRead.currentBytesStart, + deserializeRead.currentBytesStart + deserializeRead.currentBytesLength); + + Text text = new Text(stringBytes); + String string = text.toString(); + + switch (primitiveCategory) { + case STRING: + { + Text expectedWritable = (Text) expected; + if (!string.equals(expectedWritable.toString())) { + TestCase.fail("String field mismatch (expected '" + expectedWritable.toString() + "' found '" + string + "')"); + } + } + break; + case CHAR: + { + HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength()); + + HiveCharWritable expectedWritable = (HiveCharWritable) expected; + if (!hiveChar.equals(expectedWritable.getHiveChar())) { + TestCase.fail("Char field mismatch (expected '" + expectedWritable.getHiveChar() + "' found '" + hiveChar + "')"); + } + } + break; + case VARCHAR: + { + HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength()); + HiveVarcharWritable expectedWritable = (HiveVarcharWritable) expected; + if (!hiveVarchar.equals(expectedWritable.getHiveVarchar())) { + TestCase.fail("Varchar field mismatch (expected '" + expectedWritable.getHiveVarchar() + "' found '" + hiveVarchar + "')"); + } + } + break; + case BINARY: + { + BytesWritable expectedWritable = (BytesWritable) expected; + if (stringBytes.length != expectedWritable.getLength()){ + TestCase.fail("Byte Array field mismatch (expected " + expected + " found " + stringBytes + ")"); + } + byte[] expectedBytes = expectedWritable.getBytes(); + for (int b = 0; b < stringBytes.length; b++) { + if (stringBytes[b] != expectedBytes[b]) { + TestCase.fail("Byte Array field mismatch (expected " + expected + " found " + stringBytes + ")"); + } + } + } + break; + default: + throw new HiveException("Unexpected primitive category " + primitiveCategory); } } break; case DECIMAL: { - DeserializeRead.ReadDecimalResults readDecimalResults = deserializeRead.createReadDecimalResults(); - deserializeRead.readHiveDecimal(readDecimalResults); - HiveDecimal value = readDecimalResults.getHiveDecimal(); + HiveDecimal value = deserializeRead.currentHiveDecimalWritable.getHiveDecimal(); if (value == null) { TestCase.fail("Decimal field evaluated to NULL"); } @@ -238,9 +249,7 @@ public class TestVectorSerDeRow extends TestCase { break; case TIMESTAMP: { - DeserializeRead.ReadTimestampResults readTimestampResults = deserializeRead.createReadTimestampResults(); - deserializeRead.readTimestamp(readTimestampResults); - Timestamp value = readTimestampResults.getTimestamp(); + Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp(); TimestampWritable expectedWritable = (TimestampWritable) expected; if (!value.equals(expectedWritable.getTimestamp())) { TestCase.fail("Timestamp field mismatch (expected " + expectedWritable.getTimestamp() + " found " + value.toString() + ")"); @@ -249,9 +258,7 @@ public class TestVectorSerDeRow extends TestCase { break; case INTERVAL_YEAR_MONTH: { - DeserializeRead.ReadIntervalYearMonthResults readIntervalYearMonthResults = deserializeRead.createReadIntervalYearMonthResults(); - deserializeRead.readIntervalYearMonth(readIntervalYearMonthResults); - HiveIntervalYearMonth value = readIntervalYearMonthResults.getHiveIntervalYearMonth(); + HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth(); HiveIntervalYearMonthWritable expectedWritable = (HiveIntervalYearMonthWritable) expected; HiveIntervalYearMonth expectedValue = expectedWritable.getHiveIntervalYearMonth(); if (!value.equals(expectedValue)) { @@ -261,9 +268,7 @@ public class TestVectorSerDeRow extends TestCase { break; case INTERVAL_DAY_TIME: { - DeserializeRead.ReadIntervalDayTimeResults readIntervalDayTimeResults = deserializeRead.createReadIntervalDayTimeResults(); - deserializeRead.readIntervalDayTime(readIntervalDayTimeResults); - HiveIntervalDayTime value = readIntervalDayTimeResults.getHiveIntervalDayTime(); + HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime(); HiveIntervalDayTimeWritable expectedWritable = (HiveIntervalDayTimeWritable) expected; HiveIntervalDayTime expectedValue = expectedWritable.getHiveIntervalDayTime(); if (!value.equals(expectedValue)) { @@ -271,26 +276,10 @@ public class TestVectorSerDeRow extends TestCase { } } break; - case BINARY: - { - DeserializeRead.ReadBinaryResults readBinaryResults = deserializeRead.createReadBinaryResults(); - deserializeRead.readBinary(readBinaryResults); - byte[] byteArray = Arrays.copyOfRange(readBinaryResults.bytes, readBinaryResults.start, readBinaryResults.start + readBinaryResults.length); - BytesWritable expectedWritable = (BytesWritable) expected; - if (byteArray.length != expectedWritable.getLength()){ - TestCase.fail("Byte Array field mismatch (expected " + expected + " found " + byteArray + ")"); - } - byte[] expectedBytes = expectedWritable.getBytes(); - for (int b = 0; b < byteArray.length; b++) { - if (byteArray[b] != expectedBytes[b]) { - TestCase.fail("Byte Array field mismatch (expected " + expected + " found " + byteArray + ")"); - } - } - } - break; - default: - throw new HiveException("Unexpected primitive category " + primitiveCategory); - } + + default: + throw new HiveException("Unexpected primitive category " + primitiveCategory); + } } deserializeRead.extraFieldsCheck(); TestCase.assertTrue(!deserializeRead.readBeyondConfiguredFieldsWarned()); @@ -331,9 +320,8 @@ public class TestVectorSerDeRow extends TestCase { batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames); VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); - VectorAssignRowSameBatch vectorAssignRow = new VectorAssignRowSameBatch(); + VectorAssignRow vectorAssignRow = new VectorAssignRow(); vectorAssignRow.init(source.typeNames()); - vectorAssignRow.setOneBatch(batch); int fieldCount = source.typeNames().size(); DeserializeRead deserializeRead; @@ -369,7 +357,7 @@ public class TestVectorSerDeRow extends TestCase { for (int i = 0; i < randomRows.length; i++) { Object[] row = randomRows[i]; - vectorAssignRow.assignRow(batch.size, row); + vectorAssignRow.assignRow(batch, batch.size, row); batch.size++; if (batch.size == batch.DEFAULT_SIZE) { serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex); @@ -382,13 +370,13 @@ public class TestVectorSerDeRow extends TestCase { } } - void examineBatch(VectorizedRowBatch batch, VectorExtractRowSameBatch vectorExtractRow, + void examineBatch(VectorizedRowBatch batch, VectorExtractRow vectorExtractRow, Object[][] randomRows, int firstRandomRowIndex ) { int rowSize = vectorExtractRow.getCount(); Object[] row = new Object[rowSize]; for (int i = 0; i < batch.size; i++) { - vectorExtractRow.extractRow(i, row); + vectorExtractRow.extractRow(batch, i, row); Object[] expectedRow = randomRows[firstRandomRowIndex + i]; @@ -603,9 +591,8 @@ public class TestVectorSerDeRow extends TestCase { cv.noNulls = false; } - VectorExtractRowSameBatch vectorExtractRow = new VectorExtractRowSameBatch(); + VectorExtractRow vectorExtractRow = new VectorExtractRow(); vectorExtractRow.init(source.typeNames()); - vectorExtractRow.setOneBatch(batch); Object[][] randomRows = source.randomRows(100000); int firstRandomRowIndex = 0; @@ -614,7 +601,7 @@ public class TestVectorSerDeRow extends TestCase { Output output = serializeRow(row, source, serializeWrite); vectorDeserializeRow.setBytes(output.getData(), 0, output.getLength()); - vectorDeserializeRow.deserializeByValue(batch, batch.size); + vectorDeserializeRow.deserialize(batch, batch.size); batch.size++; if (batch.size == batch.DEFAULT_SIZE) { examineBatch(batch, vectorExtractRow, randomRows, firstRandomRowIndex); http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index 85923a8..4eb0249 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -80,6 +80,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.SerDe; @@ -1700,7 +1701,10 @@ public class TestInputOutputFormat { Utilities.clearWorkMap(conf); conf.set("hive.exec.plan", workDir.toString()); conf.set("mapred.job.tracker", "local"); - conf.set("hive.vectorized.execution.enabled", Boolean.toString(isVectorized)); + String isVectorizedString = Boolean.toString(isVectorized); + conf.set("hive.vectorized.execution.enabled", isVectorizedString); + conf.set(Utilities.VECTOR_MODE, isVectorizedString); + conf.set(Utilities.USE_VECTORIZED_INPUT_FILE_FORMAT, isVectorizedString); conf.set("fs.mock.impl", MockFileSystem.class.getName()); conf.set("mapred.mapper.class", ExecMapper.class.getName()); Path root = new Path(warehouseDir, tableName); @@ -1767,6 +1771,10 @@ public class TestInputOutputFormat { LinkedHashMap<String, String> partSpec = new LinkedHashMap<String, String>(); PartitionDesc part = new PartitionDesc(tbl, partSpec); + if (isVectorized) { + part.setVectorPartitionDesc( + VectorPartitionDesc.createVectorizedInputFileFormat("MockInputFileFormatClassName", false)); + } partMap.put(partPath[p], part); } mapWork.setPathToAliases(aliasMap); http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/queries/clientpositive/avro_schema_evolution_native.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/avro_schema_evolution_native.q b/ql/src/test/queries/clientpositive/avro_schema_evolution_native.q index b32e1ec..efeb167 100644 --- a/ql/src/test/queries/clientpositive/avro_schema_evolution_native.q +++ b/ql/src/test/queries/clientpositive/avro_schema_evolution_native.q @@ -1,3 +1,4 @@ +set hive.cli.print.header=true; set hive.mapred.mode=nonstrict; -- SORT_QUERY_RESULTS -- Verify that table scans work with partitioned Avro tables @@ -19,6 +20,7 @@ STORED AS AVRO; SET hive.exec.dynamic.partition.mode=nonstrict; INSERT OVERWRITE TABLE episodes_partitioned PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes; +DESCRIBE FORMATTED episodes_partitioned; ALTER TABLE episodes_partitioned SET SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' @@ -51,7 +53,12 @@ SERDEPROPERTIES ('avro.schema.literal'='{ } ] }'); +DESCRIBE FORMATTED episodes_partitioned; +set hive.fetch.task.conversion=more; + +EXPLAIN +SELECT * FROM episodes_partitioned WHERE doctor_pt > 6; SELECT * FROM episodes_partitioned WHERE doctor_pt > 6; @@ -60,4 +67,15 @@ SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5; -- Fetch w/filter to specific partition SELECT * FROM episodes_partitioned WHERE doctor_pt = 6; -- Fetch w/non-existent partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5; + +set hive.fetch.task.conversion=none; + +EXPLAIN +SELECT * FROM episodes_partitioned WHERE doctor_pt > 6; + +SELECT * FROM episodes_partitioned WHERE doctor_pt > 6; + +SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5; +SELECT * FROM episodes_partitioned WHERE doctor_pt = 6; SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/queries/clientpositive/bucket_groupby.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucket_groupby.q b/ql/src/test/queries/clientpositive/bucket_groupby.q index ea35bd7..a36c79d 100644 --- a/ql/src/test/queries/clientpositive/bucket_groupby.q +++ b/ql/src/test/queries/clientpositive/bucket_groupby.q @@ -1,3 +1,4 @@ +SET hive.vectorized.execution.enabled=false; set hive.mapred.mode=nonstrict; create table clustergroupby(key string, value string) partitioned by(ds string); describe extended clustergroupby; @@ -6,16 +7,16 @@ alter table clustergroupby clustered by (key) into 1 buckets; insert overwrite table clustergroupby partition (ds='100') select key, value from src sort by key; explain -select key, count(1) from clustergroupby where ds='100' group by key limit 10; -select key, count(1) from clustergroupby where ds='100' group by key limit 10; +select key, count(1) from clustergroupby where ds='100' group by key order by key limit 10; +select key, count(1) from clustergroupby where ds='100' group by key order by key limit 10; describe extended clustergroupby; insert overwrite table clustergroupby partition (ds='101') select key, value from src distribute by key; --normal-- explain -select key, count(1) from clustergroupby where ds='101' group by key limit 10; -select key, count(1) from clustergroupby where ds='101' group by key limit 10; +select key, count(1) from clustergroupby where ds='101' group by key order by key limit 10; +select key, count(1) from clustergroupby where ds='101' group by key order by key limit 10; --function-- explain @@ -27,13 +28,13 @@ select abs(length(key)), count(1) from clustergroupby where ds='101' group by a --constant-- explain -select key, count(1) from clustergroupby where ds='101' group by key,3 limit 10; -select key, count(1) from clustergroupby where ds='101' group by key,3 limit 10; +select key, count(1) from clustergroupby where ds='101' group by key,3 order by key,3 limit 10; +select key, count(1) from clustergroupby where ds='101' group by key,3 order by key,3 limit 10; --subquery-- explain -select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key limit 10; -select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key limit 10; +select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key order by key limit 10; +select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key order by key limit 10; explain select key, count(1) from clustergroupby group by key; @@ -52,11 +53,11 @@ describe extended clustergroupby; insert overwrite table clustergroupby partition (ds='102') select key, value from src distribute by value sort by key, value; explain -select key, count(1) from clustergroupby where ds='102' group by key limit 10; -select key, count(1) from clustergroupby where ds='102' group by key limit 10; +select key, count(1) from clustergroupby where ds='102' group by key order by key limit 10; +select key, count(1) from clustergroupby where ds='102' group by key order by key limit 10; explain -select value, count(1) from clustergroupby where ds='102' group by value limit 10; -select value, count(1) from clustergroupby where ds='102' group by value limit 10; +select value, count(1) from clustergroupby where ds='102' group by value order by value limit 10; +select value, count(1) from clustergroupby where ds='102' group by value order by value limit 10; explain select key, count(1) from clustergroupby where ds='102' group by key, value limit 10; select key, count(1) from clustergroupby where ds='102' group by key, value limit 10; @@ -69,8 +70,8 @@ alter table clustergroupby clustered by (value, key) sorted by (key) into 1 buck describe extended clustergroupby; insert overwrite table clustergroupby partition (ds='103') select key, value from src distribute by value, key sort by key; explain -select key, count(1) from clustergroupby where ds='103' group by key limit 10; -select key, count(1) from clustergroupby where ds='103' group by key limit 10; +select key, count(1) from clustergroupby where ds='103' group by key order by key limit 10; +select key, count(1) from clustergroupby where ds='103' group by key order by key limit 10; explain -select key, count(1) from clustergroupby where ds='103' group by value, key limit 10; -select key, count(1) from clustergroupby where ds='103' group by value, key limit 10; +select key, count(1) from clustergroupby where ds='103' group by value, key order by key limit 10; +select key, count(1) from clustergroupby where ds='103' group by value, key order by key limit 10; http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/queries/clientpositive/groupby_sort_10.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_10.q b/ql/src/test/queries/clientpositive/groupby_sort_10.q index 910a272..3517693 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_10.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_10.q @@ -2,6 +2,8 @@ set hive.mapred.mode=nonstrict; set hive.exec.reducers.max = 10; set hive.map.groupby.sorted=true; +-- SORT_QUERY_RESULTS + CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_part.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_part.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_part.q index fc935d5..b0e57fb 100644 --- a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_part.q +++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_part.q @@ -4,6 +4,9 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; SET hive.exec.schema.evolution=false; SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=true; +SET hive.vectorized.use.vector.serde.deserialize=false; +SET hive.vectorized.use.row.serde.deserialize=false; set hive.fetch.task.conversion=none; set hive.exec.dynamic.partition.mode=nonstrict; http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_table.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_table.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_table.q index e49a0f3..ca6822c 100644 --- a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_table.q +++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_table.q @@ -3,6 +3,9 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; SET hive.exec.schema.evolution=false; SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=true; +SET hive.vectorized.use.vector.serde.deserialize=false; +SET hive.vectorized.use.row.serde.deserialize=false; set hive.fetch.task.conversion=none; set hive.exec.dynamic.partition.mode=nonstrict; http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_mapwork_table.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_mapwork_table.q b/ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_mapwork_table.q index 6c256ea..f05f02a 100644 --- a/ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_mapwork_table.q +++ b/ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_mapwork_table.q @@ -1,7 +1,5 @@ set hive.cli.print.header=true; set hive.support.concurrency=true; -set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; - SET hive.vectorized.execution.enabled=false; set hive.fetch.task.conversion=none; set hive.exec.dynamic.partition.mode=nonstrict; http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_part.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_part.q b/ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_part.q index 30b19bb..da726c5 100644 --- a/ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_part.q +++ b/ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_part.q @@ -2,6 +2,9 @@ set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=true; +SET hive.vectorized.use.vector.serde.deserialize=false; +SET hive.vectorized.use.row.serde.deserialize=false; set hive.fetch.task.conversion=more; set hive.exec.dynamic.partition.mode=nonstrict; http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_table.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_table.q b/ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_table.q index 6df2095..393967f 100644 --- a/ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_table.q +++ b/ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_table.q @@ -1,8 +1,9 @@ set hive.cli.print.header=true; -set hive.support.concurrency=true; -set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; - +SET hive.exec.schema.evolution=true; SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=true; +SET hive.vectorized.use.vector.serde.deserialize=false; +SET hive.vectorized.use.row.serde.deserialize=false; set hive.fetch.task.conversion=none; set hive.exec.dynamic.partition.mode=nonstrict; http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/queries/clientpositive/schema_evol_text_fetchwork_table.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/schema_evol_text_fetchwork_table.q b/ql/src/test/queries/clientpositive/schema_evol_text_fetchwork_table.q deleted file mode 100644 index 44f7264..0000000 --- a/ql/src/test/queries/clientpositive/schema_evol_text_fetchwork_table.q +++ /dev/null @@ -1,56 +0,0 @@ -set hive.cli.print.header=true; -set hive.support.concurrency=true; -set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -SET hive.exec.schema.evolution=true; -SET hive.vectorized.execution.enabled=false; -set hive.fetch.task.conversion=none; -set hive.exec.dynamic.partition.mode=nonstrict; - - --- SORT_QUERY_RESULTS --- --- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Table --- --- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT ---- -CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; - -insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); - --- Table-Non-Cascade ADD COLUMNS ... -alter table table1 add columns(c int, d string); - -insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); - -insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); - --- SELECT permutation columns to make sure NULL defaulting works right -select a,b from table1; -select a,b,c from table1; -select a,b,c,d from table1; -select a,c,d from table1; -select a,d from table1; -select c from table1; -select d from table1; - --- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT --- smallint = (2-byte signed integer, from -32,768 to 32,767) --- -CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE; - -insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); - --- Table-Non-Cascade CHANGE COLUMNS ... -alter table table2 change column a a int; - -insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); - -insert into table table2 values(5000, 'new'),(90000, 'new'); - -select a,b from table2; - - -DROP TABLE table1; -DROP TABLE table2; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/queries/clientpositive/schema_evol_text_mapwork_table.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/schema_evol_text_mapwork_table.q b/ql/src/test/queries/clientpositive/schema_evol_text_mapwork_table.q deleted file mode 100644 index 44f7264..0000000 --- a/ql/src/test/queries/clientpositive/schema_evol_text_mapwork_table.q +++ /dev/null @@ -1,56 +0,0 @@ -set hive.cli.print.header=true; -set hive.support.concurrency=true; -set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -SET hive.exec.schema.evolution=true; -SET hive.vectorized.execution.enabled=false; -set hive.fetch.task.conversion=none; -set hive.exec.dynamic.partition.mode=nonstrict; - - --- SORT_QUERY_RESULTS --- --- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Table --- --- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT ---- -CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; - -insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); - --- Table-Non-Cascade ADD COLUMNS ... -alter table table1 add columns(c int, d string); - -insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); - -insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); - --- SELECT permutation columns to make sure NULL defaulting works right -select a,b from table1; -select a,b,c from table1; -select a,b,c,d from table1; -select a,c,d from table1; -select a,d from table1; -select c from table1; -select d from table1; - --- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT --- smallint = (2-byte signed integer, from -32,768 to 32,767) --- -CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE; - -insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); - --- Table-Non-Cascade CHANGE COLUMNS ... -alter table table2 change column a a int; - -insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); - -insert into table table2 values(5000, 'new'),(90000, 'new'); - -select a,b from table2; - - -DROP TABLE table1; -DROP TABLE table2; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_part.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_part.q b/ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_part.q deleted file mode 100644 index 4d78642..0000000 --- a/ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_part.q +++ /dev/null @@ -1,98 +0,0 @@ -set hive.mapred.mode=nonstrict; -set hive.cli.print.header=true; -SET hive.exec.schema.evolution=true; -SET hive.vectorized.execution.enabled=false; -set hive.fetch.task.conversion=more; -set hive.exec.dynamic.partition.mode=nonstrict; - - --- SORT_QUERY_RESULTS --- --- FILE VARIATION: TEXT, Non-Vectorized, FetchWork, Partitioned --- --- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT ---- -CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; - -insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); - --- Table-Non-Cascade ADD COLUMNS ... -alter table partitioned1 add columns(c int, d string); - -insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); - -insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); - --- SELECT permutation columns to make sure NULL defaulting works right -select part,a,b from partitioned1; -select part,a,b,c from partitioned1; -select part,a,b,c,d from partitioned1; -select part,a,c,d from partitioned1; -select part,a,d from partitioned1; -select part,c from partitioned1; -select part,d from partitioned1; - --- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT --- smallint = (2-byte signed integer, from -32,768 to 32,767) --- -CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; - -insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); - --- Table-Non-Cascade CHANGE COLUMNS ... -alter table partitioned2 change column a a int; - -insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); - -insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new'); - -select part,a,b from partitioned2; - - --- --- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT ---- -CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; - -insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); - --- Table-Non-Cascade ADD COLUMNS ... -alter table partitioned3 add columns(c int, d string); - -insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), - (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1); - --- SELECT permutation columns to make sure NULL defaulting works right -select part,a,b from partitioned1; -select part,a,b,c from partitioned1; -select part,a,b,c,d from partitioned1; -select part,a,c,d from partitioned1; -select part,a,d from partitioned1; -select part,c from partitioned1; -select part,d from partitioned1; - - --- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT --- smallint = (2-byte signed integer, from -32,768 to 32,767) --- -CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; - -insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); - --- Table-Non-Cascade CHANGE COLUMNS ... -alter table partitioned4 change column a a int; - -insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), - (5000, 'new', 1),(90000, 'new', 1); - -select part,a,b from partitioned4; - - -DROP TABLE partitioned1; -DROP TABLE partitioned2; -DROP TABLE partitioned3; -DROP TABLE partitioned4; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_table.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_table.q b/ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_table.q deleted file mode 100644 index 0834351..0000000 --- a/ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_table.q +++ /dev/null @@ -1,67 +0,0 @@ -set hive.cli.print.header=true; -SET hive.exec.schema.evolution=true; -SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=more; - --- SORT_QUERY_RESULTS --- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table --- --- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS ---- -CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; - -insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); - -select a,b from table1; - --- ADD COLUMNS -alter table table1 add columns(c int, d string); - -insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); - -select a,b,c,d from table1; - --- ADD COLUMNS -alter table table1 add columns(e string); - -insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2'); - -select a,b,c,d,e from table1; - - --- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN --- smallint = (2-byte signed integer, from -32,768 to 32,767) --- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE; - -insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); - -select a,b from table3; - --- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int; - -insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); - -select a,b from table3; - --- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string); - -insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6'); - -select a,b from table3; - - --- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int; - -select a,b from table3; - - -DROP TABLE table1; -DROP TABLE table2; -DROP TABLE table3; \ No newline at end of file
