Repository: hive Updated Branches: refs/heads/master b9fc5fc10 -> 996fa0704
HIVE-17116: Vectorization: Add infrastructure for vectorization of ROW__ID struct (Matt McCline, reviewed by Teddy Choi) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/996fa070 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/996fa070 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/996fa070 Branch: refs/heads/master Commit: 996fa070410b673ebd47511b33c78da4c4757723 Parents: b9fc5fc Author: Matt McCline <[email protected]> Authored: Thu Jul 20 20:55:24 2017 -0500 Committer: Matt McCline <[email protected]> Committed: Thu Jul 20 20:55:24 2017 -0500 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 4 + .../test/resources/testconfiguration.properties | 1 + .../hive/llap/io/api/impl/LlapInputFormat.java | 4 +- .../hive/ql/exec/vector/VectorMapOperator.java | 41 +- .../ql/exec/vector/VectorizationContext.java | 2 + .../ql/exec/vector/VectorizedRowBatchCtx.java | 43 +- .../hadoop/hive/ql/metadata/VirtualColumn.java | 14 + .../hive/ql/optimizer/physical/Vectorizer.java | 164 +++-- .../queries/clientpositive/vector_row__id.q | 56 ++ .../clientpositive/llap/vector_row__id.q.out | 605 +++++++++++++++++++ .../results/clientpositive/vector_row__id.q.out | 491 +++++++++++++++ 11 files changed, 1377 insertions(+), 48 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index f360dfa..df45f2c 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2856,6 +2856,10 @@ public class HiveConf extends Configuration { "of aggregations that use complex types.\n", "For example, AVG uses a complex type (STRUCT) for partial aggregation results" + "The default value is true."), + HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED("hive.vectorized.row.identifier.enabled", false, + "This flag should be set to true to enable vectorization\n" + + "of ROW__ID.\n" + + "The default value is false."), HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true, "This property has been extended to control " + "whether to check, convert, and normalize partition value to conform to its column type in " http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index cffe245..f66e19b 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -356,6 +356,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_reduce2.q,\ vector_reduce3.q,\ vector_reduce_groupby_decimal.q,\ + vector_row__id.q,\ vector_string_concat.q,\ vector_struct_in.q,\ vector_udf_character_length.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index 22ca025..79ec4ed 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -190,8 +190,10 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB } } } + // UNDONE: Virtual column support? return new VectorizedRowBatchCtx(colNames.toArray(new String[colNames.size()]), - colTypes.toArray(new TypeInfo[colTypes.size()]), null, partitionColumnCount, new String[0]); + colTypes.toArray(new TypeInfo[colTypes.size()]), null, partitionColumnCount, + new VirtualColumn[0], new String[0]); } static TableScanOperator findTsOp(MapWork mapWork) throws HiveException { http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java index ed50df2..1ac8914 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.orc.OrcSerde; import org.apache.hadoop.hive.ql.io.orc.OrcStruct; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; @@ -125,6 +126,9 @@ public class VectorMapOperator extends AbstractMapOperator { private transient int dataColumnCount; private transient int partitionColumnCount; private transient Object[] partitionValues; + private transient int virtualColumnCount; + private transient boolean hasRowIdentifier; + private transient int rowIdentifierColumnNum; private transient boolean[] dataColumnsToIncludeTruncated; @@ -504,6 +508,19 @@ public class VectorMapOperator extends AbstractMapOperator { dataColumnCount = batchContext.getDataColumnCount(); partitionColumnCount = batchContext.getPartitionColumnCount(); partitionValues = new Object[partitionColumnCount]; + virtualColumnCount = batchContext.getVirtualColumnCount(); + rowIdentifierColumnNum = -1; + if (virtualColumnCount > 0) { + final int firstVirtualColumnNum = dataColumnCount + partitionColumnCount; + VirtualColumn[] neededVirtualColumns = batchContext.getNeededVirtualColumns(); + hasRowIdentifier = (neededVirtualColumns[0] == VirtualColumn.ROWID); + if (hasRowIdentifier) { + rowIdentifierColumnNum = firstVirtualColumnNum; + } + } else { + hasRowIdentifier = false; + } + dataColumnNums = batchContext.getDataColumnNums(); Preconditions.checkState(dataColumnNums != null); @@ -601,6 +618,13 @@ public class VectorMapOperator extends AbstractMapOperator { currentVectorPartContext.partName); } + private void setRowIdentiferToNull(VectorizedRowBatch batch) { + ColumnVector rowIdentifierColVector = batch.cols[rowIdentifierColumnNum]; + rowIdentifierColVector.isNull[0] = true; + rowIdentifierColVector.noNulls = false; + rowIdentifierColVector.isRepeating = true; + } + /* * Setup the context for reading from the next partition file. */ @@ -695,6 +719,12 @@ public class VectorMapOperator extends AbstractMapOperator { batchContext.addPartitionColsToBatch(deserializerBatch, partitionValues); } + if (hasRowIdentifier) { + + // No ACID in code path -- set ROW__ID to NULL. + setRowIdentiferToNull(deserializerBatch); + } + /* * Set or clear the rest of the reading variables based on {vector|row} deserialization. */ @@ -778,7 +808,16 @@ public class VectorMapOperator extends AbstractMapOperator { */ batchCounter++; if (value != null) { - numRows += ((VectorizedRowBatch) value).size; + VectorizedRowBatch batch = (VectorizedRowBatch) value; + numRows += batch.size; + if (hasRowIdentifier) { + + // UNDONE: Pass ROW__ID STRUCT column through IO Context to get filled in by ACID reader + // UNDONE: Or, perhaps tell it to do it before calling us, ... + // UNDONE: For now, set column to NULL. + + setRowIdentiferToNull(batch); + } } oneRootOperator.process(value, 0); if (oneRootOperator.getDone()) { http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 9e026f0..fcebb6f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -124,6 +124,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -221,6 +222,7 @@ public class VectorizationContext { projectedColumns.add(i); projectionColumnMap.put(projectionColumnNames.get(i), i); } + int firstOutputColumnIndex = projectedColumns.size(); this.ocm = new OutputColumnManager(firstOutputColumnIndex); this.firstOutputColumnIndex = firstOutputColumnIndex; http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 3c12e04..90d1372 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.IOPrepareCache; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; @@ -78,6 +79,8 @@ public class VectorizedRowBatchCtx { private int[] dataColumnNums; private int dataColumnCount; private int partitionColumnCount; + private int virtualColumnCount; + private VirtualColumn[] neededVirtualColumns; private String[] scratchColumnTypeNames; @@ -88,14 +91,17 @@ public class VectorizedRowBatchCtx { } public VectorizedRowBatchCtx(String[] rowColumnNames, TypeInfo[] rowColumnTypeInfos, - int[] dataColumnNums, int partitionColumnCount, String[] scratchColumnTypeNames) { + int[] dataColumnNums, int partitionColumnCount, VirtualColumn[] neededVirtualColumns, + String[] scratchColumnTypeNames) { this.rowColumnNames = rowColumnNames; this.rowColumnTypeInfos = rowColumnTypeInfos; this.dataColumnNums = dataColumnNums; this.partitionColumnCount = partitionColumnCount; + this.neededVirtualColumns = neededVirtualColumns; + this.virtualColumnCount = neededVirtualColumns.length; this.scratchColumnTypeNames = scratchColumnTypeNames; - dataColumnCount = rowColumnTypeInfos.length - partitionColumnCount; + dataColumnCount = rowColumnTypeInfos.length - partitionColumnCount - virtualColumnCount; } public String[] getRowColumnNames() { @@ -118,6 +124,14 @@ public class VectorizedRowBatchCtx { return partitionColumnCount; } + public int getVirtualColumnCount() { + return virtualColumnCount; + } + + public VirtualColumn[] getNeededVirtualColumns() { + return neededVirtualColumns; + } + public String[] getScratchColumnTypeNames() { return scratchColumnTypeNames; } @@ -138,6 +152,8 @@ public class VectorizedRowBatchCtx { rowColumnTypeInfos = VectorizedBatchUtil.typeInfosFromStructObjectInspector(structObjectInspector); dataColumnNums = null; partitionColumnCount = 0; + virtualColumnCount = 0; + neededVirtualColumns = new VirtualColumn[0]; dataColumnCount = rowColumnTypeInfos.length; // Scratch column information. @@ -204,13 +220,14 @@ public class VectorizedRowBatchCtx { */ public VectorizedRowBatch createVectorizedRowBatch() { - final int dataAndPartColumnCount = rowColumnTypeInfos.length; - final int totalColumnCount = dataAndPartColumnCount + scratchColumnTypeNames.length; + final int nonScratchColumnCount = rowColumnTypeInfos.length; + final int totalColumnCount = + nonScratchColumnCount + scratchColumnTypeNames.length; VectorizedRowBatch result = new VectorizedRowBatch(totalColumnCount); if (dataColumnNums == null) { // All data and partition columns. - for (int i = 0; i < dataAndPartColumnCount; i++) { + for (int i = 0; i < nonScratchColumnCount; i++) { TypeInfo typeInfo = rowColumnTypeInfos[i]; result.cols[i] = VectorizedBatchUtil.createColumnVector(typeInfo); } @@ -218,24 +235,30 @@ public class VectorizedRowBatchCtx { // Create only needed/included columns data columns. for (int i = 0; i < dataColumnNums.length; i++) { int columnNum = dataColumnNums[i]; - Preconditions.checkState(columnNum < dataAndPartColumnCount); + Preconditions.checkState(columnNum < nonScratchColumnCount); TypeInfo typeInfo = rowColumnTypeInfos[columnNum]; result.cols[columnNum] = VectorizedBatchUtil.createColumnVector(typeInfo); } - // Always create partition columns. - final int endColumnNum = dataColumnCount + partitionColumnCount; - for (int partitionColumnNum = dataColumnCount; partitionColumnNum < endColumnNum; partitionColumnNum++) { + // Always create partition and virtual columns. + final int partitionEndColumnNum = dataColumnCount + partitionColumnCount; + for (int partitionColumnNum = dataColumnCount; partitionColumnNum < partitionEndColumnNum; partitionColumnNum++) { TypeInfo typeInfo = rowColumnTypeInfos[partitionColumnNum]; result.cols[partitionColumnNum] = VectorizedBatchUtil.createColumnVector(typeInfo); } + final int virtualEndColumnNum = partitionEndColumnNum + virtualColumnCount; + for (int virtualColumnNum = partitionEndColumnNum; virtualColumnNum < virtualEndColumnNum; virtualColumnNum++) { + TypeInfo typeInfo = rowColumnTypeInfos[virtualColumnNum]; + result.cols[virtualColumnNum] = VectorizedBatchUtil.createColumnVector(typeInfo); + } } for (int i = 0; i < scratchColumnTypeNames.length; i++) { String typeName = scratchColumnTypeNames[i]; - result.cols[rowColumnTypeInfos.length + i] = + result.cols[nonScratchColumnCount + i] = VectorizedBatchUtil.createColumnVector(typeName); } + // UNDONE: Also remember virtualColumnCount... result.setPartitionInfo(dataColumnCount, partitionColumnCount); result.reset(); http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java index 2435bf1..0032305 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java @@ -20,10 +20,13 @@ package org.apache.hadoop.hive.ql.metadata; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.ListIterator; +import java.util.Map; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import org.apache.hadoop.conf.Configuration; @@ -64,6 +67,17 @@ public enum VirtualColumn { ImmutableSet.of(FILENAME.getName(), BLOCKOFFSET.getName(), ROWOFFSET.getName(), RAWDATASIZE.getName(), GROUPINGID.getName(), ROWID.getName()); + public static final ImmutableMap<String, VirtualColumn> VIRTUAL_COLUMN_NAME_MAP = + new ImmutableMap.Builder<String, VirtualColumn>().putAll(getColumnNameMap()).build(); + + private static Map<String, VirtualColumn> getColumnNameMap() { + Map<String, VirtualColumn> map = new HashMap<String, VirtualColumn>(); + for (VirtualColumn virtualColumn : values()) { + map.put(virtualColumn.name, virtualColumn); + } + return map; + } + private final String name; private final TypeInfo typeInfo; private final boolean isHidden; http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 8183194..0913f40 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -75,6 +75,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.HiveVectorAdaptorUsageMode; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; @@ -229,6 +230,7 @@ import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hive.common.util.AnnotationUtils; import org.apache.hadoop.util.ReflectionUtils; +import com.google.common.collect.ImmutableSet; import com.google.common.base.Preconditions; public class Vectorizer implements PhysicalPlanResolver { @@ -274,6 +276,10 @@ public class Vectorizer implements PhysicalPlanResolver { private Set<String> supportedAggregationUdfs = new HashSet<String>(); + // The set of virtual columns that vectorized readers *MAY* support. + public static final ImmutableSet<VirtualColumn> vectorizableVirtualColumns = + ImmutableSet.of(VirtualColumn.ROWID); + private HiveConf hiveConf; private boolean useVectorizedInputFileFormat; @@ -283,6 +289,7 @@ public class Vectorizer implements PhysicalPlanResolver { private boolean isPtfVectorizationEnabled; private boolean isVectorizationComplexTypesEnabled; private boolean isVectorizationGroupByComplexTypesEnabled; + private boolean isVectorizedRowIdentifierEnabled; private boolean isSchemaEvolution; @@ -316,6 +323,9 @@ public class Vectorizer implements PhysicalPlanResolver { private long vectorizedVertexNum = -1; + private Set<VirtualColumn> availableVectorizedVirtualColumnSet = null; + private Set<VirtualColumn> neededVirtualColumnSet = null; + public Vectorizer() { /* @@ -453,6 +463,8 @@ public class Vectorizer implements PhysicalPlanResolver { List<Integer> dataColumnNums; int partitionColumnCount; + List<VirtualColumn> availableVirtualColumnList; + List<VirtualColumn> neededVirtualColumnList; boolean useVectorizedInputFileFormat; boolean groupByVectorOutput; @@ -488,6 +500,12 @@ public class Vectorizer implements PhysicalPlanResolver { public void setPartitionColumnCount(int partitionColumnCount) { this.partitionColumnCount = partitionColumnCount; } + public void setAvailableVirtualColumnList(List<VirtualColumn> availableVirtualColumnList) { + this.availableVirtualColumnList = availableVirtualColumnList; + } + public void setNeededVirtualColumnList(List<VirtualColumn> neededVirtualColumnList) { + this.neededVirtualColumnList = neededVirtualColumnList; + } public void setScratchTypeNameArray(String[] scratchTypeNameArray) { this.scratchTypeNameArray = scratchTypeNameArray; } @@ -522,6 +540,16 @@ public class Vectorizer implements PhysicalPlanResolver { public void transferToBaseWork(BaseWork baseWork) { + final int virtualColumnCount; + VirtualColumn[] neededVirtualColumns; + if (neededVirtualColumnList != null && neededVirtualColumnList.size() > 0) { + virtualColumnCount = neededVirtualColumnList.size(); + neededVirtualColumns = neededVirtualColumnList.toArray(new VirtualColumn[0]); + } else { + virtualColumnCount = 0; + neededVirtualColumns = new VirtualColumn[0]; + } + String[] allColumnNameArray = allColumnNames.toArray(new String[0]); TypeInfo[] allTypeInfoArray = allTypeInfos.toArray(new TypeInfo[0]); int[] dataColumnNumsArray; @@ -537,6 +565,7 @@ public class Vectorizer implements PhysicalPlanResolver { allTypeInfoArray, dataColumnNumsArray, partitionColumnCount, + neededVirtualColumns, scratchTypeNameArray); baseWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx); @@ -687,20 +716,41 @@ public class Vectorizer implements PhysicalPlanResolver { } private void getTableScanOperatorSchemaInfo(TableScanOperator tableScanOperator, - List<String> logicalColumnNameList, List<TypeInfo> logicalTypeInfoList) { + List<String> logicalColumnNameList, List<TypeInfo> logicalTypeInfoList, + List<VirtualColumn> availableVirtualColumnList) { - // Add all non-virtual columns to make a vectorization context for + // Add all columns to make a vectorization context for // the TableScan operator. RowSchema rowSchema = tableScanOperator.getSchema(); for (ColumnInfo c : rowSchema.getSignature()) { - // Validation will later exclude vectorization of virtual columns usage (HIVE-5560). - if (!isVirtualColumn(c)) { - String columnName = c.getInternalName(); - String typeName = c.getTypeName(); - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + // Validation will later exclude vectorization of virtual columns usage if necessary. + String columnName = c.getInternalName(); + + // Turns out partition columns get marked as virtual in ColumnInfo, so we need to + // check the VirtualColumn directly. + VirtualColumn virtualColumn = VirtualColumn.VIRTUAL_COLUMN_NAME_MAP.get(columnName); + if (virtualColumn == null) { logicalColumnNameList.add(columnName); - logicalTypeInfoList.add(typeInfo); + logicalTypeInfoList.add(TypeInfoUtils.getTypeInfoFromTypeString(c.getTypeName())); + } else { + + // The planner gives us a subset virtual columns available for this table scan. + // AND + // We only support some virtual columns in vectorization. + // + // So, create the intersection. Note these are available vectorizable virtual columns. + // Later we remember which virtual columns were *actually used* in the query so + // just those will be included in the Map VectorizedRowBatchCtx that has the + // information for creating the Map VectorizedRowBatch. + // + if (!vectorizableVirtualColumns.contains(virtualColumn)) { + continue; + } + if (virtualColumn == VirtualColumn.ROWID && !isVectorizedRowIdentifierEnabled) { + continue; + } + availableVirtualColumnList.add(virtualColumn); } } } @@ -893,14 +943,19 @@ public class Vectorizer implements PhysicalPlanResolver { boolean isAcidTable = tableScanOperator.getConf().isAcidTable(); // These names/types are the data columns plus partition columns. - final List<String> allColumnNameList = new ArrayList<String>(); - final List<TypeInfo> allTypeInfoList = new ArrayList<TypeInfo>(); + final List<String> dataAndPartColumnNameList = new ArrayList<String>(); + final List<TypeInfo> dataAndPartTypeInfoList = new ArrayList<TypeInfo>(); + + final List<VirtualColumn> availableVirtualColumnList = new ArrayList<VirtualColumn>(); - getTableScanOperatorSchemaInfo(tableScanOperator, allColumnNameList, allTypeInfoList); + getTableScanOperatorSchemaInfo( + tableScanOperator, + dataAndPartColumnNameList, dataAndPartTypeInfoList, + availableVirtualColumnList); final List<Integer> dataColumnNums = new ArrayList<Integer>(); - final int allColumnCount = allColumnNameList.size(); + final int dataAndPartColumnCount = dataAndPartColumnNameList.size(); /* * Validate input formats of all the partitions can be vectorized. @@ -956,17 +1011,17 @@ public class Vectorizer implements PhysicalPlanResolver { LinkedHashMap<String, String> partSpec = partDesc.getPartSpec(); if (partSpec != null && partSpec.size() > 0) { partitionColumnCount = partSpec.size(); - dataColumnCount = allColumnCount - partitionColumnCount; + dataColumnCount = dataAndPartColumnCount - partitionColumnCount; } else { partitionColumnCount = 0; - dataColumnCount = allColumnCount; + dataColumnCount = dataAndPartColumnCount; } - determineDataColumnNums(tableScanOperator, allColumnNameList, dataColumnCount, + determineDataColumnNums(tableScanOperator, dataAndPartColumnNameList, dataColumnCount, dataColumnNums); - tableDataColumnList = allColumnNameList.subList(0, dataColumnCount); - tableDataTypeInfoList = allTypeInfoList.subList(0, dataColumnCount); + tableDataColumnList = dataAndPartColumnNameList.subList(0, dataColumnCount); + tableDataTypeInfoList = dataAndPartTypeInfoList.subList(0, dataColumnCount); isFirst = false; } @@ -1038,10 +1093,14 @@ public class Vectorizer implements PhysicalPlanResolver { vectorPartDesc.setDataTypeInfos(nextDataTypeInfoList); } - vectorTaskColumnInfo.setAllColumnNames(allColumnNameList); - vectorTaskColumnInfo.setAllTypeInfos(allTypeInfoList); + // For now, we don't know which virtual columns are going to be included. We'll add them + // later... + vectorTaskColumnInfo.setAllColumnNames(dataAndPartColumnNameList); + vectorTaskColumnInfo.setAllTypeInfos(dataAndPartTypeInfoList); + vectorTaskColumnInfo.setDataColumnNums(dataColumnNums); vectorTaskColumnInfo.setPartitionColumnCount(partitionColumnCount); + vectorTaskColumnInfo.setAvailableVirtualColumnList(availableVirtualColumnList); vectorTaskColumnInfo.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat); // Always set these so EXPLAIN can see. @@ -1082,6 +1141,14 @@ public class Vectorizer implements PhysicalPlanResolver { return false; } + // Set global member indicating which virtual columns are possible to be used by + // the Map vertex. + availableVectorizedVirtualColumnSet = new HashSet<VirtualColumn>(); + availableVectorizedVirtualColumnSet.addAll(vectorTaskColumnInfo.availableVirtualColumnList); + + // And, use set to remember which virtual columns were actually referenced. + neededVirtualColumnSet = new HashSet<VirtualColumn>(); + // Now we are enabled and any issues found from here on out are considered // not vectorized issues. mapWork.setVectorizationEnabled(true); @@ -1104,6 +1171,21 @@ public class Vectorizer implements PhysicalPlanResolver { } } } + + List<VirtualColumn> neededVirtualColumnList = new ArrayList<VirtualColumn>(); + if (!neededVirtualColumnSet.isEmpty()) { + + // Create needed in same order. + for (VirtualColumn virtualColumn : vectorTaskColumnInfo.availableVirtualColumnList) { + if (neededVirtualColumnSet.contains(virtualColumn)) { + neededVirtualColumnList.add(virtualColumn); + vectorTaskColumnInfo.allColumnNames.add(virtualColumn.getName()); + vectorTaskColumnInfo.allTypeInfos.add(virtualColumn.getTypeInfo()); + } + } + } + + vectorTaskColumnInfo.setNeededVirtualColumnList(neededVirtualColumnList); vectorTaskColumnInfo.setNonVectorizedOps(vnp.getNonVectorizedOps()); return true; } @@ -1737,6 +1819,10 @@ public class Vectorizer implements PhysicalPlanResolver { HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED); + isVectorizedRowIdentifierEnabled = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED); + isSchemaEvolution = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_SCHEMA_EVOLUTION); @@ -2328,10 +2414,24 @@ public class Vectorizer implements PhysicalPlanResolver { VectorExpressionDescriptor.Mode mode, boolean allowComplex) { if (desc instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc; - // Currently, we do not support vectorized virtual columns (see HIVE-5570). - if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(c.getColumn())) { - setExpressionIssue(expressionTitle, "Virtual columns not supported (" + c.getColumn() + ")"); - return false; + String columnName = c.getColumn(); + + if (availableVectorizedVirtualColumnSet != null) { + + // For Map, check for virtual columns. + VirtualColumn virtualColumn = VirtualColumn.VIRTUAL_COLUMN_NAME_MAP.get(columnName); + if (virtualColumn != null) { + + // We support some virtual columns in vectorization for this table scan. + + if (!availableVectorizedVirtualColumnSet.contains(virtualColumn)) { + setExpressionIssue(expressionTitle, "Virtual column " + columnName + " is not supported"); + return false; + } + + // Remember we used this one in the query. + neededVirtualColumnSet.add(virtualColumn); + } } } String typeName = desc.getTypeInfo().getTypeName(); @@ -4180,28 +4280,20 @@ public class Vectorizer implements PhysicalPlanResolver { return vectorOp; } - private boolean isVirtualColumn(ColumnInfo column) { - - // Not using method column.getIsVirtualCol() because partitioning columns are also - // treated as virtual columns in ColumnInfo. - if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(column.getInternalName())) { - return true; - } - return false; - } - public void debugDisplayAllMaps(BaseWork work) { VectorizedRowBatchCtx vectorizedRowBatchCtx = work.getVectorizedRowBatchCtx(); String[] allColumnNames = vectorizedRowBatchCtx.getRowColumnNames(); - Object columnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos(); + TypeInfo[] columnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos(); int partitionColumnCount = vectorizedRowBatchCtx.getPartitionColumnCount(); + int virtualColumnCount = vectorizedRowBatchCtx.getVirtualColumnCount(); String[] scratchColumnTypeNames =vectorizedRowBatchCtx.getScratchColumnTypeNames(); - LOG.debug("debugDisplayAllMaps allColumnNames " + Arrays.toString(allColumnNames)); - LOG.debug("debugDisplayAllMaps columnTypeInfos " + Arrays.deepToString((Object[]) columnTypeInfos)); + LOG.debug("debugDisplayAllMaps rowColumnNames " + Arrays.toString(allColumnNames)); + LOG.debug("debugDisplayAllMaps rowColumnTypeInfos " + Arrays.toString(columnTypeInfos)); LOG.debug("debugDisplayAllMaps partitionColumnCount " + partitionColumnCount); + LOG.debug("debugDisplayAllMaps virtualColumnCount " + virtualColumnCount); LOG.debug("debugDisplayAllMaps scratchColumnTypeNames " + Arrays.toString(scratchColumnTypeNames)); } } http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/test/queries/clientpositive/vector_row__id.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_row__id.q b/ql/src/test/queries/clientpositive/vector_row__id.q new file mode 100644 index 0000000..11eda90 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_row__id.q @@ -0,0 +1,56 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +-- SORT_QUERY_RESULTS + +drop table if exists hello_acid; +create table hello_acid (key int, value int) +partitioned by (load_date date) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true'); + +insert into hello_acid partition (load_date='2016-03-01') values (1, 1); +insert into hello_acid partition (load_date='2016-03-02') values (2, 2); +insert into hello_acid partition (load_date='2016-03-03') values (3, 3); + +set hive.vectorized.row.identifier.enabled=false; + +explain vectorization detail +select row__id, key, value from hello_acid order by key; + +select row__id, key, value from hello_acid order by key; + +explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid; + +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid; + +explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3; + +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3; + + + +set hive.vectorized.row.identifier.enabled=true; + +explain vectorization detail +select row__id, key, value from hello_acid order by key; + +select row__id, key, value from hello_acid order by key; + +explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid; + +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid; + +explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3; + +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3; + http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/test/results/clientpositive/llap/vector_row__id.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_row__id.q.out b/ql/src/test/results/clientpositive/llap/vector_row__id.q.out new file mode 100644 index 0000000..850e3a4 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_row__id.q.out @@ -0,0 +1,605 @@ +PREHOOK: query: drop table if exists hello_acid +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists hello_acid +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table hello_acid (key int, value int) +partitioned by (load_date date) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hello_acid +POSTHOOK: query: create table hello_acid (key int, value int) +partitioned by (load_date date) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hello_acid +PREHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1) +PREHOOK: type: QUERY +PREHOOK: Output: default@hello_acid@load_date=2016-03-01 +POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2) +PREHOOK: type: QUERY +PREHOOK: Output: default@hello_acid@load_date=2016-03-02 +POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3) +PREHOOK: type: QUERY +PREHOOK: Output: default@hello_acid@load_date=2016-03-03 +POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hello_acid@load_date=2016-03-03 +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).value EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain vectorization detail +select row__id, key, value from hello_acid order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select row__id, key, value from hello_acid order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int) + Execution mode: llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported + vectorized: false + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:struct<transactionid:bigint,bucketid:int,rowid:bigint>, VALUE._col1:int + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select row__id, key, value from hello_acid order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select row__id, key, value from hello_acid order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +{"transactionid":3,"bucketid":536870912,"rowid":0} 1 1 +{"transactionid":4,"bucketid":536870912,"rowid":0} 2 2 +{"transactionid":5,"bucketid":536870912,"rowid":0} 3 3 +PREHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported + vectorized: false + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +3 +4 +5 +PREHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ROW__ID.transactionid = 3) (type: boolean) + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Virtual column ROW__ID is not supported + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +3 +PREHOOK: query: explain vectorization detail +select row__id, key, value from hello_acid order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select row__id, key, value from hello_acid order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 0, 1] + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [3, 1] + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 1 + partitionColumns: load_date:date + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:struct<transactionid:bigint,bucketid:int,rowid:bigint>, VALUE._col1:int + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select row__id, key, value from hello_acid order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select row__id, key, value from hello_acid order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +NULL 1 1 +NULL 2 2 +NULL 3 3 +PREHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): Column[ROW__ID].transactionid + vectorized: false + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +3 +4 +5 +PREHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ROW__ID.transactionid = 3) (type: boolean) + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Cannot handle expression type: ExprNodeFieldDesc + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +3 http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/test/results/clientpositive/vector_row__id.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_row__id.q.out b/ql/src/test/results/clientpositive/vector_row__id.q.out new file mode 100644 index 0000000..d48902b --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_row__id.q.out @@ -0,0 +1,491 @@ +PREHOOK: query: drop table if exists hello_acid +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists hello_acid +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table hello_acid (key int, value int) +partitioned by (load_date date) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hello_acid +POSTHOOK: query: create table hello_acid (key int, value int) +partitioned by (load_date date) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hello_acid +PREHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1) +PREHOOK: type: QUERY +PREHOOK: Output: default@hello_acid@load_date=2016-03-01 +POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2) +PREHOOK: type: QUERY +PREHOOK: Output: default@hello_acid@load_date=2016-03-02 +POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3) +PREHOOK: type: QUERY +PREHOOK: Output: default@hello_acid@load_date=2016-03-03 +POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@hello_acid@load_date=2016-03-03 +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).value EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain vectorization detail +select row__id, key, value from hello_acid order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select row__id, key, value from hello_acid order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select row__id, key, value from hello_acid order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select row__id, key, value from hello_acid order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +{"transactionid":3,"bucketid":536936448,"rowid":0} 1 1 +{"transactionid":4,"bucketid":537001984,"rowid":0} 2 2 +{"transactionid":5,"bucketid":536870912,"rowid":0} 3 3 +PREHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +3 +4 +5 +PREHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ROW__ID.transactionid = 3) (type: boolean) + Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Virtual column ROW__ID is not supported + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +3 +PREHOOK: query: explain vectorization detail +select row__id, key, value from hello_acid order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select row__id, key, value from hello_acid order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 0, 1] + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 1 + partitionColumns: load_date:date + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select row__id, key, value from hello_acid order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select row__id, key, value from hello_acid order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +NULL 1 1 +NULL 2 2 +NULL 3 3 +PREHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): Column[ROW__ID].transactionid + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +3 +4 +5 +PREHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: hello_acid + Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ROW__ID.transactionid = 3) (type: boolean) + Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ROW__ID.transactionid (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Cannot handle expression type: ExprNodeFieldDesc + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@hello_acid +PREHOOK: Input: default@hello_acid@load_date=2016-03-01 +PREHOOK: Input: default@hello_acid@load_date=2016-03-02 +PREHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hello_acid +POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 +POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 +#### A masked pattern was here #### +3
