Repository: hive Updated Branches: refs/heads/master 996fa0704 -> aa5e9bfab
Revert "HIVE-17116: Vectorization: Add infrastructure for vectorization of ROW__ID struct (Matt McCline, reviewed by Teddy Choi)" This reverts commit 996fa070410b673ebd47511b33c78da4c4757723. Transaction ids vary in Hive QA runs and break vector_row__id.q.out query results. Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/aa5e9bfa Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/aa5e9bfa Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/aa5e9bfa Branch: refs/heads/master Commit: aa5e9bfab58de9985239bfc8c13dc1f1b21ff67d Parents: 996fa07 Author: Matt McCline <[email protected]> Authored: Fri Jul 21 01:40:22 2017 -0500 Committer: Matt McCline <[email protected]> Committed: Fri Jul 21 01:40:22 2017 -0500 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 4 - .../test/resources/testconfiguration.properties | 1 - .../hive/llap/io/api/impl/LlapInputFormat.java | 4 +- .../hive/ql/exec/vector/VectorMapOperator.java | 41 +- .../ql/exec/vector/VectorizationContext.java | 2 - .../ql/exec/vector/VectorizedRowBatchCtx.java | 43 +- .../hadoop/hive/ql/metadata/VirtualColumn.java | 14 - .../hive/ql/optimizer/physical/Vectorizer.java | 164 ++--- .../queries/clientpositive/vector_row__id.q | 56 -- .../clientpositive/llap/vector_row__id.q.out | 605 ------------------- .../results/clientpositive/vector_row__id.q.out | 491 --------------- 11 files changed, 48 insertions(+), 1377 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index df45f2c..f360dfa 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2856,10 +2856,6 @@ public class HiveConf extends Configuration { "of aggregations that use complex types.\n", "For example, AVG uses a complex type (STRUCT) for partial aggregation results" + "The default value is true."), - HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED("hive.vectorized.row.identifier.enabled", false, - "This flag should be set to true to enable vectorization\n" + - "of ROW__ID.\n" + - "The default value is false."), HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true, "This property has been extended to control " + "whether to check, convert, and normalize partition value to conform to its column type in " http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index f66e19b..cffe245 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -356,7 +356,6 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_reduce2.q,\ vector_reduce3.q,\ vector_reduce_groupby_decimal.q,\ - vector_row__id.q,\ vector_string_concat.q,\ vector_struct_in.q,\ vector_udf_character_length.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index 79ec4ed..22ca025 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -190,10 +190,8 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB } } } - // UNDONE: Virtual column support? return new VectorizedRowBatchCtx(colNames.toArray(new String[colNames.size()]), - colTypes.toArray(new TypeInfo[colTypes.size()]), null, partitionColumnCount, - new VirtualColumn[0], new String[0]); + colTypes.toArray(new TypeInfo[colTypes.size()]), null, partitionColumnCount, new String[0]); } static TableScanOperator findTsOp(MapWork mapWork) throws HiveException { http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java index 1ac8914..ed50df2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java @@ -39,7 +39,6 @@ import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.orc.OrcSerde; import org.apache.hadoop.hive.ql.io.orc.OrcStruct; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; @@ -126,9 +125,6 @@ public class VectorMapOperator extends AbstractMapOperator { private transient int dataColumnCount; private transient int partitionColumnCount; private transient Object[] partitionValues; - private transient int virtualColumnCount; - private transient boolean hasRowIdentifier; - private transient int rowIdentifierColumnNum; private transient boolean[] dataColumnsToIncludeTruncated; @@ -508,19 +504,6 @@ public class VectorMapOperator extends AbstractMapOperator { dataColumnCount = batchContext.getDataColumnCount(); partitionColumnCount = batchContext.getPartitionColumnCount(); partitionValues = new Object[partitionColumnCount]; - virtualColumnCount = batchContext.getVirtualColumnCount(); - rowIdentifierColumnNum = -1; - if (virtualColumnCount > 0) { - final int firstVirtualColumnNum = dataColumnCount + partitionColumnCount; - VirtualColumn[] neededVirtualColumns = batchContext.getNeededVirtualColumns(); - hasRowIdentifier = (neededVirtualColumns[0] == VirtualColumn.ROWID); - if (hasRowIdentifier) { - rowIdentifierColumnNum = firstVirtualColumnNum; - } - } else { - hasRowIdentifier = false; - } - dataColumnNums = batchContext.getDataColumnNums(); Preconditions.checkState(dataColumnNums != null); @@ -618,13 +601,6 @@ public class VectorMapOperator extends AbstractMapOperator { currentVectorPartContext.partName); } - private void setRowIdentiferToNull(VectorizedRowBatch batch) { - ColumnVector rowIdentifierColVector = batch.cols[rowIdentifierColumnNum]; - rowIdentifierColVector.isNull[0] = true; - rowIdentifierColVector.noNulls = false; - rowIdentifierColVector.isRepeating = true; - } - /* * Setup the context for reading from the next partition file. */ @@ -719,12 +695,6 @@ public class VectorMapOperator extends AbstractMapOperator { batchContext.addPartitionColsToBatch(deserializerBatch, partitionValues); } - if (hasRowIdentifier) { - - // No ACID in code path -- set ROW__ID to NULL. - setRowIdentiferToNull(deserializerBatch); - } - /* * Set or clear the rest of the reading variables based on {vector|row} deserialization. */ @@ -808,16 +778,7 @@ public class VectorMapOperator extends AbstractMapOperator { */ batchCounter++; if (value != null) { - VectorizedRowBatch batch = (VectorizedRowBatch) value; - numRows += batch.size; - if (hasRowIdentifier) { - - // UNDONE: Pass ROW__ID STRUCT column through IO Context to get filled in by ACID reader - // UNDONE: Or, perhaps tell it to do it before calling us, ... - // UNDONE: For now, set column to NULL. - - setRowIdentiferToNull(batch); - } + numRows += ((VectorizedRowBatch) value).size; } oneRootOperator.process(value, 0); if (oneRootOperator.getDone()) { http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index fcebb6f..9e026f0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -124,7 +124,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -222,7 +221,6 @@ public class VectorizationContext { projectedColumns.add(i); projectionColumnMap.put(projectionColumnNames.get(i), i); } - int firstOutputColumnIndex = projectedColumns.size(); this.ocm = new OutputColumnManager(firstOutputColumnIndex); this.firstOutputColumnIndex = firstOutputColumnIndex; http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 90d1372..3c12e04 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -38,7 +38,6 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.IOPrepareCache; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; @@ -79,8 +78,6 @@ public class VectorizedRowBatchCtx { private int[] dataColumnNums; private int dataColumnCount; private int partitionColumnCount; - private int virtualColumnCount; - private VirtualColumn[] neededVirtualColumns; private String[] scratchColumnTypeNames; @@ -91,17 +88,14 @@ public class VectorizedRowBatchCtx { } public VectorizedRowBatchCtx(String[] rowColumnNames, TypeInfo[] rowColumnTypeInfos, - int[] dataColumnNums, int partitionColumnCount, VirtualColumn[] neededVirtualColumns, - String[] scratchColumnTypeNames) { + int[] dataColumnNums, int partitionColumnCount, String[] scratchColumnTypeNames) { this.rowColumnNames = rowColumnNames; this.rowColumnTypeInfos = rowColumnTypeInfos; this.dataColumnNums = dataColumnNums; this.partitionColumnCount = partitionColumnCount; - this.neededVirtualColumns = neededVirtualColumns; - this.virtualColumnCount = neededVirtualColumns.length; this.scratchColumnTypeNames = scratchColumnTypeNames; - dataColumnCount = rowColumnTypeInfos.length - partitionColumnCount - virtualColumnCount; + dataColumnCount = rowColumnTypeInfos.length - partitionColumnCount; } public String[] getRowColumnNames() { @@ -124,14 +118,6 @@ public class VectorizedRowBatchCtx { return partitionColumnCount; } - public int getVirtualColumnCount() { - return virtualColumnCount; - } - - public VirtualColumn[] getNeededVirtualColumns() { - return neededVirtualColumns; - } - public String[] getScratchColumnTypeNames() { return scratchColumnTypeNames; } @@ -152,8 +138,6 @@ public class VectorizedRowBatchCtx { rowColumnTypeInfos = VectorizedBatchUtil.typeInfosFromStructObjectInspector(structObjectInspector); dataColumnNums = null; partitionColumnCount = 0; - virtualColumnCount = 0; - neededVirtualColumns = new VirtualColumn[0]; dataColumnCount = rowColumnTypeInfos.length; // Scratch column information. @@ -220,14 +204,13 @@ public class VectorizedRowBatchCtx { */ public VectorizedRowBatch createVectorizedRowBatch() { - final int nonScratchColumnCount = rowColumnTypeInfos.length; - final int totalColumnCount = - nonScratchColumnCount + scratchColumnTypeNames.length; + final int dataAndPartColumnCount = rowColumnTypeInfos.length; + final int totalColumnCount = dataAndPartColumnCount + scratchColumnTypeNames.length; VectorizedRowBatch result = new VectorizedRowBatch(totalColumnCount); if (dataColumnNums == null) { // All data and partition columns. - for (int i = 0; i < nonScratchColumnCount; i++) { + for (int i = 0; i < dataAndPartColumnCount; i++) { TypeInfo typeInfo = rowColumnTypeInfos[i]; result.cols[i] = VectorizedBatchUtil.createColumnVector(typeInfo); } @@ -235,30 +218,24 @@ public class VectorizedRowBatchCtx { // Create only needed/included columns data columns. for (int i = 0; i < dataColumnNums.length; i++) { int columnNum = dataColumnNums[i]; - Preconditions.checkState(columnNum < nonScratchColumnCount); + Preconditions.checkState(columnNum < dataAndPartColumnCount); TypeInfo typeInfo = rowColumnTypeInfos[columnNum]; result.cols[columnNum] = VectorizedBatchUtil.createColumnVector(typeInfo); } - // Always create partition and virtual columns. - final int partitionEndColumnNum = dataColumnCount + partitionColumnCount; - for (int partitionColumnNum = dataColumnCount; partitionColumnNum < partitionEndColumnNum; partitionColumnNum++) { + // Always create partition columns. + final int endColumnNum = dataColumnCount + partitionColumnCount; + for (int partitionColumnNum = dataColumnCount; partitionColumnNum < endColumnNum; partitionColumnNum++) { TypeInfo typeInfo = rowColumnTypeInfos[partitionColumnNum]; result.cols[partitionColumnNum] = VectorizedBatchUtil.createColumnVector(typeInfo); } - final int virtualEndColumnNum = partitionEndColumnNum + virtualColumnCount; - for (int virtualColumnNum = partitionEndColumnNum; virtualColumnNum < virtualEndColumnNum; virtualColumnNum++) { - TypeInfo typeInfo = rowColumnTypeInfos[virtualColumnNum]; - result.cols[virtualColumnNum] = VectorizedBatchUtil.createColumnVector(typeInfo); - } } for (int i = 0; i < scratchColumnTypeNames.length; i++) { String typeName = scratchColumnTypeNames[i]; - result.cols[nonScratchColumnCount + i] = + result.cols[rowColumnTypeInfos.length + i] = VectorizedBatchUtil.createColumnVector(typeName); } - // UNDONE: Also remember virtualColumnCount... result.setPartitionInfo(dataColumnCount, partitionColumnCount); result.reset(); http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java index 0032305..2435bf1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java @@ -20,13 +20,10 @@ package org.apache.hadoop.hive.ql.metadata; import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; import java.util.List; import java.util.ListIterator; -import java.util.Map; import com.google.common.collect.ImmutableSet; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import org.apache.hadoop.conf.Configuration; @@ -67,17 +64,6 @@ public enum VirtualColumn { ImmutableSet.of(FILENAME.getName(), BLOCKOFFSET.getName(), ROWOFFSET.getName(), RAWDATASIZE.getName(), GROUPINGID.getName(), ROWID.getName()); - public static final ImmutableMap<String, VirtualColumn> VIRTUAL_COLUMN_NAME_MAP = - new ImmutableMap.Builder<String, VirtualColumn>().putAll(getColumnNameMap()).build(); - - private static Map<String, VirtualColumn> getColumnNameMap() { - Map<String, VirtualColumn> map = new HashMap<String, VirtualColumn>(); - for (VirtualColumn virtualColumn : values()) { - map.put(virtualColumn.name, virtualColumn); - } - return map; - } - private final String name; private final TypeInfo typeInfo; private final boolean isHidden; http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 0913f40..8183194 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -75,7 +75,6 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.HiveVectorAdaptorUsageMode; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; @@ -230,7 +229,6 @@ import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hive.common.util.AnnotationUtils; import org.apache.hadoop.util.ReflectionUtils; -import com.google.common.collect.ImmutableSet; import com.google.common.base.Preconditions; public class Vectorizer implements PhysicalPlanResolver { @@ -276,10 +274,6 @@ public class Vectorizer implements PhysicalPlanResolver { private Set<String> supportedAggregationUdfs = new HashSet<String>(); - // The set of virtual columns that vectorized readers *MAY* support. - public static final ImmutableSet<VirtualColumn> vectorizableVirtualColumns = - ImmutableSet.of(VirtualColumn.ROWID); - private HiveConf hiveConf; private boolean useVectorizedInputFileFormat; @@ -289,7 +283,6 @@ public class Vectorizer implements PhysicalPlanResolver { private boolean isPtfVectorizationEnabled; private boolean isVectorizationComplexTypesEnabled; private boolean isVectorizationGroupByComplexTypesEnabled; - private boolean isVectorizedRowIdentifierEnabled; private boolean isSchemaEvolution; @@ -323,9 +316,6 @@ public class Vectorizer implements PhysicalPlanResolver { private long vectorizedVertexNum = -1; - private Set<VirtualColumn> availableVectorizedVirtualColumnSet = null; - private Set<VirtualColumn> neededVirtualColumnSet = null; - public Vectorizer() { /* @@ -463,8 +453,6 @@ public class Vectorizer implements PhysicalPlanResolver { List<Integer> dataColumnNums; int partitionColumnCount; - List<VirtualColumn> availableVirtualColumnList; - List<VirtualColumn> neededVirtualColumnList; boolean useVectorizedInputFileFormat; boolean groupByVectorOutput; @@ -500,12 +488,6 @@ public class Vectorizer implements PhysicalPlanResolver { public void setPartitionColumnCount(int partitionColumnCount) { this.partitionColumnCount = partitionColumnCount; } - public void setAvailableVirtualColumnList(List<VirtualColumn> availableVirtualColumnList) { - this.availableVirtualColumnList = availableVirtualColumnList; - } - public void setNeededVirtualColumnList(List<VirtualColumn> neededVirtualColumnList) { - this.neededVirtualColumnList = neededVirtualColumnList; - } public void setScratchTypeNameArray(String[] scratchTypeNameArray) { this.scratchTypeNameArray = scratchTypeNameArray; } @@ -540,16 +522,6 @@ public class Vectorizer implements PhysicalPlanResolver { public void transferToBaseWork(BaseWork baseWork) { - final int virtualColumnCount; - VirtualColumn[] neededVirtualColumns; - if (neededVirtualColumnList != null && neededVirtualColumnList.size() > 0) { - virtualColumnCount = neededVirtualColumnList.size(); - neededVirtualColumns = neededVirtualColumnList.toArray(new VirtualColumn[0]); - } else { - virtualColumnCount = 0; - neededVirtualColumns = new VirtualColumn[0]; - } - String[] allColumnNameArray = allColumnNames.toArray(new String[0]); TypeInfo[] allTypeInfoArray = allTypeInfos.toArray(new TypeInfo[0]); int[] dataColumnNumsArray; @@ -565,7 +537,6 @@ public class Vectorizer implements PhysicalPlanResolver { allTypeInfoArray, dataColumnNumsArray, partitionColumnCount, - neededVirtualColumns, scratchTypeNameArray); baseWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx); @@ -716,41 +687,20 @@ public class Vectorizer implements PhysicalPlanResolver { } private void getTableScanOperatorSchemaInfo(TableScanOperator tableScanOperator, - List<String> logicalColumnNameList, List<TypeInfo> logicalTypeInfoList, - List<VirtualColumn> availableVirtualColumnList) { + List<String> logicalColumnNameList, List<TypeInfo> logicalTypeInfoList) { - // Add all columns to make a vectorization context for + // Add all non-virtual columns to make a vectorization context for // the TableScan operator. RowSchema rowSchema = tableScanOperator.getSchema(); for (ColumnInfo c : rowSchema.getSignature()) { + // Validation will later exclude vectorization of virtual columns usage (HIVE-5560). + if (!isVirtualColumn(c)) { + String columnName = c.getInternalName(); + String typeName = c.getTypeName(); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); - // Validation will later exclude vectorization of virtual columns usage if necessary. - String columnName = c.getInternalName(); - - // Turns out partition columns get marked as virtual in ColumnInfo, so we need to - // check the VirtualColumn directly. - VirtualColumn virtualColumn = VirtualColumn.VIRTUAL_COLUMN_NAME_MAP.get(columnName); - if (virtualColumn == null) { logicalColumnNameList.add(columnName); - logicalTypeInfoList.add(TypeInfoUtils.getTypeInfoFromTypeString(c.getTypeName())); - } else { - - // The planner gives us a subset virtual columns available for this table scan. - // AND - // We only support some virtual columns in vectorization. - // - // So, create the intersection. Note these are available vectorizable virtual columns. - // Later we remember which virtual columns were *actually used* in the query so - // just those will be included in the Map VectorizedRowBatchCtx that has the - // information for creating the Map VectorizedRowBatch. - // - if (!vectorizableVirtualColumns.contains(virtualColumn)) { - continue; - } - if (virtualColumn == VirtualColumn.ROWID && !isVectorizedRowIdentifierEnabled) { - continue; - } - availableVirtualColumnList.add(virtualColumn); + logicalTypeInfoList.add(typeInfo); } } } @@ -943,19 +893,14 @@ public class Vectorizer implements PhysicalPlanResolver { boolean isAcidTable = tableScanOperator.getConf().isAcidTable(); // These names/types are the data columns plus partition columns. - final List<String> dataAndPartColumnNameList = new ArrayList<String>(); - final List<TypeInfo> dataAndPartTypeInfoList = new ArrayList<TypeInfo>(); - - final List<VirtualColumn> availableVirtualColumnList = new ArrayList<VirtualColumn>(); + final List<String> allColumnNameList = new ArrayList<String>(); + final List<TypeInfo> allTypeInfoList = new ArrayList<TypeInfo>(); - getTableScanOperatorSchemaInfo( - tableScanOperator, - dataAndPartColumnNameList, dataAndPartTypeInfoList, - availableVirtualColumnList); + getTableScanOperatorSchemaInfo(tableScanOperator, allColumnNameList, allTypeInfoList); final List<Integer> dataColumnNums = new ArrayList<Integer>(); - final int dataAndPartColumnCount = dataAndPartColumnNameList.size(); + final int allColumnCount = allColumnNameList.size(); /* * Validate input formats of all the partitions can be vectorized. @@ -1011,17 +956,17 @@ public class Vectorizer implements PhysicalPlanResolver { LinkedHashMap<String, String> partSpec = partDesc.getPartSpec(); if (partSpec != null && partSpec.size() > 0) { partitionColumnCount = partSpec.size(); - dataColumnCount = dataAndPartColumnCount - partitionColumnCount; + dataColumnCount = allColumnCount - partitionColumnCount; } else { partitionColumnCount = 0; - dataColumnCount = dataAndPartColumnCount; + dataColumnCount = allColumnCount; } - determineDataColumnNums(tableScanOperator, dataAndPartColumnNameList, dataColumnCount, + determineDataColumnNums(tableScanOperator, allColumnNameList, dataColumnCount, dataColumnNums); - tableDataColumnList = dataAndPartColumnNameList.subList(0, dataColumnCount); - tableDataTypeInfoList = dataAndPartTypeInfoList.subList(0, dataColumnCount); + tableDataColumnList = allColumnNameList.subList(0, dataColumnCount); + tableDataTypeInfoList = allTypeInfoList.subList(0, dataColumnCount); isFirst = false; } @@ -1093,14 +1038,10 @@ public class Vectorizer implements PhysicalPlanResolver { vectorPartDesc.setDataTypeInfos(nextDataTypeInfoList); } - // For now, we don't know which virtual columns are going to be included. We'll add them - // later... - vectorTaskColumnInfo.setAllColumnNames(dataAndPartColumnNameList); - vectorTaskColumnInfo.setAllTypeInfos(dataAndPartTypeInfoList); - + vectorTaskColumnInfo.setAllColumnNames(allColumnNameList); + vectorTaskColumnInfo.setAllTypeInfos(allTypeInfoList); vectorTaskColumnInfo.setDataColumnNums(dataColumnNums); vectorTaskColumnInfo.setPartitionColumnCount(partitionColumnCount); - vectorTaskColumnInfo.setAvailableVirtualColumnList(availableVirtualColumnList); vectorTaskColumnInfo.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat); // Always set these so EXPLAIN can see. @@ -1141,14 +1082,6 @@ public class Vectorizer implements PhysicalPlanResolver { return false; } - // Set global member indicating which virtual columns are possible to be used by - // the Map vertex. - availableVectorizedVirtualColumnSet = new HashSet<VirtualColumn>(); - availableVectorizedVirtualColumnSet.addAll(vectorTaskColumnInfo.availableVirtualColumnList); - - // And, use set to remember which virtual columns were actually referenced. - neededVirtualColumnSet = new HashSet<VirtualColumn>(); - // Now we are enabled and any issues found from here on out are considered // not vectorized issues. mapWork.setVectorizationEnabled(true); @@ -1171,21 +1104,6 @@ public class Vectorizer implements PhysicalPlanResolver { } } } - - List<VirtualColumn> neededVirtualColumnList = new ArrayList<VirtualColumn>(); - if (!neededVirtualColumnSet.isEmpty()) { - - // Create needed in same order. - for (VirtualColumn virtualColumn : vectorTaskColumnInfo.availableVirtualColumnList) { - if (neededVirtualColumnSet.contains(virtualColumn)) { - neededVirtualColumnList.add(virtualColumn); - vectorTaskColumnInfo.allColumnNames.add(virtualColumn.getName()); - vectorTaskColumnInfo.allTypeInfos.add(virtualColumn.getTypeInfo()); - } - } - } - - vectorTaskColumnInfo.setNeededVirtualColumnList(neededVirtualColumnList); vectorTaskColumnInfo.setNonVectorizedOps(vnp.getNonVectorizedOps()); return true; } @@ -1819,10 +1737,6 @@ public class Vectorizer implements PhysicalPlanResolver { HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED); - isVectorizedRowIdentifierEnabled = - HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED); - isSchemaEvolution = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_SCHEMA_EVOLUTION); @@ -2414,24 +2328,10 @@ public class Vectorizer implements PhysicalPlanResolver { VectorExpressionDescriptor.Mode mode, boolean allowComplex) { if (desc instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc; - String columnName = c.getColumn(); - - if (availableVectorizedVirtualColumnSet != null) { - - // For Map, check for virtual columns. - VirtualColumn virtualColumn = VirtualColumn.VIRTUAL_COLUMN_NAME_MAP.get(columnName); - if (virtualColumn != null) { - - // We support some virtual columns in vectorization for this table scan. - - if (!availableVectorizedVirtualColumnSet.contains(virtualColumn)) { - setExpressionIssue(expressionTitle, "Virtual column " + columnName + " is not supported"); - return false; - } - - // Remember we used this one in the query. - neededVirtualColumnSet.add(virtualColumn); - } + // Currently, we do not support vectorized virtual columns (see HIVE-5570). + if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(c.getColumn())) { + setExpressionIssue(expressionTitle, "Virtual columns not supported (" + c.getColumn() + ")"); + return false; } } String typeName = desc.getTypeInfo().getTypeName(); @@ -4280,20 +4180,28 @@ public class Vectorizer implements PhysicalPlanResolver { return vectorOp; } + private boolean isVirtualColumn(ColumnInfo column) { + + // Not using method column.getIsVirtualCol() because partitioning columns are also + // treated as virtual columns in ColumnInfo. + if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(column.getInternalName())) { + return true; + } + return false; + } + public void debugDisplayAllMaps(BaseWork work) { VectorizedRowBatchCtx vectorizedRowBatchCtx = work.getVectorizedRowBatchCtx(); String[] allColumnNames = vectorizedRowBatchCtx.getRowColumnNames(); - TypeInfo[] columnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos(); + Object columnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos(); int partitionColumnCount = vectorizedRowBatchCtx.getPartitionColumnCount(); - int virtualColumnCount = vectorizedRowBatchCtx.getVirtualColumnCount(); String[] scratchColumnTypeNames =vectorizedRowBatchCtx.getScratchColumnTypeNames(); - LOG.debug("debugDisplayAllMaps rowColumnNames " + Arrays.toString(allColumnNames)); - LOG.debug("debugDisplayAllMaps rowColumnTypeInfos " + Arrays.toString(columnTypeInfos)); + LOG.debug("debugDisplayAllMaps allColumnNames " + Arrays.toString(allColumnNames)); + LOG.debug("debugDisplayAllMaps columnTypeInfos " + Arrays.deepToString((Object[]) columnTypeInfos)); LOG.debug("debugDisplayAllMaps partitionColumnCount " + partitionColumnCount); - LOG.debug("debugDisplayAllMaps virtualColumnCount " + virtualColumnCount); LOG.debug("debugDisplayAllMaps scratchColumnTypeNames " + Arrays.toString(scratchColumnTypeNames)); } } http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/ql/src/test/queries/clientpositive/vector_row__id.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_row__id.q b/ql/src/test/queries/clientpositive/vector_row__id.q deleted file mode 100644 index 11eda90..0000000 --- a/ql/src/test/queries/clientpositive/vector_row__id.q +++ /dev/null @@ -1,56 +0,0 @@ -set hive.support.concurrency=true; -set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -set hive.mapred.mode=nonstrict; -set hive.explain.user=false; -SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=none; - --- SORT_QUERY_RESULTS - -drop table if exists hello_acid; -create table hello_acid (key int, value int) -partitioned by (load_date date) -clustered by(key) into 3 buckets -stored as orc tblproperties ('transactional'='true'); - -insert into hello_acid partition (load_date='2016-03-01') values (1, 1); -insert into hello_acid partition (load_date='2016-03-02') values (2, 2); -insert into hello_acid partition (load_date='2016-03-03') values (3, 3); - -set hive.vectorized.row.identifier.enabled=false; - -explain vectorization detail -select row__id, key, value from hello_acid order by key; - -select row__id, key, value from hello_acid order by key; - -explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid; - -select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid; - -explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3; - -select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3; - - - -set hive.vectorized.row.identifier.enabled=true; - -explain vectorization detail -select row__id, key, value from hello_acid order by key; - -select row__id, key, value from hello_acid order by key; - -explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid; - -select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid; - -explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3; - -select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3; - http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/ql/src/test/results/clientpositive/llap/vector_row__id.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_row__id.q.out b/ql/src/test/results/clientpositive/llap/vector_row__id.q.out deleted file mode 100644 index 850e3a4..0000000 --- a/ql/src/test/results/clientpositive/llap/vector_row__id.q.out +++ /dev/null @@ -1,605 +0,0 @@ -PREHOOK: query: drop table if exists hello_acid -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table if exists hello_acid -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table hello_acid (key int, value int) -partitioned by (load_date date) -clustered by(key) into 3 buckets -stored as orc tblproperties ('transactional'='true') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@hello_acid -POSTHOOK: query: create table hello_acid (key int, value int) -partitioned by (load_date date) -clustered by(key) into 3 buckets -stored as orc tblproperties ('transactional'='true') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@hello_acid -PREHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1) -PREHOOK: type: QUERY -PREHOOK: Output: default@hello_acid@load_date=2016-03-01 -POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@hello_acid@load_date=2016-03-01 -POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2) -PREHOOK: type: QUERY -PREHOOK: Output: default@hello_acid@load_date=2016-03-02 -POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@hello_acid@load_date=2016-03-02 -POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3) -PREHOOK: type: QUERY -PREHOOK: Output: default@hello_acid@load_date=2016-03-03 -POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@hello_acid@load_date=2016-03-03 -POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).value EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain vectorization detail -select row__id, key, value from hello_acid order by key -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select row__id, key, value from hello_acid order by key -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: hello_acid - Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int) - Execution mode: llap - LLAP IO: may be used (ACID table) - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported - vectorized: false - Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - dataColumns: KEY.reducesinkkey0:int, VALUE._col0:struct<transactionid:bigint,bucketid:int,rowid:bigint>, VALUE._col1:int - partitionColumnCount: 0 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0, 2] - Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select row__id, key, value from hello_acid order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@hello_acid -PREHOOK: Input: default@hello_acid@load_date=2016-03-01 -PREHOOK: Input: default@hello_acid@load_date=2016-03-02 -PREHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -POSTHOOK: query: select row__id, key, value from hello_acid order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hello_acid -POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -{"transactionid":3,"bucketid":536870912,"rowid":0} 1 1 -{"transactionid":4,"bucketid":536870912,"rowid":0} 2 2 -{"transactionid":5,"bucketid":536870912,"rowid":0} 3 3 -PREHOOK: query: explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: hello_acid - Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ROW__ID.transactionid (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: may be used (ACID table) - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported - vectorized: false - Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:bigint - partitionColumnCount: 0 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid -PREHOOK: type: QUERY -PREHOOK: Input: default@hello_acid -PREHOOK: Input: default@hello_acid@load_date=2016-03-01 -PREHOOK: Input: default@hello_acid@load_date=2016-03-02 -PREHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hello_acid -POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -3 -4 -5 -PREHOOK: query: explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: hello_acid - Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ROW__ID.transactionid = 3) (type: boolean) - Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ROW__ID.transactionid (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap - LLAP IO: may be used (ACID table) - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Predicate expression for FILTER operator: Virtual column ROW__ID is not supported - vectorized: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@hello_acid -PREHOOK: Input: default@hello_acid@load_date=2016-03-01 -PREHOOK: Input: default@hello_acid@load_date=2016-03-02 -PREHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hello_acid -POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -3 -PREHOOK: query: explain vectorization detail -select row__id, key, value from hello_acid order by key -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select row__id, key, value from hello_acid order by key -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: hello_acid - Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3] - Select Operator - expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value (type: int) - outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [3, 0, 1] - Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumns: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: [3, 1] - Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:int - partitionColumnCount: 1 - partitionColumns: load_date:date - Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - dataColumns: KEY.reducesinkkey0:int, VALUE._col0:struct<transactionid:bigint,bucketid:int,rowid:bigint>, VALUE._col1:int - partitionColumnCount: 0 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0, 2] - Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select row__id, key, value from hello_acid order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@hello_acid -PREHOOK: Input: default@hello_acid@load_date=2016-03-01 -PREHOOK: Input: default@hello_acid@load_date=2016-03-02 -PREHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -POSTHOOK: query: select row__id, key, value from hello_acid order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hello_acid -POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -NULL 1 1 -NULL 2 2 -NULL 3 3 -PREHOOK: query: explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: hello_acid - Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ROW__ID.transactionid (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: may be used (ACID table) - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): Column[ROW__ID].transactionid - vectorized: false - Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:bigint - partitionColumnCount: 0 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid -PREHOOK: type: QUERY -PREHOOK: Input: default@hello_acid -PREHOOK: Input: default@hello_acid@load_date=2016-03-01 -PREHOOK: Input: default@hello_acid@load_date=2016-03-02 -PREHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hello_acid -POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -3 -4 -5 -PREHOOK: query: explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: hello_acid - Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ROW__ID.transactionid = 3) (type: boolean) - Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ROW__ID.transactionid (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap - LLAP IO: may be used (ACID table) - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Predicate expression for FILTER operator: Cannot handle expression type: ExprNodeFieldDesc - vectorized: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@hello_acid -PREHOOK: Input: default@hello_acid@load_date=2016-03-01 -PREHOOK: Input: default@hello_acid@load_date=2016-03-02 -PREHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hello_acid -POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -3 http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/ql/src/test/results/clientpositive/vector_row__id.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_row__id.q.out b/ql/src/test/results/clientpositive/vector_row__id.q.out deleted file mode 100644 index d48902b..0000000 --- a/ql/src/test/results/clientpositive/vector_row__id.q.out +++ /dev/null @@ -1,491 +0,0 @@ -PREHOOK: query: drop table if exists hello_acid -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table if exists hello_acid -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table hello_acid (key int, value int) -partitioned by (load_date date) -clustered by(key) into 3 buckets -stored as orc tblproperties ('transactional'='true') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@hello_acid -POSTHOOK: query: create table hello_acid (key int, value int) -partitioned by (load_date date) -clustered by(key) into 3 buckets -stored as orc tblproperties ('transactional'='true') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@hello_acid -PREHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1) -PREHOOK: type: QUERY -PREHOOK: Output: default@hello_acid@load_date=2016-03-01 -POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@hello_acid@load_date=2016-03-01 -POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2) -PREHOOK: type: QUERY -PREHOOK: Output: default@hello_acid@load_date=2016-03-02 -POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@hello_acid@load_date=2016-03-02 -POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3) -PREHOOK: type: QUERY -PREHOOK: Output: default@hello_acid@load_date=2016-03-03 -POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@hello_acid@load_date=2016-03-03 -POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).value EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain vectorization detail -select row__id, key, value from hello_acid order by key -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select row__id, key, value from hello_acid order by key -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: hello_acid - Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int) - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported - vectorized: false - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select row__id, key, value from hello_acid order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@hello_acid -PREHOOK: Input: default@hello_acid@load_date=2016-03-01 -PREHOOK: Input: default@hello_acid@load_date=2016-03-02 -PREHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -POSTHOOK: query: select row__id, key, value from hello_acid order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hello_acid -POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -{"transactionid":3,"bucketid":536936448,"rowid":0} 1 1 -{"transactionid":4,"bucketid":537001984,"rowid":0} 2 2 -{"transactionid":5,"bucketid":536870912,"rowid":0} 3 3 -PREHOOK: query: explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: hello_acid - Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: ROW__ID.transactionid (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported - vectorized: false - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid -PREHOOK: type: QUERY -PREHOOK: Input: default@hello_acid -PREHOOK: Input: default@hello_acid@load_date=2016-03-01 -PREHOOK: Input: default@hello_acid@load_date=2016-03-02 -PREHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hello_acid -POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -3 -4 -5 -PREHOOK: query: explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: hello_acid - Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (ROW__ID.transactionid = 3) (type: boolean) - Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ROW__ID.transactionid (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Predicate expression for FILTER operator: Virtual column ROW__ID is not supported - vectorized: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@hello_acid -PREHOOK: Input: default@hello_acid@load_date=2016-03-01 -PREHOOK: Input: default@hello_acid@load_date=2016-03-02 -PREHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hello_acid -POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -3 -PREHOOK: query: explain vectorization detail -select row__id, key, value from hello_acid order by key -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select row__id, key, value from hello_acid order by key -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: hello_acid - Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3] - Select Operator - expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value (type: int) - outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [3, 0, 1] - Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:int - partitionColumnCount: 1 - partitionColumns: load_date:date - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select row__id, key, value from hello_acid order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@hello_acid -PREHOOK: Input: default@hello_acid@load_date=2016-03-01 -PREHOOK: Input: default@hello_acid@load_date=2016-03-02 -PREHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -POSTHOOK: query: select row__id, key, value from hello_acid order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hello_acid -POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -NULL 1 1 -NULL 2 2 -NULL 3 3 -PREHOOK: query: explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: hello_acid - Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: ROW__ID.transactionid (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): Column[ROW__ID].transactionid - vectorized: false - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid -PREHOOK: type: QUERY -PREHOOK: Input: default@hello_acid -PREHOOK: Input: default@hello_acid@load_date=2016-03-01 -PREHOOK: Input: default@hello_acid@load_date=2016-03-02 -PREHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hello_acid -POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -3 -4 -5 -PREHOOK: query: explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: hello_acid - Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (ROW__ID.transactionid = 3) (type: boolean) - Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ROW__ID.transactionid (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Predicate expression for FILTER operator: Cannot handle expression type: ExprNodeFieldDesc - vectorized: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@hello_acid -PREHOOK: Input: default@hello_acid@load_date=2016-03-01 -PREHOOK: Input: default@hello_acid@load_date=2016-03-02 -PREHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@hello_acid -POSTHOOK: Input: default@hello_acid@load_date=2016-03-01 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-02 -POSTHOOK: Input: default@hello_acid@load_date=2016-03-03 -#### A masked pattern was here #### -3
