hive git commit: HIVE-17116: Vectorization: Add infrastructure for vectorization of ROW__ID struct (Matt McCline, reviewed by Teddy Choi)

mmccline Thu, 20 Jul 2017 18:56:04 -0700

Repository: hive
Updated Branches:
  refs/heads/master b9fc5fc10 -> 996fa0704



HIVE-17116: Vectorization: Add infrastructure for vectorization of ROW__ID 
struct (Matt McCline, reviewed by Teddy Choi)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/996fa070
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/996fa070
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/996fa070

Branch: refs/heads/master
Commit: 996fa070410b673ebd47511b33c78da4c4757723
Parents: b9fc5fc
Author: Matt McCline <[email protected]>
Authored: Thu Jul 20 20:55:24 2017 -0500
Committer: Matt McCline <[email protected]>
Committed: Thu Jul 20 20:55:24 2017 -0500

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   4 +
 .../test/resources/testconfiguration.properties |   1 +
 .../hive/llap/io/api/impl/LlapInputFormat.java  |   4 +-
 .../hive/ql/exec/vector/VectorMapOperator.java  |  41 +-
 .../ql/exec/vector/VectorizationContext.java    |   2 +
 .../ql/exec/vector/VectorizedRowBatchCtx.java   |  43 +-
 .../hadoop/hive/ql/metadata/VirtualColumn.java  |  14 +
 .../hive/ql/optimizer/physical/Vectorizer.java  | 164 +++--
 .../queries/clientpositive/vector_row__id.q     |  56 ++
 .../clientpositive/llap/vector_row__id.q.out    | 605 +++++++++++++++++++
 .../results/clientpositive/vector_row__id.q.out | 491 +++++++++++++++
 11 files changed, 1377 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index f360dfa..df45f2c 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2856,6 +2856,10 @@ public class HiveConf extends Configuration {
         "of aggregations that use complex types.\n",
         "For example, AVG uses a complex type (STRUCT) for partial aggregation 
results" +
         "The default value is true."),
+    
HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED("hive.vectorized.row.identifier.enabled",
 false,
+        "This flag should be set to true to enable vectorization\n" +
+        "of ROW__ID.\n" +
+        "The default value is false."),
 
     HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true, "This property 
has been extended to control "
         + "whether to check, convert, and normalize partition value to conform 
to its column type in "

http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index cffe245..f66e19b 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -356,6 +356,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
   vector_reduce2.q,\
   vector_reduce3.q,\
   vector_reduce_groupby_decimal.q,\
+  vector_row__id.q,\
   vector_string_concat.q,\
   vector_struct_in.q,\
   vector_udf_character_length.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
----------------------------------------------------------------------
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
index 22ca025..79ec4ed 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
@@ -190,8 +190,10 @@ public class LlapInputFormat implements 
InputFormat<NullWritable, VectorizedRowB
         }
       }
     }
+    // UNDONE: Virtual column support?
     return new VectorizedRowBatchCtx(colNames.toArray(new 
String[colNames.size()]),
-        colTypes.toArray(new TypeInfo[colTypes.size()]), null, 
partitionColumnCount, new String[0]);
+        colTypes.toArray(new TypeInfo[colTypes.size()]), null, 
partitionColumnCount,
+        new VirtualColumn[0], new String[0]);
   }
 
   static TableScanOperator findTsOp(MapWork mapWork) throws HiveException {

http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
index ed50df2..1ac8914 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
 import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
@@ -125,6 +126,9 @@ public class VectorMapOperator extends AbstractMapOperator {
   private transient int dataColumnCount;
   private transient int partitionColumnCount;
   private transient Object[] partitionValues;
+  private transient int virtualColumnCount;
+  private transient boolean hasRowIdentifier;
+  private transient int rowIdentifierColumnNum;
 
   private transient boolean[] dataColumnsToIncludeTruncated;
 
@@ -504,6 +508,19 @@ public class VectorMapOperator extends AbstractMapOperator 
{
     dataColumnCount = batchContext.getDataColumnCount();
     partitionColumnCount = batchContext.getPartitionColumnCount();
     partitionValues = new Object[partitionColumnCount];
+    virtualColumnCount = batchContext.getVirtualColumnCount();
+    rowIdentifierColumnNum = -1;
+    if (virtualColumnCount > 0) {
+      final int firstVirtualColumnNum = dataColumnCount + partitionColumnCount;
+      VirtualColumn[] neededVirtualColumns = 
batchContext.getNeededVirtualColumns();
+      hasRowIdentifier = (neededVirtualColumns[0] == VirtualColumn.ROWID);
+      if (hasRowIdentifier) {
+        rowIdentifierColumnNum = firstVirtualColumnNum;
+      }
+    } else {
+      hasRowIdentifier = false;
+    }
+    
 
     dataColumnNums = batchContext.getDataColumnNums();
     Preconditions.checkState(dataColumnNums != null);
@@ -601,6 +618,13 @@ public class VectorMapOperator extends AbstractMapOperator 
{
         currentVectorPartContext.partName);
   }
 
+  private void setRowIdentiferToNull(VectorizedRowBatch batch) {
+    ColumnVector rowIdentifierColVector = batch.cols[rowIdentifierColumnNum];
+    rowIdentifierColVector.isNull[0] = true;
+    rowIdentifierColVector.noNulls = false;
+    rowIdentifierColVector.isRepeating = true;
+  }
+
   /*
    * Setup the context for reading from the next partition file.
    */
@@ -695,6 +719,12 @@ public class VectorMapOperator extends AbstractMapOperator 
{
         batchContext.addPartitionColsToBatch(deserializerBatch, 
partitionValues);
       }
 
+      if (hasRowIdentifier) {
+
+        // No ACID in code path -- set ROW__ID to NULL.
+        setRowIdentiferToNull(deserializerBatch);
+      }
+
       /*
        * Set or clear the rest of the reading variables based on {vector|row} 
deserialization.
        */
@@ -778,7 +808,16 @@ public class VectorMapOperator extends AbstractMapOperator 
{
            */
           batchCounter++;
           if (value != null) {
-            numRows += ((VectorizedRowBatch) value).size;
+            VectorizedRowBatch batch = (VectorizedRowBatch) value;
+            numRows += batch.size;
+            if (hasRowIdentifier) {
+
+              // UNDONE: Pass ROW__ID STRUCT column through IO Context to get 
filled in by ACID reader
+              // UNDONE: Or, perhaps tell it to do it before calling us, ...
+              // UNDONE: For now, set column to NULL.
+
+              setRowIdentiferToNull(batch);
+            }
           }
           oneRootOperator.process(value, 0);
           if (oneRootOperator.getDone()) {

http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 9e026f0..fcebb6f 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -124,6 +124,7 @@ import 
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*;
 import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
 import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.AggregationDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -221,6 +222,7 @@ public class VectorizationContext {
       projectedColumns.add(i);
       projectionColumnMap.put(projectionColumnNames.get(i), i);
     }
+
     int firstOutputColumnIndex = projectedColumns.size();
     this.ocm = new OutputColumnManager(firstOutputColumnIndex);
     this.firstOutputColumnIndex = firstOutputColumnIndex;

http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
index 3c12e04..90d1372 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
 import org.apache.hadoop.hive.ql.io.IOPrepareCache;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.plan.Explain;
 import org.apache.hadoop.hive.ql.plan.MapWork;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
@@ -78,6 +79,8 @@ public class VectorizedRowBatchCtx {
   private int[] dataColumnNums;
   private int dataColumnCount;
   private int partitionColumnCount;
+  private int virtualColumnCount;
+  private VirtualColumn[] neededVirtualColumns;
 
   private String[] scratchColumnTypeNames;
 
@@ -88,14 +91,17 @@ public class VectorizedRowBatchCtx {
   }
 
   public VectorizedRowBatchCtx(String[] rowColumnNames, TypeInfo[] 
rowColumnTypeInfos,
-      int[] dataColumnNums, int partitionColumnCount, String[] 
scratchColumnTypeNames) {
+      int[] dataColumnNums, int partitionColumnCount, VirtualColumn[] 
neededVirtualColumns,
+      String[] scratchColumnTypeNames) {
     this.rowColumnNames = rowColumnNames;
     this.rowColumnTypeInfos = rowColumnTypeInfos;
     this.dataColumnNums = dataColumnNums;
     this.partitionColumnCount = partitionColumnCount;
+    this.neededVirtualColumns = neededVirtualColumns;
+    this.virtualColumnCount = neededVirtualColumns.length;
     this.scratchColumnTypeNames = scratchColumnTypeNames;
 
-    dataColumnCount = rowColumnTypeInfos.length - partitionColumnCount;
+    dataColumnCount = rowColumnTypeInfos.length - partitionColumnCount - 
virtualColumnCount;
   }
 
   public String[] getRowColumnNames() {
@@ -118,6 +124,14 @@ public class VectorizedRowBatchCtx {
     return partitionColumnCount;
   }
 
+  public int getVirtualColumnCount() {
+    return virtualColumnCount;
+  }
+
+  public VirtualColumn[] getNeededVirtualColumns() {
+    return neededVirtualColumns;
+  }
+
   public String[] getScratchColumnTypeNames() {
     return scratchColumnTypeNames;
   }
@@ -138,6 +152,8 @@ public class VectorizedRowBatchCtx {
     rowColumnTypeInfos = 
VectorizedBatchUtil.typeInfosFromStructObjectInspector(structObjectInspector);
     dataColumnNums = null;
     partitionColumnCount = 0;
+    virtualColumnCount = 0;
+    neededVirtualColumns = new VirtualColumn[0];
     dataColumnCount = rowColumnTypeInfos.length;
 
     // Scratch column information.
@@ -204,13 +220,14 @@ public class VectorizedRowBatchCtx {
    */
   public VectorizedRowBatch createVectorizedRowBatch()
   {
-    final int dataAndPartColumnCount = rowColumnTypeInfos.length;
-    final int totalColumnCount = dataAndPartColumnCount + 
scratchColumnTypeNames.length;
+    final int nonScratchColumnCount = rowColumnTypeInfos.length;
+    final int totalColumnCount =
+        nonScratchColumnCount + scratchColumnTypeNames.length;
     VectorizedRowBatch result = new VectorizedRowBatch(totalColumnCount);
 
     if (dataColumnNums == null) {
         // All data and partition columns.
-      for (int i = 0; i < dataAndPartColumnCount; i++) {
+      for (int i = 0; i < nonScratchColumnCount; i++) {
         TypeInfo typeInfo = rowColumnTypeInfos[i];
         result.cols[i] = VectorizedBatchUtil.createColumnVector(typeInfo);
       }
@@ -218,24 +235,30 @@ public class VectorizedRowBatchCtx {
       // Create only needed/included columns data columns.
       for (int i = 0; i < dataColumnNums.length; i++) {
         int columnNum = dataColumnNums[i];
-        Preconditions.checkState(columnNum < dataAndPartColumnCount);
+        Preconditions.checkState(columnNum < nonScratchColumnCount);
         TypeInfo typeInfo = rowColumnTypeInfos[columnNum];
         result.cols[columnNum] = 
VectorizedBatchUtil.createColumnVector(typeInfo);
       }
-      // Always create partition columns.
-      final int endColumnNum = dataColumnCount + partitionColumnCount;
-      for (int partitionColumnNum = dataColumnCount; partitionColumnNum < 
endColumnNum; partitionColumnNum++) {
+      // Always create partition and virtual columns.
+      final int partitionEndColumnNum = dataColumnCount + partitionColumnCount;
+      for (int partitionColumnNum = dataColumnCount; partitionColumnNum < 
partitionEndColumnNum; partitionColumnNum++) {
         TypeInfo typeInfo = rowColumnTypeInfos[partitionColumnNum];
         result.cols[partitionColumnNum] = 
VectorizedBatchUtil.createColumnVector(typeInfo);
       }
+      final int virtualEndColumnNum = partitionEndColumnNum + 
virtualColumnCount;
+      for (int virtualColumnNum = partitionEndColumnNum; virtualColumnNum < 
virtualEndColumnNum; virtualColumnNum++) {
+        TypeInfo typeInfo = rowColumnTypeInfos[virtualColumnNum];
+        result.cols[virtualColumnNum] = 
VectorizedBatchUtil.createColumnVector(typeInfo);
+      }
     }
 
     for (int i = 0; i < scratchColumnTypeNames.length; i++) {
       String typeName = scratchColumnTypeNames[i];
-      result.cols[rowColumnTypeInfos.length + i] =
+      result.cols[nonScratchColumnCount + i] =
           VectorizedBatchUtil.createColumnVector(typeName);
     }
 
+    // UNDONE: Also remember virtualColumnCount...
     result.setPartitionInfo(dataColumnCount, partitionColumnCount);
 
     result.reset();

http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
index 2435bf1..0032305 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
@@ -20,10 +20,13 @@ package org.apache.hadoop.hive.ql.metadata;
 
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.HashMap;
 import java.util.List;
 import java.util.ListIterator;
+import java.util.Map;
 
 import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
 
 import org.apache.hadoop.conf.Configuration;
@@ -64,6 +67,17 @@ public enum VirtualColumn {
       ImmutableSet.of(FILENAME.getName(), BLOCKOFFSET.getName(), 
ROWOFFSET.getName(),
           RAWDATASIZE.getName(), GROUPINGID.getName(), ROWID.getName());
 
+  public static final ImmutableMap<String, VirtualColumn> 
VIRTUAL_COLUMN_NAME_MAP =
+       new ImmutableMap.Builder<String, 
VirtualColumn>().putAll(getColumnNameMap()).build();
+
+  private static Map<String, VirtualColumn> getColumnNameMap() {
+    Map<String, VirtualColumn> map = new HashMap<String, VirtualColumn>();
+    for (VirtualColumn virtualColumn : values()) {
+      map.put(virtualColumn.name, virtualColumn);
+    }
+    return map;
+  }
+
   private final String name;
   private final TypeInfo typeInfo;
   private final boolean isHidden;

http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 8183194..0913f40 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -75,6 +75,7 @@ import 
org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping;
 import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
 import 
org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
 import 
org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.HiveVectorAdaptorUsageMode;
 import 
org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
@@ -229,6 +230,7 @@ import org.apache.hadoop.mapred.TextInputFormat;
 import org.apache.hive.common.util.AnnotationUtils;
 import org.apache.hadoop.util.ReflectionUtils;
 
+import com.google.common.collect.ImmutableSet;
 import com.google.common.base.Preconditions;
 
 public class Vectorizer implements PhysicalPlanResolver {
@@ -274,6 +276,10 @@ public class Vectorizer implements PhysicalPlanResolver {
 
   private Set<String> supportedAggregationUdfs = new HashSet<String>();
 
+  // The set of virtual columns that vectorized readers *MAY* support.
+  public static final ImmutableSet<VirtualColumn> vectorizableVirtualColumns =
+      ImmutableSet.of(VirtualColumn.ROWID);
+
   private HiveConf hiveConf;
 
   private boolean useVectorizedInputFileFormat;
@@ -283,6 +289,7 @@ public class Vectorizer implements PhysicalPlanResolver {
   private boolean isPtfVectorizationEnabled;
   private boolean isVectorizationComplexTypesEnabled;
   private boolean isVectorizationGroupByComplexTypesEnabled;
+  private boolean isVectorizedRowIdentifierEnabled;
 
   private boolean isSchemaEvolution;
 
@@ -316,6 +323,9 @@ public class Vectorizer implements PhysicalPlanResolver {
 
   private long vectorizedVertexNum = -1;
 
+  private Set<VirtualColumn> availableVectorizedVirtualColumnSet = null;
+  private Set<VirtualColumn> neededVirtualColumnSet = null;
+
   public Vectorizer() {
 
     /*
@@ -453,6 +463,8 @@ public class Vectorizer implements PhysicalPlanResolver {
     List<Integer> dataColumnNums;
 
     int partitionColumnCount;
+    List<VirtualColumn> availableVirtualColumnList;
+    List<VirtualColumn> neededVirtualColumnList;
     boolean useVectorizedInputFileFormat;
 
     boolean groupByVectorOutput;
@@ -488,6 +500,12 @@ public class Vectorizer implements PhysicalPlanResolver {
     public void setPartitionColumnCount(int partitionColumnCount) {
       this.partitionColumnCount = partitionColumnCount;
     }
+    public void setAvailableVirtualColumnList(List<VirtualColumn> 
availableVirtualColumnList) {
+      this.availableVirtualColumnList = availableVirtualColumnList;
+    }
+    public void setNeededVirtualColumnList(List<VirtualColumn> 
neededVirtualColumnList) {
+      this.neededVirtualColumnList = neededVirtualColumnList;
+    }
     public void setScratchTypeNameArray(String[] scratchTypeNameArray) {
       this.scratchTypeNameArray = scratchTypeNameArray;
     }
@@ -522,6 +540,16 @@ public class Vectorizer implements PhysicalPlanResolver {
 
     public void transferToBaseWork(BaseWork baseWork) {
 
+      final int virtualColumnCount;
+      VirtualColumn[] neededVirtualColumns;
+      if (neededVirtualColumnList != null && neededVirtualColumnList.size() > 
0) {
+        virtualColumnCount = neededVirtualColumnList.size();
+        neededVirtualColumns = neededVirtualColumnList.toArray(new 
VirtualColumn[0]);
+      } else {
+        virtualColumnCount = 0;
+        neededVirtualColumns = new VirtualColumn[0];
+      }
+
       String[] allColumnNameArray = allColumnNames.toArray(new String[0]);
       TypeInfo[] allTypeInfoArray = allTypeInfos.toArray(new TypeInfo[0]);
       int[] dataColumnNumsArray;
@@ -537,6 +565,7 @@ public class Vectorizer implements PhysicalPlanResolver {
             allTypeInfoArray,
             dataColumnNumsArray,
             partitionColumnCount,
+            neededVirtualColumns,
             scratchTypeNameArray);
       baseWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx);
 
@@ -687,20 +716,41 @@ public class Vectorizer implements PhysicalPlanResolver {
     }
 
     private void getTableScanOperatorSchemaInfo(TableScanOperator 
tableScanOperator,
-        List<String> logicalColumnNameList, List<TypeInfo> 
logicalTypeInfoList) {
+        List<String> logicalColumnNameList, List<TypeInfo> logicalTypeInfoList,
+        List<VirtualColumn> availableVirtualColumnList) {
 
-      // Add all non-virtual columns to make a vectorization context for
+      // Add all columns to make a vectorization context for
       // the TableScan operator.
       RowSchema rowSchema = tableScanOperator.getSchema();
       for (ColumnInfo c : rowSchema.getSignature()) {
-        // Validation will later exclude vectorization of virtual columns 
usage (HIVE-5560).
-        if (!isVirtualColumn(c)) {
-          String columnName = c.getInternalName();
-          String typeName = c.getTypeName();
-          TypeInfo typeInfo = 
TypeInfoUtils.getTypeInfoFromTypeString(typeName);
 
+        // Validation will later exclude vectorization of virtual columns 
usage if necessary.
+        String columnName = c.getInternalName();
+
+        // Turns out partition columns get marked as virtual in ColumnInfo, so 
we need to
+        // check the VirtualColumn directly.
+        VirtualColumn virtualColumn = 
VirtualColumn.VIRTUAL_COLUMN_NAME_MAP.get(columnName);
+        if (virtualColumn == null) {
           logicalColumnNameList.add(columnName);
-          logicalTypeInfoList.add(typeInfo);
+          
logicalTypeInfoList.add(TypeInfoUtils.getTypeInfoFromTypeString(c.getTypeName()));
+        } else {
+
+          // The planner gives us a subset virtual columns available for this 
table scan.
+          //    AND
+          // We only support some virtual columns in vectorization.
+          //
+          // So, create the intersection.  Note these are available 
vectorizable virtual columns.
+          // Later we remember which virtual columns were *actually used* in 
the query so
+          // just those will be included in the Map VectorizedRowBatchCtx that 
has the
+          // information for creating the Map VectorizedRowBatch.
+          //
+          if (!vectorizableVirtualColumns.contains(virtualColumn)) {
+            continue;
+          }
+          if (virtualColumn == VirtualColumn.ROWID && 
!isVectorizedRowIdentifierEnabled) {
+            continue;
+          }
+          availableVirtualColumnList.add(virtualColumn);
         }
       }
     }
@@ -893,14 +943,19 @@ public class Vectorizer implements PhysicalPlanResolver {
       boolean isAcidTable = tableScanOperator.getConf().isAcidTable();
 
       // These names/types are the data columns plus partition columns.
-      final List<String> allColumnNameList = new ArrayList<String>();
-      final List<TypeInfo> allTypeInfoList = new ArrayList<TypeInfo>();
+      final List<String> dataAndPartColumnNameList = new ArrayList<String>();
+      final List<TypeInfo> dataAndPartTypeInfoList = new ArrayList<TypeInfo>();
+
+      final List<VirtualColumn> availableVirtualColumnList = new 
ArrayList<VirtualColumn>();
 
-      getTableScanOperatorSchemaInfo(tableScanOperator, allColumnNameList, 
allTypeInfoList);
+      getTableScanOperatorSchemaInfo(
+          tableScanOperator,
+          dataAndPartColumnNameList, dataAndPartTypeInfoList,
+          availableVirtualColumnList);
 
       final List<Integer> dataColumnNums = new ArrayList<Integer>();
 
-      final int allColumnCount = allColumnNameList.size();
+      final int dataAndPartColumnCount = dataAndPartColumnNameList.size();
 
       /*
        * Validate input formats of all the partitions can be vectorized.
@@ -956,17 +1011,17 @@ public class Vectorizer implements PhysicalPlanResolver {
           LinkedHashMap<String, String> partSpec = partDesc.getPartSpec();
           if (partSpec != null && partSpec.size() > 0) {
             partitionColumnCount = partSpec.size();
-            dataColumnCount = allColumnCount - partitionColumnCount;
+            dataColumnCount = dataAndPartColumnCount - partitionColumnCount;
           } else {
             partitionColumnCount = 0;
-            dataColumnCount = allColumnCount;
+            dataColumnCount = dataAndPartColumnCount;
           }
 
-          determineDataColumnNums(tableScanOperator, allColumnNameList, 
dataColumnCount,
+          determineDataColumnNums(tableScanOperator, 
dataAndPartColumnNameList, dataColumnCount,
               dataColumnNums);
 
-          tableDataColumnList = allColumnNameList.subList(0, dataColumnCount);
-          tableDataTypeInfoList = allTypeInfoList.subList(0, dataColumnCount);
+          tableDataColumnList = dataAndPartColumnNameList.subList(0, 
dataColumnCount);
+          tableDataTypeInfoList = dataAndPartTypeInfoList.subList(0, 
dataColumnCount);
 
           isFirst = false;
         }
@@ -1038,10 +1093,14 @@ public class Vectorizer implements PhysicalPlanResolver 
{
         vectorPartDesc.setDataTypeInfos(nextDataTypeInfoList);
       }
 
-      vectorTaskColumnInfo.setAllColumnNames(allColumnNameList);
-      vectorTaskColumnInfo.setAllTypeInfos(allTypeInfoList);
+      // For now, we don't know which virtual columns are going to be 
included.  We'll add them
+      // later...
+      vectorTaskColumnInfo.setAllColumnNames(dataAndPartColumnNameList);
+      vectorTaskColumnInfo.setAllTypeInfos(dataAndPartTypeInfoList);
+
       vectorTaskColumnInfo.setDataColumnNums(dataColumnNums);
       vectorTaskColumnInfo.setPartitionColumnCount(partitionColumnCount);
+      
vectorTaskColumnInfo.setAvailableVirtualColumnList(availableVirtualColumnList);
       
vectorTaskColumnInfo.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat);
 
       // Always set these so EXPLAIN can see.
@@ -1082,6 +1141,14 @@ public class Vectorizer implements PhysicalPlanResolver {
         return false;
       }
 
+      // Set global member indicating which virtual columns are possible to be 
used by
+      // the Map vertex.
+      availableVectorizedVirtualColumnSet = new HashSet<VirtualColumn>();
+      
availableVectorizedVirtualColumnSet.addAll(vectorTaskColumnInfo.availableVirtualColumnList);
+
+      // And, use set to remember which virtual columns were actually 
referenced.
+      neededVirtualColumnSet = new HashSet<VirtualColumn>();
+
       // Now we are enabled and any issues found from here on out are 
considered
       // not vectorized issues.
       mapWork.setVectorizationEnabled(true);
@@ -1104,6 +1171,21 @@ public class Vectorizer implements PhysicalPlanResolver {
           }
         }
       }
+
+      List<VirtualColumn> neededVirtualColumnList = new 
ArrayList<VirtualColumn>();
+      if (!neededVirtualColumnSet.isEmpty()) {
+
+        // Create needed in same order.
+        for (VirtualColumn virtualColumn : 
vectorTaskColumnInfo.availableVirtualColumnList) {
+          if (neededVirtualColumnSet.contains(virtualColumn)) {
+            neededVirtualColumnList.add(virtualColumn);
+            vectorTaskColumnInfo.allColumnNames.add(virtualColumn.getName());
+            vectorTaskColumnInfo.allTypeInfos.add(virtualColumn.getTypeInfo());
+          }
+        }
+      }
+
+      vectorTaskColumnInfo.setNeededVirtualColumnList(neededVirtualColumnList);
       vectorTaskColumnInfo.setNonVectorizedOps(vnp.getNonVectorizedOps());
       return true;
     }
@@ -1737,6 +1819,10 @@ public class Vectorizer implements PhysicalPlanResolver {
         HiveConf.getBoolVar(hiveConf,
             
HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED);
 
+    isVectorizedRowIdentifierEnabled =
+        HiveConf.getBoolVar(hiveConf,
+            HiveConf.ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED);
+
     isSchemaEvolution =
         HiveConf.getBoolVar(hiveConf,
             HiveConf.ConfVars.HIVE_SCHEMA_EVOLUTION);
@@ -2328,10 +2414,24 @@ public class Vectorizer implements PhysicalPlanResolver 
{
       VectorExpressionDescriptor.Mode mode, boolean allowComplex) {
     if (desc instanceof ExprNodeColumnDesc) {
       ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc;
-      // Currently, we do not support vectorized virtual columns (see 
HIVE-5570).
-      if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(c.getColumn())) {
-        setExpressionIssue(expressionTitle, "Virtual columns not supported (" 
+ c.getColumn() + ")");
-        return false;
+      String columnName = c.getColumn();
+
+      if (availableVectorizedVirtualColumnSet != null) {
+
+        // For Map, check for virtual columns.
+        VirtualColumn virtualColumn = 
VirtualColumn.VIRTUAL_COLUMN_NAME_MAP.get(columnName);
+        if (virtualColumn != null) {
+
+          // We support some virtual columns in vectorization for this table 
scan.
+
+          if (!availableVectorizedVirtualColumnSet.contains(virtualColumn)) {
+            setExpressionIssue(expressionTitle, "Virtual column " + columnName 
+ " is not supported");
+            return false;
+          }
+
+          // Remember we used this one in the query.
+          neededVirtualColumnSet.add(virtualColumn);
+        }
       }
     }
     String typeName = desc.getTypeInfo().getTypeName();
@@ -4180,28 +4280,20 @@ public class Vectorizer implements PhysicalPlanResolver 
{
     return vectorOp;
   }
 
-  private boolean isVirtualColumn(ColumnInfo column) {
-
-    // Not using method column.getIsVirtualCol() because partitioning columns 
are also
-    // treated as virtual columns in ColumnInfo.
-    if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(column.getInternalName())) 
{
-        return true;
-    }
-    return false;
-  }
-
   public void debugDisplayAllMaps(BaseWork work) {
 
     VectorizedRowBatchCtx vectorizedRowBatchCtx = 
work.getVectorizedRowBatchCtx();
 
     String[] allColumnNames = vectorizedRowBatchCtx.getRowColumnNames();
-    Object columnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos();
+    TypeInfo[] columnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos();
     int partitionColumnCount = vectorizedRowBatchCtx.getPartitionColumnCount();
+    int virtualColumnCount = vectorizedRowBatchCtx.getVirtualColumnCount();
     String[] scratchColumnTypeNames 
=vectorizedRowBatchCtx.getScratchColumnTypeNames();
 
-    LOG.debug("debugDisplayAllMaps allColumnNames " + 
Arrays.toString(allColumnNames));
-    LOG.debug("debugDisplayAllMaps columnTypeInfos " + 
Arrays.deepToString((Object[]) columnTypeInfos));
+    LOG.debug("debugDisplayAllMaps rowColumnNames " + 
Arrays.toString(allColumnNames));
+    LOG.debug("debugDisplayAllMaps rowColumnTypeInfos " + 
Arrays.toString(columnTypeInfos));
     LOG.debug("debugDisplayAllMaps partitionColumnCount " + 
partitionColumnCount);
+    LOG.debug("debugDisplayAllMaps virtualColumnCount " + virtualColumnCount);
     LOG.debug("debugDisplayAllMaps scratchColumnTypeNames " + 
Arrays.toString(scratchColumnTypeNames));
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/test/queries/clientpositive/vector_row__id.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_row__id.q 
b/ql/src/test/queries/clientpositive/vector_row__id.q
new file mode 100644
index 0000000..11eda90
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_row__id.q
@@ -0,0 +1,56 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+-- SORT_QUERY_RESULTS
+
+drop table if exists hello_acid;
+create table hello_acid (key int, value int)
+partitioned by (load_date date)
+clustered by(key) into 3 buckets
+stored as orc tblproperties ('transactional'='true');
+
+insert into hello_acid partition (load_date='2016-03-01') values (1, 1);
+insert into hello_acid partition (load_date='2016-03-02') values (2, 2);
+insert into hello_acid partition (load_date='2016-03-03') values (3, 3);
+
+set hive.vectorized.row.identifier.enabled=false;
+
+explain vectorization detail
+select row__id, key, value from hello_acid order by key;
+
+select row__id, key, value from hello_acid order by key;
+
+explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
order by tid;
+
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
order by tid;
+
+explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
where tid = 3;
+
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
where tid = 3;
+
+
+
+set hive.vectorized.row.identifier.enabled=true;
+
+explain vectorization detail
+select row__id, key, value from hello_acid order by key;
+
+select row__id, key, value from hello_acid order by key;
+
+explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
order by tid;
+
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
order by tid;
+
+explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
where tid = 3;
+
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
where tid = 3;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/test/results/clientpositive/llap/vector_row__id.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_row__id.q.out 
b/ql/src/test/results/clientpositive/llap/vector_row__id.q.out
new file mode 100644
index 0000000..850e3a4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_row__id.q.out
@@ -0,0 +1,605 @@
+PREHOOK: query: drop table if exists hello_acid
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists hello_acid
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table hello_acid (key int, value int)
+partitioned by (load_date date)
+clustered by(key) into 3 buckets
+stored as orc tblproperties ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hello_acid
+POSTHOOK: query: create table hello_acid (key int, value int)
+partitioned by (load_date date)
+clustered by(key) into 3 buckets
+stored as orc tblproperties ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hello_acid
+PREHOOK: query: insert into hello_acid partition (load_date='2016-03-01') 
values (1, 1)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@hello_acid@load_date=2016-03-01
+POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-01') 
values (1, 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).key EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).value EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: insert into hello_acid partition (load_date='2016-03-02') 
values (2, 2)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@hello_acid@load_date=2016-03-02
+POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-02') 
values (2, 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).key EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).value EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: insert into hello_acid partition (load_date='2016-03-03') 
values (3, 3)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@hello_acid@load_date=2016-03-03
+POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-03') 
values (3, 3)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@hello_acid@load_date=2016-03-03
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).key EXPRESSION 
[(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).value EXPRESSION 
[(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: explain vectorization detail
+select row__id, key, value from hello_acid order by key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select row__id, key, value from hello_acid order by key
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: hello_acid
+                  Statistics: Num rows: 231 Data size: 3696 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: ROW__ID (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value 
(type: int)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 231 Data size: 3696 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: int)
+                      sort order: +
+                      Statistics: Num rows: 231 Data size: 3696 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int)
+            Execution mode: llap
+            LLAP IO: may be used (ACID table)
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                notVectorizedReason: Select expression for SELECT operator: 
Virtual column ROW__ID is not supported
+                vectorized: false
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    dataColumns: KEY.reducesinkkey0:int, 
VALUE._col0:struct<transactionid:bigint,bucketid:int,rowid:bigint>, 
VALUE._col1:int
+                    partitionColumnCount: 0
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 
(type: int), VALUE._col1 (type: int)
+                outputColumnNames: _col0, _col1, _col2
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [1, 0, 2]
+                Statistics: Num rows: 231 Data size: 3696 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 231 Data size: 3696 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select row__id, key, value from hello_acid order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select row__id, key, value from hello_acid order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+{"transactionid":3,"bucketid":536870912,"rowid":0}     1       1
+{"transactionid":4,"bucketid":536870912,"rowid":0}     2       2
+{"transactionid":5,"bucketid":536870912,"rowid":0}     3       3
+PREHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
order by tid
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
order by tid
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: hello_acid
+                  Statistics: Num rows: 1 Data size: 1856 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: ROW__ID.transactionid (type: bigint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 1856 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: bigint)
+                      sort order: +
+                      Statistics: Num rows: 1 Data size: 1856 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: llap
+            LLAP IO: may be used (ACID table)
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                notVectorizedReason: Select expression for SELECT operator: 
Virtual column ROW__ID is not supported
+                vectorized: false
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: KEY.reducesinkkey0:bigint
+                    partitionColumnCount: 0
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: bigint)
+                outputColumnNames: _col0
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0]
+                Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 1 Data size: 1856 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select tid from (select row__id.transactionid as tid from 
hello_acid) sub order by tid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select tid from (select row__id.transactionid as tid from 
hello_acid) sub order by tid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+3
+4
+5
+PREHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
where tid = 3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
where tid = 3
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: hello_acid
+                  Statistics: Num rows: 1 Data size: 1856 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (ROW__ID.transactionid = 3) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 1856 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ROW__ID.transactionid (type: bigint)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 1856 Basic stats: 
COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 1 Data size: 1856 Basic stats: 
COMPLETE Column stats: NONE
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: may be used (ACID table)
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                notVectorizedReason: Predicate expression for FILTER operator: 
Virtual column ROW__ID is not supported
+                vectorized: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select tid from (select row__id.transactionid as tid from 
hello_acid) sub where tid = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select tid from (select row__id.transactionid as tid from 
hello_acid) sub where tid = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+3
+PREHOOK: query: explain vectorization detail
+select row__id, key, value from hello_acid order by key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select row__id, key, value from hello_acid order by key
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: hello_acid
+                  Statistics: Num rows: 231 Data size: 3696 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3]
+                  Select Operator
+                    expressions: ROW__ID (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value 
(type: int)
+                    outputColumnNames: _col0, _col1, _col2
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [3, 0, 1]
+                    Statistics: Num rows: 231 Data size: 3696 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: int)
+                      sort order: +
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkObjectHashOperator
+                          keyColumns: [0]
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumns: [3, 1]
+                      Statistics: Num rows: 231 Data size: 3696 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int)
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:int, value:int
+                    partitionColumnCount: 1
+                    partitionColumns: load_date:date
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    dataColumns: KEY.reducesinkkey0:int, 
VALUE._col0:struct<transactionid:bigint,bucketid:int,rowid:bigint>, 
VALUE._col1:int
+                    partitionColumnCount: 0
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 
(type: int), VALUE._col1 (type: int)
+                outputColumnNames: _col0, _col1, _col2
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [1, 0, 2]
+                Statistics: Num rows: 231 Data size: 3696 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 231 Data size: 3696 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select row__id, key, value from hello_acid order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select row__id, key, value from hello_acid order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+NULL   1       1
+NULL   2       2
+NULL   3       3
+PREHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
order by tid
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
order by tid
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: hello_acid
+                  Statistics: Num rows: 1 Data size: 1856 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: ROW__ID.transactionid (type: bigint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 1856 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: bigint)
+                      sort order: +
+                      Statistics: Num rows: 1 Data size: 1856 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: llap
+            LLAP IO: may be used (ACID table)
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                notVectorizedReason: Select expression for SELECT operator: 
Could not vectorize expression (mode = PROJECTION): 
Column[ROW__ID].transactionid
+                vectorized: false
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: KEY.reducesinkkey0:bigint
+                    partitionColumnCount: 0
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: bigint)
+                outputColumnNames: _col0
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0]
+                Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 1 Data size: 1856 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select tid from (select row__id.transactionid as tid from 
hello_acid) sub order by tid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select tid from (select row__id.transactionid as tid from 
hello_acid) sub order by tid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+3
+4
+5
+PREHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
where tid = 3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
where tid = 3
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: hello_acid
+                  Statistics: Num rows: 1 Data size: 1856 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (ROW__ID.transactionid = 3) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 1856 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ROW__ID.transactionid (type: bigint)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 1856 Basic stats: 
COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 1 Data size: 1856 Basic stats: 
COMPLETE Column stats: NONE
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: may be used (ACID table)
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                notVectorizedReason: Predicate expression for FILTER operator: 
Cannot handle expression type: ExprNodeFieldDesc
+                vectorized: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select tid from (select row__id.transactionid as tid from 
hello_acid) sub where tid = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select tid from (select row__id.transactionid as tid from 
hello_acid) sub where tid = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+3

http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/test/results/clientpositive/vector_row__id.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_row__id.q.out 
b/ql/src/test/results/clientpositive/vector_row__id.q.out
new file mode 100644
index 0000000..d48902b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_row__id.q.out
@@ -0,0 +1,491 @@
+PREHOOK: query: drop table if exists hello_acid
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists hello_acid
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table hello_acid (key int, value int)
+partitioned by (load_date date)
+clustered by(key) into 3 buckets
+stored as orc tblproperties ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hello_acid
+POSTHOOK: query: create table hello_acid (key int, value int)
+partitioned by (load_date date)
+clustered by(key) into 3 buckets
+stored as orc tblproperties ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hello_acid
+PREHOOK: query: insert into hello_acid partition (load_date='2016-03-01') 
values (1, 1)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@hello_acid@load_date=2016-03-01
+POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-01') 
values (1, 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).key EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).value EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: insert into hello_acid partition (load_date='2016-03-02') 
values (2, 2)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@hello_acid@load_date=2016-03-02
+POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-02') 
values (2, 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).key EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).value EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: insert into hello_acid partition (load_date='2016-03-03') 
values (3, 3)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@hello_acid@load_date=2016-03-03
+POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-03') 
values (3, 3)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@hello_acid@load_date=2016-03-03
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).key EXPRESSION 
[(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).value EXPRESSION 
[(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: explain vectorization detail
+select row__id, key, value from hello_acid order by key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select row__id, key, value from hello_acid order by key
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: hello_acid
+            Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: ROW__ID (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value 
(type: int)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: int)
+                sort order: +
+                Statistics: Num rows: 380 Data size: 3054 Basic stats: 
COMPLETE Column stats: NONE
+                value expressions: _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int)
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS 
true
+          inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+          notVectorizedReason: Select expression for SELECT operator: Virtual 
column ROW__ID is not supported
+          vectorized: false
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 
(type: int), VALUE._col1 (type: int)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select row__id, key, value from hello_acid order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select row__id, key, value from hello_acid order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+{"transactionid":3,"bucketid":536936448,"rowid":0}     1       1
+{"transactionid":4,"bucketid":537001984,"rowid":0}     2       2
+{"transactionid":5,"bucketid":536870912,"rowid":0}     3       3
+PREHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
order by tid
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
order by tid
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: hello_acid
+            Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL 
Column stats: NONE
+            Select Operator
+              expressions: ROW__ID.transactionid (type: bigint)
+              outputColumnNames: _col0
+              Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: bigint)
+                sort order: +
+                Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL 
Column stats: NONE
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS 
true
+          inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+          notVectorizedReason: Select expression for SELECT operator: Virtual 
column ROW__ID is not supported
+          vectorized: false
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: bigint)
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select tid from (select row__id.transactionid as tid from 
hello_acid) sub order by tid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select tid from (select row__id.transactionid as tid from 
hello_acid) sub order by tid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+3
+4
+5
+PREHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
where tid = 3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
where tid = 3
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: hello_acid
+            Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL 
Column stats: NONE
+            Filter Operator
+              predicate: (ROW__ID.transactionid = 3) (type: boolean)
+              Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: ROW__ID.transactionid (type: bigint)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 3054 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS 
true
+          inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+          notVectorizedReason: Predicate expression for FILTER operator: 
Virtual column ROW__ID is not supported
+          vectorized: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select tid from (select row__id.transactionid as tid from 
hello_acid) sub where tid = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select tid from (select row__id.transactionid as tid from 
hello_acid) sub where tid = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+3
+PREHOOK: query: explain vectorization detail
+select row__id, key, value from hello_acid order by key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select row__id, key, value from hello_acid order by key
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: hello_acid
+            Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE 
Column stats: NONE
+            TableScan Vectorization:
+                native: true
+                projectedOutputColumns: [0, 1, 2, 3]
+            Select Operator
+              expressions: ROW__ID (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value 
(type: int)
+              outputColumnNames: _col0, _col1, _col2
+              Select Vectorization:
+                  className: VectorSelectOperator
+                  native: true
+                  projectedOutputColumns: [3, 0, 1]
+              Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: int)
+                sort order: +
+                Reduce Sink Vectorization:
+                    className: VectorReduceSinkOperator
+                    native: false
+                    nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                    nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
+                Statistics: Num rows: 380 Data size: 3054 Basic stats: 
COMPLETE Column stats: NONE
+                value expressions: _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int)
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS 
true
+          groupByVectorOutput: true
+          inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+          rowBatchContext:
+              dataColumnCount: 2
+              includeColumns: [0, 1]
+              dataColumns: key:int, value:int
+              partitionColumnCount: 1
+              partitionColumns: load_date:date
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 
(type: int), VALUE._col1 (type: int)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select row__id, key, value from hello_acid order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select row__id, key, value from hello_acid order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+NULL   1       1
+NULL   2       2
+NULL   3       3
+PREHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
order by tid
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
order by tid
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: hello_acid
+            Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL 
Column stats: NONE
+            Select Operator
+              expressions: ROW__ID.transactionid (type: bigint)
+              outputColumnNames: _col0
+              Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: bigint)
+                sort order: +
+                Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL 
Column stats: NONE
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS 
true
+          inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+          notVectorizedReason: Select expression for SELECT operator: Could 
not vectorize expression (mode = PROJECTION): Column[ROW__ID].transactionid
+          vectorized: false
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: bigint)
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select tid from (select row__id.transactionid as tid from 
hello_acid) sub order by tid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select tid from (select row__id.transactionid as tid from 
hello_acid) sub order by tid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+3
+4
+5
+PREHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
where tid = 3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub 
where tid = 3
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: hello_acid
+            Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL 
Column stats: NONE
+            Filter Operator
+              predicate: (ROW__ID.transactionid = 3) (type: boolean)
+              Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: ROW__ID.transactionid (type: bigint)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 3054 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS 
true
+          inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+          notVectorizedReason: Predicate expression for FILTER operator: 
Cannot handle expression type: ExprNodeFieldDesc
+          vectorized: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select tid from (select row__id.transactionid as tid from 
hello_acid) sub where tid = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select tid from (select row__id.transactionid as tid from 
hello_acid) sub where tid = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+3

hive git commit: HIVE-17116: Vectorization: Add infrastructure for vectorization of ROW__ID struct (Matt McCline, reviewed by Teddy Choi)

Reply via email to