[1/5] hive git commit: HIVE-19019: Vectorization: When vectorized, orc_merge_incompat_schema.q throws HiveException "Not implemented yet" from VectorExpressionWriterMap (Matt McCline, reviewed by Teddy Choi)

mmccline Fri, 30 Mar 2018 21:46:41 -0700

Repository: hive
Updated Branches:
  refs/heads/master ba8a99e11 -> fc48d7218



http://git-wip-us.apache.org/repos/asf/hive/blob/fc48d721/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out 
b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out
deleted file mode 100644
index 5cd757a..0000000
--- a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out
+++ /dev/null
@@ -1,946 +0,0 @@
-WARNING: Comparing a bigint and a double may result in a loss of precision.
-PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc 
WHERE cbigint < cdouble and cint > 0 limit 7
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM 
alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 183488 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 
0)) (type: boolean)
-                    Statistics: Num rows: 1365 Data size: 20400 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: cbigint (type: bigint), cdouble (type: 
double)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 1365 Data size: 16320 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Limit
-                        Number of rows: 7
-                        Statistics: Num rows: 7 Data size: 96 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        File Output Operator
-                          compressed: false
-                          Statistics: Num rows: 7 Data size: 96 Basic stats: 
COMPLETE Column stats: COMPLETE
-                          table:
-                              input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-            Execution mode: vectorized
-            Map Vectorization:
-                enabled: true
-                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: []
-                featureSupportInUse: []
-                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: 7
-      Processor Tree:
-        ListSink
-
-WARNING: Comparing a bigint and a double may result in a loss of precision.
-PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < 
cdouble and cint > 0 limit 7
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < 
cdouble and cint > 0 limit 7
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-POSTHOOK: Output: hdfs://### HDFS PATH ###
--1887561756    -10011.0
--1887561756    -13877.0
--1887561756    -2281.0
--1887561756    -8881.0
--1887561756    10361.0
--1887561756    1839.0
--1887561756    9531.0
-PREHOOK: query: explain vectorization detail
-select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null 
order by ctinyint,cdouble limit 20
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null 
order by ctinyint,cdouble limit 20
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-#### A masked pattern was here ####
-      Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 146796 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  TableScan Vectorization:
-                      native: true
-                      vectorizationSchemaColumns: [0:ctinyint:tinyint, 
1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 
5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 
8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 
11:cboolean2:boolean, 
12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
-                  Filter Operator
-                    Filter Vectorization:
-                        className: VectorFilterOperator
-                        native: true
-                        predicateExpression: SelectColumnIsNotNull(col 
0:tinyint)
-                    predicate: ctinyint is not null (type: boolean)
-                    Statistics: Num rows: 9173 Data size: 109584 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: ctinyint (type: tinyint), cdouble (type: 
double), csmallint (type: smallint)
-                      outputColumnNames: _col0, _col1, _col2
-                      Select Vectorization:
-                          className: VectorSelectOperator
-                          native: true
-                          projectedOutputColumnNums: [0, 5, 1]
-                      Statistics: Num rows: 9173 Data size: 109584 Basic 
stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: tinyint), _col1 (type: 
double)
-                        sort order: ++
-                        Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
-                            keyColumnNums: [0, 5]
-                            native: true
-                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                            valueColumnNums: [1]
-                        Statistics: Num rows: 9173 Data size: 109584 Basic 
stats: COMPLETE Column stats: COMPLETE
-                        TopN Hash Memory Usage: 0.3
-                        value expressions: _col2 (type: smallint)
-            Execution mode: vectorized
-            Map Vectorization:
-                enabled: true
-                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: []
-                featureSupportInUse: []
-                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                allNative: true
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 12
-                    includeColumns: [0, 1, 5]
-                    dataColumns: ctinyint:tinyint, csmallint:smallint, 
cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, 
cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, 
cboolean1:boolean, cboolean2:boolean
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: []
-        Reducer 2 
-            Execution mode: vectorized
-            Reduce Vectorization:
-                enabled: true
-                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-                reduceColumnNullOrder: aa
-                reduceColumnSortOrder: ++
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 3
-                    dataColumns: KEY.reducesinkkey0:tinyint, 
KEY.reducesinkkey1:double, VALUE._col0:smallint
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: []
-            Reduce Operator Tree:
-              Select Operator
-                expressions: KEY.reducesinkkey0 (type: tinyint), 
KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint)
-                outputColumnNames: _col0, _col1, _col2
-                Select Vectorization:
-                    className: VectorSelectOperator
-                    native: true
-                    projectedOutputColumnNums: [0, 1, 2]
-                Statistics: Num rows: 9173 Data size: 109584 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Limit
-                  Number of rows: 20
-                  Limit Vectorization:
-                      className: VectorLimitOperator
-                      native: true
-                  Statistics: Num rows: 20 Data size: 256 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
-                    File Sink Vectorization:
-                        className: VectorFileSinkOperator
-                        native: false
-                    Statistics: Num rows: 20 Data size: 256 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: 20
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where 
ctinyint is not null order by ctinyint,cdouble limit 20
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where 
ctinyint is not null order by ctinyint,cdouble limit 20
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-POSTHOOK: Output: hdfs://### HDFS PATH ###
--64    -10462.0        -10462
--64    -15920.0        -15920
--64    -1600.0 -1600
--64    -200.0  -200
--64    -2919.0 -2919
--64    -3097.0 -3097
--64    -3586.0 -3586
--64    -4018.0 -4018
--64    -4040.0 -4040
--64    -4803.0 -4803
--64    -6907.0 -6907
--64    -7196.0 -7196
--64    -7196.0 -7196
--64    -7196.0 -7196
--64    -7196.0 -7196
--64    -7196.0 -7196
--64    -7196.0 -7196
--64    -7196.0 -7196
--64    -8080.0 -8080
--64    -9842.0 -9842
-PREHOOK: query: explain vectorization detail
-select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by 
ctinyint limit 20
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by 
ctinyint limit 20
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-#### A masked pattern was here ####
-      Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 110096 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  TableScan Vectorization:
-                      native: true
-                      vectorizationSchemaColumns: [0:ctinyint:tinyint, 
1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 
5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 
8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 
11:cboolean2:boolean, 
12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
-                  Select Operator
-                    expressions: ctinyint (type: tinyint), (cdouble + 1.0D) 
(type: double)
-                    outputColumnNames: _col0, _col1
-                    Select Vectorization:
-                        className: VectorSelectOperator
-                        native: true
-                        projectedOutputColumnNums: [0, 13]
-                        selectExpressions: DoubleColAddDoubleScalar(col 
5:double, val 1.0) -> 13:double
-                    Statistics: Num rows: 12288 Data size: 110096 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: sum(_col1), count(_col1)
-                      Group By Vectorization:
-                          aggregators: VectorUDAFSumDouble(col 13:double) -> 
double, VectorUDAFCount(col 13:double) -> bigint
-                          className: VectorGroupByOperator
-                          groupByMode: HASH
-                          keyExpressions: col 0:tinyint
-                          native: false
-                          vectorProcessingMode: HASH
-                          projectedOutputColumnNums: [0, 1]
-                      keys: _col0 (type: tinyint)
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 128 Data size: 2436 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: tinyint)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: tinyint)
-                        Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
-                            keyColumnNums: [0]
-                            native: true
-                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                            partitionColumnNums: [0]
-                            valueColumnNums: [1, 2]
-                        Statistics: Num rows: 128 Data size: 2436 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        TopN Hash Memory Usage: 0.3
-                        value expressions: _col1 (type: double), _col2 (type: 
bigint)
-            Execution mode: vectorized
-            Map Vectorization:
-                enabled: true
-                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: []
-                featureSupportInUse: []
-                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 12
-                    includeColumns: [0, 5]
-                    dataColumns: ctinyint:tinyint, csmallint:smallint, 
cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, 
cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, 
cboolean1:boolean, cboolean2:boolean
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: [double]
-        Reducer 2 
-            Execution mode: vectorized
-            Reduce Vectorization:
-                enabled: true
-                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-                reduceColumnNullOrder: a
-                reduceColumnSortOrder: +
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 3
-                    dataColumns: KEY._col0:tinyint, VALUE._col0:double, 
VALUE._col1:bigint
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: []
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: sum(VALUE._col0), count(VALUE._col1)
-                Group By Vectorization:
-                    aggregators: VectorUDAFSumDouble(col 1:double) -> double, 
VectorUDAFCountMerge(col 2:bigint) -> bigint
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:tinyint
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: [0, 1]
-                keys: KEY._col0 (type: tinyint)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 128 Data size: 2436 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: _col0 (type: tinyint), (_col1 / _col2) (type: 
double)
-                  outputColumnNames: _col0, _col1
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [0, 3]
-                      selectExpressions: DoubleColDivideLongColumn(col 
1:double, col 2:bigint) -> 3:double
-                  Statistics: Num rows: 128 Data size: 1412 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Limit
-                    Number of rows: 20
-                    Limit Vectorization:
-                        className: VectorLimitOperator
-                        native: true
-                    Statistics: Num rows: 20 Data size: 224 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    File Output Operator
-                      compressed: false
-                      File Sink Vectorization:
-                          className: VectorFileSinkOperator
-                          native: false
-                      Statistics: Num rows: 20 Data size: 224 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      table:
-                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: 20
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by 
ctinyint order by ctinyint limit 20
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by 
ctinyint order by ctinyint limit 20
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-POSTHOOK: Output: hdfs://### HDFS PATH ###
--46    3033.55
--47    -574.6428571428571
--48    1672.909090909091
--49    768.7659574468086
--50    -960.0192307692307
--51    -96.46341463414635
--52    2810.705882352941
--53    -532.7567567567568
--54    2712.7272727272725
--55    2385.595744680851
--56    2595.818181818182
--57    1867.0535714285713
--58    3483.2444444444445
--59    318.27272727272725
--60    1071.82
--61    914.3404255319149
--62    245.69387755102042
--63    2178.7272727272725
--64    373.52941176470586
-NULL   9370.0945309795
-PREHOOK: query: explain vectorization detail
-select distinct(ctinyint) from alltypesorc limit 20
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select distinct(ctinyint) from alltypesorc limit 20
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-#### A masked pattern was here ####
-      Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 36696 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  TableScan Vectorization:
-                      native: true
-                      vectorizationSchemaColumns: [0:ctinyint:tinyint, 
1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 
5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 
8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 
11:cboolean2:boolean, 
12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
-                  Select Operator
-                    expressions: ctinyint (type: tinyint)
-                    outputColumnNames: ctinyint
-                    Select Vectorization:
-                        className: VectorSelectOperator
-                        native: true
-                        projectedOutputColumnNums: [0]
-                    Statistics: Num rows: 12288 Data size: 36696 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      Group By Vectorization:
-                          className: VectorGroupByOperator
-                          groupByMode: HASH
-                          keyExpressions: col 0:tinyint
-                          native: false
-                          vectorProcessingMode: HASH
-                          projectedOutputColumnNums: []
-                      keys: ctinyint (type: tinyint)
-                      mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 128 Data size: 388 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: tinyint)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: tinyint)
-                        Reduce Sink Vectorization:
-                            className: VectorReduceSinkLongOperator
-                            keyColumnNums: [0]
-                            native: true
-                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                            valueColumnNums: []
-                        Statistics: Num rows: 128 Data size: 388 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        TopN Hash Memory Usage: 0.3
-            Execution mode: vectorized
-            Map Vectorization:
-                enabled: true
-                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: []
-                featureSupportInUse: []
-                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 12
-                    includeColumns: [0]
-                    dataColumns: ctinyint:tinyint, csmallint:smallint, 
cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, 
cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, 
cboolean1:boolean, cboolean2:boolean
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: []
-        Reducer 2 
-            Execution mode: vectorized
-            Reduce Vectorization:
-                enabled: true
-                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-                reduceColumnNullOrder: a
-                reduceColumnSortOrder: +
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 1
-                    dataColumns: KEY._col0:tinyint
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: []
-            Reduce Operator Tree:
-              Group By Operator
-                Group By Vectorization:
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:tinyint
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: []
-                keys: KEY._col0 (type: tinyint)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Limit
-                  Number of rows: 20
-                  Limit Vectorization:
-                      className: VectorLimitOperator
-                      native: true
-                  Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
-                    File Sink Vectorization:
-                        className: VectorFileSinkOperator
-                        native: false
-                    Statistics: Num rows: 20 Data size: 64 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: 20
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select distinct(ctinyint) from alltypesorc limit 20
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: select distinct(ctinyint) from alltypesorc limit 20
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-POSTHOOK: Output: hdfs://### HDFS PATH ###
--46
--47
--48
--49
--50
--51
--52
--53
--54
--55
--56
--57
--58
--59
--60
--61
--62
--63
--64
-NULL
-PREHOOK: query: explain vectorization detail
-select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint 
order by ctinyint limit 20
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint 
order by ctinyint limit 20
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-#### A masked pattern was here ####
-      Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 110096 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  TableScan Vectorization:
-                      native: true
-                      vectorizationSchemaColumns: [0:ctinyint:tinyint, 
1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 
5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 
8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 
11:cboolean2:boolean, 
12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
-                  Select Operator
-                    expressions: ctinyint (type: tinyint), cdouble (type: 
double)
-                    outputColumnNames: ctinyint, cdouble
-                    Select Vectorization:
-                        className: VectorSelectOperator
-                        native: true
-                        projectedOutputColumnNums: [0, 5]
-                    Statistics: Num rows: 12288 Data size: 110096 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      Group By Vectorization:
-                          className: VectorGroupByOperator
-                          groupByMode: HASH
-                          keyExpressions: col 0:tinyint, col 5:double
-                          native: false
-                          vectorProcessingMode: HASH
-                          projectedOutputColumnNums: []
-                      keys: ctinyint (type: tinyint), cdouble (type: double)
-                      mode: hash
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 6144 Data size: 55052 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: tinyint), _col1 (type: 
double)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: tinyint)
-                        Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
-                            keyColumnNums: [0, 1]
-                            native: true
-                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                            partitionColumnNums: [0]
-                            valueColumnNums: []
-                        Statistics: Num rows: 6144 Data size: 55052 Basic 
stats: COMPLETE Column stats: COMPLETE
-            Execution mode: vectorized
-            Map Vectorization:
-                enabled: true
-                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: []
-                featureSupportInUse: []
-                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 12
-                    includeColumns: [0, 5]
-                    dataColumns: ctinyint:tinyint, csmallint:smallint, 
cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, 
cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, 
cboolean1:boolean, cboolean2:boolean
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: []
-        Reducer 2 
-            Execution mode: vectorized
-            Reduce Vectorization:
-                enabled: true
-                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-                reduceColumnNullOrder: aa
-                reduceColumnSortOrder: ++
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 2
-                    dataColumns: KEY._col0:tinyint, KEY._col1:double
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: []
-            Reduce Operator Tree:
-              Group By Operator
-                Group By Vectorization:
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:tinyint, col 1:double
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: []
-                keys: KEY._col0 (type: tinyint), KEY._col1 (type: double)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 6144 Data size: 55052 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Group By Operator
-                  aggregations: count(_col1)
-                  Group By Vectorization:
-                      aggregators: VectorUDAFCount(col 1:double) -> bigint
-                      className: VectorGroupByOperator
-                      groupByMode: COMPLETE
-                      keyExpressions: col 0:tinyint
-                      native: false
-                      vectorProcessingMode: STREAMING
-                      projectedOutputColumnNums: [0]
-                  keys: _col0 (type: tinyint)
-                  mode: complete
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 128 Data size: 1412 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Limit
-                    Number of rows: 20
-                    Limit Vectorization:
-                        className: VectorLimitOperator
-                        native: true
-                    Statistics: Num rows: 20 Data size: 224 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    File Output Operator
-                      compressed: false
-                      File Sink Vectorization:
-                          className: VectorFileSinkOperator
-                          native: false
-                      Statistics: Num rows: 20 Data size: 224 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      table:
-                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: 20
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc 
group by ctinyint order by ctinyint limit 20
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc 
group by ctinyint order by ctinyint limit 20
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-POSTHOOK: Output: hdfs://### HDFS PATH ###
--46    24
--47    22
--48    29
--49    26
--50    30
--51    21
--52    33
--53    22
--54    26
--55    29
--56    36
--57    35
--58    23
--59    31
--60    27
--61    25
--62    27
--63    19
--64    24
-NULL   2932
-PREHOOK: query: explain vectorization detail
-select ctinyint,cdouble from alltypesorc order by ctinyint limit 0
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select ctinyint,cdouble from alltypesorc order by ctinyint limit 0
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-    Fetch Operator
-      limit: 0
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint 
limit 0
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint 
limit 0
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-POSTHOOK: Output: hdfs://### HDFS PATH ###
-PREHOOK: query: explain vectorization detail
-select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not 
null group by cdouble order by sum, cdouble limit 20
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not 
null group by cdouble order by sum, cdouble limit 20
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-#### A masked pattern was here ####
-      Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 110096 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  TableScan Vectorization:
-                      native: true
-                      vectorizationSchemaColumns: [0:ctinyint:tinyint, 
1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 
5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 
8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 
11:cboolean2:boolean, 
12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
-                  Filter Operator
-                    Filter Vectorization:
-                        className: VectorFilterOperator
-                        native: true
-                        predicateExpression: SelectColumnIsNotNull(col 
0:tinyint)
-                    predicate: ctinyint is not null (type: boolean)
-                    Statistics: Num rows: 9173 Data size: 82188 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: sum(ctinyint)
-                      Group By Vectorization:
-                          aggregators: VectorUDAFSumLong(col 0:tinyint) -> 
bigint
-                          className: VectorGroupByOperator
-                          groupByMode: HASH
-                          keyExpressions: col 5:double
-                          native: false
-                          vectorProcessingMode: HASH
-                          projectedOutputColumnNums: [0]
-                      keys: cdouble (type: double)
-                      mode: hash
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 4159 Data size: 58120 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: double)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: double)
-                        Reduce Sink Vectorization:
-                            className: VectorReduceSinkMultiKeyOperator
-                            keyColumnNums: [0]
-                            native: true
-                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                            valueColumnNums: [1]
-                        Statistics: Num rows: 4159 Data size: 58120 Basic 
stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: bigint)
-            Execution mode: vectorized
-            Map Vectorization:
-                enabled: true
-                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: []
-                featureSupportInUse: []
-                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 12
-                    includeColumns: [0, 5]
-                    dataColumns: ctinyint:tinyint, csmallint:smallint, 
cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, 
cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, 
cboolean1:boolean, cboolean2:boolean
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: []
-        Reducer 2 
-            Execution mode: vectorized
-            Reduce Vectorization:
-                enabled: true
-                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-                reduceColumnNullOrder: a
-                reduceColumnSortOrder: +
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 2
-                    dataColumns: KEY._col0:double, VALUE._col0:bigint
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: []
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: sum(VALUE._col0)
-                Group By Vectorization:
-                    aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:double
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: [0]
-                keys: KEY._col0 (type: double)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 4159 Data size: 58120 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col1 (type: bigint), _col0 (type: double)
-                  sort order: ++
-                  Reduce Sink Vectorization:
-                      className: VectorReduceSinkObjectHashOperator
-                      keyColumnNums: [1, 0]
-                      native: true
-                      nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                      valueColumnNums: []
-                  Statistics: Num rows: 4159 Data size: 58120 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  TopN Hash Memory Usage: 0.3
-        Reducer 3 
-            Execution mode: vectorized
-            Reduce Vectorization:
-                enabled: true
-                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-                reduceColumnNullOrder: aa
-                reduceColumnSortOrder: ++
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 2
-                    dataColumns: KEY.reducesinkkey0:bigint, 
KEY.reducesinkkey1:double
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: []
-            Reduce Operator Tree:
-              Select Operator
-                expressions: KEY.reducesinkkey1 (type: double), 
KEY.reducesinkkey0 (type: bigint)
-                outputColumnNames: _col0, _col1
-                Select Vectorization:
-                    className: VectorSelectOperator
-                    native: true
-                    projectedOutputColumnNums: [1, 0]
-                Statistics: Num rows: 4159 Data size: 58120 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Limit
-                  Number of rows: 20
-                  Limit Vectorization:
-                      className: VectorLimitOperator
-                      native: true
-                  Statistics: Num rows: 20 Data size: 288 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
-                    File Sink Vectorization:
-                        className: VectorFileSinkOperator
-                        native: false
-                    Statistics: Num rows: 20 Data size: 288 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: 20
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where 
ctinyint is not null group by cdouble order by sum, cdouble limit 20
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where 
ctinyint is not null group by cdouble order by sum, cdouble limit 20
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-POSTHOOK: Output: hdfs://### HDFS PATH ###
--10462.0       -64
--1121.0        -89
--11322.0       -101
--11492.0       -78
--15920.0       -64
--4803.0        -64
--6907.0        -64
--7196.0        -2009
--8080.0        -64
--8118.0        -80
--9842.0        -64
-10496.0        -67
-15601.0        -1733
-3520.0 -86
-4811.0 -115
-5241.0 -80
-557.0  -75
-7705.0 -88
-9452.0 -76
-NULL   -32768

http://git-wip-us.apache.org/repos/asf/hive/blob/fc48d721/ql/src/test/results/clientpositive/vectorization_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorization_limit.q.out 
b/ql/src/test/results/clientpositive/vectorization_limit.q.out
index 517cb07..7474547 100644
--- a/ql/src/test/results/clientpositive/vectorization_limit.q.out
+++ b/ql/src/test/results/clientpositive/vectorization_limit.q.out
@@ -1,7 +1,9 @@
 WARNING: Comparing a bigint and a double may result in a loss of precision.
-PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc 
WHERE cbigint < cdouble and cint > 0 limit 7
+PREHOOK: query: explain vectorization
+SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 
order by cbigint, cdouble limit 7
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM 
alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7
+POSTHOOK: query: explain vectorization
+SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 
order by cbigint, cdouble limit 7
 POSTHOOK: type: QUERY
 PLAN VECTORIZATION:
   enabled: true
@@ -25,16 +27,11 @@ STAGE PLANS:
                 expressions: cbigint (type: bigint), cdouble (type: double)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 1365 Data size: 293479 Basic stats: 
COMPLETE Column stats: NONE
-                Limit
-                  Number of rows: 7
-                  Statistics: Num rows: 7 Data size: 1505 Basic stats: 
COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 7 Data size: 1505 Basic stats: 
COMPLETE Column stats: NONE
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                Reduce Output Operator
+                  key expressions: _col0 (type: bigint), _col1 (type: double)
+                  sort order: ++
+                  Statistics: Num rows: 1365 Data size: 293479 Basic stats: 
COMPLETE Column stats: NONE
+                  TopN Hash Memory Usage: 0.1
       Execution mode: vectorized
       Map Vectorization:
           enabled: true
@@ -45,6 +42,25 @@ STAGE PLANS:
           allNative: false
           usesVectorUDFAdaptor: false
           vectorized: true
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 
(type: double)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE 
Column stats: NONE
+          Limit
+            Number of rows: 7
+            Statistics: Num rows: 7 Data size: 1505 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 7 Data size: 1505 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -53,26 +69,26 @@ STAGE PLANS:
         ListSink
 
 WARNING: Comparing a bigint and a double may result in a loss of precision.
-PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < 
cdouble and cint > 0 limit 7
+PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < 
cdouble and cint > 0 order by cbigint, cdouble limit 7
 PREHOOK: type: QUERY
 PREHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
-POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < 
cdouble and cint > 0 limit 7
+POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < 
cdouble and cint > 0 order by cbigint, cdouble limit 7
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
--1887561756    -10011.0
--1887561756    -13877.0
--1887561756    -2281.0
--1887561756    -8881.0
--1887561756    10361.0
--1887561756    1839.0
--1887561756    9531.0
+-1887561756    -15891.0
+-1887561756    -15951.0
+-1887561756    -16008.0
+-1887561756    -16183.0
+-1887561756    -16225.0
+-1887561756    -16243.0
+-1887561756    -16296.0
 PREHOOK: query: explain vectorization detail
-select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null 
order by ctinyint,cdouble limit 20
+select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null 
order by ctinyint,cdouble,csmallint limit 20
 PREHOOK: type: QUERY
 POSTHOOK: query: explain vectorization detail
-select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null 
order by ctinyint,cdouble limit 20
+select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null 
order by ctinyint,cdouble,csmallint limit 20
 POSTHOOK: type: QUERY
 PLAN VECTORIZATION:
   enabled: true
@@ -108,8 +124,8 @@ STAGE PLANS:
                     projectedOutputColumnNums: [0, 5, 1]
                 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: tinyint), _col1 (type: double)
-                  sort order: ++
+                  key expressions: _col0 (type: tinyint), _col1 (type: 
double), _col2 (type: smallint)
+                  sort order: +++
                   Reduce Sink Vectorization:
                       className: VectorReduceSinkOperator
                       native: false
@@ -117,7 +133,6 @@ STAGE PLANS:
                       nativeConditionsNotMet: hive.execution.engine mr IN 
[tez, spark] IS false
                   Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
                   TopN Hash Memory Usage: 0.3
-                  value expressions: _col2 (type: smallint)
       Execution mode: vectorized
       Map Vectorization:
           enabled: true
@@ -140,7 +155,7 @@ STAGE PLANS:
           enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
       Reduce Operator Tree:
         Select Operator
-          expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 
(type: double), VALUE._col0 (type: smallint)
+          expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 
(type: double), KEY.reducesinkkey2 (type: smallint)
           outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE 
Column stats: NONE
           Limit
@@ -160,11 +175,11 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where 
ctinyint is not null order by ctinyint,cdouble limit 20
+PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where 
ctinyint is not null order by ctinyint,cdouble,csmallint limit 20
 PREHOOK: type: QUERY
 PREHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
-POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where 
ctinyint is not null order by ctinyint,cdouble limit 20
+POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where 
ctinyint is not null order by ctinyint,cdouble,csmallint limit 20
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
@@ -189,10 +204,10 @@ POSTHOOK: Input: default@alltypesorc
 -64    -8080.0 -8080
 -64    -9842.0 -9842
 PREHOOK: query: explain vectorization detail
-select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by 
ctinyint limit 20
+select ctinyint,avg(cdouble + 1) as cavg from alltypesorc group by ctinyint 
order by ctinyint, cavg limit 20
 PREHOOK: type: QUERY
 POSTHOOK: query: explain vectorization detail
-select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by 
ctinyint limit 20
+select ctinyint,avg(cdouble + 1) as cavg from alltypesorc group by ctinyint 
order by ctinyint, cavg limit 20
 POSTHOOK: type: QUERY
 PLAN VECTORIZATION:
   enabled: true
@@ -200,7 +215,8 @@ PLAN VECTORIZATION:
 
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -245,7 +261,6 @@ STAGE PLANS:
                       nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
                       nativeConditionsNotMet: hive.execution.engine mr IN 
[tez, spark] IS false
                   Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
-                  TopN Hash Memory Usage: 0.3
                   value expressions: _col1 (type: double), _col2 (type: bigint)
       Execution mode: vectorized
       Map Vectorization:
@@ -278,16 +293,65 @@ STAGE PLANS:
             expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double)
             outputColumnNames: _col0, _col1
             Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
-            Limit
-              Number of rows: 20
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            TableScan Vectorization:
+                native: true
+                vectorizationSchemaColumns: [0:_col0:tinyint, 1:_col1:double]
+            Reduce Output Operator
+              key expressions: _col0 (type: tinyint), _col1 (type: double)
+              sort order: ++
+              Reduce Sink Vectorization:
+                  className: VectorReduceSinkOperator
+                  native: false
+                  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                  nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
+              Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
+              TopN Hash Memory Usage: 0.3
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize 
IS true
+          inputFormatFeatureSupport: []
+          featureSupportInUse: []
+          inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+          rowBatchContext:
+              dataColumnCount: 2
+              includeColumns: [0, 1]
+              dataColumns: _col0:tinyint, _col1:double
+              partitionColumnCount: 0
+              scratchColumnTypeNames: []
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 
(type: double)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE 
Column stats: NONE
+          Limit
+            Number of rows: 20
+            Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
               Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE 
Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE 
Column stats: NONE
-                table:
-                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -295,11 +359,11 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by 
ctinyint order by ctinyint limit 20
+PREHOOK: query: select ctinyint,avg(cdouble + 1) as cavg from alltypesorc 
group by ctinyint order by ctinyint, cavg limit 20
 PREHOOK: type: QUERY
 PREHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
-POSTHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by 
ctinyint order by ctinyint limit 20
+POSTHOOK: query: select ctinyint,avg(cdouble + 1) as cavg from alltypesorc 
group by ctinyint order by ctinyint, cavg limit 20
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
@@ -324,10 +388,10 @@ POSTHOOK: Input: default@alltypesorc
 -64    373.52941176470586
 NULL   9370.0945309795
 PREHOOK: query: explain vectorization detail
-select distinct(ctinyint) from alltypesorc limit 20
+select distinct(ctinyint) as cdistinct from alltypesorc order by cdistinct 
limit 20
 PREHOOK: type: QUERY
 POSTHOOK: query: explain vectorization detail
-select distinct(ctinyint) from alltypesorc limit 20
+select distinct(ctinyint) as cdistinct from alltypesorc order by cdistinct 
limit 20
 POSTHOOK: type: QUERY
 PLAN VECTORIZATION:
   enabled: true
@@ -421,11 +485,11 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: select distinct(ctinyint) from alltypesorc limit 20
+PREHOOK: query: select distinct(ctinyint) as cdistinct from alltypesorc order 
by cdistinct limit 20
 PREHOOK: type: QUERY
 PREHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
-POSTHOOK: query: select distinct(ctinyint) from alltypesorc limit 20
+POSTHOOK: query: select distinct(ctinyint) as cdistinct from alltypesorc order 
by cdistinct limit 20
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
@@ -450,10 +514,10 @@ POSTHOOK: Input: default@alltypesorc
 -64
 NULL
 PREHOOK: query: explain vectorization detail
-select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint 
order by ctinyint limit 20
+select ctinyint, count(distinct(cdouble)) as count_distinct from alltypesorc 
group by ctinyint order by ctinyint, count_distinct limit 20
 PREHOOK: type: QUERY
 POSTHOOK: query: explain vectorization detail
-select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint 
order by ctinyint limit 20
+select ctinyint, count(distinct(cdouble)) as count_distinct from alltypesorc 
group by ctinyint order by ctinyint, count_distinct limit 20
 POSTHOOK: type: QUERY
 PLAN VECTORIZATION:
   enabled: true
@@ -461,7 +525,8 @@ PLAN VECTORIZATION:
 
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -485,7 +550,6 @@ STAGE PLANS:
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: tinyint)
                   Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
-                  TopN Hash Memory Usage: 0.3
       Map Vectorization:
           enabled: true
           enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS 
true
@@ -503,6 +567,55 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            TableScan Vectorization:
+                native: true
+                vectorizationSchemaColumns: [0:_col0:tinyint, 1:_col1:bigint]
+            Reduce Output Operator
+              key expressions: _col0 (type: tinyint), _col1 (type: bigint)
+              sort order: ++
+              Reduce Sink Vectorization:
+                  className: VectorReduceSinkOperator
+                  native: false
+                  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                  nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
+              Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
+              TopN Hash Memory Usage: 0.3
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize 
IS true
+          inputFormatFeatureSupport: []
+          featureSupportInUse: []
+          inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+          rowBatchContext:
+              dataColumnCount: 2
+              includeColumns: [0, 1]
+              dataColumns: _col0:tinyint, _col1:bigint
+              partitionColumnCount: 0
+              scratchColumnTypeNames: []
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 
(type: bigint)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE 
Column stats: NONE
           Limit
             Number of rows: 20
             Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE 
Column stats: NONE
@@ -520,11 +633,11 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc 
group by ctinyint order by ctinyint limit 20
+PREHOOK: query: select ctinyint, count(distinct(cdouble)) as count_distinct 
from alltypesorc group by ctinyint order by ctinyint, count_distinct limit 20
 PREHOOK: type: QUERY
 PREHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
-POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc 
group by ctinyint order by ctinyint limit 20
+POSTHOOK: query: select ctinyint, count(distinct(cdouble)) as count_distinct 
from alltypesorc group by ctinyint order by ctinyint, count_distinct limit 20
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
@@ -549,10 +662,10 @@ POSTHOOK: Input: default@alltypesorc
 -64    24
 NULL   2932
 PREHOOK: query: explain vectorization detail
-select ctinyint,cdouble from alltypesorc order by ctinyint limit 0
+select ctinyint,cdouble from alltypesorc order by ctinyint,cdouble limit 0
 PREHOOK: type: QUERY
 POSTHOOK: query: explain vectorization detail
-select ctinyint,cdouble from alltypesorc order by ctinyint limit 0
+select ctinyint,cdouble from alltypesorc order by ctinyint,cdouble limit 0
 POSTHOOK: type: QUERY
 PLAN VECTORIZATION:
   enabled: true
@@ -568,19 +681,19 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint 
limit 0
+PREHOOK: query: select ctinyint,cdouble from alltypesorc order by 
ctinyint,cdouble limit 0
 PREHOOK: type: QUERY
 PREHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
-POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint 
limit 0
+POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by 
ctinyint,cdouble limit 0
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
 PREHOOK: query: explain vectorization detail
-select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not 
null group by cdouble order by sum, cdouble limit 20
+select cdouble, sum(ctinyint) as csum from alltypesorc where ctinyint is not 
null group by cdouble order by csum, cdouble limit 20
 PREHOOK: type: QUERY
 POSTHOOK: query: explain vectorization detail
-select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not 
null group by cdouble order by sum, cdouble limit 20
+select cdouble, sum(ctinyint) as csum from alltypesorc where ctinyint is not 
null group by cdouble order by csum, cdouble limit 20
 POSTHOOK: type: QUERY
 PLAN VECTORIZATION:
   enabled: true
@@ -726,11 +839,11 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where 
ctinyint is not null group by cdouble order by sum, cdouble limit 20
+PREHOOK: query: select cdouble, sum(ctinyint) as csum from alltypesorc where 
ctinyint is not null group by cdouble order by csum, cdouble limit 20
 PREHOOK: type: QUERY
 PREHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
-POSTHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where 
ctinyint is not null group by cdouble order by sum, cdouble limit 20
+POSTHOOK: query: select cdouble, sum(ctinyint) as csum from alltypesorc where 
ctinyint is not null group by cdouble order by csum, cdouble limit 20
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####

[1/5] hive git commit: HIVE-19019: Vectorization: When vectorized, orc_merge_incompat_schema.q throws HiveException "Not implemented yet" from VectorExpressionWriterMap (Matt McCline, reviewed by Teddy Choi)

Reply via email to