Repository: hive Updated Branches: refs/heads/master ba8a99e11 -> fc48d7218
http://git-wip-us.apache.org/repos/asf/hive/blob/fc48d721/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out deleted file mode 100644 index 5cd757a..0000000 --- a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out +++ /dev/null @@ -1,946 +0,0 @@ -WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 183488 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Statistics: Num rows: 1365 Data size: 20400 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cbigint (type: bigint), cdouble (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 7 - Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - - Stage: Stage-0 - Fetch Operator - limit: 7 - Processor Tree: - ListSink - -WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Output: hdfs://### HDFS PATH ### --1887561756 -10011.0 --1887561756 -13877.0 --1887561756 -2281.0 --1887561756 -8881.0 --1887561756 10361.0 --1887561756 1839.0 --1887561756 9531.0 -PREHOOK: query: explain vectorization detail -select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 146796 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:tinyint) - predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) - outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 5, 1] - Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: double) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 5] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] - Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col2 (type: smallint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 1, 5] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:double, VALUE._col0:smallint - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) - outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 20 - Processor Tree: - ListSink - -PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Output: hdfs://### HDFS PATH ### --64 -10462.0 -10462 --64 -15920.0 -15920 --64 -1600.0 -1600 --64 -200.0 -200 --64 -2919.0 -2919 --64 -3097.0 -3097 --64 -3586.0 -3586 --64 -4018.0 -4018 --64 -4040.0 -4040 --64 -4803.0 -4803 --64 -6907.0 -6907 --64 -7196.0 -7196 --64 -7196.0 -7196 --64 -7196.0 -7196 --64 -7196.0 -7196 --64 -7196.0 -7196 --64 -7196.0 -7196 --64 -7196.0 -7196 --64 -8080.0 -8080 --64 -9842.0 -9842 -PREHOOK: query: explain vectorization detail -select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] - Select Operator - expressions: ctinyint (type: tinyint), (cdouble + 1.0D) (type: double) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 13] - selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 1.0) -> 13:double - Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1), count(_col1) - Group By Vectorization: - aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 13:double) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:tinyint - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] - keys: _col0 (type: tinyint) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [1, 2] - Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: double), _col2 (type: bigint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 5] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [double] - Reducer 2 - Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - dataColumns: KEY._col0:tinyint, VALUE._col0:double, VALUE._col1:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1) - Group By Vectorization: - aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFCountMerge(col 2:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:tinyint - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1] - keys: KEY._col0 (type: tinyint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 3] - selectExpressions: DoubleColDivideLongColumn(col 1:double, col 2:bigint) -> 3:double - Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 20 - Processor Tree: - ListSink - -PREHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Output: hdfs://### HDFS PATH ### --46 3033.55 --47 -574.6428571428571 --48 1672.909090909091 --49 768.7659574468086 --50 -960.0192307692307 --51 -96.46341463414635 --52 2810.705882352941 --53 -532.7567567567568 --54 2712.7272727272725 --55 2385.595744680851 --56 2595.818181818182 --57 1867.0535714285713 --58 3483.2444444444445 --59 318.27272727272725 --60 1071.82 --61 914.3404255319149 --62 245.69387755102042 --63 2178.7272727272725 --64 373.52941176470586 -NULL 9370.0945309795 -PREHOOK: query: explain vectorization detail -select distinct(ctinyint) from alltypesorc limit 20 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select distinct(ctinyint) from alltypesorc limit 20 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: ctinyint - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:tinyint - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: ctinyint (type: tinyint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY._col0:tinyint - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Operator Tree: - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:tinyint - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [] - keys: KEY._col0 (type: tinyint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 20 - Processor Tree: - ListSink - -PREHOOK: query: select distinct(ctinyint) from alltypesorc limit 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select distinct(ctinyint) from alltypesorc limit 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Output: hdfs://### HDFS PATH ### --46 --47 --48 --49 --50 --51 --52 --53 --54 --55 --56 --57 --58 --59 --60 --61 --62 --63 --64 -NULL -PREHOOK: query: explain vectorization detail -select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] - Select Operator - expressions: ctinyint (type: tinyint), cdouble (type: double) - outputColumnNames: ctinyint, cdouble - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 5] - Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:tinyint, col 5:double - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: ctinyint (type: tinyint), cdouble (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: tinyint) - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] - Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 5] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY._col0:tinyint, KEY._col1:double - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Operator Tree: - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:tinyint, col 1:double - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [] - keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1) - Group By Vectorization: - aggregators: VectorUDAFCount(col 1:double) -> bigint - className: VectorGroupByOperator - groupByMode: COMPLETE - keyExpressions: col 0:tinyint - native: false - vectorProcessingMode: STREAMING - projectedOutputColumnNums: [0] - keys: _col0 (type: tinyint) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 20 - Processor Tree: - ListSink - -PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Output: hdfs://### HDFS PATH ### --46 24 --47 22 --48 29 --49 26 --50 30 --51 21 --52 33 --53 22 --54 26 --55 29 --56 36 --57 35 --58 23 --59 31 --60 27 --61 25 --62 27 --63 19 --64 24 -NULL 2932 -PREHOOK: query: explain vectorization detail -select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 0 - Processor Tree: - ListSink - -PREHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Output: hdfs://### HDFS PATH ### -PREHOOK: query: explain vectorization detail -select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:tinyint) - predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(ctinyint) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 0:tinyint) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 5:double - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: cdouble (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] - Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 5] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY._col0:double, VALUE._col0:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:double - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0] - keys: KEY._col0 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: bigint), _col0 (type: double) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - Reducer 3 - Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:bigint, KEY.reducesinkkey1:double - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1, 0] - Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 20 - Processor Tree: - ListSink - -PREHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Output: hdfs://### HDFS PATH ### --10462.0 -64 --1121.0 -89 --11322.0 -101 --11492.0 -78 --15920.0 -64 --4803.0 -64 --6907.0 -64 --7196.0 -2009 --8080.0 -64 --8118.0 -80 --9842.0 -64 -10496.0 -67 -15601.0 -1733 -3520.0 -86 -4811.0 -115 -5241.0 -80 -557.0 -75 -7705.0 -88 -9452.0 -76 -NULL -32768 http://git-wip-us.apache.org/repos/asf/hive/blob/fc48d721/ql/src/test/results/clientpositive/vectorization_limit.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vectorization_limit.q.out b/ql/src/test/results/clientpositive/vectorization_limit.q.out index 517cb07..7474547 100644 --- a/ql/src/test/results/clientpositive/vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -1,7 +1,9 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: query: explain vectorization +SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 order by cbigint, cdouble limit 7 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: query: explain vectorization +SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 order by cbigint, cdouble limit 7 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -25,16 +27,11 @@ STAGE PLANS: expressions: cbigint (type: bigint), cdouble (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 7 - Statistics: Num rows: 7 Data size: 1505 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 1505 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: double) + sort order: ++ + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: enabled: true @@ -45,6 +42,25 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 7 + Statistics: Num rows: 7 Data size: 1505 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 1505 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -53,26 +69,26 @@ STAGE PLANS: ListSink WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 order by cbigint, cdouble limit 7 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 order by cbigint, cdouble limit 7 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --1887561756 -10011.0 --1887561756 -13877.0 --1887561756 -2281.0 --1887561756 -8881.0 --1887561756 10361.0 --1887561756 1839.0 --1887561756 9531.0 +-1887561756 -15891.0 +-1887561756 -15951.0 +-1887561756 -16008.0 +-1887561756 -16183.0 +-1887561756 -16225.0 +-1887561756 -16243.0 +-1887561756 -16296.0 PREHOOK: query: explain vectorization detail -select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble,csmallint limit 20 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble,csmallint limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -108,8 +124,8 @@ STAGE PLANS: projectedOutputColumnNums: [0, 5, 1] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: double) - sort order: ++ + key expressions: _col0 (type: tinyint), _col1 (type: double), _col2 (type: smallint) + sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false @@ -117,7 +133,6 @@ STAGE PLANS: nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: _col2 (type: smallint) Execution mode: vectorized Map Vectorization: enabled: true @@ -140,7 +155,7 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: smallint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Limit @@ -160,11 +175,11 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble,csmallint limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20 +POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble,csmallint limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### @@ -189,10 +204,10 @@ POSTHOOK: Input: default@alltypesorc -64 -8080.0 -8080 -64 -9842.0 -9842 PREHOOK: query: explain vectorization detail -select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +select ctinyint,avg(cdouble + 1) as cavg from alltypesorc group by ctinyint order by ctinyint, cavg limit 20 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +select ctinyint,avg(cdouble + 1) as cavg from alltypesorc group by ctinyint order by ctinyint, cavg limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -200,7 +215,8 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -245,7 +261,6 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -278,16 +293,65 @@ STAGE PLANS: expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:tinyint, 1:_col1:double] + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: double) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: _col0:tinyint, _col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -295,11 +359,11 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +PREHOOK: query: select ctinyint,avg(cdouble + 1) as cavg from alltypesorc group by ctinyint order by ctinyint, cavg limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20 +POSTHOOK: query: select ctinyint,avg(cdouble + 1) as cavg from alltypesorc group by ctinyint order by ctinyint, cavg limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### @@ -324,10 +388,10 @@ POSTHOOK: Input: default@alltypesorc -64 373.52941176470586 NULL 9370.0945309795 PREHOOK: query: explain vectorization detail -select distinct(ctinyint) from alltypesorc limit 20 +select distinct(ctinyint) as cdistinct from alltypesorc order by cdistinct limit 20 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -select distinct(ctinyint) from alltypesorc limit 20 +select distinct(ctinyint) as cdistinct from alltypesorc order by cdistinct limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -421,11 +485,11 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select distinct(ctinyint) from alltypesorc limit 20 +PREHOOK: query: select distinct(ctinyint) as cdistinct from alltypesorc order by cdistinct limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select distinct(ctinyint) from alltypesorc limit 20 +POSTHOOK: query: select distinct(ctinyint) as cdistinct from alltypesorc order by cdistinct limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### @@ -450,10 +514,10 @@ POSTHOOK: Input: default@alltypesorc -64 NULL PREHOOK: query: explain vectorization detail -select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +select ctinyint, count(distinct(cdouble)) as count_distinct from alltypesorc group by ctinyint order by ctinyint, count_distinct limit 20 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +select ctinyint, count(distinct(cdouble)) as count_distinct from alltypesorc group by ctinyint order by ctinyint, count_distinct limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -461,7 +525,8 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -485,7 +550,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true @@ -503,6 +567,55 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:tinyint, 1:_col1:bigint] + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: _col0:tinyint, _col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE @@ -520,11 +633,11 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +PREHOOK: query: select ctinyint, count(distinct(cdouble)) as count_distinct from alltypesorc group by ctinyint order by ctinyint, count_distinct limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 +POSTHOOK: query: select ctinyint, count(distinct(cdouble)) as count_distinct from alltypesorc group by ctinyint order by ctinyint, count_distinct limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### @@ -549,10 +662,10 @@ POSTHOOK: Input: default@alltypesorc -64 24 NULL 2932 PREHOOK: query: explain vectorization detail -select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +select ctinyint,cdouble from alltypesorc order by ctinyint,cdouble limit 0 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +select ctinyint,cdouble from alltypesorc order by ctinyint,cdouble limit 0 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -568,19 +681,19 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +PREHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint,cdouble limit 0 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0 +POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint,cdouble limit 0 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### PREHOOK: query: explain vectorization detail -select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +select cdouble, sum(ctinyint) as csum from alltypesorc where ctinyint is not null group by cdouble order by csum, cdouble limit 20 PREHOOK: type: QUERY POSTHOOK: query: explain vectorization detail -select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +select cdouble, sum(ctinyint) as csum from alltypesorc where ctinyint is not null group by cdouble order by csum, cdouble limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -726,11 +839,11 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +PREHOOK: query: select cdouble, sum(ctinyint) as csum from alltypesorc where ctinyint is not null group by cdouble order by csum, cdouble limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20 +POSTHOOK: query: select cdouble, sum(ctinyint) as csum from alltypesorc where ctinyint is not null group by cdouble order by csum, cdouble limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here ####
