http://git-wip-us.apache.org/repos/asf/hive/blob/a5d5473f/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out deleted file mode 100644 index a790fc1..0000000 --- a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out +++ /dev/null @@ -1,31543 +0,0 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT MIN(ctinyint) as c1, - MAX(ctinyint), - COUNT(ctinyint), - COUNT(*) -FROM alltypesparquet -ORDER BY c1 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT MIN(ctinyint) as c1, - MAX(ctinyint), - COUNT(ctinyint), - COUNT(*) -FROM alltypesparquet -ORDER BY c1 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: ctinyint - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count() - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3] - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:tinyint, 1:_col1:tinyint, 2:_col2:bigint, 3:_col3:bigint] - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 4 - includeColumns: [0, 1, 2, 3] - dataColumns: _col0:tinyint, _col1:tinyint, _col2:bigint, _col3:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT MIN(ctinyint) as c1, - MAX(ctinyint), - COUNT(ctinyint), - COUNT(*) -FROM alltypesparquet -ORDER BY c1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### -POSTHOOK: query: SELECT MIN(ctinyint) as c1, - MAX(ctinyint), - COUNT(ctinyint), - COUNT(*) -FROM alltypesparquet -ORDER BY c1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### --64 62 9173 12288 -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT SUM(ctinyint) as c1 -FROM alltypesparquet -ORDER BY c1 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT SUM(ctinyint) as c1 -FROM alltypesparquet -ORDER BY c1 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: ctinyint - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(ctinyint) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 0:tinyint) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:bigint] - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: _col0:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT SUM(ctinyint) as c1 -FROM alltypesparquet -ORDER BY c1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(ctinyint) as c1 -FROM alltypesparquet -ORDER BY c1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### --39856 -PREHOOK: query: EXPLAIN VECTORIZATION -SELECT - avg(ctinyint) as c1, - variance(ctinyint), - var_pop(ctinyint), - var_samp(ctinyint), - std(ctinyint), - stddev(ctinyint), - stddev_pop(ctinyint), - stddev_samp(ctinyint) -FROM alltypesparquet -ORDER BY c1 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION -SELECT - avg(ctinyint) as c1, - variance(ctinyint), - var_pop(ctinyint), - var_samp(ctinyint), - std(ctinyint), - stddev(ctinyint), - stddev_pop(ctinyint), - stddev_samp(ctinyint) -FROM alltypesparquet -ORDER BY c1 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: ctinyint - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(ctinyint), variance(ctinyint), var_pop(ctinyint), var_samp(ctinyint), std(ctinyint), stddev(ctinyint), stddev_pop(ctinyint), stddev_samp(ctinyint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct<count:bigint,sum:double,input:tinyint>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT - avg(ctinyint) as c1, - variance(ctinyint), - var_pop(ctinyint), - var_samp(ctinyint), - std(ctinyint), - stddev(ctinyint), - stddev_pop(ctinyint), - stddev_samp(ctinyint) -FROM alltypesparquet -ORDER BY c1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### -POSTHOOK: query: SELECT - avg(ctinyint) as c1, - variance(ctinyint), - var_pop(ctinyint), - var_samp(ctinyint), - std(ctinyint), - stddev(ctinyint), - stddev_pop(ctinyint), - stddev_samp(ctinyint) -FROM alltypesparquet -ORDER BY c1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### --4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT MIN(cbigint) as c1, - MAX(cbigint), - COUNT(cbigint), - COUNT(*) -FROM alltypesparquet -ORDER BY c1 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT MIN(cbigint) as c1, - MAX(cbigint), - COUNT(cbigint), - COUNT(*) -FROM alltypesparquet -ORDER BY c1 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: cbigint - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [3] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(cbigint), max(cbigint), count(cbigint), count() - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 3:bigint) -> bigint, VectorUDAFMaxLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3] - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [3] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:bigint, 1:_col1:bigint, 2:_col2:bigint, 3:_col3:bigint] - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 4 - includeColumns: [0, 1, 2, 3] - dataColumns: _col0:bigint, _col1:bigint, _col2:bigint, _col3:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT MIN(cbigint) as c1, - MAX(cbigint), - COUNT(cbigint), - COUNT(*) -FROM alltypesparquet -ORDER BY c1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### -POSTHOOK: query: SELECT MIN(cbigint) as c1, - MAX(cbigint), - COUNT(cbigint), - COUNT(*) -FROM alltypesparquet -ORDER BY c1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### --2147311592 2145498388 9173 12288 -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT SUM(cbigint) as c1 -FROM alltypesparquet -ORDER BY c1 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT SUM(cbigint) as c1 -FROM alltypesparquet -ORDER BY c1 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: cbigint - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [3] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(cbigint) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [3] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:bigint] - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: _col0:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT SUM(cbigint) as c1 -FROM alltypesparquet -ORDER BY c1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(cbigint) as c1 -FROM alltypesparquet -ORDER BY c1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### --1698460028409 -PREHOOK: query: EXPLAIN VECTORIZATION -SELECT - avg(cbigint) as c1, - variance(cbigint), - var_pop(cbigint), - var_samp(cbigint), - std(cbigint), - stddev(cbigint), - stddev_pop(cbigint), - stddev_samp(cbigint) -FROM alltypesparquet -ORDER BY c1 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION -SELECT - avg(cbigint) as c1, - variance(cbigint), - var_pop(cbigint), - var_samp(cbigint), - std(cbigint), - stddev(cbigint), - stddev_pop(cbigint), - stddev_samp(cbigint) -FROM alltypesparquet -ORDER BY c1 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: cbigint - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(cbigint), variance(cbigint), var_pop(cbigint), var_samp(cbigint), std(cbigint), stddev(cbigint), stddev_pop(cbigint), stddev_samp(cbigint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct<count:bigint,sum:double,input:bigint>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT - avg(cbigint) as c1, - variance(cbigint), - var_pop(cbigint), - var_samp(cbigint), - std(cbigint), - stddev(cbigint), - stddev_pop(cbigint), - stddev_samp(cbigint) -FROM alltypesparquet -ORDER BY c1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### -POSTHOOK: query: SELECT - avg(cbigint) as c1, - variance(cbigint), - var_pop(cbigint), - var_samp(cbigint), - std(cbigint), - stddev(cbigint), - stddev_pop(cbigint), - stddev_samp(cbigint) -FROM alltypesparquet -ORDER BY c1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### --1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT MIN(cfloat) as c1, - MAX(cfloat), - COUNT(cfloat), - COUNT(*) -FROM alltypesparquet -ORDER BY c1 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT MIN(cfloat) as c1, - MAX(cfloat), - COUNT(cfloat), - COUNT(*) -FROM alltypesparquet -ORDER BY c1 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] - Select Operator - expressions: cfloat (type: float) - outputColumnNames: cfloat - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [4] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(cfloat), max(cfloat), count(cfloat), count() - Group By Vectorization: - aggregators: VectorUDAFMinDouble(col 4:float) -> float, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3] - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [4] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:float, 1:_col1:float, 2:_col2:bigint, 3:_col3:bigint] - Reduce Output Operator - key expressions: _col0 (type: float) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 4 - includeColumns: [0, 1, 2, 3] - dataColumns: _col0:float, _col1:float, _col2:bigint, _col3:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT MIN(cfloat) as c1, - MAX(cfloat), - COUNT(cfloat), - COUNT(*) -FROM alltypesparquet -ORDER BY c1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### -POSTHOOK: query: SELECT MIN(cfloat) as c1, - MAX(cfloat), - COUNT(cfloat), - COUNT(*) -FROM alltypesparquet -ORDER BY c1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### --64.0 79.553 9173 12288 -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT SUM(cfloat) as c1 -FROM alltypesparquet -ORDER BY c1 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT SUM(cfloat) as c1 -FROM alltypesparquet -ORDER BY c1 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] - Select Operator - expressions: cfloat (type: float) - outputColumnNames: cfloat - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [4] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(cfloat) - Group By Vectorization: - aggregators: VectorUDAFSumDouble(col 4:float) -> double - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [4] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:double] - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: _col0:double - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT SUM(cfloat) as c1 -FROM alltypesparquet -ORDER BY c1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(cfloat) as c1 -FROM alltypesparquet -ORDER BY c1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### --39479.635992884636 -PREHOOK: query: EXPLAIN VECTORIZATION -SELECT - avg(cfloat) as c1, - variance(cfloat), - var_pop(cfloat), - var_samp(cfloat), - std(cfloat), - stddev(cfloat), - stddev_pop(cfloat), - stddev_samp(cfloat) -FROM alltypesparquet -ORDER BY c1 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION -SELECT - avg(cfloat) as c1, - variance(cfloat), - var_pop(cfloat), - var_samp(cfloat), - std(cfloat), - stddev(cfloat), - stddev_pop(cfloat), - stddev_samp(cfloat) -FROM alltypesparquet -ORDER BY c1 -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cfloat (type: float) - outputColumnNames: cfloat - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(cfloat), variance(cfloat), var_pop(cfloat), var_samp(cfloat), std(cfloat), stddev(cfloat), stddev_pop(cfloat), stddev_samp(cfloat) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct<count:bigint,sum:double,input:float>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT - avg(cfloat) as c1, - variance(cfloat), - var_pop(cfloat), - var_samp(cfloat), - std(cfloat), - stddev(cfloat), - stddev_pop(cfloat), - stddev_samp(cfloat) -FROM alltypesparquet -ORDER BY c1 -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### -POSTHOOK: query: SELECT - avg(cfloat) as c1, - variance(cfloat), - var_pop(cfloat), - var_samp(cfloat), - std(cfloat), - stddev(cfloat), - stddev_pop(cfloat), - stddev_samp(cfloat) -FROM alltypesparquet -ORDER BY c1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### --4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 -WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT AVG(cbigint), - (-(AVG(cbigint))), - (-6432 + AVG(cbigint)), - STDDEV_POP(cbigint), - (-((-6432 + AVG(cbigint)))), - ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))), - VAR_SAMP(cbigint), - (-((-6432 + AVG(cbigint)))), - (-6432 + (-((-6432 + AVG(cbigint))))), - (-((-6432 + AVG(cbigint)))), - ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))), - COUNT(*), - SUM(cfloat), - (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)), - (-(VAR_SAMP(cbigint))), - ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))), - MIN(ctinyint), - (-(MIN(ctinyint))) -FROM alltypesparquet -WHERE (((cstring2 LIKE '%b%') - OR ((79.553 != cint) - OR (cbigint < cdouble))) - OR ((ctinyint >= csmallint) - AND ((cboolean2 = 1) - AND (3569 = ctinyint)))) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL -SELECT AVG(cbigint), - (-(AVG(cbigint))), - (-6432 + AVG(cbigint)), - STDDEV_POP(cbigint), - (-((-6432 + AVG(cbigint)))), - ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))), - VAR_SAMP(cbigint), - (-((-6432 + AVG(cbigint)))), - (-6432 + (-((-6432 + AVG(cbigint))))), - (-((-6432 + AVG(cbigint)))), - ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))), - COUNT(*), - SUM(cfloat), - (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)), - (-(VAR_SAMP(cbigint))), - ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))), - MIN(ctinyint), - (-(MIN(ctinyint))) -FROM alltypesparquet -WHERE (((cstring2 LIKE '%b%') - OR ((79.553 != cint) - OR (cbigint < cdouble))) - OR ((ctinyint >= csmallint) - AND ((cboolean2 = 1) - AND (3569 = ctinyint)))) -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 13:decimal(13,3))(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint), FilterLongColEqualLongScalar(col 11:boolean, val 1), FilterLongScalarEqualLongColumn(val 3569, col 0:int)(children: col 0:tinyint))) - predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (3569 = UDFToInteger(ctinyint))) or (79.553 <> CAST( cint AS decimal(13,3))) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cbigint (type: bigint), cfloat (type: float) - outputColumnNames: ctinyint, cbigint, cfloat - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 3, 4] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) - Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 3:bigint) -> struct<count:bigint,sum:double,input:bigint>, VectorUDAFVarLong(col 3:bigint) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_pop, VectorUDAFVarLong(col 3:bigint) -> struct<count:bigint,sum:double,variance:double> aggregation: var_samp, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct<count:bigint,sum:double,input:bigint>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 1, 2, 3, 4, 5, 7, 11] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(13,3), double] - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), (- _col0) (type: double), (-6432.0 + _col0) (type: double), _col1 (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) + (-6432.0 + _col0)) (type: double), _col2 (type: double), (- (-6432.0 + _col0)) (type: double), (-6432.0 + (- (-6432.0 + _col0))) (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) / (- (-6432.0 + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432.0 + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: SELECT AVG(cbigint), - (-(AVG(cbigint))), - (-6432 + AVG(cbigint)), - STDDEV_POP(cbigint), - (-((-6432 + AVG(cbigint)))), - ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))), - VAR_SAMP(cbigint), - (-((-6432 + AVG(cbigint)))), - (-6432 + (-((-6432 + AVG(cbigint))))), - (-((-6432 + AVG(cbigint)))), - ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))), - COUNT(*), - SUM(cfloat), - (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)), - (-(VAR_SAMP(cbigint))), - ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))), - MIN(ctinyint), - (-(MIN(ctinyint))) -FROM alltypesparquet -WHERE (((cstring2 LIKE '%b%') - OR ((79.553 != cint) - OR (cbigint < cdouble))) - OR ((ctinyint >= csmallint) - AND ((cboolean2 = 1) - AND (3569 = ctinyint)))) -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### -POSTHOOK: query: SELECT AVG(cbigint), - (-(AVG(cbigint))), - (-6432 + AVG(cbigint)), - STDDEV_POP(cbigint), - (-((-6432 + AVG(cbigint)))), - ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))), - VAR_SAMP(cbigint), - (-((-6432 + AVG(cbigint)))), - (-6432 + (-((-6432 + AVG(cbigint))))), - (-((-6432 + AVG(cbigint)))), - ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))), - COUNT(*), - SUM(cfloat), - (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)), - (-(VAR_SAMP(cbigint))), - ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))), - MIN(ctinyint), - (-(MIN(ctinyint))) -FROM alltypesparquet -WHERE (((cstring2 LIKE '%b%') - OR ((79.553 != cint) - OR (cbigint < cdouble))) - OR ((ctinyint >= csmallint) - AND ((cboolean2 = 1) - AND (3569 = ctinyint)))) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### --3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.436387455459401E9 3.875716535945533E8 0.0 2.06347151720204902E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0517370547117279E9 -2.06347151720204902E18 1.5020929380914048E17 -64 64 -PREHOOK: query: EXPLAIN extended -select count(*) from alltypesparquet - where (((cstring1 LIKE 'a%') or ((cstring1 like 'b%') or (cstring1 like 'c%'))) or - ((length(cstring1) < 50 ) and ((cstring1 like '%n') and (length(cstring1) > 0)))) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN extended -select count(*) from alltypesparquet - where (((cstring1 LIKE 'a%') or ((cstring1 like 'b%') or (cstring1 like 'c%'))) or - ((length(cstring1) < 50 ) and ((cstring1 like '%n') and (length(cstring1) > 0)))) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (((length(cstring1) < 50) and (cstring1 like '%n') and (length(cstring1) > 0)) or (cstring1 like 'a%') or (cstring1 like 'b%') or (cstring1 like 'c%')) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: vectorized - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: alltypesparquet - input format: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - output format: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cbigint":"true","cboolean1":"true","cboolean2":"true","cdouble":"true","cfloat":"true","cint":"true","csmallint":"true","cstring1":"true","cstring2":"true","ctimestamp1":"true","ctimestamp2":"true","ctinyint":"true"}} - bucket_count -1 - column.name.delimiter , - columns ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2 - columns.comments - columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean -#### A masked pattern was here #### - name default.alltypesparquet - numFiles 1 - numRows 12288 - rawDataSize 147456 - serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 594976 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - - input format: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - output format: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cbigint":"true","cboolean1":"true","cboolean2":"true","cdouble":"true","cfloat":"true","cint":"true","csmallint":"true","cstring1":"true","cstring2":"true","ctimestamp1":"true","ctimestamp2":"true","ctinyint":"true"}} - bucket_count -1 - column.name.delimiter , - columns ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2 - columns.comments - columns.types tinyint:smallint:int:bigint:float:double:string:string:timestamp:timestamp:boolean:boolean -#### A masked pattern was here #### - name default.alltypesparquet - numFiles 1 - numRows 12288 - rawDataSize 147456 - serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - totalSize 594976 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe - name: default.alltypesparquet - name: default.alltypesparquet - Truncated Path -> Alias: - /alltypesparquet [alltypesparquet] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from alltypesparquet - where (((cstring1 LIKE 'a%') or ((cstring1 like 'b%') or (cstring1 like 'c%'))) or - ((length(cstring1) < 50 ) and ((cstring1 like '%n') and (length(cstring1) > 0)))) -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from alltypesparquet - where (((cstring1 LIKE 'a%') or ((cstring1 like 'b%') or (cstring1 like 'c%'))) or - ((length(cstring1) < 50 ) and ((cstring1 like '%n') and (length(cstring1) > 0)))) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### -1370 -PREHOOK: query: select min(ctinyint), max(ctinyint), sum(ctinyint), avg(ctinyint) from alltypesparquet -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### -POSTHOOK: query: select min(ctinyint), max(ctinyint), sum(ctinyint), avg(ctinyint) from alltypesparquet -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### --64 62 -39856 -4.344925324321378 -PREHOOK: query: select min(csmallint), max(csmallint), sum(csmallint), avg(csmallint) from alltypesparquet -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### -POSTHOOK: query: select min(csmallint), max(csmallint), sum(csmallint), avg(csmallint) from alltypesparquet -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### --16379 16376 7435990 810.5504687159363 -PREHOOK: query: select min(cint), max(cint), sum(cint), avg(cint) from alltypesparquet -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### -POSTHOOK: query: select min(cint), max(cint), sum(cint), avg(cint) from alltypesparquet -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### --1073279343 1073680599 1438050863785 1.567699622571678E8 -PREHOOK: query: select min(cbigint), max(cbigint), sum(cbigint), avg(cbigint) from alltypesparquet -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### -POSTHOOK: query: select min(cbigint), max(cbigint), sum(cbigint), avg(cbigint) from alltypesparquet -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### --2147311592 2145498388 -1698460028409 -1.8515862077935246E8 -PREHOOK: query: select min(cdouble), max(cdouble), sum(cdouble), avg(cdouble) from alltypesparquet -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesparquet -#### A masked pattern was here #### -POSTHOOK: query: select min(cdouble), max(cdouble), sum(cdouble), avg(cdouble) from alltypesparquet -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypespa
<TRUNCATED>