http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out index 903e74b..a445b44 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, cstring1, cint, @@ -22,7 +22,7 @@ WHERE (((cbigint > -23) OR (cfloat = cdouble)))) ORDER BY cbigint, cfloat PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, cstring1, cint, @@ -69,7 +69,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -91,10 +90,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 4] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [6, 2, 8, 5, 15, 16, 14, 17, 19, 20, 22, 18] Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized @@ -107,27 +104,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 1, 2, 3, 4, 5, 6, 8] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(13,3), double, double, bigint, double, double, double, double, decimal(19,0), decimal(11,4), double] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - reduceColumnNullOrder: zz - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 14 - dataColumns: KEY.reducesinkkey0:bigint, KEY.reducesinkkey1:float, VALUE._col0:string, VALUE._col1:int, VALUE._col2:timestamp, VALUE._col3:double, VALUE._col4:double, VALUE._col5:bigint, VALUE._col6:double, VALUE._col7:double, VALUE._col8:double, VALUE._col9:double, VALUE._col10:decimal(11,4), VALUE._col11:double - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out index 390b1df..5323ab3 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(csmallint), (AVG(csmallint) % -563), (AVG(csmallint) + 762), @@ -21,7 +21,7 @@ WHERE (((ctimestamp1 < ctimestamp2) AND ((-10669 != ctimestamp2) OR (359 > cint)))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(csmallint), (AVG(csmallint) % -563), (AVG(csmallint) + 762), @@ -67,7 +67,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -100,10 +99,8 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: bigint) Execution mode: vectorized @@ -116,27 +113,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 1, 2, 3, 4, 5, 7, 8, 9] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [double, double, double, double] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - reduceColumnNullOrder: - reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 10 - dataColumns: VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:double, VALUE._col3:double, VALUE._col4:double, VALUE._col5:bigint, VALUE._col6:bigint, VALUE._col7:tinyint, VALUE._col8:double, VALUE._col9:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), count(VALUE._col6), min(VALUE._col7), sum(VALUE._col8), count(VALUE._col9) http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out index 140f009..62dd3f5 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out @@ -1,5 +1,5 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT STDDEV_SAMP(csmallint), (STDDEV_SAMP(csmallint) - 10.175), STDDEV_POP(ctinyint), @@ -24,7 +24,7 @@ WHERE (((cint <= cfloat) AND ((79.553 <= csmallint) AND (ctimestamp1 > ctimestamp2)))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT STDDEV_SAMP(csmallint), (STDDEV_SAMP(csmallint) - 10.175), STDDEV_POP(ctinyint), @@ -72,7 +72,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -105,10 +104,8 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: double), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: double), _col13 (type: double) Execution mode: vectorized @@ -121,27 +118,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 1, 2, 3, 4, 5, 8, 9] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(22,3), decimal(8,3), double, double, double, double, double, double, double, double] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - reduceColumnNullOrder: - reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 14 - dataColumns: VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint, VALUE._col3:double, VALUE._col4:double, VALUE._col5:bigint, VALUE._col6:double, VALUE._col7:double, VALUE._col8:bigint, VALUE._col9:double, VALUE._col10:bigint, VALUE._col11:bigint, VALUE._col12:double, VALUE._col13:double - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7), count(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), count(VALUE._col11), sum(VALUE._col12), sum(VALUE._col13) http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out index adf6d60..0f544a4 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cint), (SUM(cint) * -563), (-3728 + SUM(cint)), @@ -21,7 +21,7 @@ WHERE (((csmallint >= cint) AND ((ctinyint != cbigint) OR (-3728 >= cdouble)))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cint), (SUM(cint) * -563), (-3728 + SUM(cint)), @@ -67,7 +67,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -100,10 +99,8 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized @@ -116,27 +113,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 1, 2, 3, 5] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [double] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - reduceColumnNullOrder: - reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 5 - dataColumns: VALUE._col0:bigint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:bigint, VALUE._col4:tinyint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), min(VALUE._col4) http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out index 5f2e2ca..40205d2 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MAX(csmallint), (MAX(csmallint) * -75), COUNT(*), @@ -18,7 +18,7 @@ WHERE (((cboolean2 IS NOT NULL) AND ((ctimestamp2 IS NOT NULL) AND (cstring2 LIKE 'a')))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MAX(csmallint), (MAX(csmallint) * -75), COUNT(*), @@ -61,7 +61,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -93,10 +92,8 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized @@ -109,27 +106,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 1, 2, 5, 6, 7, 9, 11] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [double] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - reduceColumnNullOrder: - reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 5 - dataColumns: VALUE._col0:smallint, VALUE._col1:bigint, VALUE._col2:smallint, VALUE._col3:bigint, VALUE._col4:tinyint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), count(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), max(VALUE._col4) http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/parquet_vectorization_6.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_6.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_6.q.out index 5b7f309..362d19c 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_6.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_6.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cfloat, cstring1, @@ -19,7 +19,7 @@ WHERE ((ctinyint != 0) AND ((cstring2 LIKE '%a') OR (cfloat <= -257)))))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cfloat, cstring1, @@ -61,7 +61,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -98,12 +97,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 10, 11] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint, double, double, double, bigint, double, bigint, bigint, bigint] Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out index 9e93897..e5d6345 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -25,7 +25,7 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, cbigint, csmallint, @@ -75,7 +75,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -97,10 +96,8 @@ STAGE PLANS: sort order: +++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 3, 1, 0, 8, 6, 14, 15, 16, 17, 19, 20, 18, 21, 23] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized @@ -113,27 +110,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 1, 2, 3, 5, 6, 7, 8, 9, 10] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [double, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - reduceColumnNullOrder: zzzzzzzzzzzzzzz - reduceColumnSortOrder: +++++++++++++++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 15 - dataColumns: KEY.reducesinkkey0:boolean, KEY.reducesinkkey1:bigint, KEY.reducesinkkey2:smallint, KEY.reducesinkkey3:tinyint, KEY.reducesinkkey4:timestamp, KEY.reducesinkkey5:string, KEY.reducesinkkey6:bigint, KEY.reducesinkkey7:int, KEY.reducesinkkey8:smallint, KEY.reducesinkkey9:tinyint, KEY.reducesinkkey10:int, KEY.reducesinkkey11:bigint, KEY.reducesinkkey12:int, KEY.reducesinkkey13:tinyint, KEY.reducesinkkey14:tinyint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out index 8ac270d..b10b550 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -23,7 +23,7 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cdouble, cboolean1, @@ -71,7 +71,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -93,10 +92,8 @@ STAGE PLANS: sort order: ++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 5, 10, 6, 4, 13, 14, 15, 17, 19, 16, 18, 20, 22] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized @@ -109,27 +106,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [double, double, double, double, double, double, double, double, double, double, double] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - reduceColumnNullOrder: zzzzzzzzzzzzzz - reduceColumnSortOrder: ++++++++++++++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 14 - dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:double, KEY.reducesinkkey2:boolean, KEY.reducesinkkey3:string, KEY.reducesinkkey4:float, KEY.reducesinkkey5:double, KEY.reducesinkkey6:double, KEY.reducesinkkey7:double, KEY.reducesinkkey8:float, KEY.reducesinkkey9:double, KEY.reducesinkkey10:double, KEY.reducesinkkey11:float, KEY.reducesinkkey12:float, KEY.reducesinkkey13:double - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out index a85f4d3..c8ad650 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cstring1, cdouble, ctimestamp1, @@ -18,7 +18,7 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cstring1, cdouble, ctimestamp1, @@ -61,7 +61,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -98,10 +97,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6] Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized @@ -114,27 +111,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [5, 6, 7, 8] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [double] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - reduceColumnNullOrder: aaa - reduceColumnSortOrder: +++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 7 - dataColumns: KEY._col0:string, KEY._col1:double, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:double - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), min(VALUE._col3) http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out index 97194a2..4c6fbca 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out @@ -72,10 +72,10 @@ POSTHOOK: Input: default@alltypesparquet -1887561756 10361.0 -1887561756 1839.0 -1887561756 9531.0 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain VECTORIZATION EXPRESSION select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain VECTORIZATION EXPRESSION select ctinyint,cdouble,csmallint from alltypesparquet where ctinyint is not null order by ctinyint,cdouble limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -101,7 +101,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -122,10 +121,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 5] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col2 (type: smallint) @@ -139,27 +136,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 1, 5] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - reduceColumnNullOrder: zz - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 3 - dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:double, VALUE._col0:smallint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) @@ -220,10 +204,10 @@ POSTHOOK: Input: default@alltypesparquet -64 -7196.0 -7196 -64 -8080.0 -8080 -64 -9842.0 -9842 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain VECTORIZATION EXPRESSION select ctinyint,avg(cdouble + 1) from alltypesparquet group by ctinyint order by ctinyint limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain VECTORIZATION EXPRESSION select ctinyint,avg(cdouble + 1) from alltypesparquet group by ctinyint order by ctinyint limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -248,7 +232,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: ctinyint (type: tinyint), (cdouble + 1.0D) (type: double) outputColumnNames: _col0, _col1 @@ -278,11 +261,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [1, 2] Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: double), _col2 (type: bigint) @@ -296,27 +276,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 5] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [double] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - reduceColumnNullOrder: z - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 3 - dataColumns: KEY._col0:tinyint, VALUE._col0:double, VALUE._col1:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) @@ -392,10 +359,10 @@ POSTHOOK: Input: default@alltypesparquet -62 245.69387755102042 -63 2178.7272727272725 -64 373.52941176470586 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain VECTORIZATION EXPRESSION select distinct(ctinyint) from alltypesparquet limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain VECTORIZATION EXPRESSION select distinct(ctinyint) from alltypesparquet limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -420,7 +387,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint @@ -447,10 +413,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Execution mode: vectorized @@ -463,27 +427,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY._col0:tinyint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator Group By Vectorization: @@ -548,10 +499,10 @@ POSTHOOK: Input: default@alltypesparquet -63 -64 NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain VECTORIZATION EXPRESSION select ctinyint, count(distinct(cdouble)) from alltypesparquet group by ctinyint order by ctinyint limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain VECTORIZATION EXPRESSION select ctinyint, count(distinct(cdouble)) from alltypesparquet group by ctinyint order by ctinyint limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -576,7 +527,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble @@ -603,11 +553,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -619,27 +566,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 5] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - reduceColumnNullOrder: za - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY._col0:tinyint, KEY._col1:double - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator Group By Vectorization: @@ -718,10 +652,10 @@ POSTHOOK: Input: default@alltypesparquet -62 27 -63 19 -64 24 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain VECTORIZATION EXPRESSION select ctinyint,cdouble from alltypesparquet order by ctinyint limit 0 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain VECTORIZATION EXPRESSION select ctinyint,cdouble from alltypesparquet order by ctinyint limit 0 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -746,10 +680,10 @@ POSTHOOK: query: select ctinyint,cdouble from alltypesparquet order by ctinyint POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain VECTORIZATION EXPRESSION select cdouble, sum(ctinyint) as sum from alltypesparquet where ctinyint is not null group by cdouble order by sum, cdouble limit 20 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain VECTORIZATION EXPRESSION select cdouble, sum(ctinyint) as sum from alltypesparquet where ctinyint is not null group by cdouble order by sum, cdouble limit 20 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -776,7 +710,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -804,10 +737,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized @@ -820,27 +751,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 5] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY._col0:double, VALUE._col0:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -861,10 +779,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Reducer 3 @@ -872,16 +788,9 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - reduceColumnNullOrder: zz - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:bigint, KEY.reducesinkkey1:double - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint) http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/semijoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/semijoin.q.out b/ql/src/test/results/clientpositive/spark/semijoin.q.out index 5455b36..87d8d40 100644 --- a/ql/src/test/results/clientpositive/spark/semijoin.q.out +++ b/ql/src/test/results/clientpositive/spark/semijoin.q.out @@ -1885,7 +1885,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Semi Join 1 to 2 keys: 0 key (type: int) @@ -2284,7 +2284,7 @@ STAGE PLANS: Join Operator condition map: Left Semi Join 0 to 1 - Outer Join 0 to 2 + Full Outer Join 0 to 2 keys: 0 key (type: int) 1 _col0 (type: int) http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out index e365f5f..31bcf23 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out @@ -281,7 +281,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 330 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -295,7 +295,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 330 Basic stats: COMPLETE Column stats: NONE File Output Operator http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out index 183ba44..8175984 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out @@ -293,7 +293,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -539,7 +539,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out index ea44efc..8ce2eb2 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out @@ -253,7 +253,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -503,7 +503,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out index 11fb39d..80f38fa 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out @@ -252,7 +252,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -500,7 +500,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out index eda1afa..399edaf 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out @@ -375,7 +375,7 @@ STAGE PLANS: Join Operator condition map: Left Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -685,7 +685,7 @@ STAGE PLANS: Join Operator condition map: Right Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -861,7 +861,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -973,7 +973,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Right Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1082,8 +1082,8 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out index 9abc627..1b71cdb 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out @@ -375,7 +375,7 @@ STAGE PLANS: Join Operator condition map: Left Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -685,7 +685,7 @@ STAGE PLANS: Join Operator condition map: Right Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -861,7 +861,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -973,7 +973,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Right Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1082,8 +1082,8 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out index 610abab..1732927 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out @@ -631,7 +631,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int)