[1/2] hive git commit: HIVE-18529: Vectorization: Add a debug config option to disable scratch column reuse (Gopal V, reviewed by Matt McCline)

gopalv Mon, 29 Jan 2018 14:40:43 -0800

Repository: hive
Updated Branches:
  refs/heads/master 8a6f36e6f -> d2a14fe28



http://git-wip-us.apache.org/repos/asf/hive/blob/d2a14fe2/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out 
b/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out
new file mode 100644
index 0000000..6528b6f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out
@@ -0,0 +1,412 @@
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT AVG(cint),
+       (AVG(cint) + -3728),
+       (-((AVG(cint) + -3728))),
+       (-((-((AVG(cint) + -3728))))),
+       ((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)),
+       SUM(cdouble),
+       (-(AVG(cint))),
+       STDDEV_POP(cint),
+       (((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)) * 
(-((-((AVG(cint) + -3728)))))),
+       STDDEV_SAMP(csmallint),
+       (-(STDDEV_POP(cint))),
+       (STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))),
+       ((STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))) * STDDEV_POP(cint)),
+       VAR_SAMP(cint),
+       AVG(cfloat),
+       (10.175 - VAR_SAMP(cint)),
+       (-((10.175 - VAR_SAMP(cint)))),
+       ((-(STDDEV_POP(cint))) / -563),
+       STDDEV_SAMP(cint),
+       (-(((-(STDDEV_POP(cint))) / -563))),
+       (AVG(cint) / SUM(cdouble)),
+       MIN(ctinyint),
+       COUNT(csmallint),
+       (MIN(ctinyint) / ((-(STDDEV_POP(cint))) / -563)),
+       (-((AVG(cint) / SUM(cdouble))))
+FROM   alltypesorc
+WHERE  ((762 = cbigint)
+        OR ((csmallint < cfloat)
+            AND ((ctimestamp2 > -5)
+                 AND (cdouble != cint)))
+        OR (cstring1 = 'a')
+           OR ((cbigint <= -1.389)
+               AND ((cstring2 != 'a')
+                    AND ((79.553 != cint)
+                         AND (cboolean2 != cboolean1)))))
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT AVG(cint),
+       (AVG(cint) + -3728),
+       (-((AVG(cint) + -3728))),
+       (-((-((AVG(cint) + -3728))))),
+       ((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)),
+       SUM(cdouble),
+       (-(AVG(cint))),
+       STDDEV_POP(cint),
+       (((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)) * 
(-((-((AVG(cint) + -3728)))))),
+       STDDEV_SAMP(csmallint),
+       (-(STDDEV_POP(cint))),
+       (STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))),
+       ((STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))) * STDDEV_POP(cint)),
+       VAR_SAMP(cint),
+       AVG(cfloat),
+       (10.175 - VAR_SAMP(cint)),
+       (-((10.175 - VAR_SAMP(cint)))),
+       ((-(STDDEV_POP(cint))) / -563),
+       STDDEV_SAMP(cint),
+       (-(((-(STDDEV_POP(cint))) / -563))),
+       (AVG(cint) / SUM(cdouble)),
+       MIN(ctinyint),
+       COUNT(csmallint),
+       (MIN(ctinyint) / ((-(STDDEV_POP(cint))) / -563)),
+       (-((AVG(cint) / SUM(cdouble))))
+FROM   alltypesorc
+WHERE  ((762 = cbigint)
+        OR ((csmallint < cfloat)
+            AND ((ctimestamp2 > -5)
+                 AND (cdouble != cint)))
+        OR (cstring1 = 'a')
+           OR ((cbigint <= -1.389)
+               AND ((cstring2 != 'a')
+                    AND ((79.553 != cint)
+                         AND (cboolean2 != cboolean1)))))
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 2601650 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:ctinyint:tinyint, 
1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 
5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 
8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 
11:cboolean2:boolean, 
12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprOrExpr(children: 
FilterLongScalarEqualLongColumn(val 762, col 3:bigint), 
FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 13:float, col 
4:float)(children: CastLongToFloatViaLongToDouble(col 1:smallint) -> 13:float), 
FilterDoubleColGreaterDoubleScalar(col 13:double, val -5.0)(children: 
CastTimestampToDouble(col 9:timestamp) -> 13:double), 
FilterDoubleColNotEqualDoubleColumn(col 5:double, col 13:double)(children: 
CastLongToDouble(col 2:int) -> 13:double)), 
FilterStringGroupColEqualStringScalar(col 6:string, val a), 
FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 
14:decimal(22,3), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 
14:decimal(22,3)), FilterStringGroupColNotEqualStringScalar(col 7:string, val 
a), FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 
15:decimal(13,3))(children: CastLongToDecimal(col 2:int) -> 15:decimal(13,3)), 
FilterLongColNotEqualLongColumn(col 11:boole
 an, col 10:boolean)))
+                    predicate: (((CAST( cbigint AS decimal(22,3)) <= -1.389) 
and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and 
(cboolean2 <> cboolean1)) or ((UDFToFloat(csmallint) < cfloat) and 
(UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (762 = 
cbigint) or (cstring1 = 'a')) (type: boolean)
+                    Statistics: Num rows: 5465 Data size: 1157230 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: ctinyint (type: tinyint), csmallint (type: 
smallint), cint (type: int), cfloat (type: float), cdouble (type: double)
+                      outputColumnNames: ctinyint, csmallint, cint, cfloat, 
cdouble
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 1, 2, 4, 5]
+                      Statistics: Num rows: 5465 Data size: 1157230 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        aggregations: avg(cint), sum(cdouble), 
stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), 
stddev_samp(cint), min(ctinyint), count(csmallint)
+                        Group By Vectorization:
+                            aggregators: VectorUDAFAvgLong(col 2:int) -> 
struct<count:bigint,sum:double,input:int>, VectorUDAFSumDouble(col 5:double) -> 
double, VectorUDAFVarLong(col 2:int) -> 
struct<count:bigint,sum:double,variance:double> aggregation: stddev_pop, 
VectorUDAFVarLong(col 1:smallint) -> 
struct<count:bigint,sum:double,variance:double> aggregation: stddev_samp, 
VectorUDAFVarLong(col 2:int) -> struct<count:bigint,sum:double,variance:double> 
aggregation: var_samp, VectorUDAFAvgDouble(col 4:float) -> 
struct<count:bigint,sum:double,input:float>, VectorUDAFVarLong(col 2:int) -> 
struct<count:bigint,sum:double,variance:double> aggregation: stddev_samp, 
VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFCount(col 1:smallint) -> 
bigint
+                            className: VectorGroupByOperator
+                            groupByMode: HASH
+                            native: false
+                            vectorProcessingMode: HASH
+                            projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 
7, 8]
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
+                        Statistics: Num rows: 1 Data size: 492 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          sort order: 
+                          Reduce Sink Vectorization:
+                              className: VectorReduceSinkEmptyKeyOperator
+                              keyColumnNums: []
+                              native: true
+                              nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+                          Statistics: Num rows: 1 Data size: 492 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          value expressions: _col0 (type: 
struct<count:bigint,sum:double,input:int>), _col1 (type: double), _col2 (type: 
struct<count:bigint,sum:double,variance:double>), _col3 (type: 
struct<count:bigint,sum:double,variance:double>), _col4 (type: 
struct<count:bigint,sum:double,variance:double>), _col5 (type: 
struct<count:bigint,sum:double,input:float>), _col6 (type: 
struct<count:bigint,sum:double,variance:double>), _col7 (type: tinyint), _col8 
(type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: []
+                featureSupportInUse: []
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, 
cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, 
cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, 
cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [double, decimal(22,3), 
decimal(13,3)]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 9
+                    dataColumns: 
VALUE._col0:struct<count:bigint,sum:double,input:int>, VALUE._col1:double, 
VALUE._col2:struct<count:bigint,sum:double,variance:double>, 
VALUE._col3:struct<count:bigint,sum:double,variance:double>, 
VALUE._col4:struct<count:bigint,sum:double,variance:double>, 
VALUE._col5:struct<count:bigint,sum:double,input:float>, 
VALUE._col6:struct<count:bigint,sum:double,variance:double>, 
VALUE._col7:tinyint, VALUE._col8:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: avg(VALUE._col0), sum(VALUE._col1), 
stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), 
avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8)
+                Group By Vectorization:
+                    aggregators: VectorUDAFAvgFinal(col 
0:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFSumDouble(col 
1:double) -> double, VectorUDAFVarFinal(col 
2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_pop, VectorUDAFVarFinal(col 
3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_samp, VectorUDAFVarFinal(col 
4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_samp, VectorUDAFAvgFinal(col 5:struct<count:bigint,sum:double,input:float>) 
-> double, VectorUDAFVarFinal(col 
6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_samp, VectorUDAFMinLong(col 7:tinyint) -> tinyint, 
VectorUDAFCountMerge(col 8:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
+                Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: double), (_col0 + -3728.0) (type: 
double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: 
double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 
(type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 
+ -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0)))) (type: double), 
_col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + 
-3728.0)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0)))) * _col2) 
(type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) 
(type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) 
(type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), 
(_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), 
(UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) 
(type: double)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumnNums: [0, 9, 11, 10, 14, 1, 12, 2, 
15, 3, 13, 17, 16, 4, 5, 18, 20, 21, 6, 19, 22, 7, 8, 24, 25]
+                      selectExpressions: DoubleColAddDoubleScalar(col 
0:double, val -3728.0) -> 9:double, DoubleColUnaryMinus(col 
10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 
10:double) -> 11:double, DoubleColUnaryMinus(col 12:double)(children: 
DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 
0:double, val -3728.0) -> 10:double) -> 12:double) -> 10:double, 
DoubleColMultiplyDoubleColumn(col 12:double, col 13:double)(children: 
DoubleColUnaryMinus(col 13:double)(children: DoubleColUnaryMinus(col 
12:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 
12:double) -> 13:double) -> 12:double, DoubleColAddDoubleScalar(col 0:double, 
val -3728.0) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 0:double) -> 
12:double, DoubleColMultiplyDoubleColumn(col 16:double, col 
13:double)(children: DoubleColMultiplyDoubleColumn(col 13:double, col 
15:double)(children: DoubleColUnaryMinus(col 15:double)(children: 
DoubleColUnaryM
 inus(col 13:double)(children: DoubleColAddDoubleScalar(col 0:double, val 
-3728.0) -> 13:double) -> 15:double) -> 13:double, DoubleColAddDoubleScalar(col 
0:double, val -3728.0) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 
15:double)(children: DoubleColUnaryMinus(col 13:double)(children: 
DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 15:double) 
-> 13:double) -> 15:double, DoubleColUnaryMinus(col 2:double) -> 13:double, 
DoubleColSubtractDoubleColumn(col 2:double, col 16:double)(children: 
DoubleColUnaryMinus(col 17:double)(children: DoubleColUnaryMinus(col 
16:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 
16:double) -> 17:double) -> 16:double) -> 17:double, 
DoubleColMultiplyDoubleColumn(col 18:double, col 2:double)(children: 
DoubleColSubtractDoubleColumn(col 2:double, col 16:double)(children: 
DoubleColUnaryMinus(col 18:double)(children: DoubleColUnaryMinus(col 
16:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) 
 -> 16:double) -> 18:double) -> 16:double) -> 18:double) -> 16:double, 
DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 18:double, 
DoubleColUnaryMinus(col 19:double)(children: 
DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 19:double) -> 
20:double, DoubleColDivideDoubleScalar(col 19:double, val -563.0)(children: 
DoubleColUnaryMinus(col 2:double) -> 19:double) -> 21:double, 
DoubleColUnaryMinus(col 22:double)(children: DoubleColDivideDoubleScalar(col 
19:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 
19:double) -> 22:double) -> 19:double, DoubleColDivideDoubleColumn(col 
0:double, col 1:double) -> 22:double, DoubleColDivideDoubleColumn(col 
23:double, col 25:double)(children: CastLongToDouble(col 7:tinyint) -> 
23:double, DoubleColDivideDoubleScalar(col 24:double, val -563.0)(children: 
DoubleColUnaryMinus(col 2:double) -> 24:double) -> 25:double) -> 24:double, 
DoubleColUnaryMinus(col 23:double)(children: DoubleColDivideDoubleColumn(col 
0:doubl
 e, col 1:double) -> 23:double) -> 25:double
+                  Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 1 Data size: 196 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT AVG(cint),
+       (AVG(cint) + -3728),
+       (-((AVG(cint) + -3728))),
+       (-((-((AVG(cint) + -3728))))),
+       ((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)),
+       SUM(cdouble),
+       (-(AVG(cint))),
+       STDDEV_POP(cint),
+       (((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)) * 
(-((-((AVG(cint) + -3728)))))),
+       STDDEV_SAMP(csmallint),
+       (-(STDDEV_POP(cint))),
+       (STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))),
+       ((STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))) * STDDEV_POP(cint)),
+       VAR_SAMP(cint),
+       AVG(cfloat),
+       (10.175 - VAR_SAMP(cint)),
+       (-((10.175 - VAR_SAMP(cint)))),
+       ((-(STDDEV_POP(cint))) / -563),
+       STDDEV_SAMP(cint),
+       (-(((-(STDDEV_POP(cint))) / -563))),
+       (AVG(cint) / SUM(cdouble)),
+       MIN(ctinyint),
+       COUNT(csmallint),
+       (MIN(ctinyint) / ((-(STDDEV_POP(cint))) / -563)),
+       (-((AVG(cint) / SUM(cdouble))))
+FROM   alltypesorc
+WHERE  ((762 = cbigint)
+        OR ((csmallint < cfloat)
+            AND ((ctimestamp2 > -5)
+                 AND (cdouble != cint)))
+        OR (cstring1 = 'a')
+           OR ((cbigint <= -1.389)
+               AND ((cstring2 != 'a')
+                    AND ((79.553 != cint)
+                         AND (cboolean2 != cboolean1)))))
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT AVG(cint),
+       (AVG(cint) + -3728),
+       (-((AVG(cint) + -3728))),
+       (-((-((AVG(cint) + -3728))))),
+       ((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)),
+       SUM(cdouble),
+       (-(AVG(cint))),
+       STDDEV_POP(cint),
+       (((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)) * 
(-((-((AVG(cint) + -3728)))))),
+       STDDEV_SAMP(csmallint),
+       (-(STDDEV_POP(cint))),
+       (STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))),
+       ((STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))) * STDDEV_POP(cint)),
+       VAR_SAMP(cint),
+       AVG(cfloat),
+       (10.175 - VAR_SAMP(cint)),
+       (-((10.175 - VAR_SAMP(cint)))),
+       ((-(STDDEV_POP(cint))) / -563),
+       STDDEV_SAMP(cint),
+       (-(((-(STDDEV_POP(cint))) / -563))),
+       (AVG(cint) / SUM(cdouble)),
+       MIN(ctinyint),
+       COUNT(csmallint),
+       (MIN(ctinyint) / ((-(STDDEV_POP(cint))) / -563)),
+       (-((AVG(cint) / SUM(cdouble))))
+FROM   alltypesorc
+WHERE  ((762 = cbigint)
+        OR ((csmallint < cfloat)
+            AND ((ctimestamp2 > -5)
+                 AND (cdouble != cint)))
+        OR (cstring1 = 'a')
+           OR ((cbigint <= -1.389)
+               AND ((cstring2 != 'a')
+                    AND ((79.553 != cint)
+                         AND (cboolean2 != cboolean1)))))
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 2601650 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:ctinyint:tinyint, 
1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 
5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 
8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 
11:cboolean2:boolean, 
12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprOrExpr(children: 
FilterLongScalarEqualLongColumn(val 762, col 3:bigint), 
FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 13:float, col 
4:float)(children: CastLongToFloatViaLongToDouble(col 1:smallint) -> 13:float), 
FilterDoubleColGreaterDoubleScalar(col 14:double, val -5.0)(children: 
CastTimestampToDouble(col 9:timestamp) -> 14:double), 
FilterDoubleColNotEqualDoubleColumn(col 5:double, col 15:double)(children: 
CastLongToDouble(col 2:int) -> 15:double)), 
FilterStringGroupColEqualStringScalar(col 6:string, val a), 
FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 
16:decimal(22,3), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 
16:decimal(22,3)), FilterStringGroupColNotEqualStringScalar(col 7:string, val 
a), FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 
17:decimal(13,3))(children: CastLongToDecimal(col 2:int) -> 17:decimal(13,3)), 
FilterLongColNotEqualLongColumn(col 11:boole
 an, col 10:boolean)))
+                    predicate: (((CAST( cbigint AS decimal(22,3)) <= -1.389) 
and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and 
(cboolean2 <> cboolean1)) or ((UDFToFloat(csmallint) < cfloat) and 
(UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (762 = 
cbigint) or (cstring1 = 'a')) (type: boolean)
+                    Statistics: Num rows: 5465 Data size: 1157230 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: ctinyint (type: tinyint), csmallint (type: 
smallint), cint (type: int), cfloat (type: float), cdouble (type: double)
+                      outputColumnNames: ctinyint, csmallint, cint, cfloat, 
cdouble
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 1, 2, 4, 5]
+                      Statistics: Num rows: 5465 Data size: 1157230 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        aggregations: avg(cint), sum(cdouble), 
stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), 
stddev_samp(cint), min(ctinyint), count(csmallint)
+                        Group By Vectorization:
+                            aggregators: VectorUDAFAvgLong(col 2:int) -> 
struct<count:bigint,sum:double,input:int>, VectorUDAFSumDouble(col 5:double) -> 
double, VectorUDAFVarLong(col 2:int) -> 
struct<count:bigint,sum:double,variance:double> aggregation: stddev_pop, 
VectorUDAFVarLong(col 1:smallint) -> 
struct<count:bigint,sum:double,variance:double> aggregation: stddev_samp, 
VectorUDAFVarLong(col 2:int) -> struct<count:bigint,sum:double,variance:double> 
aggregation: var_samp, VectorUDAFAvgDouble(col 4:float) -> 
struct<count:bigint,sum:double,input:float>, VectorUDAFVarLong(col 2:int) -> 
struct<count:bigint,sum:double,variance:double> aggregation: stddev_samp, 
VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFCount(col 1:smallint) -> 
bigint
+                            className: VectorGroupByOperator
+                            groupByMode: HASH
+                            native: false
+                            vectorProcessingMode: HASH
+                            projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 
7, 8]
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
+                        Statistics: Num rows: 1 Data size: 492 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          sort order: 
+                          Reduce Sink Vectorization:
+                              className: VectorReduceSinkEmptyKeyOperator
+                              keyColumnNums: []
+                              native: true
+                              nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+                          Statistics: Num rows: 1 Data size: 492 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          value expressions: _col0 (type: 
struct<count:bigint,sum:double,input:int>), _col1 (type: double), _col2 (type: 
struct<count:bigint,sum:double,variance:double>), _col3 (type: 
struct<count:bigint,sum:double,variance:double>), _col4 (type: 
struct<count:bigint,sum:double,variance:double>), _col5 (type: 
struct<count:bigint,sum:double,input:float>), _col6 (type: 
struct<count:bigint,sum:double,variance:double>), _col7 (type: tinyint), _col8 
(type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: []
+                featureSupportInUse: []
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, 
cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, 
cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, 
cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [double, double, double, 
decimal(22,3), decimal(13,3)]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 9
+                    dataColumns: 
VALUE._col0:struct<count:bigint,sum:double,input:int>, VALUE._col1:double, 
VALUE._col2:struct<count:bigint,sum:double,variance:double>, 
VALUE._col3:struct<count:bigint,sum:double,variance:double>, 
VALUE._col4:struct<count:bigint,sum:double,variance:double>, 
VALUE._col5:struct<count:bigint,sum:double,input:float>, 
VALUE._col6:struct<count:bigint,sum:double,variance:double>, 
VALUE._col7:tinyint, VALUE._col8:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: avg(VALUE._col0), sum(VALUE._col1), 
stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), 
avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8)
+                Group By Vectorization:
+                    aggregators: VectorUDAFAvgFinal(col 
0:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFSumDouble(col 
1:double) -> double, VectorUDAFVarFinal(col 
2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_pop, VectorUDAFVarFinal(col 
3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_samp, VectorUDAFVarFinal(col 
4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_samp, VectorUDAFAvgFinal(col 5:struct<count:bigint,sum:double,input:float>) 
-> double, VectorUDAFVarFinal(col 
6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_samp, VectorUDAFMinLong(col 7:tinyint) -> tinyint, 
VectorUDAFCountMerge(col 8:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
+                Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: double), (_col0 + -3728.0) (type: 
double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: 
double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 
(type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 
+ -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0)))) (type: double), 
_col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + 
-3728.0)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0)))) * _col2) 
(type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) 
(type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) 
(type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), 
(_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), 
(UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) 
(type: double)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumnNums: [0, 9, 11, 14, 19, 1, 20, 2, 
29, 3, 30, 34, 39, 4, 5, 40, 42, 44, 6, 47, 48, 7, 8, 52, 54]
+                      selectExpressions: DoubleColAddDoubleScalar(col 
0:double, val -3728.0) -> 9:double, DoubleColUnaryMinus(col 
10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 
10:double) -> 11:double, DoubleColUnaryMinus(col 13:double)(children: 
DoubleColUnaryMinus(col 12:double)(children: DoubleColAddDoubleScalar(col 
0:double, val -3728.0) -> 12:double) -> 13:double) -> 14:double, 
DoubleColMultiplyDoubleColumn(col 17:double, col 18:double)(children: 
DoubleColUnaryMinus(col 16:double)(children: DoubleColUnaryMinus(col 
15:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 
15:double) -> 16:double) -> 17:double, DoubleColAddDoubleScalar(col 0:double, 
val -3728.0) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 0:double) -> 
20:double, DoubleColMultiplyDoubleColumn(col 25:double, col 
28:double)(children: DoubleColMultiplyDoubleColumn(col 23:double, col 
24:double)(children: DoubleColUnaryMinus(col 22:double)(children: 
DoubleColUnaryM
 inus(col 21:double)(children: DoubleColAddDoubleScalar(col 0:double, val 
-3728.0) -> 21:double) -> 22:double) -> 23:double, DoubleColAddDoubleScalar(col 
0:double, val -3728.0) -> 24:double) -> 25:double, DoubleColUnaryMinus(col 
27:double)(children: DoubleColUnaryMinus(col 26:double)(children: 
DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 26:double) -> 27:double) 
-> 28:double) -> 29:double, DoubleColUnaryMinus(col 2:double) -> 30:double, 
DoubleColSubtractDoubleColumn(col 2:double, col 33:double)(children: 
DoubleColUnaryMinus(col 32:double)(children: DoubleColUnaryMinus(col 
31:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 
31:double) -> 32:double) -> 33:double) -> 34:double, 
DoubleColMultiplyDoubleColumn(col 38:double, col 2:double)(children: 
DoubleColSubtractDoubleColumn(col 2:double, col 37:double)(children: 
DoubleColUnaryMinus(col 36:double)(children: DoubleColUnaryMinus(col 
35:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) 
 -> 35:double) -> 36:double) -> 37:double) -> 38:double) -> 39:double, 
DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 40:double, 
DoubleColUnaryMinus(col 41:double)(children: 
DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 41:double) -> 
42:double, DoubleColDivideDoubleScalar(col 43:double, val -563.0)(children: 
DoubleColUnaryMinus(col 2:double) -> 43:double) -> 44:double, 
DoubleColUnaryMinus(col 46:double)(children: DoubleColDivideDoubleScalar(col 
45:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 
45:double) -> 46:double) -> 47:double, DoubleColDivideDoubleColumn(col 
0:double, col 1:double) -> 48:double, DoubleColDivideDoubleColumn(col 
49:double, col 51:double)(children: CastLongToDouble(col 7:tinyint) -> 
49:double, DoubleColDivideDoubleScalar(col 50:double, val -563.0)(children: 
DoubleColUnaryMinus(col 2:double) -> 50:double) -> 51:double) -> 52:double, 
DoubleColUnaryMinus(col 53:double)(children: DoubleColDivideDoubleColumn(col 
0:doubl
 e, col 1:double) -> 53:double) -> 54:double
+                  Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 1 Data size: 196 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

[1/2] hive git commit: HIVE-18529: Vectorization: Add a debug config option to disable scratch column reuse (Gopal V, reviewed by Matt McCline)

Reply via email to