http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out b/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out index b17bf57..1443bef 100644 --- a/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out @@ -45,16 +45,20 @@ POSTHOOK: Input: default@src 0 val_0 10 val_10 100 val_100 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization only select key, value from char_2 order by key asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization only select key, value from char_2 order by key asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -62,51 +66,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: char(10)), value (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(10)) - sort order: + - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: char(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator - limit: 5 - Processor Tree: - ListSink PREHOOK: query: select key, value from char_2 @@ -146,16 +131,20 @@ POSTHOOK: Input: default@src 97 val_97 97 val_97 96 val_96 -PREHOOK: query: explain select key, value +PREHOOK: query: explain vectorization only select key, value from char_2 order by key desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value +POSTHOOK: query: explain vectorization only select key, value from char_2 order by key desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -163,51 +152,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: char(10)), value (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(10)) - sort order: - - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: char(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator - limit: 5 - Processor Tree: - ListSink PREHOOK: query: select key, value from char_2 @@ -248,12 +218,16 @@ POSTHOOK: query: create table char_3 ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@char_3 -PREHOOK: query: explain +PREHOOK: query: explain vectorization only operator insert into table char_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization only operator insert into table char_3 select cint from alltypesorc limit 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -263,68 +237,63 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cint (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: int) + TableScan Vectorization: + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Limit Vectorization: + className: VectorLimitOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: CAST( _col0 AS CHAR(12) (type: char(12)) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.char_3 + Select Vectorization: + className: VectorSelectOperator + native: true + Limit Vectorization: + className: VectorLimitOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: CastLongToChar(col 0, maxLength 12) -> 1:Char + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-2 - Dependency Collection Stage: Stage-0 - Move Operator - tables: - replace: false - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.char_3 Stage: Stage-3 - Stats-Aggr Operator PREHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out b/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out index f4634d3..944d38d 100644 --- a/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out @@ -1,15 +1,19 @@ -PREHOOK: query: EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc WHERE (cdouble IS NULL) ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -17,53 +21,62 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 1045942 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cdouble is null (type: boolean) - Statistics: Num rows: 3114 Data size: 265164 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring1 (type: string), cint (type: int), cfloat (type: float), csmallint (type: smallint), COALESCE(null,cstring1,cint,cfloat,csmallint) (type: string) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3114 Data size: 819540 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: smallint), _col5 (type: string) - sort order: +++++ - Statistics: Num rows: 3114 Data size: 819540 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 5) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6, 2, 4, 1, 16] + selectExpressions: VectorCoalesce(columns [12, 6, 13, 14, 15])(children: ConstantVectorExpression(val null) -> 12:string, col 6, CastLongToString(col 2) -> 13:String, VectorUDFAdaptor(null(cfloat)) -> 14:string, CastLongToString(col 1) -> 15:String) -> 16:string + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: null (type: double), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: float), KEY.reducesinkkey3 (type: smallint), KEY.reducesinkkey4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3114 Data size: 246572 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 0, 1, 2, 3, 4] + selectExpressions: ConstantVectorExpression(val null) -> 5:double + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c FROM alltypesorc @@ -91,18 +104,22 @@ NULL NULL -738306196 -51.0 NULL -738306196 NULL NULL -819152895 8.0 NULL -819152895 NULL NULL -827212561 8.0 NULL -827212561 NULL NULL -949587513 11.0 NULL -949587513 -PREHOOK: query: EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc WHERE (ctinyint IS NULL) ORDER BY ctinyint, cdouble, cint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -110,53 +127,62 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 146792 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ctinyint is null (type: boolean) - Statistics: Num rows: 3115 Data size: 37224 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cdouble (type: double), cint (type: int), COALESCE(null,(cdouble + log2(cint)),0) (type: double) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 3115 Data size: 52844 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: double), _col2 (type: int), _col3 (type: double) - sort order: +++ - Statistics: Num rows: 3115 Data size: 52844 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 0) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 2, 15] + selectExpressions: VectorCoalesce(columns [12, 14, 13])(children: ConstantVectorExpression(val null) -> 12:double, DoubleColAddDoubleColumn(col 5, col 13)(children: FuncLog2LongToDouble(col 2) -> 13:double) -> 14:double, ConstantVectorExpression(val 0.0) -> 13:double) -> 15:double + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: null (type: tinyint), KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3115 Data size: 27928 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 0, 1, 2] + selectExpressions: ConstantVectorExpression(val null) -> 3:tinyint + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT ctinyint, cdouble, cint, coalesce(ctinyint+10, (cdouble+log2(cint)), 0) as c FROM alltypesorc @@ -184,18 +210,22 @@ NULL NULL -850295959 0.0 NULL NULL -886426182 0.0 NULL NULL -899422227 0.0 NULL NULL -971543377 0.0 -PREHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -203,50 +233,61 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (cfloat is null and cbigint is null) (type: boolean) - Statistics: Num rows: 790 Data size: 7092 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 4) -> boolean, SelectColumnIsNull(col 3) -> boolean) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: null (type: float), null (type: bigint), 0.0 (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + selectExpressions: ConstantVectorExpression(val null) -> 0:float, ConstantVectorExpression(val null) -> 1:bigint, ConstantVectorExpression(val 0.0) -> 2:double + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint, 0) as c FROM alltypesorc @@ -274,18 +315,22 @@ NULL NULL 0.0 NULL NULL 0.0 NULL NULL 0.0 NULL NULL 0.0 -PREHOOK: query: EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc WHERE ctimestamp1 IS NOT NULL OR ctimestamp2 IS NOT NULL ORDER BY ctimestamp1, ctimestamp2, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -293,53 +338,61 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 983040 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (ctimestamp1 is not null or ctimestamp2 is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 983040 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), COALESCE(ctimestamp1,ctimestamp2) (type: timestamp) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: timestamp) - sort order: +++ - Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: SelectColumnIsNotNull(col 8) -> boolean, SelectColumnIsNotNull(col 9) -> boolean) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorCoalesce(columns [8, 9])(children: col 8, col 9) -> 12:timestamp + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: timestamp), KEY.reducesinkkey2 (type: timestamp) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT ctimestamp1, ctimestamp2, coalesce(ctimestamp1, ctimestamp2) as c FROM alltypesorc @@ -367,18 +420,22 @@ NULL 1969-12-31 15:59:43.684 1969-12-31 15:59:43.684 NULL 1969-12-31 15:59:43.703 1969-12-31 15:59:43.703 NULL 1969-12-31 15:59:43.704 1969-12-31 15:59:43.704 NULL 1969-12-31 15:59:43.709 1969-12-31 15:59:43.709 -PREHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc WHERE (cfloat IS NULL AND cbigint IS NULL) ORDER BY cfloat, cbigint, c LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -386,50 +443,61 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez -#### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (cfloat is null and cbigint is null) (type: boolean) - Statistics: Num rows: 790 Data size: 7092 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 4) -> boolean, SelectColumnIsNull(col 3) -> boolean) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: - Select Operator - expressions: null (type: float), null (type: bigint), null (type: float) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + selectExpressions: ConstantVectorExpression(val null) -> 0:float, ConstantVectorExpression(val null) -> 1:bigint, ConstantVectorExpression(val null) -> 2:float + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - ListSink PREHOOK: query: SELECT cfloat, cbigint, coalesce(cfloat, cbigint) as c FROM alltypesorc @@ -457,34 +525,61 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -PREHOOK: query: EXPLAIN SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc WHERE cbigint IS NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc WHERE cbigint IS NULL LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNull(col 3) -> boolean + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 0, 14] + selectExpressions: ConstantVectorExpression(val null) -> 12:bigint, VectorCoalesce(columns [13, 0])(children: ConstantVectorExpression(val null) -> 13:tinyint, col 0) -> 14:tinyint + Limit Vectorization: + className: VectorLimitOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator - limit: 10 - Processor Tree: - TableScan - alias: alltypesorc - Filter Operator - predicate: cbigint is null (type: boolean) - Select Operator - expressions: null (type: bigint), ctinyint (type: tinyint), COALESCE(null,ctinyint) (type: tinyint) - outputColumnNames: _col0, _col1, _col2 - Limit - Number of rows: 10 - ListSink PREHOOK: query: SELECT cbigint, ctinyint, coalesce(cbigint, ctinyint) as c FROM alltypesorc http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out b/ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out index 18f45ff..6abf92d 100644 --- a/ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out @@ -14,18 +14,22 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@str_str_orc POSTHOOK: Lineage: str_str_orc.str1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: str_str_orc.str2 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -49,6 +53,10 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -66,6 +74,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -104,14 +116,18 @@ POSTHOOK: Input: default@str_str_orc #### A masked pattern was here #### X 0.02 y 0.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -160,18 +176,22 @@ POSTHOOK: Input: default@str_str_orc 0 1 0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT str2, ROUND(sum(cast(COALESCE(str1, 0) as int))/60, 2) as result from str_str_orc GROUP BY str2 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -189,12 +209,27 @@ STAGE PLANS: TableScan alias: str_str_orc Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 4] + selectExpressions: VectorUDFAdaptor(UDFToInteger(COALESCE(str1,0)))(children: VectorCoalesce(columns [0, 2])(children: col 0, ConstantVectorExpression(val 0) -> 2:string) -> 3:string) -> 4:int Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 4) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -203,15 +238,41 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -219,9 +280,17 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), round((UDFToDouble(_col1) / 60.0), 2) (type: double) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 3, decimalPlaces 2)(children: DoubleColDivideDoubleScalar(col 2, val 60.0)(children: CastLongToDouble(col 1) -> 2:double) -> 3:double) -> 2:double Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -250,14 +319,18 @@ POSTHOOK: Input: default@str_str_orc #### A masked pattern was here #### X 0.02 y 0.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COALESCE(str1, 0) as result from str_str_orc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -272,12 +345,23 @@ STAGE PLANS: TableScan alias: str_str_orc Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: COALESCE(str1,0) (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + selectExpressions: VectorCoalesce(columns [0, 2])(children: col 0, ConstantVectorExpression(val 0) -> 2:string) -> 3:string Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -285,6 +369,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out b/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out index 89507ee..5ea4b0f 100644 --- a/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out @@ -17,13 +17,17 @@ POSTHOOK: Output: default@test POSTHOOK: Lineage: test.a SIMPLE [] POSTHOOK: Lineage: test.b EXPRESSION [] _c0 _c1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from alltypesorc join test where alltypesorc.cint=test.a PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from alltypesorc join test where alltypesorc.cint=test.a POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -67,6 +71,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Small Table expression for MAPJOIN operator: Data type map<int,string> of Column[_col1] not supported + vectorized: false Map 2 Map Operator Tree: TableScan @@ -87,6 +97,12 @@ STAGE PLANS: value expressions: _col1 (type: map<int,string>) Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type map<int,string> of Column[b] not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -140,13 +156,17 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@test2b POSTHOOK: Lineage: test2b.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select * from test2b join test2a on test2b.a = test2a.a[1] PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select * from test2b join test2a on test2b.a = test2a.a[1] POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -190,6 +210,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Small Table expression for MAPJOIN operator: Data type array<int> of Column[a] not supported + vectorized: false Map 2 Map Operator Tree: TableScan @@ -206,6 +232,12 @@ STAGE PLANS: value expressions: a (type: array<int>) Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Predicate expression for FILTER operator: Data type array<int> of Column[a] not supported + vectorized: false Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_const.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_const.q.out b/ql/src/test/results/clientpositive/llap/vector_const.q.out new file mode 100644 index 0000000..f7b958e --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_const.q.out @@ -0,0 +1,64 @@ +PREHOOK: query: CREATE TEMPORARY TABLE varchar_const_1 (c1 int) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_const_1 +POSTHOOK: query: CREATE TEMPORARY TABLE varchar_const_1 (c1 int) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_const_1 +PREHOOK: query: INSERT INTO varchar_const_1 values(42) +PREHOOK: type: QUERY +PREHOOK: Output: default@varchar_const_1 +POSTHOOK: query: INSERT INTO varchar_const_1 values(42) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@varchar_const_1 +POSTHOOK: Lineage: varchar_const_1.c1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN +SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: varchar_const_1 + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'FF' (type: varchar(4)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_const_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CONCAT(CAST('F' AS CHAR(2)), CAST('F' AS VARCHAR(2))) FROM VARCHAR_CONST_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_const_1 +#### A masked pattern was here #### +FF
