Repository: hive Updated Branches: refs/heads/master 24f7d2473 -> beccce398
http://git-wip-us.apache.org/repos/asf/hive/blob/beccce39/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out b/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out index 02aa87a..cd178cf 100644 --- a/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out @@ -458,7 +458,7 @@ Storage Desc Params: serialization.format 1 PREHOOK: query: merge into t as t using upd_t as u ON t.a = u.a WHEN MATCHED THEN DELETE -WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b) +WHEN NOT MATCHED THEN INSERT (a, b) VALUES(u.a, u.b) PREHOOK: type: QUERY PREHOOK: Input: default@t PREHOOK: Input: default@upd_t @@ -467,7 +467,7 @@ PREHOOK: Output: default@t PREHOOK: Output: default@t POSTHOOK: query: merge into t as t using upd_t as u ON t.a = u.a WHEN MATCHED THEN DELETE -WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b) +WHEN NOT MATCHED THEN INSERT (a, b) VALUES(u.a, u.b) POSTHOOK: type: QUERY POSTHOOK: Input: default@t POSTHOOK: Input: default@upd_t @@ -522,3 +522,2064 @@ Bucket Columns: [a] Sort Columns: [] Storage Desc Params: serialization.format 1 +PREHOOK: query: create table t2(a int, b int, c int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2(a int, b int, c int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: create table upd_t2_1(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@upd_t2_1 +POSTHOOK: query: create table upd_t2_1(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@upd_t2_1 +PREHOOK: query: create table upd_t2_2(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@upd_t2_2 +POSTHOOK: query: create table upd_t2_2(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@upd_t2_2 +PREHOOK: query: create table upd_t2_3(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@upd_t2_3 +POSTHOOK: query: create table upd_t2_3(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@upd_t2_3 +PREHOOK: query: create table upd_t2_4(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@upd_t2_4 +POSTHOOK: query: create table upd_t2_4(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@upd_t2_4 +PREHOOK: query: desc formatted t2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@t2 +POSTHOOK: query: desc formatted t2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@t2 +# col_name data_type comment +a int +b int +c int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} + bucketing_version 2 + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 + transactional true + transactional_properties default +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 2 +Bucket Columns: [a] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 + +# Constraints + +# Default Constraints +Table: default.t2 +Constraint Name: #### A masked pattern was here #### +Column Name:c Default Value:1 + +PREHOOK: query: insert into t2 (a, b) values (1,1), (3,3), (5,5), (7,7) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t2 +POSTHOOK: query: insert into t2 (a, b) values (1,1), (3,3), (5,5), (7,7) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.a SCRIPT [] +POSTHOOK: Lineage: t2.b SCRIPT [] +POSTHOOK: Lineage: t2.c SIMPLE [] +PREHOOK: query: insert into upd_t2_1 values (1,1),(2,2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@upd_t2_1 +POSTHOOK: query: insert into upd_t2_1 values (1,1),(2,2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@upd_t2_1 +POSTHOOK: Lineage: upd_t2_1.a SCRIPT [] +POSTHOOK: Lineage: upd_t2_1.b SCRIPT [] +PREHOOK: query: insert into upd_t2_2 values (3,3),(4,4) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@upd_t2_2 +POSTHOOK: query: insert into upd_t2_2 values (3,3),(4,4) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@upd_t2_2 +POSTHOOK: Lineage: upd_t2_2.a SCRIPT [] +POSTHOOK: Lineage: upd_t2_2.b SCRIPT [] +PREHOOK: query: insert into upd_t2_3 values (5,5),(6,6) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@upd_t2_3 +POSTHOOK: query: insert into upd_t2_3 values (5,5),(6,6) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@upd_t2_3 +POSTHOOK: Lineage: upd_t2_3.a SCRIPT [] +POSTHOOK: Lineage: upd_t2_3.b SCRIPT [] +PREHOOK: query: insert into upd_t2_4 values (7,7),(8,8) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@upd_t2_4 +POSTHOOK: query: insert into upd_t2_4 values (7,7),(8,8) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@upd_t2_4 +POSTHOOK: Lineage: upd_t2_4.a SCRIPT [] +POSTHOOK: Lineage: upd_t2_4.b SCRIPT [] +PREHOOK: query: explain merge into t2 as t using upd_t2_1 as u ON t.a = u.a +WHEN MATCHED THEN UPDATE SET b = 99 +WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b, default) +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +PREHOOK: Input: default@upd_t2_1 +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: explain merge into t2 as t using upd_t2_1 as u ON t.a = u.a +WHEN MATCHED THEN UPDATE SET b = 99 +WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b, default) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@upd_t2_1 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t2 +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-3 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: c (type: int), ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 7 + Map Operator Tree: + TableScan + alias: u + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: b (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 a (type: int) + 1 a (type: int) + outputColumnNames: _col0, _col2, _col5, _col6, _col7 + Statistics: Num rows: 4 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col0 = _col6) (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col3 (type: int) + Filter Operator + predicate: (_col0 = _col6) (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + outputColumnNames: _col5 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + sort order: + + Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Filter Operator + predicate: _col0 is null (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col6 (type: int), _col7 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), 99 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: UPDATE + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + Select Operator + expressions: _col0 (type: int) + outputColumnNames: val + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(val, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), 1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int) + outputColumnNames: a, b, c + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Dependency Collection + + Stage: Stage-1 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: UPDATE + + Stage: Stage-5 + Stats Work + Basic Stats Work: + + Stage: Stage-2 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: val + Column Types: int + Table: default.merge_tmp_table + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: INSERT + + Stage: Stage-7 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: a, b, c + Column Types: int, int, int + Table: default.t2 + +PREHOOK: query: merge into t2 as t using upd_t2_1 as u ON t.a = u.a +WHEN MATCHED THEN UPDATE SET b = 99 +WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b, default) +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +PREHOOK: Input: default@upd_t2_1 +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: merge into t2 as t using upd_t2_1 as u ON t.a = u.a +WHEN MATCHED THEN UPDATE SET b = 99 +WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b, default) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@upd_t2_1 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(t2)t.FieldSchema(name:ROW__ID, type:struct<writeId:bigint,bucketId:int,rowId:bigint>, comment:), ] +POSTHOOK: Lineage: t2.a SIMPLE [(upd_t2_1)u.FieldSchema(name:a, type:int, comment:null), ] +POSTHOOK: Lineage: t2.b SIMPLE [(upd_t2_1)u.FieldSchema(name:b, type:int, comment:null), ] +POSTHOOK: Lineage: t2.c SIMPLE [] +PREHOOK: query: explain merge into t2 as t using upd_t2_2 as u ON t.a = u.a +WHEN MATCHED THEN UPDATE SET b = 98 +WHEN NOT MATCHED THEN INSERT (a, b) VALUES(u.a, u.b) +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +PREHOOK: Input: default@upd_t2_2 +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: explain merge into t2 as t using upd_t2_2 as u ON t.a = u.a +WHEN MATCHED THEN UPDATE SET b = 98 +WHEN NOT MATCHED THEN INSERT (a, b) VALUES(u.a, u.b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@upd_t2_2 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t2 +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-3 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: c (type: int), ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 7 + Map Operator Tree: + TableScan + alias: u + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: b (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 a (type: int) + 1 a (type: int) + outputColumnNames: _col0, _col2, _col5, _col6, _col7 + Statistics: Num rows: 4 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col0 = _col6) (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col3 (type: int) + Filter Operator + predicate: (_col0 = _col6) (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + outputColumnNames: _col5 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + sort order: + + Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Filter Operator + predicate: _col0 is null (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col6 (type: int), _col7 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), 98 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: UPDATE + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + Select Operator + expressions: _col0 (type: int) + outputColumnNames: val + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(val, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), 1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int) + outputColumnNames: a, b, c + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Dependency Collection + + Stage: Stage-1 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: UPDATE + + Stage: Stage-5 + Stats Work + Basic Stats Work: + + Stage: Stage-2 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: val + Column Types: int + Table: default.merge_tmp_table + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: INSERT + + Stage: Stage-7 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: a, b, c + Column Types: int, int, int + Table: default.t2 + +PREHOOK: query: merge into t2 as t using upd_t2_2 as u ON t.a = u.a +WHEN MATCHED THEN UPDATE SET b = 98 +WHEN NOT MATCHED THEN INSERT (a, b) VALUES(u.a, u.b) +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +PREHOOK: Input: default@upd_t2_2 +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: merge into t2 as t using upd_t2_2 as u ON t.a = u.a +WHEN MATCHED THEN UPDATE SET b = 98 +WHEN NOT MATCHED THEN INSERT (a, b) VALUES(u.a, u.b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@upd_t2_2 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(t2)t.FieldSchema(name:ROW__ID, type:struct<writeId:bigint,bucketId:int,rowId:bigint>, comment:), ] +POSTHOOK: Lineage: t2.a SIMPLE [(upd_t2_2)u.FieldSchema(name:a, type:int, comment:null), ] +POSTHOOK: Lineage: t2.b SIMPLE [(upd_t2_2)u.FieldSchema(name:b, type:int, comment:null), ] +POSTHOOK: Lineage: t2.c SIMPLE [] +PREHOOK: query: explain merge into t2 as t using upd_t2_3 as u ON t.a = u.a +WHEN MATCHED THEN UPDATE SET b = 97 +WHEN NOT MATCHED THEN INSERT (a, b, c) VALUES(u.a, u.b, default) +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +PREHOOK: Input: default@upd_t2_3 +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: explain merge into t2 as t using upd_t2_3 as u ON t.a = u.a +WHEN MATCHED THEN UPDATE SET b = 97 +WHEN NOT MATCHED THEN INSERT (a, b, c) VALUES(u.a, u.b, default) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@upd_t2_3 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t2 +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-3 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: c (type: int), ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 7 + Map Operator Tree: + TableScan + alias: u + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: b (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 a (type: int) + 1 a (type: int) + outputColumnNames: _col0, _col2, _col5, _col6, _col7 + Statistics: Num rows: 5 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col0 = _col6) (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col3 (type: int) + Filter Operator + predicate: (_col0 = _col6) (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + outputColumnNames: _col5 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + sort order: + + Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Filter Operator + predicate: _col0 is null (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col6 (type: int), _col7 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), 97 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: UPDATE + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + Select Operator + expressions: _col0 (type: int) + outputColumnNames: val + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(val, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), 1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int) + outputColumnNames: a, b, c + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Dependency Collection + + Stage: Stage-1 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: UPDATE + + Stage: Stage-5 + Stats Work + Basic Stats Work: + + Stage: Stage-2 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: val + Column Types: int + Table: default.merge_tmp_table + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: INSERT + + Stage: Stage-7 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: a, b, c + Column Types: int, int, int + Table: default.t2 + +PREHOOK: query: merge into t2 as t using upd_t2_3 as u ON t.a = u.a +WHEN MATCHED THEN UPDATE SET b = 97 +WHEN NOT MATCHED THEN INSERT (a, b, c) VALUES(u.a, u.b, default) +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +PREHOOK: Input: default@upd_t2_3 +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: merge into t2 as t using upd_t2_3 as u ON t.a = u.a +WHEN MATCHED THEN UPDATE SET b = 97 +WHEN NOT MATCHED THEN INSERT (a, b, c) VALUES(u.a, u.b, default) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@upd_t2_3 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(t2)t.FieldSchema(name:ROW__ID, type:struct<writeId:bigint,bucketId:int,rowId:bigint>, comment:), ] +POSTHOOK: Lineage: t2.a SIMPLE [(upd_t2_3)u.FieldSchema(name:a, type:int, comment:null), ] +POSTHOOK: Lineage: t2.b SIMPLE [(upd_t2_3)u.FieldSchema(name:b, type:int, comment:null), ] +POSTHOOK: Lineage: t2.c SIMPLE [] +PREHOOK: query: explain merge into t2 as t using upd_t2_4 as u ON t.a = u.a +WHEN MATCHED THEN UPDATE SET b = 96 +WHEN NOT MATCHED THEN INSERT (b, c, a) VALUES(u.b, default, u.a) +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +PREHOOK: Input: default@upd_t2_4 +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: explain merge into t2 as t using upd_t2_4 as u ON t.a = u.a +WHEN MATCHED THEN UPDATE SET b = 96 +WHEN NOT MATCHED THEN INSERT (b, c, a) VALUES(u.b, default, u.a) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@upd_t2_4 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t2 +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-3 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: c (type: int), ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 7 + Map Operator Tree: + TableScan + alias: u + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: b (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 a (type: int) + 1 a (type: int) + outputColumnNames: _col0, _col2, _col5, _col6, _col7 + Statistics: Num rows: 5 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col0 = _col6) (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col3 (type: int) + Filter Operator + predicate: (_col0 = _col6) (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + outputColumnNames: _col5 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + sort order: + + Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Filter Operator + predicate: _col0 is null (type: boolean) + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col6 (type: int), _col7 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), 96 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: UPDATE + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + Select Operator + expressions: _col0 (type: int) + outputColumnNames: val + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(val, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), 1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int) + outputColumnNames: a, b, c + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Dependency Collection + + Stage: Stage-1 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: UPDATE + + Stage: Stage-5 + Stats Work + Basic Stats Work: + + Stage: Stage-2 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: val + Column Types: int + Table: default.merge_tmp_table + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t2 + Write Type: INSERT + + Stage: Stage-7 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: a, b, c + Column Types: int, int, int + Table: default.t2 + +PREHOOK: query: merge into t2 as t using upd_t2_4 as u ON t.a = u.a +WHEN MATCHED THEN UPDATE SET b = 96 +WHEN NOT MATCHED THEN INSERT (b, c, a) VALUES(u.b, default, u.a) +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +PREHOOK: Input: default@upd_t2_4 +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: merge into t2 as t using upd_t2_4 as u ON t.a = u.a +WHEN MATCHED THEN UPDATE SET b = 96 +WHEN NOT MATCHED THEN INSERT (b, c, a) VALUES(u.b, default, u.a) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@upd_t2_4 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(t2)t.FieldSchema(name:ROW__ID, type:struct<writeId:bigint,bucketId:int,rowId:bigint>, comment:), ] +POSTHOOK: Lineage: t2.a SIMPLE [(upd_t2_4)u.FieldSchema(name:a, type:int, comment:null), ] +POSTHOOK: Lineage: t2.b SIMPLE [(upd_t2_4)u.FieldSchema(name:b, type:int, comment:null), ] +POSTHOOK: Lineage: t2.c SIMPLE [] +PREHOOK: query: select * from t2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +2 2 1 +1 99 1 +4 4 1 +3 98 1 +6 6 1 +5 97 1 +8 8 1 +7 96 1 +PREHOOK: query: create table t3(a int, b int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t3 +POSTHOOK: query: create table t3(a int, b int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t3 +PREHOOK: query: create table upd_t3(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@upd_t3 +POSTHOOK: query: create table upd_t3(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@upd_t3 +PREHOOK: query: insert into t3 values (1,2), (2,4) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t3 +POSTHOOK: query: insert into t3 values (1,2), (2,4) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t3 +POSTHOOK: Lineage: t3.a SCRIPT [] +POSTHOOK: Lineage: t3.b SCRIPT [] +PREHOOK: query: insert into upd_t3 values (1,3), (3,5) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@upd_t3 +POSTHOOK: query: insert into upd_t3 values (1,3), (3,5) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@upd_t3 +POSTHOOK: Lineage: upd_t3.a SCRIPT [] +POSTHOOK: Lineage: upd_t3.b SCRIPT [] +PREHOOK: query: explain merge into t3 as t using upd_t3 as u ON t.a = u.a +WHEN MATCHED THEN DELETE +WHEN NOT MATCHED THEN INSERT (b, a) VALUES(default, u.b) +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +PREHOOK: Input: default@upd_t3 +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@t3 +PREHOOK: Output: default@t3 +POSTHOOK: query: explain merge into t3 as t using upd_t3 as u ON t.a = u.a +WHEN MATCHED THEN DELETE +WHEN NOT MATCHED THEN INSERT (b, a) VALUES(default, u.b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +POSTHOOK: Input: default@upd_t3 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@t3 +POSTHOOK: Output: default@t3 +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-3 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 7 + Map Operator Tree: + TableScan + alias: u + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: b (type: int) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 a (type: int) + 1 a (type: int) + outputColumnNames: _col0, _col4, _col5, _col6 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col0 = _col5) (type: boolean) + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col0 = _col5) (type: boolean) + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + outputColumnNames: _col4 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + sort order: + + Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Filter Operator + predicate: _col0 is null (type: boolean) + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col6 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t3 + Write Type: DELETE + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + Select Operator + expressions: _col0 (type: int) + outputColumnNames: val + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(val, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), 1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t3 + Write Type: INSERT + Select Operator + expressions: _col0 (type: int), 1 (type: int) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t3 + Write Type: DELETE + + Stage: Stage-5 + Stats Work + Basic Stats Work: + + Stage: Stage-2 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: val + Column Types: int + Table: default.merge_tmp_table + + Stage: Stage-1 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.t3 + Write Type: INSERT + + Stage: Stage-7 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.t3 + +PREHOOK: query: merge into t3 as t using upd_t3 as u ON t.a = u.a +WHEN MATCHED THEN DELETE +WHEN NOT MATCHED THEN INSERT (b, a) VALUES(default, u.b) +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +PREHOOK: Input: default@upd_t3 +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@t3 +PREHOOK: Output: default@t3 +POSTHOOK: query: merge into t3 as t using upd_t3 as u ON t.a = u.a +WHEN MATCHED THEN DELETE +WHEN NOT MATCHED THEN INSERT (b, a) VALUES(default, u.b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +POSTHOOK: Input: default@upd_t3 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@t3 +POSTHOOK: Output: default@t3 +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(t3)t.FieldSchema(name:ROW__ID, type:struct<writeId:bigint,bucketId:int,rowId:bigint>, comment:), ] +POSTHOOK: Lineage: t3.a SIMPLE [(upd_t3)u.FieldSchema(name:b, type:int, comment:null), ] +POSTHOOK: Lineage: t3.b SIMPLE [] +PREHOOK: query: select * from t3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select * from t3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +2 4 +5 1 +PREHOOK: query: create table t4(a int, b int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t4 +POSTHOOK: query: create table t4(a int, b int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t4 +PREHOOK: query: create table upd_t4(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@upd_t4 +POSTHOOK: query: create table upd_t4(a int, b int) cl <TRUNCATED>
