Repository: hive Updated Branches: refs/heads/master 959e77257 -> e7c15d234
HIVE-19129: Support DEFAULT keyword with MERGE(Vineet Garg, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e7c15d23 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e7c15d23 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e7c15d23 Branch: refs/heads/master Commit: e7c15d2348602b6890aff24a088c2b04fb46af8a Parents: 959e772 Author: Vineet Garg <vg...@apache.org> Authored: Mon Apr 9 10:51:46 2018 -0700 Committer: Vineet Garg <vg...@apache.org> Committed: Mon Apr 9 10:51:46 2018 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 26 +- .../ql/parse/UpdateDeleteSemanticAnalyzer.java | 1 + .../insert_into_default_keyword.q | 49 ++ .../llap/insert_into_default_keyword.q.out | 787 +++++++++++++++++++ 4 files changed, 861 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/e7c15d23/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index ff0a2e6..3b74aba 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -665,13 +665,13 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { /** * This method creates a list of default constraints which corresponds to - * given schema (taretSchema) or target table's column schema (if targetSchema is null) + * given schema (targetSchema) or target table's column schema (if targetSchema is null) * @param tbl * @param targetSchema * @return List of default constraints (including NULL if there is no default) * @throws SemanticException */ - private List<String> getDefaultConstraints(Table tbl, List<String> targetSchema) throws SemanticException{ + private static List<String> getDefaultConstraints(Table tbl, List<String> targetSchema) throws SemanticException{ Map<String, String> colNameToDefaultVal = null; try { DefaultConstraint dc = Hive.get().getEnabledDefaultConstraints(tbl.getDbName(), tbl.getTableName()); @@ -718,6 +718,28 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { return newNode; } + public static String replaceDefaultKeywordForMerge(String valueClause,Table targetTable) + throws SemanticException { + List<String> defaultConstraints = null; + String[] values = valueClause.trim().split(","); + StringBuilder newValueClause = new StringBuilder(); + for (int i = 0; i < values.length; i++) { + if (values[i].trim().toLowerCase().equals("`default`")) { + if (defaultConstraints == null) { + defaultConstraints = getDefaultConstraints(targetTable, null); + } + newValueClause.append(defaultConstraints.get(i)); + } + else { + newValueClause.append(values[i]); + } + if(i != values.length-1) { + newValueClause.append(","); + } + } + return newValueClause.toString(); + } + /** * This method replaces ASTNode corresponding to DEFAULT keyword with either DEFAULT constraint * expression if exists or NULL otherwise http://git-wip-us.apache.org/repos/asf/hive/blob/e7c15d23/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java index a660747..0effd92 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java @@ -1101,6 +1101,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer { List<FieldSchema> partCols = targetTable.getPartCols(); String valuesClause = getMatchedText((ASTNode)getWhenClauseOperation(whenNotMatchedClause).getChild(0)); valuesClause = valuesClause.substring(1, valuesClause.length() - 1);//strip '(' and ')' + valuesClause = SemanticAnalyzer.replaceDefaultKeywordForMerge(valuesClause, targetTable); rewrittenQueryStr.append("INSERT INTO ").append(getFullTableNameForSQL(target)); addPartitionColsToInsert(partCols, rewrittenQueryStr); http://git-wip-us.apache.org/repos/asf/hive/blob/e7c15d23/ql/src/test/queries/clientpositive/insert_into_default_keyword.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/insert_into_default_keyword.q b/ql/src/test/queries/clientpositive/insert_into_default_keyword.q index 14f91fe..2e92e91 100644 --- a/ql/src/test/queries/clientpositive/insert_into_default_keyword.q +++ b/ql/src/test/queries/clientpositive/insert_into_default_keyword.q @@ -114,3 +114,52 @@ INSERT INTO tpart partition(ds='1')(i,j) values(10, DEFAULT); SELECT * FROM tpart; TRUNCATE table tpart; DROP TABLE tpart; + +-- MEREGE +set hive.mapred.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +create table nonacid (key int, a1 string, value string) stored as orc; +insert into nonacid values(1, 'a11', 'val1'); +insert into nonacid values(2, 'a12', 'val2'); + +create table acidTable(key int NOT NULL enable, a1 string DEFAULT 'a1', value string) +clustered by (value) into 2 buckets stored as orc +tblproperties ("transactional"="true"); +insert into acidTable values(1, 'a10','val100'); + +-- only insert +explain MERGE INTO acidTable as t using nonacid as s ON t.key = s.key +WHEN NOT MATCHED THEN INSERT VALUES (s.key, DEFAULT, DEFAULT); + +MERGE INTO acidTable as t using nonacid as s ON t.key = s.key +WHEN NOT MATCHED THEN INSERT VALUES (s.key, DEFAULT, DEFAULT); +select * from acidTable; +truncate table acidTable; +insert into acidTable values(1, 'a10','val100'); + +-- insert + update + delete +explain MERGE INTO acidTable as t using nonacid as s ON t.key = s.key +WHEN MATCHED AND s.key < 3 THEN DELETE +WHEN MATCHED AND s.key > 3 THEN UPDATE set a1 = DEFAULT +WHEN NOT MATCHED THEN INSERT VALUES (s.key, s.a1, DEFAULT); +MERGE INTO acidTable as t using nonacid as s ON t.key = s.key +WHEN MATCHED AND s.key < 3 THEN DELETE +WHEN MATCHED AND s.key > 3 THEN UPDATE set a1 = DEFAULT +WHEN NOT MATCHED THEN INSERT VALUES (s.key, s.a1, DEFAULT); +select * from acidTable; +truncate table acidTable; + +create table acidTable2(key int DEFAULT 404) clustered by (key) into 2 buckets stored as orc +tblproperties ("transactional"="true"); + +explain MERGE INTO acidTable2 as t using nonacid as s ON t.key = s.key +WHEN NOT MATCHED THEN INSERT VALUES (DEFAULT); +MERGE INTO acidTable2 as t using nonacid as s ON t.key = s.key +WHEN NOT MATCHED THEN INSERT VALUES (DEFAULT); +select * from acidTable2; + +DROP TABLE acidTable; +DROP TABLE acidTable2; +DROP TABLE nonacid; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/e7c15d23/ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out b/ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out index a8d8fd3..addef62 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out @@ -2289,3 +2289,790 @@ POSTHOOK: query: DROP TABLE tpart POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@tpart POSTHOOK: Output: default@tpart +PREHOOK: query: create table nonacid (key int, a1 string, value string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nonacid +POSTHOOK: query: create table nonacid (key int, a1 string, value string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nonacid +PREHOOK: query: insert into nonacid values(1, 'a11', 'val1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@nonacid +POSTHOOK: query: insert into nonacid values(1, 'a11', 'val1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@nonacid +POSTHOOK: Lineage: nonacid.a1 SCRIPT [] +POSTHOOK: Lineage: nonacid.key SCRIPT [] +POSTHOOK: Lineage: nonacid.value SCRIPT [] +PREHOOK: query: insert into nonacid values(2, 'a12', 'val2') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@nonacid +POSTHOOK: query: insert into nonacid values(2, 'a12', 'val2') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@nonacid +POSTHOOK: Lineage: nonacid.a1 SCRIPT [] +POSTHOOK: Lineage: nonacid.key SCRIPT [] +POSTHOOK: Lineage: nonacid.value SCRIPT [] +PREHOOK: query: create table acidTable(key int NOT NULL enable, a1 string DEFAULT 'a1', value string) +clustered by (value) into 2 buckets stored as orc +tblproperties ("transactional"="true") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acidTable +POSTHOOK: query: create table acidTable(key int NOT NULL enable, a1 string DEFAULT 'a1', value string) +clustered by (value) into 2 buckets stored as orc +tblproperties ("transactional"="true") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acidTable +PREHOOK: query: insert into acidTable values(1, 'a10','val100') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@acidtable +POSTHOOK: query: insert into acidTable values(1, 'a10','val100') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@acidtable +POSTHOOK: Lineage: acidtable.a1 SCRIPT [] +POSTHOOK: Lineage: acidtable.key SCRIPT [] +POSTHOOK: Lineage: acidtable.value SCRIPT [] +PREHOOK: query: explain MERGE INTO acidTable as t using nonacid as s ON t.key = s.key +WHEN NOT MATCHED THEN INSERT VALUES (s.key, DEFAULT, DEFAULT) +PREHOOK: type: QUERY +POSTHOOK: query: explain MERGE INTO acidTable as t using nonacid as s ON t.key = s.key +WHEN NOT MATCHED THEN INSERT VALUES (s.key, DEFAULT, DEFAULT) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 19 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: enforce_constraint(key is not null) (type: boolean) + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: enforce_constraint(key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col6 + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is null (type: boolean) + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col6 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: null (type: string) + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), 'a1' (type: string), null (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtable + Write Type: INSERT + Select Operator + expressions: _col0 (type: int), 'a1' (type: string), null (type: string) + outputColumnNames: key, a1, value + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtable + Write Type: INSERT + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, a1, value + Column Types: int, string, string + Table: default.acidtable + +PREHOOK: query: MERGE INTO acidTable as t using nonacid as s ON t.key = s.key +WHEN NOT MATCHED THEN INSERT VALUES (s.key, DEFAULT, DEFAULT) +PREHOOK: type: QUERY +PREHOOK: Input: default@acidtable +PREHOOK: Input: default@nonacid +PREHOOK: Output: default@acidtable +POSTHOOK: query: MERGE INTO acidTable as t using nonacid as s ON t.key = s.key +WHEN NOT MATCHED THEN INSERT VALUES (s.key, DEFAULT, DEFAULT) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidtable +POSTHOOK: Input: default@nonacid +POSTHOOK: Output: default@acidtable +POSTHOOK: Lineage: acidtable.a1 SIMPLE [] +POSTHOOK: Lineage: acidtable.key SIMPLE [(nonacid)s.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: acidtable.value EXPRESSION [] +PREHOOK: query: select * from acidTable +PREHOOK: type: QUERY +PREHOOK: Input: default@acidtable +#### A masked pattern was here #### +POSTHOOK: query: select * from acidTable +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidtable +#### A masked pattern was here #### +1 a10 val100 +2 a1 NULL +PREHOOK: query: truncate table acidTable +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: default@acidtable +POSTHOOK: query: truncate table acidTable +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: default@acidtable +PREHOOK: query: insert into acidTable values(1, 'a10','val100') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@acidtable +POSTHOOK: query: insert into acidTable values(1, 'a10','val100') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@acidtable +POSTHOOK: Lineage: acidtable.a1 SCRIPT [] +POSTHOOK: Lineage: acidtable.key SCRIPT [] +POSTHOOK: Lineage: acidtable.value SCRIPT [] +PREHOOK: query: explain MERGE INTO acidTable as t using nonacid as s ON t.key = s.key +WHEN MATCHED AND s.key < 3 THEN DELETE +WHEN MATCHED AND s.key > 3 THEN UPDATE set a1 = DEFAULT +WHEN NOT MATCHED THEN INSERT VALUES (s.key, s.a1, DEFAULT) +PREHOOK: type: QUERY +POSTHOOK: query: explain MERGE INTO acidTable as t using nonacid as s ON t.key = s.key +WHEN MATCHED AND s.key < 3 THEN DELETE +WHEN MATCHED AND s.key > 3 THEN UPDATE set a1 = DEFAULT +WHEN NOT MATCHED THEN INSERT VALUES (s.key, s.a1, DEFAULT) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-5 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-5 + Stage-9 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-4 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (SIMPLE_EDGE) + Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string), ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + Execution mode: llap + LLAP IO: may be used (ACID table) + Map 8 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: a1 (type: string) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col2, _col5, _col6, _col7 + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 = _col6) and (_col6 < 3)) (type: boolean) + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 = _col6) and (_col6 > 3) and (_col6 >= 3) and enforce_constraint(_col0 is not null)) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col0 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: string) + Filter Operator + predicate: (_col0 = _col6) (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + outputColumnNames: _col5 + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + sort order: + + Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Filter Operator + predicate: (_col0 is null and enforce_constraint(_col6 is not null)) (type: boolean) + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: null (type: string) + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtable + Write Type: DELETE + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), 'a1' (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtable + Write Type: UPDATE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cardinality_violation(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + Select Operator + expressions: _col0 (type: int) + outputColumnNames: val + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(val, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), null (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtable + Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: string), null (type: string) + outputColumnNames: key, a1, value + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(a1, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtable + Write Type: DELETE + + Stage: Stage-6 + Stats Work + Basic Stats Work: + + Stage: Stage-2 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtable + Write Type: UPDATE + + Stage: Stage-7 + Stats Work + Basic Stats Work: + + Stage: Stage-3 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.merge_tmp_table + + Stage: Stage-8 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: val + Column Types: int + Table: default.merge_tmp_table + + Stage: Stage-1 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtable + Write Type: INSERT + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, a1, value + Column Types: int, string, string + Table: default.acidtable + +PREHOOK: query: MERGE INTO acidTable as t using nonacid as s ON t.key = s.key +WHEN MATCHED AND s.key < 3 THEN DELETE +WHEN MATCHED AND s.key > 3 THEN UPDATE set a1 = DEFAULT +WHEN NOT MATCHED THEN INSERT VALUES (s.key, s.a1, DEFAULT) +PREHOOK: type: QUERY +PREHOOK: Input: default@acidtable +PREHOOK: Input: default@nonacid +PREHOOK: Output: default@acidtable +PREHOOK: Output: default@acidtable +PREHOOK: Output: default@acidtable +PREHOOK: Output: default@merge_tmp_table +POSTHOOK: query: MERGE INTO acidTable as t using nonacid as s ON t.key = s.key +WHEN MATCHED AND s.key < 3 THEN DELETE +WHEN MATCHED AND s.key > 3 THEN UPDATE set a1 = DEFAULT +WHEN NOT MATCHED THEN INSERT VALUES (s.key, s.a1, DEFAULT) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidtable +POSTHOOK: Input: default@nonacid +POSTHOOK: Output: default@acidtable +POSTHOOK: Output: default@acidtable +POSTHOOK: Output: default@acidtable +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Lineage: acidtable.a1 SIMPLE [(nonacid)s.FieldSchema(name:a1, type:string, comment:null), ] +POSTHOOK: Lineage: acidtable.key SIMPLE [(nonacid)s.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: acidtable.value EXPRESSION [] +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(acidtable)t.FieldSchema(name:ROW__ID, type:struct<writeId:bigint,bucketId:int,rowId:bigint>, comment:), ] +PREHOOK: query: select * from acidTable +PREHOOK: type: QUERY +PREHOOK: Input: default@acidtable +#### A masked pattern was here #### +POSTHOOK: query: select * from acidTable +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidtable +#### A masked pattern was here #### +2 a12 NULL +PREHOOK: query: truncate table acidTable +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: default@acidtable +POSTHOOK: query: truncate table acidTable +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: default@acidtable +PREHOOK: query: create table acidTable2(key int DEFAULT 404) clustered by (key) into 2 buckets stored as orc +tblproperties ("transactional"="true") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acidTable2 +POSTHOOK: query: create table acidTable2(key int DEFAULT 404) clustered by (key) into 2 buckets stored as orc +tblproperties ("transactional"="true") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acidTable2 +PREHOOK: query: explain MERGE INTO acidTable2 as t using nonacid as s ON t.key = s.key +WHEN NOT MATCHED THEN INSERT VALUES (DEFAULT) +PREHOOK: type: QUERY +POSTHOOK: query: explain MERGE INTO acidTable2 as t using nonacid as s ON t.key = s.key +WHEN NOT MATCHED THEN INSERT VALUES (DEFAULT) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: 404 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: 404 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtable2 + Write Type: INSERT + Select Operator + expressions: 404 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtable2 + Write Type: INSERT + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.acidtable2 + +PREHOOK: query: MERGE INTO acidTable2 as t using nonacid as s ON t.key = s.key +WHEN NOT MATCHED THEN INSERT VALUES (DEFAULT) +PREHOOK: type: QUERY +PREHOOK: Input: default@acidtable2 +PREHOOK: Input: default@nonacid +PREHOOK: Output: default@acidtable2 +POSTHOOK: query: MERGE INTO acidTable2 as t using nonacid as s ON t.key = s.key +WHEN NOT MATCHED THEN INSERT VALUES (DEFAULT) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidtable2 +POSTHOOK: Input: default@nonacid +POSTHOOK: Output: default@acidtable2 +POSTHOOK: Lineage: acidtable2.key SIMPLE [] +PREHOOK: query: select * from acidTable2 +PREHOOK: type: QUERY +PREHOOK: Input: default@acidtable2 +#### A masked pattern was here #### +POSTHOOK: query: select * from acidTable2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acidtable2 +#### A masked pattern was here #### +404 +404 +PREHOOK: query: DROP TABLE acidTable +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@acidtable +PREHOOK: Output: default@acidtable +POSTHOOK: query: DROP TABLE acidTable +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@acidtable +POSTHOOK: Output: default@acidtable +PREHOOK: query: DROP TABLE acidTable2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@acidtable2 +PREHOOK: Output: default@acidtable2 +POSTHOOK: query: DROP TABLE acidTable2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@acidtable2 +POSTHOOK: Output: default@acidtable2 +PREHOOK: query: DROP TABLE nonacid +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@nonacid +PREHOOK: Output: default@nonacid +POSTHOOK: query: DROP TABLE nonacid +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@nonacid +POSTHOOK: Output: default@nonacid