Repository: hive Updated Branches: refs/heads/hive-14535 e3fb11671 -> 7a40541f5
HIVE-17013: Delete request with a subquery based on select over a view (Eugene Koifman, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f772ba3c Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f772ba3c Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f772ba3c Branch: refs/heads/hive-14535 Commit: f772ba3cdcc5e039c8ba3af4ca6215108483ac0e Parents: d2d9170 Author: Eugene Koifman <ekoif...@apache.org> Authored: Sun Sep 17 00:33:52 2017 -0500 Committer: Matt McCline <mmccl...@hortonworks.com> Committed: Sun Sep 17 00:33:52 2017 -0500 ---------------------------------------------------------------------- .../java/org/apache/hadoop/hive/ql/Context.java | 12 +- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +- .../queries/clientpositive/acid_view_delete.q | 35 +++++ .../clientpositive/acid_view_delete.q.out | 141 +++++++++++++++++++ 4 files changed, 188 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/f772ba3c/ql/src/java/org/apache/hadoop/hive/ql/Context.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index f04aed4..d6046d1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -56,6 +56,7 @@ import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.ql.parse.QB; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.shims.ShimLoader; @@ -169,8 +170,17 @@ public class Context { /** * The suffix is always relative to a given ASTNode */ - public DestClausePrefix getDestNamePrefix(ASTNode curNode) { + public DestClausePrefix getDestNamePrefix(ASTNode curNode, QB queryBlock) { assert curNode != null : "must supply curNode"; + if(queryBlock.isInsideView() || queryBlock.getParseInfo().getIsSubQ()) { + /** + * Views get inlined in the logical plan but not in the AST + * {@link org.apache.hadoop.hive.ql.parse.SemanticAnalyzer#replaceViewReferenceWithDefinition(QB, Table, String, String)} + * Since here we only care to identify clauses representing Update/Delete which are not + * possible inside a view/subquery, we can immediately return the default {@link DestClausePrefix.INSERT} + */ + return DestClausePrefix.INSERT; + } if(curNode.getType() != HiveParser.TOK_INSERT_INTO) { //select statement assert curNode.getType() == HiveParser.TOK_DESTINATION; http://git-wip-us.apache.org/repos/asf/hive/blob/f772ba3c/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 1c74779..db29e3a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1490,7 +1490,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { qbp.addInsertIntoTable(tab_name, ast); case HiveParser.TOK_DESTINATION: - ctx_1.dest = this.ctx.getDestNamePrefix(ast).toString() + ctx_1.nextNum; + ctx_1.dest = this.ctx.getDestNamePrefix(ast, qb).toString() + ctx_1.nextNum; ctx_1.nextNum++; boolean isTmpFileDest = false; if (ast.getChildCount() > 0 && ast.getChild(0) instanceof ASTNode) { http://git-wip-us.apache.org/repos/asf/hive/blob/f772ba3c/ql/src/test/queries/clientpositive/acid_view_delete.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/acid_view_delete.q b/ql/src/test/queries/clientpositive/acid_view_delete.q new file mode 100644 index 0000000..69f0aeb --- /dev/null +++ b/ql/src/test/queries/clientpositive/acid_view_delete.q @@ -0,0 +1,35 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.strict.checks.cartesian.product=false; +set hive.mapred.mode=nostrict; + +drop table if exists mydim; +drop table if exists updates_staging_table; +drop view if exists updates_staging_view; + +create table mydim (key int, name string, zip string, is_current boolean) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true'); + +insert into mydim values + (1, 'bob', '95136', true), + (2, 'joe', '70068', true), + (3, 'steve', '22150', true); + +create table updates_staging_table (ks int, newzip string); +insert into updates_staging_table values (1, 87102), (3, 45220); + +create view updates_staging_view (kv, newzip) as select ks, newzip from updates_staging_table; + +delete from mydim +where mydim.key in (select kv from updates_staging_view where kv >=3); +select * from mydim order by key; + +update mydim set is_current = false +where mydim.key not in(select kv from updates_staging_view); +select * from mydim order by key; + +update mydim set name = 'Olaf' +where mydim.key in(select kv from updates_staging_view); +select * from mydim order by key; http://git-wip-us.apache.org/repos/asf/hive/blob/f772ba3c/ql/src/test/results/clientpositive/acid_view_delete.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/acid_view_delete.q.out b/ql/src/test/results/clientpositive/acid_view_delete.q.out new file mode 100644 index 0000000..71b42e1 --- /dev/null +++ b/ql/src/test/results/clientpositive/acid_view_delete.q.out @@ -0,0 +1,141 @@ +PREHOOK: query: drop table if exists mydim +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists mydim +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists updates_staging_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists updates_staging_table +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop view if exists updates_staging_view +PREHOOK: type: DROPVIEW +POSTHOOK: query: drop view if exists updates_staging_view +POSTHOOK: type: DROPVIEW +PREHOOK: query: create table mydim (key int, name string, zip string, is_current boolean) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@mydim +POSTHOOK: query: create table mydim (key int, name string, zip string, is_current boolean) +clustered by(key) into 3 buckets +stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mydim +PREHOOK: query: insert into mydim values + (1, 'bob', '95136', true), + (2, 'joe', '70068', true), + (3, 'steve', '22150', true) +PREHOOK: type: QUERY +PREHOOK: Output: default@mydim +POSTHOOK: query: insert into mydim values + (1, 'bob', '95136', true), + (2, 'joe', '70068', true), + (3, 'steve', '22150', true) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@mydim +POSTHOOK: Lineage: mydim.is_current EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: mydim.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: mydim.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: mydim.zip SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: create table updates_staging_table (ks int, newzip string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@updates_staging_table +POSTHOOK: query: create table updates_staging_table (ks int, newzip string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@updates_staging_table +PREHOOK: query: insert into updates_staging_table values (1, 87102), (3, 45220) +PREHOOK: type: QUERY +PREHOOK: Output: default@updates_staging_table +POSTHOOK: query: insert into updates_staging_table values (1, 87102), (3, 45220) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@updates_staging_table +POSTHOOK: Lineage: updates_staging_table.ks EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: updates_staging_table.newzip SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: create view updates_staging_view (kv, newzip) as select ks, newzip from updates_staging_table +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@updates_staging_table +PREHOOK: Output: database:default +PREHOOK: Output: default@updates_staging_view +POSTHOOK: query: create view updates_staging_view (kv, newzip) as select ks, newzip from updates_staging_table +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@updates_staging_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@updates_staging_view +POSTHOOK: Lineage: updates_staging_view.kv SIMPLE [(updates_staging_table)updates_staging_table.FieldSchema(name:ks, type:int, comment:null), ] +POSTHOOK: Lineage: updates_staging_view.newzip SIMPLE [(updates_staging_table)updates_staging_table.FieldSchema(name:newzip, type:string, comment:null), ] +PREHOOK: query: delete from mydim +where mydim.key in (select kv from updates_staging_view where kv >=3) +PREHOOK: type: QUERY +PREHOOK: Input: default@mydim +PREHOOK: Input: default@updates_staging_table +PREHOOK: Input: default@updates_staging_view +PREHOOK: Output: default@mydim +POSTHOOK: query: delete from mydim +where mydim.key in (select kv from updates_staging_view where kv >=3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mydim +POSTHOOK: Input: default@updates_staging_table +POSTHOOK: Input: default@updates_staging_view +POSTHOOK: Output: default@mydim +PREHOOK: query: select * from mydim order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@mydim +#### A masked pattern was here #### +POSTHOOK: query: select * from mydim order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mydim +#### A masked pattern was here #### +1 bob 95136 true +2 joe 70068 true +Warning: Shuffle Join JOIN[20][tables = [mydim, sq_1_notin_nullcheck]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: update mydim set is_current = false +where mydim.key not in(select kv from updates_staging_view) +PREHOOK: type: QUERY +PREHOOK: Input: default@mydim +PREHOOK: Input: default@updates_staging_table +PREHOOK: Input: default@updates_staging_view +PREHOOK: Output: default@mydim +POSTHOOK: query: update mydim set is_current = false +where mydim.key not in(select kv from updates_staging_view) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mydim +POSTHOOK: Input: default@updates_staging_table +POSTHOOK: Input: default@updates_staging_view +POSTHOOK: Output: default@mydim +PREHOOK: query: select * from mydim order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@mydim +#### A masked pattern was here #### +POSTHOOK: query: select * from mydim order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mydim +#### A masked pattern was here #### +1 bob 95136 true +2 joe 70068 false +PREHOOK: query: update mydim set name = 'Olaf' +where mydim.key in(select kv from updates_staging_view) +PREHOOK: type: QUERY +PREHOOK: Input: default@mydim +PREHOOK: Input: default@updates_staging_table +PREHOOK: Input: default@updates_staging_view +PREHOOK: Output: default@mydim +POSTHOOK: query: update mydim set name = 'Olaf' +where mydim.key in(select kv from updates_staging_view) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mydim +POSTHOOK: Input: default@updates_staging_table +POSTHOOK: Input: default@updates_staging_view +POSTHOOK: Output: default@mydim +PREHOOK: query: select * from mydim order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@mydim +#### A masked pattern was here #### +POSTHOOK: query: select * from mydim order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mydim +#### A masked pattern was here #### +1 Olaf 95136 true +2 joe 70068 false