Repository: hive
Updated Branches:
  refs/heads/hive-14535 e3fb11671 -> 7a40541f5


HIVE-17013: Delete request with a subquery based on select over a view (Eugene 
Koifman, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f772ba3c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f772ba3c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f772ba3c

Branch: refs/heads/hive-14535
Commit: f772ba3cdcc5e039c8ba3af4ca6215108483ac0e
Parents: d2d9170
Author: Eugene Koifman <ekoif...@apache.org>
Authored: Sun Sep 17 00:33:52 2017 -0500
Committer: Matt McCline <mmccl...@hortonworks.com>
Committed: Sun Sep 17 00:33:52 2017 -0500

----------------------------------------------------------------------
 .../java/org/apache/hadoop/hive/ql/Context.java |  12 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   2 +-
 .../queries/clientpositive/acid_view_delete.q   |  35 +++++
 .../clientpositive/acid_view_delete.q.out       | 141 +++++++++++++++++++
 4 files changed, 188 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f772ba3c/ql/src/java/org/apache/hadoop/hive/ql/Context.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java 
b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
index f04aed4..d6046d1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
@@ -56,6 +56,7 @@ import org.apache.hadoop.hive.ql.parse.ASTNode;
 import org.apache.hadoop.hive.ql.parse.ExplainConfiguration;
 import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState;
 import org.apache.hadoop.hive.ql.parse.HiveParser;
+import org.apache.hadoop.hive.ql.parse.QB;
 import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.shims.ShimLoader;
@@ -169,8 +170,17 @@ public class Context {
   /**
    * The suffix is always relative to a given ASTNode
    */
-  public DestClausePrefix getDestNamePrefix(ASTNode curNode) {
+  public DestClausePrefix getDestNamePrefix(ASTNode curNode, QB queryBlock) {
     assert curNode != null : "must supply curNode";
+    if(queryBlock.isInsideView() || queryBlock.getParseInfo().getIsSubQ()) {
+      /**
+       * Views get inlined in the logical plan but not in the AST
+       * {@link 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer#replaceViewReferenceWithDefinition(QB,
 Table, String, String)}
+       * Since here we only care to identify clauses representing 
Update/Delete which are not
+       * possible inside a view/subquery, we can immediately return the 
default {@link DestClausePrefix.INSERT}
+       */
+      return DestClausePrefix.INSERT;
+    }
     if(curNode.getType() != HiveParser.TOK_INSERT_INTO) {
       //select statement
       assert curNode.getType() == HiveParser.TOK_DESTINATION;

http://git-wip-us.apache.org/repos/asf/hive/blob/f772ba3c/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 1c74779..db29e3a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -1490,7 +1490,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
         qbp.addInsertIntoTable(tab_name, ast);
 
       case HiveParser.TOK_DESTINATION:
-        ctx_1.dest = this.ctx.getDestNamePrefix(ast).toString() + 
ctx_1.nextNum;
+        ctx_1.dest = this.ctx.getDestNamePrefix(ast, qb).toString() + 
ctx_1.nextNum;
         ctx_1.nextNum++;
         boolean isTmpFileDest = false;
         if (ast.getChildCount() > 0 && ast.getChild(0) instanceof ASTNode) {

http://git-wip-us.apache.org/repos/asf/hive/blob/f772ba3c/ql/src/test/queries/clientpositive/acid_view_delete.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/acid_view_delete.q 
b/ql/src/test/queries/clientpositive/acid_view_delete.q
new file mode 100644
index 0000000..69f0aeb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/acid_view_delete.q
@@ -0,0 +1,35 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.strict.checks.cartesian.product=false;
+set hive.mapred.mode=nostrict;
+
+drop table if exists mydim;
+drop table if exists updates_staging_table;
+drop view if exists updates_staging_view;
+
+create table mydim (key int, name string, zip string, is_current boolean)
+clustered by(key) into 3 buckets
+stored as orc tblproperties ('transactional'='true');
+
+insert into mydim values
+  (1, 'bob',   '95136', true),
+  (2, 'joe',   '70068', true),
+  (3, 'steve', '22150', true);
+
+create table updates_staging_table (ks int, newzip string);
+insert into updates_staging_table values (1, 87102), (3, 45220);
+
+create view updates_staging_view (kv, newzip) as select ks, newzip from 
updates_staging_table;
+
+delete from mydim
+where mydim.key in (select kv from updates_staging_view where kv >=3);
+select * from mydim order by key;
+
+update mydim set is_current = false
+where mydim.key not in(select kv from updates_staging_view);
+select * from mydim order by key;
+
+update mydim set name = 'Olaf'
+where mydim.key in(select kv from updates_staging_view);
+select * from mydim order by key;

http://git-wip-us.apache.org/repos/asf/hive/blob/f772ba3c/ql/src/test/results/clientpositive/acid_view_delete.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/acid_view_delete.q.out 
b/ql/src/test/results/clientpositive/acid_view_delete.q.out
new file mode 100644
index 0000000..71b42e1
--- /dev/null
+++ b/ql/src/test/results/clientpositive/acid_view_delete.q.out
@@ -0,0 +1,141 @@
+PREHOOK: query: drop table if exists mydim
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists mydim
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists updates_staging_table
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists updates_staging_table
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop view if exists updates_staging_view
+PREHOOK: type: DROPVIEW
+POSTHOOK: query: drop view if exists updates_staging_view
+POSTHOOK: type: DROPVIEW
+PREHOOK: query: create table mydim (key int, name string, zip string, 
is_current boolean)
+clustered by(key) into 3 buckets
+stored as orc tblproperties ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mydim
+POSTHOOK: query: create table mydim (key int, name string, zip string, 
is_current boolean)
+clustered by(key) into 3 buckets
+stored as orc tblproperties ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mydim
+PREHOOK: query: insert into mydim values
+  (1, 'bob',   '95136', true),
+  (2, 'joe',   '70068', true),
+  (3, 'steve', '22150', true)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@mydim
+POSTHOOK: query: insert into mydim values
+  (1, 'bob',   '95136', true),
+  (2, 'joe',   '70068', true),
+  (3, 'steve', '22150', true)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@mydim
+POSTHOOK: Lineage: mydim.is_current EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
+POSTHOOK: Lineage: mydim.key EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: mydim.name SIMPLE 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: mydim.zip SIMPLE 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
+PREHOOK: query: create table updates_staging_table (ks int, newzip string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@updates_staging_table
+POSTHOOK: query: create table updates_staging_table (ks int, newzip string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@updates_staging_table
+PREHOOK: query: insert into updates_staging_table values (1, 87102), (3, 45220)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@updates_staging_table
+POSTHOOK: query: insert into updates_staging_table values (1, 87102), (3, 
45220)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@updates_staging_table
+POSTHOOK: Lineage: updates_staging_table.ks EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: updates_staging_table.newzip SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: create view updates_staging_view (kv, newzip) as select ks, 
newzip from updates_staging_table
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@updates_staging_table
+PREHOOK: Output: database:default
+PREHOOK: Output: default@updates_staging_view
+POSTHOOK: query: create view updates_staging_view (kv, newzip) as select ks, 
newzip from updates_staging_table
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@updates_staging_table
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@updates_staging_view
+POSTHOOK: Lineage: updates_staging_view.kv SIMPLE 
[(updates_staging_table)updates_staging_table.FieldSchema(name:ks, type:int, 
comment:null), ]
+POSTHOOK: Lineage: updates_staging_view.newzip SIMPLE 
[(updates_staging_table)updates_staging_table.FieldSchema(name:newzip, 
type:string, comment:null), ]
+PREHOOK: query: delete from mydim
+where mydim.key in (select kv from updates_staging_view where kv >=3)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mydim
+PREHOOK: Input: default@updates_staging_table
+PREHOOK: Input: default@updates_staging_view
+PREHOOK: Output: default@mydim
+POSTHOOK: query: delete from mydim
+where mydim.key in (select kv from updates_staging_view where kv >=3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mydim
+POSTHOOK: Input: default@updates_staging_table
+POSTHOOK: Input: default@updates_staging_view
+POSTHOOK: Output: default@mydim
+PREHOOK: query: select * from mydim order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mydim
+#### A masked pattern was here ####
+POSTHOOK: query: select * from mydim order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mydim
+#### A masked pattern was here ####
+1      bob     95136   true
+2      joe     70068   true
+Warning: Shuffle Join JOIN[20][tables = [mydim, sq_1_notin_nullcheck]] in 
Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: update mydim set is_current = false
+where mydim.key not in(select kv from updates_staging_view)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mydim
+PREHOOK: Input: default@updates_staging_table
+PREHOOK: Input: default@updates_staging_view
+PREHOOK: Output: default@mydim
+POSTHOOK: query: update mydim set is_current = false
+where mydim.key not in(select kv from updates_staging_view)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mydim
+POSTHOOK: Input: default@updates_staging_table
+POSTHOOK: Input: default@updates_staging_view
+POSTHOOK: Output: default@mydim
+PREHOOK: query: select * from mydim order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mydim
+#### A masked pattern was here ####
+POSTHOOK: query: select * from mydim order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mydim
+#### A masked pattern was here ####
+1      bob     95136   true
+2      joe     70068   false
+PREHOOK: query: update mydim set name = 'Olaf'
+where mydim.key in(select kv from updates_staging_view)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mydim
+PREHOOK: Input: default@updates_staging_table
+PREHOOK: Input: default@updates_staging_view
+PREHOOK: Output: default@mydim
+POSTHOOK: query: update mydim set name = 'Olaf'
+where mydim.key in(select kv from updates_staging_view)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mydim
+POSTHOOK: Input: default@updates_staging_table
+POSTHOOK: Input: default@updates_staging_view
+POSTHOOK: Output: default@mydim
+PREHOOK: query: select * from mydim order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mydim
+#### A masked pattern was here ####
+POSTHOOK: query: select * from mydim order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mydim
+#### A masked pattern was here ####
+1      Olaf    95136   true
+2      joe     70068   false

Reply via email to