hive git commit: HIVE-20354 : Semijoin hints dont work with merge statements (Deepak Jaiswal, reviewed by Eugene Koifman)

djaiswal Sun, 12 Aug 2018 01:48:01 -0700

Repository: hive
Updated Branches:
  refs/heads/master 28b24dbf5 -> 4a30574d3



HIVE-20354 : Semijoin hints dont work with merge statements (Deepak Jaiswal, 
reviewed by Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4a30574d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4a30574d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4a30574d

Branch: refs/heads/master
Commit: 4a30574d38fc71771b3abffa225285dedf77c56a
Parents: 28b24db
Author: Deepak Jaiswal <[email protected]>
Authored: Sun Aug 12 01:47:42 2018 -0700
Committer: Deepak Jaiswal <[email protected]>
Committed: Sun Aug 12 01:47:42 2018 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/parse/HiveParser.g    |   4 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  32 +-
 .../ql/parse/UpdateDeleteSemanticAnalyzer.java  |  61 +-
 .../test/queries/clientpositive/semijoin_hint.q |  21 +
 .../clientpositive/llap/semijoin_hint.q.out     | 714 +++++++++++++++++++
 5 files changed, 804 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/4a30574d/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index f4d12ae..15d4edf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -2967,8 +2967,8 @@ mergeStatement
 @init { pushMsg("MERGE statement", state); }
 @after { popMsg(state); }
    :
-   KW_MERGE KW_INTO tableName (KW_AS? identifier)? KW_USING joinSourcePart 
KW_ON expression whenClauses ->
-    ^(TOK_MERGE ^(TOK_TABREF tableName identifier?) joinSourcePart expression 
whenClauses)
+   KW_MERGE QUERY_HINT? KW_INTO tableName (KW_AS? identifier)? KW_USING 
joinSourcePart KW_ON expression whenClauses
+     -> ^(TOK_MERGE ^(TOK_TABREF tableName identifier?) joinSourcePart 
expression QUERY_HINT? whenClauses)
    ;
 /*
 Allow 0,1 or 2 WHEN MATCHED clauses and 0 or 1 WHEN NOT MATCHED

http://git-wip-us.apache.org/repos/asf/hive/blob/4a30574d/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index a63aabe..2ee562a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -1569,19 +1569,8 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
         qbp.setSelExprForClause(ctx_1.dest, ast);
 
         int posn = 0;
-        if (((ASTNode) ast.getChild(0)).getToken().getType() == 
HiveParser.QUERY_HINT) {
-          ParseDriver pd = new ParseDriver();
-          String queryHintStr = ast.getChild(0).getText();
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("QUERY HINT: "+queryHintStr);
-          }
-          try {
-            ASTNode hintNode = pd.parseHint(queryHintStr);
-            qbp.setHints(hintNode);
-            posn++;
-          } catch (ParseException e) {
-            throw new SemanticException("failed to parse query hint: 
"+e.getMessage(), e);
-          }
+        if (((ASTNode) ast.getChild(0)).getType() == HiveParser.QUERY_HINT) {
+          posn = processQueryHint((ASTNode)ast.getChild(0), qbp, posn);
         }
 
         if ((ast.getChild(posn).getChild(0).getType() == 
HiveParser.TOK_TRANSFORM)) {
@@ -1881,6 +1870,8 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
       case HiveParser.TOK_CTE:
         processCTE(qb, ast);
         break;
+      case HiveParser.QUERY_HINT:
+          processQueryHint(ast, qbp, 0);
       default:
         skipRecursion = false;
         break;
@@ -1899,6 +1890,21 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
     return phase1Result;
   }
 
+  private int processQueryHint(ASTNode ast, QBParseInfo qbp, int posn) throws 
SemanticException{
+    ParseDriver pd = new ParseDriver();
+    String queryHintStr = ast.getText();
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("QUERY HINT: "+queryHintStr);
+    }
+    try {
+      ASTNode hintNode = pd.parseHint(queryHintStr);
+      qbp.setHints(hintNode);
+    } catch (ParseException e) {
+      throw new SemanticException("failed to parse query hint: 
"+e.getMessage(), e);
+    }
+    return posn + 1;
+  }
+
   /**
    * This is phase1 of supporting specifying schema in insert statement
    * insert into foo(z,y) select a,b from bar;

http://git-wip-us.apache.org/repos/asf/hive/blob/4a30574d/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index 8df2904..0d80ed3 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -888,11 +888,20 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
     ASTNode onClause = (ASTNode) tree.getChild(2);
     String onClauseAsText = getMatchedText(onClause);
 
+    int whenClauseBegins = 3;
+    boolean hasHint = false;
+    // query hint
+    ASTNode qHint = (ASTNode) tree.getChild(3);
+    if (qHint.getType() == HiveParser.QUERY_HINT) {
+      hasHint = true;
+      whenClauseBegins++;
+    }
     Table targetTable = getTargetTable(target);
     validateTargetTable(targetTable);
-    List<ASTNode> whenClauses = findWhenClauses(tree);
+    List<ASTNode> whenClauses = findWhenClauses(tree, whenClauseBegins);
 
     StringBuilder rewrittenQueryStr = new StringBuilder("FROM\n");
+
     rewrittenQueryStr.append(Indent).append(getFullTableNameForSQL(target));
     if(isAliased(target)) {
       rewrittenQueryStr.append(" ").append(targetName);
@@ -912,6 +921,12 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
     rewrittenQueryStr.append('\n');
     rewrittenQueryStr.append(Indent).append("ON 
").append(onClauseAsText).append('\n');
 
+    // Add the hint if any
+    String hintStr = null;
+    if (hasHint) {
+      hintStr = " /*+ " + qHint.getText() + " */ ";
+    }
+
     /**
      * We allow at most 2 WHEN MATCHED clause, in which case 1 must be Update 
the other Delete
      * If we have both update and delete, the 1st one (in SQL code) must have 
"AND <extra predicate>"
@@ -921,22 +936,29 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
     String extraPredicate = null;
     int numWhenMatchedUpdateClauses = 0, numWhenMatchedDeleteClauses = 0;
     int numInsertClauses = 0;
+    boolean hintProcessed = false;
     for(ASTNode whenClause : whenClauses) {
       switch (getWhenClauseOperation(whenClause).getType()) {
         case HiveParser.TOK_INSERT:
           numInsertClauses++;
-          handleInsert(whenClause, rewrittenQueryStr, target, onClause, 
targetTable, targetName, onClauseAsText);
+          handleInsert(whenClause, rewrittenQueryStr, target, onClause,
+                  targetTable, targetName, onClauseAsText, hintProcessed ? 
null : hintStr);
+          hintProcessed = true;
           break;
         case HiveParser.TOK_UPDATE:
           numWhenMatchedUpdateClauses++;
-          String s = handleUpdate(whenClause, rewrittenQueryStr, target, 
onClauseAsText, targetTable, extraPredicate);
+          String s = handleUpdate(whenClause, rewrittenQueryStr, target,
+                  onClauseAsText, targetTable, extraPredicate, hintProcessed ? 
null : hintStr);
+          hintProcessed = true;
           if(numWhenMatchedUpdateClauses + numWhenMatchedDeleteClauses == 1) {
             extraPredicate = s;//i.e. it's the 1st WHEN MATCHED
           }
           break;
         case HiveParser.TOK_DELETE:
           numWhenMatchedDeleteClauses++;
-          String s1 = handleDelete(whenClause, rewrittenQueryStr, target, 
onClauseAsText, targetTable, extraPredicate);
+          String s1 = handleDelete(whenClause, rewrittenQueryStr, target,
+                  onClauseAsText, targetTable, extraPredicate, hintProcessed ? 
null : hintStr);
+          hintProcessed = true;
           if(numWhenMatchedUpdateClauses + numWhenMatchedDeleteClauses == 1) {
             extraPredicate = s1;//i.e. it's the 1st WHEN MATCHED
           }
@@ -956,6 +978,7 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
     if(numWhenMatchedDeleteClauses + numWhenMatchedUpdateClauses == 2 && 
extraPredicate == null) {
       throw new SemanticException(ErrorMsg.MERGE_PREDIACTE_REQUIRED, 
ctx.getCmd());
     }
+
     boolean validating = handleCardinalityViolation(rewrittenQueryStr, target, 
onClauseAsText,
       targetTable, numWhenMatchedDeleteClauses == 0 && 
numWhenMatchedUpdateClauses == 0);
     ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd());
@@ -987,6 +1010,7 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
       //here means the last branch of the multi-insert is Cardinality 
Validation
       rewrittenCtx.addDestNamePrefix(rewrittenTree.getChildCount() - 1, 
Context.DestClausePrefix.INSERT);
     }
+
     try {
       useSuper = true;
       super.analyze(rewrittenTree, rewrittenCtx);
@@ -1153,13 +1177,17 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
    */
   private String handleUpdate(ASTNode whenMatchedUpdateClause, StringBuilder 
rewrittenQueryStr,
                               ASTNode target, String onClauseAsString, Table 
targetTable,
-                              String deleteExtraPredicate) throws 
SemanticException {
+                              String deleteExtraPredicate, String hintStr) 
throws SemanticException {
     assert whenMatchedUpdateClause.getType() == HiveParser.TOK_MATCHED;
     assert getWhenClauseOperation(whenMatchedUpdateClause).getType() == 
HiveParser.TOK_UPDATE;
     String targetName = getSimpleTableName(target);
     rewrittenQueryStr.append("INSERT INTO 
").append(getFullTableNameForSQL(target));
     addPartitionColsToInsert(targetTable.getPartCols(), rewrittenQueryStr);
-    rewrittenQueryStr.append("    -- update clause\n select 
").append(targetName).append(".ROW__ID");
+    rewrittenQueryStr.append("    -- update clause\n select ");
+    if (hintStr != null) {
+      rewrittenQueryStr.append(hintStr);
+    }
+    rewrittenQueryStr.append(targetName).append(".ROW__ID");
 
     ASTNode setClause = 
(ASTNode)getWhenClauseOperation(whenMatchedUpdateClause).getChild(0);
     //columns being updated -> update expressions; "setRCols" (last param) is 
null because we use actual expressions
@@ -1211,7 +1239,7 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
    * @param updateExtraPredicate - see notes at caller
    */
   private String handleDelete(ASTNode whenMatchedDeleteClause, StringBuilder 
rewrittenQueryStr, ASTNode target,
-                            String onClauseAsString, Table targetTable, String 
updateExtraPredicate) throws SemanticException {
+                            String onClauseAsString, Table targetTable, String 
updateExtraPredicate, String hintStr) throws SemanticException {
     assert whenMatchedDeleteClause.getType() == HiveParser.TOK_MATCHED;
     assert getWhenClauseOperation(whenMatchedDeleteClause).getType() == 
HiveParser.TOK_DELETE;
     List<FieldSchema> partCols = targetTable.getPartCols();
@@ -1219,7 +1247,11 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
     rewrittenQueryStr.append("INSERT INTO 
").append(getFullTableNameForSQL(target));
     addPartitionColsToInsert(partCols, rewrittenQueryStr);
 
-    rewrittenQueryStr.append("    -- delete clause\n select 
").append(targetName).append(".ROW__ID ");
+    rewrittenQueryStr.append("    -- delete clause\n select ");
+    if (hintStr != null) {
+      rewrittenQueryStr.append(hintStr);
+    }
+    rewrittenQueryStr.append(targetName).append(".ROW__ID ");
     addPartitionColsToSelect(partCols, rewrittenQueryStr, target);
     rewrittenQueryStr.append("\n   WHERE ").append(onClauseAsString);
     String extraPredicate = getWhenClausePredicate(whenMatchedDeleteClause);
@@ -1291,10 +1323,10 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
   /**
    * Collect WHEN clauses from Merge statement AST
    */
-  private List<ASTNode> findWhenClauses(ASTNode tree) throws SemanticException 
{
+  private List<ASTNode> findWhenClauses(ASTNode tree, int start) throws 
SemanticException {
     assert tree.getType() == HiveParser.TOK_MERGE;
     List<ASTNode> whenClauses = new ArrayList<>();
-    for(int idx = 3; idx < tree.getChildCount(); idx++) {
+    for(int idx = start; idx < tree.getChildCount(); idx++) {
       ASTNode whenClause = (ASTNode)tree.getChild(idx);
       assert whenClause.getType() == HiveParser.TOK_MATCHED ||
         whenClause.getType() == HiveParser.TOK_NOT_MATCHED :
@@ -1333,7 +1365,7 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
    */
   private void handleInsert(ASTNode whenNotMatchedClause, StringBuilder 
rewrittenQueryStr, ASTNode target,
                             ASTNode onClause, Table targetTable,
-                            String targetTableNameInSourceQuery, String 
onClauseAsString) throws SemanticException {
+                            String targetTableNameInSourceQuery, String 
onClauseAsString, String hintStr) throws SemanticException {
     assert whenNotMatchedClause.getType() == HiveParser.TOK_NOT_MATCHED;
     assert getWhenClauseOperation(whenNotMatchedClause).getType() == 
HiveParser.TOK_INSERT;
     List<FieldSchema> partCols = targetTable.getPartCols();
@@ -1347,8 +1379,11 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
     OnClauseAnalyzer oca = new OnClauseAnalyzer(onClause, targetTable, 
targetTableNameInSourceQuery,
       conf, onClauseAsString);
     oca.analyze();
-    rewrittenQueryStr.append("    -- insert clause\n  select ")
-      .append(valuesClause).append("\n   WHERE ").append(oca.getPredicate());
+    rewrittenQueryStr.append("    -- insert clause\n  select ");
+    if (hintStr != null) {
+      rewrittenQueryStr.append(hintStr);
+    }
+    rewrittenQueryStr.append(valuesClause).append("\n   WHERE 
").append(oca.getPredicate());
     String extraPredicate = getWhenClausePredicate(whenNotMatchedClause);
     if(extraPredicate != null) {
       //we have WHEN NOT MATCHED AND <boolean expr> THEN INSERT

http://git-wip-us.apache.org/repos/asf/hive/blob/4a30574d/ql/src/test/queries/clientpositive/semijoin_hint.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/semijoin_hint.q 
b/ql/src/test/queries/clientpositive/semijoin_hint.q
index de176af..cdf23b7 100644
--- a/ql/src/test/queries/clientpositive/semijoin_hint.q
+++ b/ql/src/test/queries/clientpositive/semijoin_hint.q
@@ -100,3 +100,24 @@ explain select /*+ semi(k, str, v, 5000)*/ count(*) from 
srcpart_date k join src
 
 -- This should NOT create a semijoin
 explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join 
srcpart_small v on (k.value = v.key1);
+
+
+-- Make sure hints work with merge
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.explain.user=false;
+set hive.merge.cardinality.check=true;
+
+create table acidTbl(a int, b int) clustered by (a) into 2 buckets stored as 
orc TBLPROPERTIES ('transactional'='true');
+create table nonAcidOrcTbl(a int, b int) clustered by (a) into 2 buckets 
stored as orc TBLPROPERTIES ('transactional'='false');
+
+--without hint, the semijoin is still made, note the difference in bloom 
filter entries.
+explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a
+WHEN MATCHED AND s.a > 8 THEN DELETE
+WHEN MATCHED THEN UPDATE SET b = 7
+WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b);
+-- with hint, the bloom filter entries become 1000 due to hint.
+explain merge  /*+ semi(s, a, t, 1000)*/  into acidTbl as t using 
nonAcidOrcTbl s ON t.a = s.a
+WHEN MATCHED AND s.a > 8 THEN DELETE
+WHEN MATCHED THEN UPDATE SET b = 7
+WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/4a30574d/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out 
b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
index 679916d..9ee70ed 100644
--- a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
+++ b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
@@ -2836,3 +2836,717 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: create table acidTbl(a int, b int) clustered by (a) into 2 
buckets stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@acidTbl
+POSTHOOK: query: create table acidTbl(a int, b int) clustered by (a) into 2 
buckets stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@acidTbl
+PREHOOK: query: create table nonAcidOrcTbl(a int, b int) clustered by (a) into 
2 buckets stored as orc TBLPROPERTIES ('transactional'='false')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@nonAcidOrcTbl
+POSTHOOK: query: create table nonAcidOrcTbl(a int, b int) clustered by (a) 
into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@nonAcidOrcTbl
+PREHOOK: query: explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = 
s.a
+WHEN MATCHED AND s.a > 8 THEN DELETE
+WHEN MATCHED THEN UPDATE SET b = 7
+WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a 
= s.a
+WHEN MATCHED AND s.a > 8 THEN DELETE
+WHEN MATCHED THEN UPDATE SET b = 7
+WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-4 is a root stage
+  Stage-5 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-5
+  Stage-6 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-5
+  Stage-7 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-5
+  Stage-8 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-5
+  Stage-9 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-4
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Reducer 9 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
+        Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: (a BETWEEN DynamicValue(RS_3_s_a_min) AND 
DynamicValue(RS_3_s_a_max) and in_bloom_filter(a, 
DynamicValue(RS_3_s_a_bloom_filter))) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: a (type: int)
+                      sort order: +
+                      Map-reduce partition columns: a (type: int)
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: ROW__ID (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
+        Map 8 
+            Map Operator Tree:
+                TableScan
+                  alias: s
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: a (type: int)
+                    sort order: +
+                    Map-reduce partition columns: a (type: int)
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                    value expressions: b (type: int)
+                  Select Operator
+                    expressions: a (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                    Group By Operator
+                      aggregations: min(_col0), max(_col0), 
bloom_filter(_col0, expectedEntries=1)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 20 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 20 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: int), _col1 (type: 
int), _col2 (type: binary)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Right Outer Join 0 to 1
+                keys:
+                  0 a (type: int)
+                  1 a (type: int)
+                outputColumnNames: _col0, _col4, _col5, _col6
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                Filter Operator
+                  predicate: ((_col0 = _col5) and (_col5 > 8)) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: _col4 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                      sort order: +
+                      Map-reduce partition columns: UDFToInteger(_col0) (type: 
int)
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: ((_col0 = _col5) and (_col5 <= 8)) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: _col4 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>), _col0 (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                      sort order: +
+                      Map-reduce partition columns: UDFToInteger(_col0) (type: 
int)
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: int)
+                Filter Operator
+                  predicate: (_col0 = _col5) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: _col4 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                    outputColumnNames: _col4
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: _col4 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                        Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+                Filter Operator
+                  predicate: _col0 is null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: _col5 (type: int), _col6 (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: int), _col1 (type: int)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                      serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                      name: default.acidtbl
+                  Write Type: DELETE
+        Reducer 4 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), 7 
(type: int)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                      serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                      name: default.acidtbl
+                  Write Type: UPDATE
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                Filter Operator
+                  predicate: (_col1 > 1L) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: cardinality_violation(_col0) (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.merge_tmp_table
+                    Select Operator
+                      expressions: _col0 (type: int)
+                      outputColumnNames: val
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: compute_stats(val, 'hll')
+                        mode: complete
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 432 Basic stats: 
COMPLETE Column stats: NONE
+                        Select Operator
+                          expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 432 Basic stats: 
COMPLETE Column stats: NONE
+                          File Output Operator
+                            compressed: false
+                            Statistics: Num rows: 1 Data size: 432 Basic 
stats: COMPLETE Column stats: NONE
+                            table:
+                                input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                                serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 6 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: int), VALUE._col1 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                      serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                      name: default.acidtbl
+                  Write Type: INSERT
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: int)
+                  outputColumnNames: a, b
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(a, 'hll'), compute_stats(b, 
'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+        Reducer 7 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 9 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), 
bloom_filter(VALUE._col2, expectedEntries=1)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: binary)
+
+  Stage: Stage-5
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.acidtbl
+          Write Type: DELETE
+
+  Stage: Stage-6
+    Stats Work
+      Basic Stats Work:
+
+  Stage: Stage-2
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.acidtbl
+          Write Type: UPDATE
+
+  Stage: Stage-7
+    Stats Work
+      Basic Stats Work:
+
+  Stage: Stage-3
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.merge_tmp_table
+
+  Stage: Stage-8
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: val
+          Column Types: int
+          Table: default.merge_tmp_table
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.acidtbl
+          Write Type: INSERT
+
+  Stage: Stage-9
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: a, b
+          Column Types: int, int
+          Table: default.acidtbl
+
+PREHOOK: query: explain merge  /*+ semi(s, a, t, 1000)*/  into acidTbl as t 
using nonAcidOrcTbl s ON t.a = s.a
+WHEN MATCHED AND s.a > 8 THEN DELETE
+WHEN MATCHED THEN UPDATE SET b = 7
+WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain merge  /*+ semi(s, a, t, 1000)*/  into acidTbl as t 
using nonAcidOrcTbl s ON t.a = s.a
+WHEN MATCHED AND s.a > 8 THEN DELETE
+WHEN MATCHED THEN UPDATE SET b = 7
+WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-4 is a root stage
+  Stage-5 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-5
+  Stage-6 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-5
+  Stage-7 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-5
+  Stage-8 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-5
+  Stage-9 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-4
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Reducer 9 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
+        Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: (a BETWEEN DynamicValue(RS_3_s_a_min) AND 
DynamicValue(RS_3_s_a_max) and in_bloom_filter(a, 
DynamicValue(RS_3_s_a_bloom_filter))) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: a (type: int)
+                      sort order: +
+                      Map-reduce partition columns: a (type: int)
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: ROW__ID (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
+        Map 8 
+            Map Operator Tree:
+                TableScan
+                  alias: s
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: a (type: int)
+                    sort order: +
+                    Map-reduce partition columns: a (type: int)
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                    value expressions: b (type: int)
+                  Select Operator
+                    expressions: a (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                    Group By Operator
+                      aggregations: min(_col0), max(_col0), 
bloom_filter(_col0, expectedEntries=1000)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 20 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 20 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: int), _col1 (type: 
int), _col2 (type: binary)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Right Outer Join 0 to 1
+                keys:
+                  0 a (type: int)
+                  1 a (type: int)
+                outputColumnNames: _col0, _col4, _col5, _col6
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                Filter Operator
+                  predicate: ((_col0 = _col5) and (_col5 > 8)) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: _col4 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                      sort order: +
+                      Map-reduce partition columns: UDFToInteger(_col0) (type: 
int)
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: ((_col0 = _col5) and (_col5 <= 8)) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: _col4 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>), _col0 (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                      sort order: +
+                      Map-reduce partition columns: UDFToInteger(_col0) (type: 
int)
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: int)
+                Filter Operator
+                  predicate: (_col0 = _col5) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: _col4 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                    outputColumnNames: _col4
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: _col4 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                        Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+                Filter Operator
+                  predicate: _col0 is null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: _col5 (type: int), _col6 (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: int), _col1 (type: int)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                      serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                      name: default.acidtbl
+                  Write Type: DELETE
+        Reducer 4 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), 7 
(type: int)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                      serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                      name: default.acidtbl
+                  Write Type: UPDATE
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: 
struct<writeid:bigint,bucketid:int,rowid:bigint>)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                Filter Operator
+                  predicate: (_col1 > 1L) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: cardinality_violation(_col0) (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.merge_tmp_table
+                    Select Operator
+                      expressions: _col0 (type: int)
+                      outputColumnNames: val
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: compute_stats(val, 'hll')
+                        mode: complete
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 432 Basic stats: 
COMPLETE Column stats: NONE
+                        Select Operator
+                          expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 432 Basic stats: 
COMPLETE Column stats: NONE
+                          File Output Operator
+                            compressed: false
+                            Statistics: Num rows: 1 Data size: 432 Basic 
stats: COMPLETE Column stats: NONE
+                            table:
+                                input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                                serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 6 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: int), VALUE._col1 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                      serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                      name: default.acidtbl
+                  Write Type: INSERT
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: int)
+                  outputColumnNames: a, b
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(a, 'hll'), compute_stats(b, 
'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+        Reducer 7 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 9 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), 
bloom_filter(VALUE._col2, expectedEntries=1000)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: binary)
+
+  Stage: Stage-5
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.acidtbl
+          Write Type: DELETE
+
+  Stage: Stage-6
+    Stats Work
+      Basic Stats Work:
+
+  Stage: Stage-2
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.acidtbl
+          Write Type: UPDATE
+
+  Stage: Stage-7
+    Stats Work
+      Basic Stats Work:
+
+  Stage: Stage-3
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.merge_tmp_table
+
+  Stage: Stage-8
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: val
+          Column Types: int
+          Table: default.merge_tmp_table
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.acidtbl
+          Write Type: INSERT
+
+  Stage: Stage-9
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: a, b
+          Column Types: int, int
+          Table: default.acidtbl
+

hive git commit: HIVE-20354 : Semijoin hints dont work with merge statements (Deepak Jaiswal, reviewed by Eugene Koifman)

Reply via email to