Author: namit
Date: Fri Jan 18 15:53:30 2013
New Revision: 1435203

URL: http://svn.apache.org/viewvc?rev=1435203&view=rev
Log:
HIVE-3915 Union with map-only query on one side and two MR job query on the 
other
produces wrong results (Kevin Wilfong via namit)


Added:
    hive/trunk/ql/src/test/queries/clientpositive/union33.q
    hive/trunk/ql/src/test/results/clientpositive/union33.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java?rev=1435203&r1=1435202&r2=1435203&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java Fri Jan 18 
15:53:30 2013
@@ -1108,6 +1108,9 @@ public class Driver implements CommandPr
 
       // Add root Tasks to runnable
       for (Task<? extends Serializable> tsk : plan.getRootTasks()) {
+        // This should never happen, if it does, it's a bug with the potential 
to produce
+        // incorrect results.
+        assert tsk.getParentTasks() == null || tsk.getParentTasks().isEmpty();
         driverCxt.addToRunnable(tsk);
       }
 

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java?rev=1435203&r1=1435202&r2=1435203&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java 
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java 
Fri Jan 18 15:53:30 2013
@@ -177,7 +177,8 @@ public class GenMRUnion1 implements Node
     currTask.addDependentTask(uTask);
     if (ctx.getRootTasks().contains(uTask)) {
       ctx.getRootTasks().remove(uTask);
-      if (!ctx.getRootTasks().contains(currTask)) {
+      if (!ctx.getRootTasks().contains(currTask) &&
+          shouldBeRootTask(currTask)) {
         ctx.getRootTasks().add(currTask);
       }
     }
@@ -290,7 +291,7 @@ public class GenMRUnion1 implements Node
     // If it a map-reduce job, create a temporary file
     else {
       // is the current task a root task
-      if (shouldBeRootTask(currTask, parseCtx)
+      if (shouldBeRootTask(currTask)
           && (!ctx.getRootTasks().contains(currTask))) {
         ctx.getRootTasks().add(currTask);
       }
@@ -315,7 +316,7 @@ public class GenMRUnion1 implements Node
   }
 
   private boolean shouldBeRootTask(
-      Task<? extends Serializable> currTask, ParseContext parseContext) {
+      Task<? extends Serializable> currTask) {
     return currTask.getParentTasks() == null
         || (currTask.getParentTasks().size() == 0);
   }

Added: hive/trunk/ql/src/test/queries/clientpositive/union33.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union33.q?rev=1435203&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union33.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/union33.q Fri Jan 18 15:53:30 
2013
@@ -0,0 +1,47 @@
+set hive.groupby.skewindata=true;
+
+-- This tests that a union all with a map only subquery on one side and a 
+-- subquery involving two map reduce jobs on the other runs correctly.
+
+CREATE TABLE test_src (key STRING, value STRING);
+
+EXPLAIN INSERT OVERWRITE TABLE test_src 
+SELECT key, value FROM (
+       SELECT key, value FROM src 
+       WHERE key = 0
+UNION ALL
+       SELECT key, COUNT(*) AS value FROM src
+       GROUP BY key
+)a;
+ 
+INSERT OVERWRITE TABLE test_src 
+SELECT key, value FROM (
+       SELECT key, value FROM src 
+       WHERE key = 0
+UNION ALL
+       SELECT key, COUNT(*) AS value FROM src
+       GROUP BY key
+)a;
+ 
+SELECT COUNT(*) FROM test_src;
+ 
+EXPLAIN INSERT OVERWRITE TABLE test_src 
+SELECT key, value FROM (
+       SELECT key, COUNT(*) AS value FROM src
+       GROUP BY key
+UNION ALL
+       SELECT key, value FROM src 
+       WHERE key = 0
+)a;
+ 
+INSERT OVERWRITE TABLE test_src 
+SELECT key, value FROM (
+       SELECT key, COUNT(*) AS value FROM src
+       GROUP BY key
+UNION ALL
+       SELECT key, value FROM src 
+       WHERE key = 0
+)a;
+ 
+SELECT COUNT(*) FROM test_src;
+ 
\ No newline at end of file

Added: hive/trunk/ql/src/test/results/clientpositive/union33.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union33.q.out?rev=1435203&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union33.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union33.q.out Fri Jan 18 
15:53:30 2013
@@ -0,0 +1,561 @@
+PREHOOK: query: -- This tests that a union all with a map only subquery on one 
side and a 
+-- subquery involving two map reduce jobs on the other runs correctly.
+
+CREATE TABLE test_src (key STRING, value STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This tests that a union all with a map only subquery on 
one side and a 
+-- subquery involving two map reduce jobs on the other runs correctly.
+
+CREATE TABLE test_src (key STRING, value STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_src
+PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_src 
+SELECT key, value FROM (
+       SELECT key, value FROM src 
+       WHERE key = 0
+UNION ALL
+       SELECT key, COUNT(*) AS value FROM src
+       GROUP BY key
+)a
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_src 
+SELECT key, value FROM (
+       SELECT key, value FROM src 
+       WHERE key = 0
+UNION ALL
+       SELECT key, COUNT(*) AS value FROM src
+       GROUP BY key
+)a
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM 
(TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR 
TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 
(TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 0)))) 
(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT 
(TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 
(TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT) value)) 
(TOK_GROUPBY (TOK_TABLE_OR_COL key))))) a)) (TOK_INSERT (TOK_DESTINATION 
(TOK_TAB (TOK_TABNAME test_src))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL 
key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))
+
+STAGE DEPENDENCIES:
+  Stage-9 is a root stage
+  Stage-10 depends on stages: Stage-9
+  Stage-2 depends on stages: Stage-10
+  Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6
+  Stage-5
+  Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
+  Stage-3 depends on stages: Stage-0
+  Stage-4
+  Stage-6
+  Stage-7 depends on stages: Stage-6
+
+STAGE PLANS:
+  Stage: Stage-9
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery2:a-subquery2:src 
+          TableScan
+            alias: src
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+              outputColumnNames: key
+              Group By Operator
+                aggregations:
+                      expr: count()
+                bucketGroup: false
+                keys:
+                      expr: key
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                  sort order: +
+                  Map-reduce partition columns:
+                        expr: rand()
+                        type: double
+                  tag: -1
+                  value expressions:
+                        expr: _col1
+                        type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: partials
+          outputColumnNames: _col0, _col1
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-10
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: _col0
+                    type: string
+              tag: -1
+              value expressions:
+                    expr: _col1
+                    type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: final
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: UDFToString(_col1)
+                    type: string
+              outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+          TableScan
+            Union
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col1
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 1
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.test_src
+        null-subquery1:a-subquery1:src 
+          TableScan
+            alias: src
+            Filter Operator
+              predicate:
+                  expr: (key = 0.0)
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                Union
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+                    outputColumnNames: _col0, _col1
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 1
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.test_src
+
+  Stage: Stage-8
+    Conditional Operator
+
+  Stage: Stage-5
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.test_src
+
+  Stage: Stage-3
+    Stats-Aggr Operator
+
+  Stage: Stage-4
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.test_src
+
+  Stage: Stage-6
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.test_src
+
+  Stage: Stage-7
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_src 
+SELECT key, value FROM (
+       SELECT key, value FROM src 
+       WHERE key = 0
+UNION ALL
+       SELECT key, COUNT(*) AS value FROM src
+       GROUP BY key
+)a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_src
+POSTHOOK: query: INSERT OVERWRITE TABLE test_src 
+SELECT key, value FROM (
+       SELECT key, value FROM src 
+       WHERE key = 0
+UNION ALL
+       SELECT key, COUNT(*) AS value FROM src
+       GROUP BY key
+)a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_src
+POSTHOOK: Lineage: test_src.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), (src)src.FieldSchema(name:key, type:string, 
comment:default), ]
+POSTHOOK: Lineage: test_src.value EXPRESSION [(src)src.FieldSchema(name:value, 
type:string, comment:default), (src)src.null, ]
+PREHOOK: query: SELECT COUNT(*) FROM test_src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*) FROM test_src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_src
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_src.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), (src)src.FieldSchema(name:key, type:string, 
comment:default), ]
+POSTHOOK: Lineage: test_src.value EXPRESSION [(src)src.FieldSchema(name:value, 
type:string, comment:default), (src)src.null, ]
+312
+PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_src 
+SELECT key, value FROM (
+       SELECT key, COUNT(*) AS value FROM src
+       GROUP BY key
+UNION ALL
+       SELECT key, value FROM src 
+       WHERE key = 0
+)a
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_src 
+SELECT key, value FROM (
+       SELECT key, COUNT(*) AS value FROM src
+       GROUP BY key
+UNION ALL
+       SELECT key, value FROM src 
+       WHERE key = 0
+)a
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_src.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), (src)src.FieldSchema(name:key, type:string, 
comment:default), ]
+POSTHOOK: Lineage: test_src.value EXPRESSION [(src)src.FieldSchema(name:value, 
type:string, comment:default), (src)src.null, ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM 
(TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR 
TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 
(TOK_FUNCTIONSTAR COUNT) value)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) 
(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT 
(TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 
(TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= 
(TOK_TABLE_OR_COL key) 0))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB 
(TOK_TABNAME test_src))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) 
(TOK_SELEXPR (TOK_TABLE_OR_COL value)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-3 depends on stages: Stage-2
+  Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7
+  Stage-6
+  Stage-0 depends on stages: Stage-6, Stage-5, Stage-8
+  Stage-4 depends on stages: Stage-0
+  Stage-5
+  Stage-7
+  Stage-8 depends on stages: Stage-7
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery1:a-subquery1:src 
+          TableScan
+            alias: src
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+              outputColumnNames: key
+              Group By Operator
+                aggregations:
+                      expr: count()
+                bucketGroup: false
+                keys:
+                      expr: key
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                  sort order: +
+                  Map-reduce partition columns:
+                        expr: rand()
+                        type: double
+                  tag: -1
+                  value expressions:
+                        expr: _col1
+                        type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: partials
+          outputColumnNames: _col0, _col1
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: _col0
+                    type: string
+              tag: -1
+              value expressions:
+                    expr: _col1
+                    type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: final
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: string
+                    expr: UDFToString(_col1)
+                    type: string
+              outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-3
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+          TableScan
+            Union
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col1
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 1
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.test_src
+        null-subquery2:a-subquery2:src 
+          TableScan
+            alias: src
+            Filter Operator
+              predicate:
+                  expr: (key = 0.0)
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                Union
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+                    outputColumnNames: _col0, _col1
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 1
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.test_src
+
+  Stage: Stage-9
+    Conditional Operator
+
+  Stage: Stage-6
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.test_src
+
+  Stage: Stage-4
+    Stats-Aggr Operator
+
+  Stage: Stage-5
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.test_src
+
+  Stage: Stage-7
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.test_src
+
+  Stage: Stage-8
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_src 
+SELECT key, value FROM (
+       SELECT key, COUNT(*) AS value FROM src
+       GROUP BY key
+UNION ALL
+       SELECT key, value FROM src 
+       WHERE key = 0
+)a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_src
+POSTHOOK: query: INSERT OVERWRITE TABLE test_src 
+SELECT key, value FROM (
+       SELECT key, COUNT(*) AS value FROM src
+       GROUP BY key
+UNION ALL
+       SELECT key, value FROM src 
+       WHERE key = 0
+)a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_src
+POSTHOOK: Lineage: test_src.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), (src)src.FieldSchema(name:key, type:string, 
comment:default), ]
+POSTHOOK: Lineage: test_src.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), (src)src.FieldSchema(name:key, type:string, 
comment:default), ]
+POSTHOOK: Lineage: test_src.value EXPRESSION [(src)src.FieldSchema(name:value, 
type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: test_src.value EXPRESSION [(src)src.null, 
(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT COUNT(*) FROM test_src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*) FROM test_src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_src
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_src.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), (src)src.FieldSchema(name:key, type:string, 
comment:default), ]
+POSTHOOK: Lineage: test_src.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), (src)src.FieldSchema(name:key, type:string, 
comment:default), ]
+POSTHOOK: Lineage: test_src.value EXPRESSION [(src)src.FieldSchema(name:value, 
type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: test_src.value EXPRESSION [(src)src.null, 
(src)src.FieldSchema(name:value, type:string, comment:default), ]
+312


Reply via email to