hive git commit: HIVE-9217: UnionProcessor misses results for multi-insert when hive.optimize.union.remove=true (Pengcheng Xiong via Laljo John Pullokkaran)

pxiong Tue, 11 Aug 2015 17:29:55 -0700

Repository: hive
Updated Branches:
  refs/heads/branch-1.0 fbcef73cc -> 02a08237d



HIVE-9217: UnionProcessor misses results for multi-insert when 
hive.optimize.union.remove=true (Pengcheng Xiong via Laljo John Pullokkaran)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1650409 
13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/02a08237
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/02a08237
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/02a08237

Branch: refs/heads/branch-1.0
Commit: 02a08237df27bd26584a4604f7d2fff5fd840475
Parents: fbcef73
Author: John Pullokkaran <jpull...@apache.org>
Authored: Thu Jan 8 22:41:21 2015 +0000
Committer: Pengcheng Xiong <pxi...@apache.org>
Committed: Tue Aug 11 17:29:21 2015 -0700

----------------------------------------------------------------------
 .../optimizer/unionproc/UnionProcFactory.java   |   15 +-
 .../clientpositive/union_remove_6_subq.q        |   76 ++
 .../clientpositive/union_remove_6_subq.q.out    | 1242 ++++++++++++++++++
 3 files changed, 1329 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/02a08237/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java
index a985c4f..94947d6 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java
@@ -265,13 +265,20 @@ public final class UnionProcFactory {
         Operator<? extends OperatorDesc> operator =
           (Operator<? extends OperatorDesc>)stack.get(pos);
 
+        // (1) Because we have operator.supportUnionRemoveOptimization() for
+        // true only in SEL and FIL operators,
+        // this rule will actually only match UNION%(SEL%|FIL%)*FS%
+        // (2) The assumption here is that, if
+        // operator.getChildOperators().size() > 1, we are going to have
+        // multiple FS operators, i.e., multiple inserts.
+        // Current implementation does not support this. More details, please
+        // see HIVE-9217.
+        if (operator.getChildOperators() != null && 
operator.getChildOperators().size() > 1) {
+          return null;
+        }
         // Break if it encountered a union
         if (operator instanceof UnionOperator) {
           union = (UnionOperator)operator;
-          // No need for this optimization in case of multi-table inserts
-          if (union.getChildOperators().size() > 1) {
-            return null;
-          }
           break;
         }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/02a08237/ql/src/test/queries/clientpositive/union_remove_6_subq.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_remove_6_subq.q 
b/ql/src/test/queries/clientpositive/union_remove_6_subq.q
new file mode 100644
index 0000000..8bcac6f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union_remove_6_subq.q
@@ -0,0 +1,76 @@
+set hive.stats.autogather=false;
+set hive.optimize.union.remove=true;
+set hive.mapred.supports.subdirectories=true;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set mapred.input.dir.recursive=true;
+
+-- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (all of which are mapred queries)
+-- followed by select star and a file sink in 2 output tables.
+-- The optimiaztion does not take affect since it is a multi-table insert.
+-- It does not matter, whether the output is merged or not. In this case,
+-- merging is turned off
+
+create table inputTbl1(key string, val string) stored as textfile;
+create table outputTbl1(key string, values bigint) stored as textfile;
+create table outputTbl2(key string, values bigint) stored as textfile;
+
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
+
+explain
+FROM (
+  select * from(
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+  )subq
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *;
+
+FROM (
+  select * from(
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+  )subq
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+select * from outputTbl1 order by key, values;
+select * from outputTbl2 order by key, values;
+
+-- The following queries guarantee the correctness.
+explain
+select avg(c) from(
+  SELECT count(1)-200 as c from src
+  UNION ALL
+  SELECT count(1) as c from src
+)subq;
+
+select avg(c) from(
+  SELECT count(1)-200 as c from src
+  UNION ALL
+  SELECT count(1) as c from src
+)subq;
+
+explain
+select key, avg(c) over w from(
+  SELECT key, count(1)*2 as c from src group by key
+  UNION ALL
+  SELECT key, count(1) as c from src group by key
+)subq group by key, c
+WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING);
+
+select key, avg(c) over w from(
+  SELECT key, count(1)*2 as c from src group by key
+  UNION ALL
+  SELECT key, count(1) as c from src group by key
+)subq group by key, c
+WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING);
+
+

http://git-wip-us.apache.org/repos/asf/hive/blob/02a08237/ql/src/test/results/clientpositive/union_remove_6_subq.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_remove_6_subq.q.out 
b/ql/src/test/results/clientpositive/union_remove_6_subq.q.out
new file mode 100644
index 0000000..63598f9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/union_remove_6_subq.q.out
@@ -0,0 +1,1242 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (all of which are mapred queries)
+-- followed by select star and a file sink in 2 output tables.
+-- The optimiaztion does not take affect since it is a multi-table insert.
+-- It does not matter, whether the output is merged or not. In this case,
+-- merging is turned off
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: -- This is to test the union->selectstar->filesink 
optimization
+-- Union of 2 subqueries is performed (all of which are mapred queries)
+-- followed by select star and a file sink in 2 output tables.
+-- The optimiaztion does not take affect since it is a multi-table insert.
+-- It does not matter, whether the output is merged or not. In this case,
+-- merging is turned off
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as 
textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl1
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as 
textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: create table outputTbl2(key string, values bigint) stored as 
textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl2
+POSTHOOK: query: create table outputTbl2(key string, values bigint) stored as 
textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl2
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table 
inputTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table 
inputTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+FROM (
+  select * from(
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+  )subq
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM (
+  select * from(
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+  )subq
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-3 depends on stages: Stage-2, Stage-4
+  Stage-0 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-3
+  Stage-4 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: inputtbl1
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column 
stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: key
+              Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL 
Column stats: NONE
+              Group By Operator
+                aggregations: count(1)
+                keys: key (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
+                  value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: 
NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Union
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.outputtbl1
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.outputtbl2
+          TableScan
+            Union
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.outputtbl1
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.outputtbl2
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.outputtbl1
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.outputtbl2
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: inputtbl1
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column 
stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: key
+              Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL 
Column stats: NONE
+              Group By Operator
+                aggregations: count(1)
+                keys: key (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
+                  value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: 
NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+PREHOOK: query: FROM (
+  select * from(
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+  )subq
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+PREHOOK: Output: default@outputtbl2
+POSTHOOK: query: FROM (
+  select * from(
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+  )subq
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Output: default@outputtbl2
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION 
[(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), 
(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, 
(inputtbl1)inputtbl1.null, ]
+POSTHOOK: Lineage: outputtbl2.key EXPRESSION 
[(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), 
(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl2.values EXPRESSION [(inputtbl1)inputtbl1.null, 
(inputtbl1)inputtbl1.null, ]
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+1      1
+1      1
+2      1
+2      1
+3      1
+3      1
+7      1
+7      1
+8      2
+8      2
+PREHOOK: query: select * from outputTbl2 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl2 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl2
+#### A masked pattern was here ####
+1      1
+1      1
+2      1
+2      1
+3      1
+3      1
+7      1
+7      1
+8      2
+8      2
+PREHOOK: query: -- The following queries guarantee the correctness.
+explain
+select avg(c) from(
+  SELECT count(1)-200 as c from src
+  UNION ALL
+  SELECT count(1) as c from src
+)subq
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The following queries guarantee the correctness.
+explain
+select avg(c) from(
+  SELECT count(1)-200 as c from src
+  UNION ALL
+  SELECT count(1) as c from src
+)subq
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1, Stage-3
+  Stage-3 is a root stage
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Select Operator
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Group By Operator
+                aggregations: count(1)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: COMPLETE
+          Select Operator
+            expressions: (_col0 - 200) (type: bigint)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: COMPLETE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Union
+              Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Group By Operator
+                aggregations: avg(_col0)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
+                  value expressions: _col0 (type: 
struct<count:bigint,sum:double,input:bigint>)
+          TableScan
+            Union
+              Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Group By Operator
+                aggregations: avg(_col0)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
+                  value expressions: _col0 (type: 
struct<count:bigint,sum:double,input:bigint>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: avg(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: COMPLETE
+          Select Operator
+            expressions: _col0 (type: double)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Select Operator
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Group By Operator
+                aggregations: count(1)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: COMPLETE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select avg(c) from(
+  SELECT count(1)-200 as c from src
+  UNION ALL
+  SELECT count(1) as c from src
+)subq
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select avg(c) from(
+  SELECT count(1)-200 as c from src
+  UNION ALL
+  SELECT count(1) as c from src
+)subq
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+400.0
+PREHOOK: query: explain
+select key, avg(c) over w from(
+  SELECT key, count(1)*2 as c from src group by key
+  UNION ALL
+  SELECT key, count(1) as c from src group by key
+)subq group by key, c
+WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, avg(c) over w from(
+  SELECT key, count(1)*2 as c from src group by key
+  UNION ALL
+  SELECT key, count(1) as c from src group by key
+)subq group by key, c
+WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1, Stage-4
+  Stage-3 depends on stages: Stage-2
+  Stage-4 is a root stage
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: count(1)
+                keys: _col0 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), (_col1 * 2) (type: bigint)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Union
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                keys: _col0 (type: string), _col1 (type: bigint)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: bigint)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: bigint)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+          TableScan
+            Union
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                keys: _col0 (type: string), _col1 (type: bigint)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: bigint)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: bigint)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: bigint)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: string), _col1 (type: bigint)
+      Reduce Operator Tree:
+        Extract
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          PTF Operator
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: string), _wcol0 (type: double)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: count(1)
+                keys: _col0 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, avg(c) over w from(
+  SELECT key, count(1)*2 as c from src group by key
+  UNION ALL
+  SELECT key, count(1) as c from src group by key
+)subq group by key, c
+WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, avg(c) over w from(
+  SELECT key, count(1)*2 as c from src group by key
+  UNION ALL
+  SELECT key, count(1) as c from src group by key
+)subq group by key, c
+WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0      3.0
+0      4.5
+10     1.0
+10     1.5
+100    2.0
+100    3.0
+103    2.0
+103    3.0
+104    2.0
+104    3.0
+105    1.0
+105    1.5
+11     1.0
+11     1.5
+111    1.0
+111    1.5
+113    2.0
+113    3.0
+114    1.0
+114    1.5
+116    1.0
+116    1.5
+118    2.0
+118    3.0
+119    3.0
+119    4.5
+12     2.0
+12     3.0
+120    2.0
+120    3.0
+125    2.0
+125    3.0
+126    1.0
+126    1.5
+128    3.0
+128    4.5
+129    2.0
+129    3.0
+131    1.0
+131    1.5
+133    1.0
+133    1.5
+134    2.0
+134    3.0
+136    1.0
+136    1.5
+137    2.0
+137    3.0
+138    4.0
+138    6.0
+143    1.0
+143    1.5
+145    1.0
+145    1.5
+146    2.0
+146    3.0
+149    2.0
+149    3.0
+15     2.0
+15     3.0
+150    1.0
+150    1.5
+152    2.0
+152    3.0
+153    1.0
+153    1.5
+155    1.0
+155    1.5
+156    1.0
+156    1.5
+157    1.0
+157    1.5
+158    1.0
+158    1.5
+160    1.0
+160    1.5
+162    1.0
+162    1.5
+163    1.0
+163    1.5
+164    2.0
+164    3.0
+165    2.0
+165    3.0
+166    1.0
+166    1.5
+167    3.0
+167    4.5
+168    1.0
+168    1.5
+169    4.0
+169    6.0
+17     1.0
+17     1.5
+170    1.0
+170    1.5
+172    2.0
+172    3.0
+174    2.0
+174    3.0
+175    2.0
+175    3.0
+176    2.0
+176    3.0
+177    1.0
+177    1.5
+178    1.0
+178    1.5
+179    2.0
+179    3.0
+18     2.0
+18     3.0
+180    1.0
+180    1.5
+181    1.0
+181    1.5
+183    1.0
+183    1.5
+186    1.0
+186    1.5
+187    3.0
+187    4.5
+189    1.0
+189    1.5
+19     1.0
+19     1.5
+190    1.0
+190    1.5
+191    2.0
+191    3.0
+192    1.0
+192    1.5
+193    3.0
+193    4.5
+194    1.0
+194    1.5
+195    2.0
+195    3.0
+196    1.0
+196    1.5
+197    2.0
+197    3.0
+199    3.0
+199    4.5
+2      1.0
+2      1.5
+20     1.0
+20     1.5
+200    2.0
+200    3.0
+201    1.0
+201    1.5
+202    1.0
+202    1.5
+203    2.0
+203    3.0
+205    2.0
+205    3.0
+207    2.0
+207    3.0
+208    3.0
+208    4.5
+209    2.0
+209    3.0
+213    2.0
+213    3.0
+214    1.0
+214    1.5
+216    2.0
+216    3.0
+217    2.0
+217    3.0
+218    1.0
+218    1.5
+219    2.0
+219    3.0
+221    2.0
+221    3.0
+222    1.0
+222    1.5
+223    2.0
+223    3.0
+224    2.0
+224    3.0
+226    1.0
+226    1.5
+228    1.0
+228    1.5
+229    2.0
+229    3.0
+230    5.0
+230    7.5
+233    2.0
+233    3.0
+235    1.0
+235    1.5
+237    2.0
+237    3.0
+238    2.0
+238    3.0
+239    2.0
+239    3.0
+24     2.0
+24     3.0
+241    1.0
+241    1.5
+242    2.0
+242    3.0
+244    1.0
+244    1.5
+247    1.0
+247    1.5
+248    1.0
+248    1.5
+249    1.0
+249    1.5
+252    1.0
+252    1.5
+255    2.0
+255    3.0
+256    2.0
+256    3.0
+257    1.0
+257    1.5
+258    1.0
+258    1.5
+26     2.0
+26     3.0
+260    1.0
+260    1.5
+262    1.0
+262    1.5
+263    1.0
+263    1.5
+265    2.0
+265    3.0
+266    1.0
+266    1.5
+27     1.0
+27     1.5
+272    2.0
+272    3.0
+273    3.0
+273    4.5
+274    1.0
+274    1.5
+275    1.0
+275    1.5
+277    4.0
+277    6.0
+278    2.0
+278    3.0
+28     1.0
+28     1.5
+280    2.0
+280    3.0
+281    2.0
+281    3.0
+282    2.0
+282    3.0
+283    1.0
+283    1.5
+284    1.0
+284    1.5
+285    1.0
+285    1.5
+286    1.0
+286    1.5
+287    1.0
+287    1.5
+288    2.0
+288    3.0
+289    1.0
+289    1.5
+291    1.0
+291    1.5
+292    1.0
+292    1.5
+296    1.0
+296    1.5
+298    3.0
+298    4.5
+30     1.0
+30     1.5
+302    1.0
+302    1.5
+305    1.0
+305    1.5
+306    1.0
+306    1.5
+307    2.0
+307    3.0
+308    1.0
+308    1.5
+309    2.0
+309    3.0
+310    1.0
+310    1.5
+311    3.0
+311    4.5
+315    1.0
+315    1.5
+316    3.0
+316    4.5
+317    2.0
+317    3.0
+318    3.0
+318    4.5
+321    2.0
+321    3.0
+322    2.0
+322    3.0
+323    1.0
+323    1.5
+325    2.0
+325    3.0
+327    3.0
+327    4.5
+33     1.0
+33     1.5
+331    2.0
+331    3.0
+332    1.0
+332    1.5
+333    2.0
+333    3.0
+335    1.0
+335    1.5
+336    1.0
+336    1.5
+338    1.0
+338    1.5
+339    1.0
+339    1.5
+34     1.0
+34     1.5
+341    1.0
+341    1.5
+342    2.0
+342    3.0
+344    2.0
+344    3.0
+345    1.0
+345    1.5
+348    5.0
+348    7.5
+35     3.0
+35     4.5
+351    1.0
+351    1.5
+353    2.0
+353    3.0
+356    1.0
+356    1.5
+360    1.0
+360    1.5
+362    1.0
+362    1.5
+364    1.0
+364    1.5
+365    1.0
+365    1.5
+366    1.0
+366    1.5
+367    2.0
+367    3.0
+368    1.0
+368    1.5
+369    3.0
+369    4.5
+37     2.0
+37     3.0
+373    1.0
+373    1.5
+374    1.0
+374    1.5
+375    1.0
+375    1.5
+377    1.0
+377    1.5
+378    1.0
+378    1.5
+379    1.0
+379    1.5
+382    2.0
+382    3.0
+384    3.0
+384    4.5
+386    1.0
+386    1.5
+389    1.0
+389    1.5
+392    1.0
+392    1.5
+393    1.0
+393    1.5
+394    1.0
+394    1.5
+395    2.0
+395    3.0
+396    3.0
+396    4.5
+397    2.0
+397    3.0
+399    2.0
+399    3.0
+4      1.0
+4      1.5
+400    1.0
+400    1.5
+401    5.0
+401    7.5
+402    1.0
+402    1.5
+403    3.0
+403    4.5
+404    2.0
+404    3.0
+406    4.0
+406    6.0
+407    1.0
+407    1.5
+409    3.0
+409    4.5
+41     1.0
+41     1.5
+411    1.0
+411    1.5
+413    2.0
+413    3.0
+414    2.0
+414    3.0
+417    3.0
+417    4.5
+418    1.0
+418    1.5
+419    1.0
+419    1.5
+42     2.0
+42     3.0
+421    1.0
+421    1.5
+424    2.0
+424    3.0
+427    1.0
+427    1.5
+429    2.0
+429    3.0
+43     1.0
+43     1.5
+430    3.0
+430    4.5
+431    3.0
+431    4.5
+432    1.0
+432    1.5
+435    1.0
+435    1.5
+436    1.0
+436    1.5
+437    1.0
+437    1.5
+438    3.0
+438    4.5
+439    2.0
+439    3.0
+44     1.0
+44     1.5
+443    1.0
+443    1.5
+444    1.0
+444    1.5
+446    1.0
+446    1.5
+448    1.0
+448    1.5
+449    1.0
+449    1.5
+452    1.0
+452    1.5
+453    1.0
+453    1.5
+454    3.0
+454    4.5
+455    1.0
+455    1.5
+457    1.0
+457    1.5
+458    2.0
+458    3.0
+459    2.0
+459    3.0
+460    1.0
+460    1.5
+462    2.0
+462    3.0
+463    2.0
+463    3.0
+466    3.0
+466    4.5
+467    1.0
+467    1.5
+468    4.0
+468    6.0
+469    5.0
+469    7.5
+47     1.0
+47     1.5
+470    1.0
+470    1.5
+472    1.0
+472    1.5
+475    1.0
+475    1.5
+477    1.0
+477    1.5
+478    2.0
+478    3.0
+479    1.0
+479    1.5
+480    3.0
+480    4.5
+481    1.0
+481    1.5
+482    1.0
+482    1.5
+483    1.0
+483    1.5
+484    1.0
+484    1.5
+485    1.0
+485    1.5
+487    1.0
+487    1.5
+489    4.0
+489    6.0
+490    1.0
+490    1.5
+491    1.0
+491    1.5
+492    2.0
+492    3.0
+493    1.0
+493    1.5
+494    1.0
+494    1.5
+495    1.0
+495    1.5
+496    1.0
+496    1.5
+497    1.0
+497    1.5
+498    3.0
+498    4.5
+5      3.0
+5      4.5
+51     2.0
+51     3.0
+53     1.0
+53     1.5
+54     1.0
+54     1.5
+57     1.0
+57     1.5
+58     2.0
+58     3.0
+64     1.0
+64     1.5
+65     1.0
+65     1.5
+66     1.0
+66     1.5
+67     2.0
+67     3.0
+69     1.0
+69     1.5
+70     3.0
+70     4.5
+72     2.0
+72     3.0
+74     1.0
+74     1.5
+76     2.0
+76     3.0
+77     1.0
+77     1.5
+78     1.0
+78     1.5
+8      1.0
+8      1.5
+80     1.0
+80     1.5
+82     1.0
+82     1.5
+83     2.0
+83     3.0
+84     2.0
+84     3.0
+85     1.0
+85     1.5
+86     1.0
+86     1.5
+87     1.0
+87     1.5
+9      1.0
+9      1.5
+90     3.0
+90     4.5
+92     1.0
+92     1.5
+95     2.0
+95     3.0
+96     1.0
+96     1.5
+97     2.0
+97     3.0
+98     2.0
+98     3.0

hive git commit: HIVE-9217: UnionProcessor misses results for multi-insert when hive.optimize.union.remove=true (Pengcheng Xiong via Laljo John Pullokkaran)

Reply via email to