Repository: hive Updated Branches: refs/heads/branch-1.0 fbcef73cc -> 02a08237d
HIVE-9217: UnionProcessor misses results for multi-insert when hive.optimize.union.remove=true (Pengcheng Xiong via Laljo John Pullokkaran) git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1650409 13f79535-47bb-0310-9956-ffa450edef68 Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/02a08237 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/02a08237 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/02a08237 Branch: refs/heads/branch-1.0 Commit: 02a08237df27bd26584a4604f7d2fff5fd840475 Parents: fbcef73 Author: John Pullokkaran <jpull...@apache.org> Authored: Thu Jan 8 22:41:21 2015 +0000 Committer: Pengcheng Xiong <pxi...@apache.org> Committed: Tue Aug 11 17:29:21 2015 -0700 ---------------------------------------------------------------------- .../optimizer/unionproc/UnionProcFactory.java | 15 +- .../clientpositive/union_remove_6_subq.q | 76 ++ .../clientpositive/union_remove_6_subq.q.out | 1242 ++++++++++++++++++ 3 files changed, 1329 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/02a08237/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java index a985c4f..94947d6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java @@ -265,13 +265,20 @@ public final class UnionProcFactory { Operator<? extends OperatorDesc> operator = (Operator<? extends OperatorDesc>)stack.get(pos); + // (1) Because we have operator.supportUnionRemoveOptimization() for + // true only in SEL and FIL operators, + // this rule will actually only match UNION%(SEL%|FIL%)*FS% + // (2) The assumption here is that, if + // operator.getChildOperators().size() > 1, we are going to have + // multiple FS operators, i.e., multiple inserts. + // Current implementation does not support this. More details, please + // see HIVE-9217. + if (operator.getChildOperators() != null && operator.getChildOperators().size() > 1) { + return null; + } // Break if it encountered a union if (operator instanceof UnionOperator) { union = (UnionOperator)operator; - // No need for this optimization in case of multi-table inserts - if (union.getChildOperators().size() > 1) { - return null; - } break; } http://git-wip-us.apache.org/repos/asf/hive/blob/02a08237/ql/src/test/queries/clientpositive/union_remove_6_subq.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/union_remove_6_subq.q b/ql/src/test/queries/clientpositive/union_remove_6_subq.q new file mode 100644 index 0000000..8bcac6f --- /dev/null +++ b/ql/src/test/queries/clientpositive/union_remove_6_subq.q @@ -0,0 +1,76 @@ +set hive.stats.autogather=false; +set hive.optimize.union.remove=true; +set hive.mapred.supports.subdirectories=true; + +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; +set mapred.input.dir.recursive=true; + +-- This is to test the union->selectstar->filesink optimization +-- Union of 2 subqueries is performed (all of which are mapred queries) +-- followed by select star and a file sink in 2 output tables. +-- The optimiaztion does not take affect since it is a multi-table insert. +-- It does not matter, whether the output is merged or not. In this case, +-- merging is turned off + +create table inputTbl1(key string, val string) stored as textfile; +create table outputTbl1(key string, values bigint) stored as textfile; +create table outputTbl2(key string, values bigint) stored as textfile; + +load data local inpath '../../data/files/T1.txt' into table inputTbl1; + +explain +FROM ( + select * from( + SELECT key, count(1) as values from inputTbl1 group by key + UNION ALL + SELECT key, count(1) as values from inputTbl1 group by key + )subq +) a +insert overwrite table outputTbl1 select * +insert overwrite table outputTbl2 select *; + +FROM ( + select * from( + SELECT key, count(1) as values from inputTbl1 group by key + UNION ALL + SELECT key, count(1) as values from inputTbl1 group by key + )subq +) a +insert overwrite table outputTbl1 select * +insert overwrite table outputTbl2 select *; + +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +select * from outputTbl1 order by key, values; +select * from outputTbl2 order by key, values; + +-- The following queries guarantee the correctness. +explain +select avg(c) from( + SELECT count(1)-200 as c from src + UNION ALL + SELECT count(1) as c from src +)subq; + +select avg(c) from( + SELECT count(1)-200 as c from src + UNION ALL + SELECT count(1) as c from src +)subq; + +explain +select key, avg(c) over w from( + SELECT key, count(1)*2 as c from src group by key + UNION ALL + SELECT key, count(1) as c from src group by key +)subq group by key, c +WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING); + +select key, avg(c) over w from( + SELECT key, count(1)*2 as c from src group by key + UNION ALL + SELECT key, count(1) as c from src group by key +)subq group by key, c +WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING); + + http://git-wip-us.apache.org/repos/asf/hive/blob/02a08237/ql/src/test/results/clientpositive/union_remove_6_subq.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/union_remove_6_subq.q.out b/ql/src/test/results/clientpositive/union_remove_6_subq.q.out new file mode 100644 index 0000000..63598f9 --- /dev/null +++ b/ql/src/test/results/clientpositive/union_remove_6_subq.q.out @@ -0,0 +1,1242 @@ +PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +-- Union of 2 subqueries is performed (all of which are mapred queries) +-- followed by select star and a file sink in 2 output tables. +-- The optimiaztion does not take affect since it is a multi-table insert. +-- It does not matter, whether the output is merged or not. In this case, +-- merging is turned off + +create table inputTbl1(key string, val string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@inputTbl1 +POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +-- Union of 2 subqueries is performed (all of which are mapred queries) +-- followed by select star and a file sink in 2 output tables. +-- The optimiaztion does not take affect since it is a multi-table insert. +-- It does not matter, whether the output is merged or not. In this case, +-- merging is turned off + +create table inputTbl1(key string, val string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@inputTbl1 +PREHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@outputTbl1 +POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@outputTbl1 +PREHOOK: query: create table outputTbl2(key string, values bigint) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@outputTbl2 +POSTHOOK: query: create table outputTbl2(key string, values bigint) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@outputTbl2 +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@inputtbl1 +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@inputtbl1 +PREHOOK: query: explain +FROM ( + select * from( + SELECT key, count(1) as values from inputTbl1 group by key + UNION ALL + SELECT key, count(1) as values from inputTbl1 group by key + )subq +) a +insert overwrite table outputTbl1 select * +insert overwrite table outputTbl2 select * +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM ( + select * from( + SELECT key, count(1) as values from inputTbl1 group by key + UNION ALL + SELECT key, count(1) as values from inputTbl1 group by key + )subq +) a +insert overwrite table outputTbl1 select * +insert overwrite table outputTbl2 select * +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2, Stage-4 + Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: inputtbl1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl2 + TableScan + Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl2 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: inputtbl1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + +PREHOOK: query: FROM ( + select * from( + SELECT key, count(1) as values from inputTbl1 group by key + UNION ALL + SELECT key, count(1) as values from inputTbl1 group by key + )subq +) a +insert overwrite table outputTbl1 select * +insert overwrite table outputTbl2 select * +PREHOOK: type: QUERY +PREHOOK: Input: default@inputtbl1 +PREHOOK: Output: default@outputtbl1 +PREHOOK: Output: default@outputtbl2 +POSTHOOK: query: FROM ( + select * from( + SELECT key, count(1) as values from inputTbl1 group by key + UNION ALL + SELECT key, count(1) as values from inputTbl1 group by key + )subq +) a +insert overwrite table outputTbl1 select * +insert overwrite table outputTbl2 select * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@inputtbl1 +POSTHOOK: Output: default@outputtbl1 +POSTHOOK: Output: default@outputtbl2 +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ] +POSTHOOK: Lineage: outputtbl2.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl2.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ] +PREHOOK: query: select * from outputTbl1 order by key, values +PREHOOK: type: QUERY +PREHOOK: Input: default@outputtbl1 +#### A masked pattern was here #### +POSTHOOK: query: select * from outputTbl1 order by key, values +POSTHOOK: type: QUERY +POSTHOOK: Input: default@outputtbl1 +#### A masked pattern was here #### +1 1 +1 1 +2 1 +2 1 +3 1 +3 1 +7 1 +7 1 +8 2 +8 2 +PREHOOK: query: select * from outputTbl2 order by key, values +PREHOOK: type: QUERY +PREHOOK: Input: default@outputtbl2 +#### A masked pattern was here #### +POSTHOOK: query: select * from outputTbl2 order by key, values +POSTHOOK: type: QUERY +POSTHOOK: Input: default@outputtbl2 +#### A masked pattern was here #### +1 1 +1 1 +2 1 +2 1 +3 1 +3 1 +7 1 +7 1 +8 2 +8 2 +PREHOOK: query: -- The following queries guarantee the correctness. +explain +select avg(c) from( + SELECT count(1)-200 as c from src + UNION ALL + SELECT count(1) as c from src +)subq +PREHOOK: type: QUERY +POSTHOOK: query: -- The following queries guarantee the correctness. +explain +select avg(c) from( + SELECT count(1)-200 as c from src + UNION ALL + SELECT count(1) as c from src +)subq +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 - 200) (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: avg(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: struct<count:bigint,sum:double,input:bigint>) + TableScan + Union + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: avg(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: struct<count:bigint,sum:double,input:bigint>) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select avg(c) from( + SELECT count(1)-200 as c from src + UNION ALL + SELECT count(1) as c from src +)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select avg(c) from( + SELECT count(1)-200 as c from src + UNION ALL + SELECT count(1) as c from src +)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +400.0 +PREHOOK: query: explain +select key, avg(c) over w from( + SELECT key, count(1)*2 as c from src group by key + UNION ALL + SELECT key, count(1) as c from src group by key +)subq group by key, c +WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, avg(c) over w from( + SELECT key, count(1)*2 as c from src group by key + UNION ALL + SELECT key, count(1) as c from src group by key +)subq group by key, c +WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 * 2) (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + Union + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + Reduce Operator Tree: + Extract + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _wcol0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, avg(c) over w from( + SELECT key, count(1)*2 as c from src group by key + UNION ALL + SELECT key, count(1) as c from src group by key +)subq group by key, c +WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, avg(c) over w from( + SELECT key, count(1)*2 as c from src group by key + UNION ALL + SELECT key, count(1) as c from src group by key +)subq group by key, c +WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 3.0 +0 4.5 +10 1.0 +10 1.5 +100 2.0 +100 3.0 +103 2.0 +103 3.0 +104 2.0 +104 3.0 +105 1.0 +105 1.5 +11 1.0 +11 1.5 +111 1.0 +111 1.5 +113 2.0 +113 3.0 +114 1.0 +114 1.5 +116 1.0 +116 1.5 +118 2.0 +118 3.0 +119 3.0 +119 4.5 +12 2.0 +12 3.0 +120 2.0 +120 3.0 +125 2.0 +125 3.0 +126 1.0 +126 1.5 +128 3.0 +128 4.5 +129 2.0 +129 3.0 +131 1.0 +131 1.5 +133 1.0 +133 1.5 +134 2.0 +134 3.0 +136 1.0 +136 1.5 +137 2.0 +137 3.0 +138 4.0 +138 6.0 +143 1.0 +143 1.5 +145 1.0 +145 1.5 +146 2.0 +146 3.0 +149 2.0 +149 3.0 +15 2.0 +15 3.0 +150 1.0 +150 1.5 +152 2.0 +152 3.0 +153 1.0 +153 1.5 +155 1.0 +155 1.5 +156 1.0 +156 1.5 +157 1.0 +157 1.5 +158 1.0 +158 1.5 +160 1.0 +160 1.5 +162 1.0 +162 1.5 +163 1.0 +163 1.5 +164 2.0 +164 3.0 +165 2.0 +165 3.0 +166 1.0 +166 1.5 +167 3.0 +167 4.5 +168 1.0 +168 1.5 +169 4.0 +169 6.0 +17 1.0 +17 1.5 +170 1.0 +170 1.5 +172 2.0 +172 3.0 +174 2.0 +174 3.0 +175 2.0 +175 3.0 +176 2.0 +176 3.0 +177 1.0 +177 1.5 +178 1.0 +178 1.5 +179 2.0 +179 3.0 +18 2.0 +18 3.0 +180 1.0 +180 1.5 +181 1.0 +181 1.5 +183 1.0 +183 1.5 +186 1.0 +186 1.5 +187 3.0 +187 4.5 +189 1.0 +189 1.5 +19 1.0 +19 1.5 +190 1.0 +190 1.5 +191 2.0 +191 3.0 +192 1.0 +192 1.5 +193 3.0 +193 4.5 +194 1.0 +194 1.5 +195 2.0 +195 3.0 +196 1.0 +196 1.5 +197 2.0 +197 3.0 +199 3.0 +199 4.5 +2 1.0 +2 1.5 +20 1.0 +20 1.5 +200 2.0 +200 3.0 +201 1.0 +201 1.5 +202 1.0 +202 1.5 +203 2.0 +203 3.0 +205 2.0 +205 3.0 +207 2.0 +207 3.0 +208 3.0 +208 4.5 +209 2.0 +209 3.0 +213 2.0 +213 3.0 +214 1.0 +214 1.5 +216 2.0 +216 3.0 +217 2.0 +217 3.0 +218 1.0 +218 1.5 +219 2.0 +219 3.0 +221 2.0 +221 3.0 +222 1.0 +222 1.5 +223 2.0 +223 3.0 +224 2.0 +224 3.0 +226 1.0 +226 1.5 +228 1.0 +228 1.5 +229 2.0 +229 3.0 +230 5.0 +230 7.5 +233 2.0 +233 3.0 +235 1.0 +235 1.5 +237 2.0 +237 3.0 +238 2.0 +238 3.0 +239 2.0 +239 3.0 +24 2.0 +24 3.0 +241 1.0 +241 1.5 +242 2.0 +242 3.0 +244 1.0 +244 1.5 +247 1.0 +247 1.5 +248 1.0 +248 1.5 +249 1.0 +249 1.5 +252 1.0 +252 1.5 +255 2.0 +255 3.0 +256 2.0 +256 3.0 +257 1.0 +257 1.5 +258 1.0 +258 1.5 +26 2.0 +26 3.0 +260 1.0 +260 1.5 +262 1.0 +262 1.5 +263 1.0 +263 1.5 +265 2.0 +265 3.0 +266 1.0 +266 1.5 +27 1.0 +27 1.5 +272 2.0 +272 3.0 +273 3.0 +273 4.5 +274 1.0 +274 1.5 +275 1.0 +275 1.5 +277 4.0 +277 6.0 +278 2.0 +278 3.0 +28 1.0 +28 1.5 +280 2.0 +280 3.0 +281 2.0 +281 3.0 +282 2.0 +282 3.0 +283 1.0 +283 1.5 +284 1.0 +284 1.5 +285 1.0 +285 1.5 +286 1.0 +286 1.5 +287 1.0 +287 1.5 +288 2.0 +288 3.0 +289 1.0 +289 1.5 +291 1.0 +291 1.5 +292 1.0 +292 1.5 +296 1.0 +296 1.5 +298 3.0 +298 4.5 +30 1.0 +30 1.5 +302 1.0 +302 1.5 +305 1.0 +305 1.5 +306 1.0 +306 1.5 +307 2.0 +307 3.0 +308 1.0 +308 1.5 +309 2.0 +309 3.0 +310 1.0 +310 1.5 +311 3.0 +311 4.5 +315 1.0 +315 1.5 +316 3.0 +316 4.5 +317 2.0 +317 3.0 +318 3.0 +318 4.5 +321 2.0 +321 3.0 +322 2.0 +322 3.0 +323 1.0 +323 1.5 +325 2.0 +325 3.0 +327 3.0 +327 4.5 +33 1.0 +33 1.5 +331 2.0 +331 3.0 +332 1.0 +332 1.5 +333 2.0 +333 3.0 +335 1.0 +335 1.5 +336 1.0 +336 1.5 +338 1.0 +338 1.5 +339 1.0 +339 1.5 +34 1.0 +34 1.5 +341 1.0 +341 1.5 +342 2.0 +342 3.0 +344 2.0 +344 3.0 +345 1.0 +345 1.5 +348 5.0 +348 7.5 +35 3.0 +35 4.5 +351 1.0 +351 1.5 +353 2.0 +353 3.0 +356 1.0 +356 1.5 +360 1.0 +360 1.5 +362 1.0 +362 1.5 +364 1.0 +364 1.5 +365 1.0 +365 1.5 +366 1.0 +366 1.5 +367 2.0 +367 3.0 +368 1.0 +368 1.5 +369 3.0 +369 4.5 +37 2.0 +37 3.0 +373 1.0 +373 1.5 +374 1.0 +374 1.5 +375 1.0 +375 1.5 +377 1.0 +377 1.5 +378 1.0 +378 1.5 +379 1.0 +379 1.5 +382 2.0 +382 3.0 +384 3.0 +384 4.5 +386 1.0 +386 1.5 +389 1.0 +389 1.5 +392 1.0 +392 1.5 +393 1.0 +393 1.5 +394 1.0 +394 1.5 +395 2.0 +395 3.0 +396 3.0 +396 4.5 +397 2.0 +397 3.0 +399 2.0 +399 3.0 +4 1.0 +4 1.5 +400 1.0 +400 1.5 +401 5.0 +401 7.5 +402 1.0 +402 1.5 +403 3.0 +403 4.5 +404 2.0 +404 3.0 +406 4.0 +406 6.0 +407 1.0 +407 1.5 +409 3.0 +409 4.5 +41 1.0 +41 1.5 +411 1.0 +411 1.5 +413 2.0 +413 3.0 +414 2.0 +414 3.0 +417 3.0 +417 4.5 +418 1.0 +418 1.5 +419 1.0 +419 1.5 +42 2.0 +42 3.0 +421 1.0 +421 1.5 +424 2.0 +424 3.0 +427 1.0 +427 1.5 +429 2.0 +429 3.0 +43 1.0 +43 1.5 +430 3.0 +430 4.5 +431 3.0 +431 4.5 +432 1.0 +432 1.5 +435 1.0 +435 1.5 +436 1.0 +436 1.5 +437 1.0 +437 1.5 +438 3.0 +438 4.5 +439 2.0 +439 3.0 +44 1.0 +44 1.5 +443 1.0 +443 1.5 +444 1.0 +444 1.5 +446 1.0 +446 1.5 +448 1.0 +448 1.5 +449 1.0 +449 1.5 +452 1.0 +452 1.5 +453 1.0 +453 1.5 +454 3.0 +454 4.5 +455 1.0 +455 1.5 +457 1.0 +457 1.5 +458 2.0 +458 3.0 +459 2.0 +459 3.0 +460 1.0 +460 1.5 +462 2.0 +462 3.0 +463 2.0 +463 3.0 +466 3.0 +466 4.5 +467 1.0 +467 1.5 +468 4.0 +468 6.0 +469 5.0 +469 7.5 +47 1.0 +47 1.5 +470 1.0 +470 1.5 +472 1.0 +472 1.5 +475 1.0 +475 1.5 +477 1.0 +477 1.5 +478 2.0 +478 3.0 +479 1.0 +479 1.5 +480 3.0 +480 4.5 +481 1.0 +481 1.5 +482 1.0 +482 1.5 +483 1.0 +483 1.5 +484 1.0 +484 1.5 +485 1.0 +485 1.5 +487 1.0 +487 1.5 +489 4.0 +489 6.0 +490 1.0 +490 1.5 +491 1.0 +491 1.5 +492 2.0 +492 3.0 +493 1.0 +493 1.5 +494 1.0 +494 1.5 +495 1.0 +495 1.5 +496 1.0 +496 1.5 +497 1.0 +497 1.5 +498 3.0 +498 4.5 +5 3.0 +5 4.5 +51 2.0 +51 3.0 +53 1.0 +53 1.5 +54 1.0 +54 1.5 +57 1.0 +57 1.5 +58 2.0 +58 3.0 +64 1.0 +64 1.5 +65 1.0 +65 1.5 +66 1.0 +66 1.5 +67 2.0 +67 3.0 +69 1.0 +69 1.5 +70 3.0 +70 4.5 +72 2.0 +72 3.0 +74 1.0 +74 1.5 +76 2.0 +76 3.0 +77 1.0 +77 1.5 +78 1.0 +78 1.5 +8 1.0 +8 1.5 +80 1.0 +80 1.5 +82 1.0 +82 1.5 +83 2.0 +83 3.0 +84 2.0 +84 3.0 +85 1.0 +85 1.5 +86 1.0 +86 1.5 +87 1.0 +87 1.5 +9 1.0 +9 1.5 +90 3.0 +90 4.5 +92 1.0 +92 1.5 +95 2.0 +95 3.0 +96 1.0 +96 1.5 +97 2.0 +97 3.0 +98 2.0 +98 3.0