Repository: hive Updated Branches: refs/heads/master ce36c439c -> 657995de1
HIVE-20570: Fix plan for query with hive.optimize.union.remove set to true (Janaki Lahorani, reviewed by Andrew Sherman) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/657995de Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/657995de Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/657995de Branch: refs/heads/master Commit: 657995de162248f66faa458b837c1312eed3b0c1 Parents: ce36c43 Author: Janaki Lahorani <[email protected]> Authored: Wed Sep 19 09:41:24 2018 -0700 Committer: Andrew Sherman <[email protected]> Committed: Wed Sep 19 09:42:55 2018 -0700 ---------------------------------------------------------------------- .../hive/ql/optimizer/GenMRFileSink1.java | 12 +- .../queries/clientpositive/union_remove_plan.q | 18 +++ .../skewjoin_union_remove_1.q.out | 4 +- .../skewjoin_union_remove_2.q.out | 2 +- .../clientpositive/union_remove_plan.q.out | 134 +++++++++++++++++++ 5 files changed, 160 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/657995de/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java index 2f00c22..25c6b24 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java @@ -116,7 +116,9 @@ public class GenMRFileSink1 implements NodeProcessor { } FileSinkDesc fileSinkDesc = fsOp.getConf(); - if (fileSinkDesc.isLinkedFileSink()) { + // There are linked file sink operators and child tasks are present + if (fileSinkDesc.isLinkedFileSink() && (currTask.getChildTasks() != null) && + (currTask.getChildTasks().size() == 1)) { Map<FileSinkDesc, Task<? extends Serializable>> linkedFileDescTasks = ctx.getLinkedFileDescTasks(); if (linkedFileDescTasks == null) { @@ -124,12 +126,8 @@ public class GenMRFileSink1 implements NodeProcessor { ctx.setLinkedFileDescTasks(linkedFileDescTasks); } - // The child tasks may be null in case of a select - if ((currTask.getChildTasks() != null) && - (currTask.getChildTasks().size() == 1)) { - for (FileSinkDesc fileDesc : fileSinkDesc.getLinkedFileSinkDesc()) { - linkedFileDescTasks.put(fileDesc, currTask.getChildTasks().get(0)); - } + for (FileSinkDesc fileDesc : fileSinkDesc.getLinkedFileSinkDesc()) { + linkedFileDescTasks.put(fileDesc, currTask.getChildTasks().get(0)); } } http://git-wip-us.apache.org/repos/asf/hive/blob/657995de/ql/src/test/queries/clientpositive/union_remove_plan.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/union_remove_plan.q b/ql/src/test/queries/clientpositive/union_remove_plan.q new file mode 100644 index 0000000..e4ad41f --- /dev/null +++ b/ql/src/test/queries/clientpositive/union_remove_plan.q @@ -0,0 +1,18 @@ +-- Tests a query with union all that can be optimized by removing the union operator + +create table if not exists test_table(column1 string, column2 int); +insert into test_table values('a',1),('b',2); + +set hive.optimize.union.remove=true; +set mapred.input.dir.recursive=true; + +explain +select column1 from test_table group by column1 +union all +select column1 from test_table group by column1; + +select column1 from test_table group by column1 +union all +select column1 from test_table group by column1; + +drop table test_table; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/657995de/ql/src/test/results/clientpositive/skewjoin_union_remove_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/skewjoin_union_remove_1.q.out b/ql/src/test/results/clientpositive/skewjoin_union_remove_1.q.out index a7351df..5d0a6bb 100644 --- a/ql/src/test/results/clientpositive/skewjoin_union_remove_1.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_union_remove_1.q.out @@ -43,7 +43,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-3 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-1, Stage-3 STAGE PLANS: Stage: Stage-1 @@ -185,7 +185,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-3 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-1, Stage-3 STAGE PLANS: Stage: Stage-1 http://git-wip-us.apache.org/repos/asf/hive/blob/657995de/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out b/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out index 31df384..0d467fe 100644 --- a/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out @@ -59,7 +59,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-4 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-1, Stage-4 STAGE PLANS: Stage: Stage-1 http://git-wip-us.apache.org/repos/asf/hive/blob/657995de/ql/src/test/results/clientpositive/union_remove_plan.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/union_remove_plan.q.out b/ql/src/test/results/clientpositive/union_remove_plan.q.out new file mode 100644 index 0000000..b7f5efb --- /dev/null +++ b/ql/src/test/results/clientpositive/union_remove_plan.q.out @@ -0,0 +1,134 @@ +PREHOOK: query: create table if not exists test_table(column1 string, column2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table +POSTHOOK: query: create table if not exists test_table(column1 string, column2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table +PREHOOK: query: insert into test_table values('a',1),('b',2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_table +POSTHOOK: query: insert into test_table values('a',1),('b',2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_table +POSTHOOK: Lineage: test_table.column1 SCRIPT [] +POSTHOOK: Lineage: test_table.column2 SCRIPT [] +PREHOOK: query: explain +select column1 from test_table group by column1 +union all +select column1 from test_table group by column1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select column1 from test_table group by column1 +union all +select column1 from test_table group by column1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 is a root stage + Stage-0 depends on stages: Stage-1, Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_table + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: column1 (type: string) + outputColumnNames: column1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: column1 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: test_table + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: column1 (type: string) + outputColumnNames: column1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: column1 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select column1 from test_table group by column1 +union all +select column1 from test_table group by column1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table +#### A masked pattern was here #### +POSTHOOK: query: select column1 from test_table group by column1 +union all +select column1 from test_table group by column1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table +#### A masked pattern was here #### +a +b +a +b +PREHOOK: query: drop table test_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table +PREHOOK: Output: default@test_table +POSTHOOK: query: drop table test_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table +POSTHOOK: Output: default@test_table
