Repository: hive
Updated Branches:
  refs/heads/master ce36c439c -> 657995de1


HIVE-20570: Fix plan for query with hive.optimize.union.remove set to true 
(Janaki Lahorani, reviewed by Andrew Sherman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/657995de
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/657995de
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/657995de

Branch: refs/heads/master
Commit: 657995de162248f66faa458b837c1312eed3b0c1
Parents: ce36c43
Author: Janaki Lahorani <[email protected]>
Authored: Wed Sep 19 09:41:24 2018 -0700
Committer: Andrew Sherman <[email protected]>
Committed: Wed Sep 19 09:42:55 2018 -0700

----------------------------------------------------------------------
 .../hive/ql/optimizer/GenMRFileSink1.java       |  12 +-
 .../queries/clientpositive/union_remove_plan.q  |  18 +++
 .../skewjoin_union_remove_1.q.out               |   4 +-
 .../skewjoin_union_remove_2.q.out               |   2 +-
 .../clientpositive/union_remove_plan.q.out      | 134 +++++++++++++++++++
 5 files changed, 160 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/657995de/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
index 2f00c22..25c6b24 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
@@ -116,7 +116,9 @@ public class GenMRFileSink1 implements NodeProcessor {
     }
 
     FileSinkDesc fileSinkDesc = fsOp.getConf();
-    if (fileSinkDesc.isLinkedFileSink()) {
+    // There are linked file sink operators and child tasks are present
+    if (fileSinkDesc.isLinkedFileSink() && (currTask.getChildTasks() != null) 
&&
+        (currTask.getChildTasks().size() == 1)) {
       Map<FileSinkDesc, Task<? extends Serializable>> linkedFileDescTasks =
         ctx.getLinkedFileDescTasks();
       if (linkedFileDescTasks == null) {
@@ -124,12 +126,8 @@ public class GenMRFileSink1 implements NodeProcessor {
         ctx.setLinkedFileDescTasks(linkedFileDescTasks);
       }
 
-      // The child tasks may be null in case of a select
-      if ((currTask.getChildTasks() != null) &&
-        (currTask.getChildTasks().size() == 1)) {
-        for (FileSinkDesc fileDesc : fileSinkDesc.getLinkedFileSinkDesc()) {
-          linkedFileDescTasks.put(fileDesc, currTask.getChildTasks().get(0));
-        }
+      for (FileSinkDesc fileDesc : fileSinkDesc.getLinkedFileSinkDesc()) {
+        linkedFileDescTasks.put(fileDesc, currTask.getChildTasks().get(0));
       }
     }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/657995de/ql/src/test/queries/clientpositive/union_remove_plan.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_remove_plan.q 
b/ql/src/test/queries/clientpositive/union_remove_plan.q
new file mode 100644
index 0000000..e4ad41f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union_remove_plan.q
@@ -0,0 +1,18 @@
+-- Tests a query with union all that can be optimized by removing the union 
operator
+
+create table if not exists test_table(column1 string, column2 int);
+insert into test_table values('a',1),('b',2);
+
+set hive.optimize.union.remove=true;
+set mapred.input.dir.recursive=true;
+
+explain
+select column1 from test_table group by column1
+union all
+select column1 from test_table group by column1;
+
+select column1 from test_table group by column1
+union all
+select column1 from test_table group by column1;
+
+drop table test_table;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/657995de/ql/src/test/results/clientpositive/skewjoin_union_remove_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/skewjoin_union_remove_1.q.out 
b/ql/src/test/results/clientpositive/skewjoin_union_remove_1.q.out
index a7351df..5d0a6bb 100644
--- a/ql/src/test/results/clientpositive/skewjoin_union_remove_1.q.out
+++ b/ql/src/test/results/clientpositive/skewjoin_union_remove_1.q.out
@@ -43,7 +43,7 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-3 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-1, Stage-3
 
 STAGE PLANS:
   Stage: Stage-1
@@ -185,7 +185,7 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-3 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-1, Stage-3
 
 STAGE PLANS:
   Stage: Stage-1

http://git-wip-us.apache.org/repos/asf/hive/blob/657995de/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out 
b/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out
index 31df384..0d467fe 100644
--- a/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out
+++ b/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out
@@ -59,7 +59,7 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-4 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-1, Stage-4
 
 STAGE PLANS:
   Stage: Stage-1

http://git-wip-us.apache.org/repos/asf/hive/blob/657995de/ql/src/test/results/clientpositive/union_remove_plan.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_remove_plan.q.out 
b/ql/src/test/results/clientpositive/union_remove_plan.q.out
new file mode 100644
index 0000000..b7f5efb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/union_remove_plan.q.out
@@ -0,0 +1,134 @@
+PREHOOK: query: create table if not exists test_table(column1 string, column2 
int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_table
+POSTHOOK: query: create table if not exists test_table(column1 string, column2 
int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_table
+PREHOOK: query: insert into test_table values('a',1),('b',2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_table
+POSTHOOK: query: insert into test_table values('a',1),('b',2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_table
+POSTHOOK: Lineage: test_table.column1 SCRIPT []
+POSTHOOK: Lineage: test_table.column2 SCRIPT []
+PREHOOK: query: explain
+select column1 from test_table group by column1
+union all
+select column1 from test_table group by column1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select column1 from test_table group by column1
+union all
+select column1 from test_table group by column1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 is a root stage
+  Stage-0 depends on stages: Stage-1, Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: test_table
+            Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column 
stats: NONE
+            Select Operator
+              expressions: column1 (type: string)
+              outputColumnNames: column1
+              Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                keys: column1 (type: string)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE 
Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column 
stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: test_table
+            Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column 
stats: NONE
+            Select Operator
+              expressions: column1 (type: string)
+              outputColumnNames: column1
+              Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                keys: column1 (type: string)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE 
Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column 
stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select column1 from test_table group by column1
+union all
+select column1 from test_table group by column1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table
+#### A masked pattern was here ####
+POSTHOOK: query: select column1 from test_table group by column1
+union all
+select column1 from test_table group by column1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table
+#### A masked pattern was here ####
+a
+b
+a
+b
+PREHOOK: query: drop table test_table
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@test_table
+PREHOOK: Output: default@test_table
+POSTHOOK: query: drop table test_table
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@test_table
+POSTHOOK: Output: default@test_table

Reply via email to