[6/6] hive git commit: HIVE-16100: Dynamic Sorted Partition optimizer loses sibling operators (Vineet Garg, Gopal V reviewed by Ashutosh Chauhan)

vgarg Tue, 11 Dec 2018 16:30:12 -0800

HIVE-16100: Dynamic Sorted Partition optimizer loses sibling operators (Vineet 
Garg, Gopal V reviewed by Ashutosh Chauhan)



Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b650083f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b650083f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b650083f

Branch: refs/heads/master
Commit: b650083f13daeffba063511ad991a5ac8af9f3cc
Parents: 5971e69
Author: Vineet Garg <vg...@apache.org>
Authored: Tue Dec 11 16:29:04 2018 -0800
Committer: Vineet Garg <vg...@apache.org>
Committed: Tue Dec 11 16:29:04 2018 -0800

----------------------------------------------------------------------
 .../optimizer/SortedDynPartitionOptimizer.java  |   5 +-
 .../clientpositive/dynpart_sort_optimization.q  |  32 +
 .../clientpositive/autoColumnStats_1.q.out      |  20 +-
 .../clientpositive/autoColumnStats_2.q.out      |  20 +-
 .../clientpositive/autoColumnStats_6.q.out      |  62 +-
 .../clientpositive/autoColumnStats_8.q.out      | 265 ++++-
 .../extrapolate_part_stats_partial.q.out        |  20 +-
 .../infer_bucket_sort_dyn_part.q.out            |  85 +-
 .../results/clientpositive/insert_into6.q.out   |  62 +-
 .../llap/dynpart_sort_opt_vectorization.q.out   | 117 ++-
 .../llap/dynpart_sort_optimization.q.out        | 984 ++++++++++++++++++-
 .../clientpositive/llap/llap_partitioned.q.out  |   8 +-
 .../clientpositive/llap/llap_stats.q.out        |   4 +-
 .../clientpositive/llap/load_dyn_part5.q.out    |  39 +-
 .../clientpositive/llap/orc_merge10.q.out       | 111 +++
 .../clientpositive/llap/orc_merge2.q.out        |  37 +
 .../clientpositive/llap/orc_merge7.q.out        |  74 ++
 .../clientpositive/llap/orc_merge_diff_fs.q.out | 111 +++
 .../llap/orc_merge_incompat2.q.out              |  37 +
 .../results/clientpositive/llap/tez_dml.q.out   |  39 +-
 .../llap/vector_count_distinct.q.out            |  18 +-
 .../llap/vector_partitioned_date_time.q.out     |  96 +-
 .../results/clientpositive/load_dyn_part1.q.out | 133 ++-
 .../clientpositive/load_dyn_part10.q.out        |  62 +-
 .../results/clientpositive/load_dyn_part3.q.out |  62 +-
 .../results/clientpositive/load_dyn_part4.q.out |  62 +-
 .../results/clientpositive/load_dyn_part8.q.out | 265 ++++-
 .../results/clientpositive/load_dyn_part9.q.out |  62 +-
 ql/src/test/results/clientpositive/merge3.q.out | 151 ++-
 ql/src/test/results/clientpositive/merge4.q.out |  80 +-
 .../merge_dynamic_partition.q.out               | 142 ++-
 .../merge_dynamic_partition2.q.out              |  80 +-
 .../merge_dynamic_partition3.q.out              |  80 +-
 .../merge_dynamic_partition4.q.out              |  80 +-
 .../merge_dynamic_partition5.q.out              |  80 +-
 .../results/clientpositive/orc_merge10.q.out    | 222 ++++-
 .../results/clientpositive/orc_merge2.q.out     |  62 +-
 .../clientpositive/orc_merge_diff_fs.q.out      | 222 ++++-
 .../clientpositive/orc_merge_incompat2.q.out    |  62 +-
 .../spark/dynpart_sort_optimization.q.out       | 368 +++++++
 ql/src/test/results/clientpositive/stats4.q.out | 133 ++-
 .../clientpositive/stats_empty_dyn_part.q.out   |  62 +-
 42 files changed, 4203 insertions(+), 513 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b650083f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
index 498877a..6fd1093 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
@@ -188,8 +188,7 @@ public class SortedDynPartitionOptimizer extends Transform {
       // unlink connection between FS and its parent
       fsParent = fsOp.getParentOperators().get(0);
 
-      fsParent.getChildOperators().clear();
-
+      fsParent.getChildOperators().remove(fsOp);
 
       // if enforce bucketing/sorting is disabled numBuckets will not be set.
       // set the number of buckets here to ensure creation of empty buckets
@@ -399,7 +398,7 @@ public class SortedDynPartitionOptimizer extends Transform {
               return false;
             }
           }
-          rsParent.getChildOperators().clear();
+          rsParent.getChildOperators().remove(rsToRemove);
           rsParent.getChildOperators().add(rsGrandChild);
           rsGrandChild.getParentOperators().clear();
           rsGrandChild.getParentOperators().add(rsParent);

http://git-wip-us.apache.org/repos/asf/hive/blob/b650083f/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q 
b/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q
index dbeb874..03bf10b 100644
--- a/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q
+++ b/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q
@@ -245,4 +245,36 @@ explain insert overwrite table over1k_part 
partition(ds="foo", t) select si,i,b,
 set hive.optimize.sort.dynamic.partition.threshold=1;
 explain insert overwrite table over1k_part partition(ds="foo", t) select 
si,i,b,f,t from over1k_n3 where t is null or t=27 limit 10;
 
+
+create table over1k_part4_0(i int) partitioned by (s string);
+create table over1k_part4_1(i int) partitioned by (s string);
+
+EXPLAIN
+WITH CTE AS (
+select i, s from over1k_n3 where s like 'bob%'
+)
+FROM (
+select * from CTE where i > 1 ORDER BY s
+) src1k
+insert overwrite table over1k_part4_0 partition(s)
+select i+1, s
+insert overwrite table over1k_part4_1 partition(s)
+select i+0, s
+;
+
+WITH CTE AS (
+select i, s from over1k_n3 where s like 'bob%'
+)
+FROM (
+select * from CTE where i > 1 ORDER BY s
+) src1k
+insert overwrite table over1k_part4_0 partition(s)
+select i+1, s
+insert overwrite table over1k_part4_1 partition(s)
+select i+0, s
+;
+
+select count(1) from over1k_part4_0;
+select count(1) from over1k_part4_1;
+
 drop table over1k_n3;

http://git-wip-us.apache.org/repos/asf/hive/blob/b650083f/ql/src/test/results/clientpositive/autoColumnStats_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_1.q.out 
b/ql/src/test/results/clientpositive/autoColumnStats_1.q.out
index bcabc02..4131535 100644
--- a/ql/src/test/results/clientpositive/autoColumnStats_1.q.out
+++ b/ql/src/test/results/clientpositive/autoColumnStats_1.q.out
@@ -1389,11 +1389,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: a_n12
-          Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: key (type: string)
             outputColumnNames: _col0
-            Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: explain select value from b_n9
@@ -1416,11 +1416,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: b_n9
-          Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: value (type: string)
             outputColumnNames: _col0
-            Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: explain select key from b_n9
@@ -1443,11 +1443,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: b_n9
-          Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: key (type: string)
             outputColumnNames: _col0
-            Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: explain select value from c_n2
@@ -1472,11 +1472,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: c_n2
-          Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: value (type: string)
             outputColumnNames: _col0
-            Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: explain select key from c_n2
@@ -1501,10 +1501,10 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: c_n2
-          Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: key (type: string)
             outputColumnNames: _col0
-            Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 

http://git-wip-us.apache.org/repos/asf/hive/blob/b650083f/ql/src/test/results/clientpositive/autoColumnStats_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_2.q.out 
b/ql/src/test/results/clientpositive/autoColumnStats_2.q.out
index 3618b02..a837123 100644
--- a/ql/src/test/results/clientpositive/autoColumnStats_2.q.out
+++ b/ql/src/test/results/clientpositive/autoColumnStats_2.q.out
@@ -1519,11 +1519,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: a_n3
-          Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: key (type: string)
             outputColumnNames: _col0
-            Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: explain select value from b_n3
@@ -1546,11 +1546,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: b_n3
-          Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: value (type: string)
             outputColumnNames: _col0
-            Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: explain select key from b_n3
@@ -1573,11 +1573,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: b_n3
-          Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: key (type: string)
             outputColumnNames: _col0
-            Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: explain select value from c_n1
@@ -1602,11 +1602,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: c_n1
-          Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: value (type: string)
             outputColumnNames: _col0
-            Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: explain select key from c_n1
@@ -1631,10 +1631,10 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: c_n1
-          Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: key (type: string)
             outputColumnNames: _col0
-            Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 

http://git-wip-us.apache.org/repos/asf/hive/blob/b650083f/ql/src/test/results/clientpositive/autoColumnStats_6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_6.q.out 
b/ql/src/test/results/clientpositive/autoColumnStats_6.q.out
index 2c2baf1..ad18a80 100644
--- a/ql/src/test/results/clientpositive/autoColumnStats_6.q.out
+++ b/ql/src/test/results/clientpositive/autoColumnStats_6.q.out
@@ -29,8 +29,9 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-1
@@ -43,11 +44,56 @@ STAGE PLANS:
               expressions: UDFToInteger(key) (type: int), value (type: 
string), (hash(key) pmod 10) (type: int), (hash(value) pmod 10) (type: int)
               outputColumnNames: _col0, _col1, _col2, _col3
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-              Reduce Output Operator
-                key expressions: _col2 (type: int), _col3 (type: int)
-                sort order: ++
-                Map-reduce partition columns: _col2 (type: int), _col3 (type: 
int)
-                value expressions: _col0 (type: int), _col1 (type: string)
+              Select Operator
+                expressions: _col0 (type: int), _col1 (type: string), '1' 
(type: string), CAST( _col2 AS STRING) (type: string), CAST( _col3 AS STRING) 
(type: string)
+                outputColumnNames: key, value, one, two, three
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                  keys: one (type: string), two (type: string), three (type: 
string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: string)
+                    sort order: +++
+                    Map-reduce partition columns: _col0 (type: string), _col1 
(type: string), _col2 (type: string)
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                    value expressions: _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col4 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 
(type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col4 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string), _col1 (type: string), _col2 (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col2 (type: int), _col3 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col2 (type: int), _col3 (type: 
int)
+              value expressions: _col0 (type: int), _col1 (type: string)
       Execution mode: vectorized
       Reduce Operator Tree:
         Select Operator
@@ -77,7 +123,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.orcfile_merge2a
 
-  Stage: Stage-2
+  Stage: Stage-3
     Stats Work
       Basic Stats Work:
       Column Stats Desc:

http://git-wip-us.apache.org/repos/asf/hive/blob/b650083f/ql/src/test/results/clientpositive/autoColumnStats_8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_8.q.out 
b/ql/src/test/results/clientpositive/autoColumnStats_8.q.out
index d0c6602..3c1fb82 100644
--- a/ql/src/test/results/clientpositive/autoColumnStats_8.q.out
+++ b/ql/src/test/results/clientpositive/autoColumnStats_8.q.out
@@ -57,11 +57,13 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
-  Stage-0 depends on stages: Stage-2
-  Stage-3 depends on stages: Stage-0
-  Stage-4 depends on stages: Stage-2
-  Stage-1 depends on stages: Stage-4
-  Stage-5 depends on stages: Stage-1
+  Stage-3 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-3
+  Stage-4 depends on stages: Stage-0
+  Stage-5 depends on stages: Stage-2
+  Stage-7 depends on stages: Stage-1, Stage-5
+  Stage-6 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-6
 
 STAGE PLANS:
   Stage: Stage-2
@@ -79,14 +81,43 @@ STAGE PLANS:
                 expressions: key (type: string), value (type: string), ds 
(type: string), hr (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3
                 Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col2 (type: string), _col3 (type: string)
-                  null sort order: aa
-                  sort order: ++
-                  Map-reduce partition columns: _col2 (type: string), _col3 
(type: string)
-                  tag: -1
-                  value expressions: _col0 (type: string), _col1 (type: string)
-                  auto parallelism: false
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string), _col3 (type: string)
+                  outputColumnNames: key, value, ds, hr
+                  Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                    keys: ds (type: string), hr (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3
+                    Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: 
string)
+                      null sort order: aa
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                      Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
+                      tag: -1
+                      value expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                      auto parallelism: false
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        column.name.delimiter ,
+                        columns _col0,_col1,_col2,_col3
+                        columns.types string,string,string,string
+                        escape.delim \
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
             Filter Operator
               isSamplingPred: false
               predicate: (ds > '2008-04-08') (type: boolean)
@@ -95,6 +126,34 @@ STAGE PLANS:
                 expressions: key (type: string), value (type: string), hr 
(type: string)
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
+                  outputColumnNames: key, value, hr
+                  Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                    keys: '2008-12-31' (type: string), hr (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3
+                    Statistics: Num rows: 666 Data size: 7075 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 0
+#### A masked pattern was here ####
+                      NumFilesPerFileSink: 1
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          properties:
+                            column.name.delimiter ,
+                            columns _col0,_col1,_col2,_col3
+                            columns.types 
string,string,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>
+                            escape.delim \
+                            serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                          serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                      TotalFiles: 1
+                      GatherStats: false
+                      MultiFileSpray: false
                 File Output Operator
                   compressed: false
                   GlobalTableId: 0
@@ -113,7 +172,6 @@ STAGE PLANS:
                   TotalFiles: 1
                   GatherStats: false
                   MultiFileSpray: false
-      Execution mode: vectorized
       Path -> Alias:
 #### A masked pattern was here ####
       Path -> Partition:
@@ -320,6 +378,82 @@ STAGE PLANS:
         /srcpart/ds=2008-04-09/hr=12 [srcpart]
       Needs Tagging: false
       Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string), _col1 (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+              Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE 
Column stats: NONE
+#### A masked pattern was here ####
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  properties:
+                    columns _col0,_col1,_col2,_col3
+                    columns.types 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:string:string
+                    escape.delim \
+                    hive.serialization.extend.additional.nesting.levels true
+                    serialization.escape.crlf true
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              key expressions: _col2 (type: string), _col3 (type: string)
+              null sort order: aa
+              sort order: ++
+              Map-reduce partition columns: _col2 (type: string), _col3 (type: 
string)
+              tag: -1
+              value expressions: _col0 (type: string), _col1 (type: string)
+              auto parallelism: false
+      Execution mode: vectorized
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10004
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              column.name.delimiter ,
+              columns _col0,_col1,_col2,_col3
+              columns.types string,string,string,string
+              escape.delim \
+              serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                column.name.delimiter ,
+                columns _col0,_col1,_col2,_col3
+                columns.types string,string,string,string
+                escape.delim \
+                serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
         Select Operator
           expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), 
KEY._col2 (type: string), KEY._col3 (type: string)
           outputColumnNames: _col0, _col1, _col2, _col3
@@ -382,12 +516,99 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.nzhang_part8
 
-  Stage: Stage-3
+  Stage: Stage-4
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
 
-  Stage: Stage-4
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              key expressions: '2008-12-31' (type: string), _col1 (type: 
string)
+              null sort order: aa
+              sort order: ++
+              Map-reduce partition columns: '2008-12-31' (type: string), _col1 
(type: string)
+              Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE 
Column stats: NONE
+              tag: -1
+              value expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+              auto parallelism: false
+      Execution mode: vectorized
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10005
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              column.name.delimiter ,
+              columns _col0,_col1,_col2,_col3
+              columns.types 
string,string,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>
+              escape.delim \
+              serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                column.name.delimiter ,
+                columns _col0,_col1,_col2,_col3
+                columns.types 
string,string,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>
+                escape.delim \
+                serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: '2008-12-31' (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 '2008-12-31' (type: string), _col1 (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+              Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE 
Column stats: NONE
+#### A masked pattern was here ####
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  properties:
+                    columns _col0,_col1,_col2,_col3
+                    columns.types 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:string:string
+                    escape.delim \
+                    hive.serialization.extend.additional.nesting.levels true
+                    serialization.escape.crlf true
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
+
+  Stage: Stage-7
+    Stats Work
+      Basic Stats Work:
+#### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: string, string
+          Table: default.nzhang_part8
+          Is Table Level Stats: false
+
+  Stage: Stage-6
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -406,7 +627,7 @@ STAGE PLANS:
       Path -> Partition:
 #### A masked pattern was here ####
           Partition
-            base file name: -mr-10004
+            base file name: -mr-10006
             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
             properties:
@@ -493,16 +714,6 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.nzhang_part8
 
-  Stage: Stage-5
-    Stats Work
-      Basic Stats Work:
-#### A masked pattern was here ####
-      Column Stats Desc:
-          Columns: key, value
-          Column Types: string, string
-          Table: default.nzhang_part8
-          Is Table Level Stats: false
-
 PREHOOK: query: from srcpart
 insert overwrite table nzhang_part8 partition (ds, hr) select key, value, ds, 
hr where ds <= '2008-04-08'
 insert overwrite table nzhang_part8 partition(ds='2008-12-31', hr) select key, 
value, hr where ds > '2008-04-08'

http://git-wip-us.apache.org/repos/asf/hive/blob/b650083f/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out 
b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
index a9b927b..9ddbb46 100644
--- a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
+++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
@@ -340,12 +340,12 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc_1d_n1
-          Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE 
Column stats: PARTIAL
+          Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE 
Column stats: COMPLETE
           GatherStats: false
           Select Operator
             expressions: state (type: string)
             outputColumnNames: _col0
-            Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: explain extended select state,locid from loc_orc_1d_n1
@@ -561,12 +561,12 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc_1d_n1
-          Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE 
Column stats: PARTIAL
+          Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE 
Column stats: COMPLETE
           GatherStats: false
           Select Operator
             expressions: state (type: string), locid (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: analyze table loc_orc_1d_n1 partition(year='2000') compute 
statistics for columns state
@@ -1031,12 +1031,12 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc_1d_n1
-          Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE 
Column stats: PARTIAL
+          Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE 
Column stats: COMPLETE
           GatherStats: false
           Select Operator
             expressions: state (type: string), locid (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: create table if not exists loc_orc_2d_n1 (
@@ -1681,12 +1681,12 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc_2d_n1
-          Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE 
Column stats: PARTIAL
+          Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE 
Column stats: COMPLETE
           GatherStats: false
           Select Operator
             expressions: state (type: string)
             outputColumnNames: _col0
-            Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 
 PREHOOK: query: explain extended select state,locid from loc_orc_2d_n1
@@ -2249,11 +2249,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc_2d_n1
-          Statistics: Num rows: 20 Data size: 1840 Basic stats: COMPLETE 
Column stats: PARTIAL
+          Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE 
Column stats: COMPLETE
           GatherStats: false
           Select Operator
             expressions: state (type: string), locid (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 20 Data size: 1840 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE 
Column stats: COMPLETE
             ListSink
 

http://git-wip-us.apache.org/repos/asf/hive/blob/b650083f/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out 
b/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out
index f865eb9..93099a1 100644
--- a/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out
+++ b/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out
@@ -420,13 +420,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
-  Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6
-  Stage-5
-  Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
-  Stage-3 depends on stages: Stage-0
-  Stage-4
+  Stage-4 depends on stages: Stage-0, Stage-2
+  Stage-3 depends on stages: Stage-1
+  Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7
   Stage-6
-  Stage-7 depends on stages: Stage-6
+  Stage-0 depends on stages: Stage-6, Stage-5, Stage-8
+  Stage-5
+  Stage-7
+  Stage-8 depends on stages: Stage-7
 
 STAGE PLANS:
   Stage: Stage-1
@@ -464,6 +465,22 @@ STAGE PLANS:
             expressions: _col0 (type: string), CAST( _col1 AS STRING) (type: 
string), if(((UDFToDouble(_col0) % 100.0D) = 0.0D), '11', '12') (type: string)
             outputColumnNames: _col0, _col1, _col2
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: string), 
'2008-04-08' (type: string), _col2 (type: string)
+              outputColumnNames: key, value, ds, hr
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(value, 
'hll')
+                keys: ds (type: string), hr (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
             File Output Operator
               compressed: false
               table:
@@ -476,6 +493,44 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: 
string)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string), _col1 (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-4
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: string, string
+          Table: default.test_table_n8
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
               key expressions: _col2 (type: string)
               sort order: +
               Map-reduce partition columns: _col2 (type: string)
@@ -495,10 +550,10 @@ STAGE PLANS:
                 serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
                 name: default.test_table_n8
 
-  Stage: Stage-8
+  Stage: Stage-9
     Conditional Operator
 
-  Stage: Stage-5
+  Stage: Stage-6
     Move Operator
       files:
           hdfs directory: true
@@ -517,29 +572,21 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
               name: default.test_table_n8
 
-  Stage: Stage-3
-    Stats Work
-      Basic Stats Work:
-      Column Stats Desc:
-          Columns: key, value
-          Column Types: string, string
-          Table: default.test_table_n8
-
-  Stage: Stage-4
+  Stage: Stage-5
     Merge File Operator
       Map Operator Tree:
           RCFile Merge Operator
       merge level: block
       input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
 
-  Stage: Stage-6
+  Stage: Stage-7
     Merge File Operator
       Map Operator Tree:
           RCFile Merge Operator
       merge level: block
       input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
 
-  Stage: Stage-7
+  Stage: Stage-8
     Move Operator
       files:
           hdfs directory: true

http://git-wip-us.apache.org/repos/asf/hive/blob/b650083f/ql/src/test/results/clientpositive/insert_into6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/insert_into6.q.out 
b/ql/src/test/results/clientpositive/insert_into6.q.out
index f13f764..035a29f 100644
--- a/ql/src/test/results/clientpositive/insert_into6.q.out
+++ b/ql/src/test/results/clientpositive/insert_into6.q.out
@@ -196,8 +196,9 @@ POSTHOOK: Input: default@insert_into6a@ds=1
 POSTHOOK: Input: default@insert_into6a@ds=2
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-1
@@ -210,11 +211,56 @@ STAGE PLANS:
               expressions: key (type: int), value (type: string), ds (type: 
string)
               outputColumnNames: _col0, _col1, _col2
               Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE 
Column stats: NONE
-              Reduce Output Operator
-                key expressions: _col2 (type: string)
-                sort order: +
-                Map-reduce partition columns: _col2 (type: string)
-                value expressions: _col0 (type: int), _col1 (type: string)
+              Select Operator
+                expressions: _col0 (type: int), _col1 (type: string), _col2 
(type: string)
+                outputColumnNames: key, value, ds
+                Statistics: Num rows: 250 Data size: 2680 Basic stats: 
COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                  keys: ds (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 250 Data size: 2680 Basic stats: 
COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 250 Data size: 2680 Basic stats: 
COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col2 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col2 (type: string)
+              value expressions: _col0 (type: int), _col1 (type: string)
       Execution mode: vectorized
       Reduce Operator Tree:
         Select Operator
@@ -242,7 +288,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.insert_into6b
 
-  Stage: Stage-2
+  Stage: Stage-3
     Stats Work
       Basic Stats Work:
       Column Stats Desc:

http://git-wip-us.apache.org/repos/asf/hive/blob/b650083f/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out 
b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out
index bd3c776..cf6c335 100644
--- 
a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out
@@ -252,6 +252,7 @@ STAGE PLANS:
       Edges:
         Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -277,7 +278,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             LLAP IO: all inputs
         Reducer 2 
-            Execution mode: vectorized, llap
+            Execution mode: llap
             Reduce Operator Tree:
               Select Operator
                 expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: 
int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: 
tinyint)
@@ -286,12 +287,48 @@ STAGE PLANS:
                 Limit
                   Number of rows: 10
                   Statistics: Num rows: 10 Data size: 240 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: _col0 (type: smallint), _col1 (type: int), 
_col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: 
tinyint)
+                    outputColumnNames: si, i, b, f, ds, t
+                    Statistics: Num rows: 10 Data size: 1110 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: compute_stats(si, 'hll'), compute_stats(i, 
'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll')
+                      keys: ds (type: string), t (type: tinyint)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
+                      Statistics: Num rows: 5 Data size: 8935 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
tinyint)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: tinyint)
+                        Statistics: Num rows: 5 Data size: 8935 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col5 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
                   Reduce Output Operator
                     key expressions: _col4 (type: tinyint)
                     sort order: +
                     Map-reduce partition columns: _col4 (type: tinyint)
                     value expressions: _col0 (type: smallint), _col1 (type: 
int), _col2 (type: bigint), _col3 (type: float)
         Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2), 
compute_stats(VALUE._col3)
+                keys: KEY._col0 (type: string), KEY._col1 (type: tinyint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col5 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string), _col1 (type: tinyint)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                  Statistics: Num rows: 5 Data size: 9255 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 5 Data size: 9255 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Select Operator
@@ -660,6 +697,7 @@ STAGE PLANS:
       Edges:
         Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -685,7 +723,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             LLAP IO: all inputs
         Reducer 2 
-            Execution mode: vectorized, llap
+            Execution mode: llap
             Reduce Operator Tree:
               Select Operator
                 expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: 
int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: 
tinyint)
@@ -694,12 +732,48 @@ STAGE PLANS:
                 Limit
                   Number of rows: 10
                   Statistics: Num rows: 10 Data size: 240 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: _col0 (type: smallint), _col1 (type: int), 
_col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: 
tinyint)
+                    outputColumnNames: si, i, b, f, ds, t
+                    Statistics: Num rows: 10 Data size: 1110 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: compute_stats(si, 'hll'), compute_stats(i, 
'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll')
+                      keys: ds (type: string), t (type: tinyint)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
+                      Statistics: Num rows: 5 Data size: 8935 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
tinyint)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: tinyint)
+                        Statistics: Num rows: 5 Data size: 8935 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col5 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
                   Reduce Output Operator
                     key expressions: _col4 (type: tinyint)
                     sort order: +
                     Map-reduce partition columns: _col4 (type: tinyint)
                     value expressions: _col0 (type: smallint), _col1 (type: 
int), _col2 (type: bigint), _col3 (type: float)
         Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2), 
compute_stats(VALUE._col3)
+                keys: KEY._col0 (type: string), KEY._col1 (type: tinyint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col5 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string), _col1 (type: tinyint)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                  Statistics: Num rows: 5 Data size: 9255 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 5 Data size: 9255 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Select Operator
@@ -1572,6 +1646,7 @@ STAGE PLANS:
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1591,7 +1666,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             LLAP IO: all inputs
         Reducer 2 
-            Execution mode: vectorized, llap
+            Execution mode: llap
             Reduce Operator Tree:
               Select Operator
                 expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: 
smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), 
VALUE._col3 (type: float)
@@ -1607,12 +1682,48 @@ STAGE PLANS:
                       expressions: _col1 (type: smallint), _col2 (type: int), 
_col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4
                       Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Select Operator
+                        expressions: _col0 (type: smallint), _col1 (type: 
int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 
(type: tinyint)
+                        outputColumnNames: si, i, b, f, ds, t
+                        Statistics: Num rows: 1 Data size: 111 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Group By Operator
+                          aggregations: compute_stats(si, 'hll'), 
compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll')
+                          keys: ds (type: string), t (type: tinyint)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2, _col3, 
_col4, _col5
+                          Statistics: Num rows: 1 Data size: 1787 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Reduce Output Operator
+                            key expressions: _col0 (type: string), _col1 
(type: tinyint)
+                            sort order: ++
+                            Map-reduce partition columns: _col0 (type: 
string), _col1 (type: tinyint)
+                            Statistics: Num rows: 1 Data size: 1787 Basic 
stats: COMPLETE Column stats: COMPLETE
+                            value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col5 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
                       Reduce Output Operator
                         key expressions: _col4 (type: tinyint)
                         sort order: +
                         Map-reduce partition columns: _col4 (type: tinyint)
                         value expressions: _col0 (type: smallint), _col1 
(type: int), _col2 (type: bigint), _col3 (type: float)
         Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2), 
compute_stats(VALUE._col3)
+                keys: KEY._col0 (type: string), KEY._col1 (type: tinyint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 1 Data size: 1851 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col4 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col5 (type: 
struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string), _col1 (type: tinyint)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                  Statistics: Num rows: 1 Data size: 1851 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 1851 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Select Operator

[6/6] hive git commit: HIVE-16100: Dynamic Sorted Partition optimizer loses sibling operators (Vineet Garg, Gopal V reviewed by Ashutosh Chauhan)

Reply via email to