[37/51] [partial] hive git commit: HIVE-13567 : Enable auto-gather column stats by default (Zoltan Haindrich, Pengcheng Xiong via Ashutosh Chauhan)

hashutosh Mon, 11 Dec 2017 16:08:34 -0800

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out 
b/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out
index 1723e75..ffe6515 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out
@@ -81,6 +81,32 @@ STAGE PLANS:
                         output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                         name: default.outputtbl1
+                  Select Operator
+                    expressions: _col0 (type: int), _col1 (type: int)
+                    outputColumnNames: key, cnt
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(key, 'hll'), 
compute_stats(cnt, 'hll')
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-7
     Conditional Operator
@@ -104,6 +130,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, cnt
+          Column Types: int, int
+          Table: default.outputtbl1
 
   Stage: Stage-3
     Map Reduce


http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out 
b/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out
index 1fa9e94..a515847 100644
--- a/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out
+++ b/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out
@@ -56,6 +56,27 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
                 serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 name: default.implicit_cast_during_insert
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: string), _col2 (type: 
string)
+            outputColumnNames: c1, c2, p1
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+            Group By Operator
+              aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll')
+              keys: p1 (type: string)
+              mode: complete
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Move Operator
@@ -72,6 +93,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: c1, c2
+          Column Types: int, string
+          Table: default.implicit_cast_during_insert
 
 PREHOOK: query: insert overwrite table implicit_cast_during_insert partition 
(p1)
   select key, value, key key1 from (select * from src where key in (0,1)) q

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/index_auto_update.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/index_auto_update.q.out 
b/ql/src/test/results/clientpositive/index_auto_update.q.out
index bc444cc..e48b657 100644
--- a/ql/src/test/results/clientpositive/index_auto_update.q.out
+++ b/ql/src/test/results/clientpositive/index_auto_update.q.out
@@ -45,10 +45,10 @@ STAGE DEPENDENCIES:
   Stage-0 depends on stages: Stage-9, Stage-8, Stage-11
   Stage-2 depends on stages: Stage-0
   Stage-1 depends on stages: Stage-2
-  Stage-3 depends on stages: Stage-1
+  Stage-3 depends on stages: Stage-1, Stage-4, Stage-5
   Stage-4 depends on stages: Stage-2
+  Stage-7 depends on stages: Stage-0, Stage-4, Stage-5
   Stage-5 depends on stages: Stage-2
-  Stage-7 depends on stages: Stage-0
   Stage-8
   Stage-10
   Stage-11 depends on stages: Stage-10
@@ -72,6 +72,32 @@ STAGE PLANS:
                     output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                     name: default.temp
+              Select Operator
+                expressions: _col0 (type: string), _col1 (type: string)
+                outputColumnNames: key, val
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: compute_stats(key, 'hll'), compute_stats(val, 
'hll')
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 880 Basic stats: 
COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-12
     Conditional Operator
@@ -140,11 +166,15 @@ STAGE PLANS:
 
   Stage: Stage-4
 
-  Stage: Stage-5
-
   Stage: Stage-7
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, val
+          Column Types: string, string
+          Table: default.temp
+
+  Stage: Stage-5
 
   Stage: Stage-8
     Map Reduce

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/infer_bucket_sort.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort.q.out 
b/ql/src/test/results/clientpositive/infer_bucket_sort.q.out
index 8ed1054..fee672b 100644
--- a/ql/src/test/results/clientpositive/infer_bucket_sort.q.out
+++ b/ql/src/test/results/clientpositive/infer_bucket_sort.q.out
@@ -38,7 +38,7 @@ Database:             default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 309                 
        rawDataSize             1482                
@@ -87,7 +87,7 @@ Database:             default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 309                 
        rawDataSize             1482                
@@ -136,7 +136,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 1028                
        rawDataSize             10968               
@@ -185,7 +185,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 1028                
        rawDataSize             10968               
@@ -234,7 +234,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 1028                
        rawDataSize             4970                
@@ -283,7 +283,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 2654                
        rawDataSize             28466               
@@ -332,7 +332,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 2654                
        rawDataSize             28466               
@@ -381,7 +381,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 500                 
        rawDataSize             5312                
@@ -430,7 +430,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 500                 
        rawDataSize             5312                
@@ -479,7 +479,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 500                 
        rawDataSize             5312                
@@ -528,7 +528,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 500                 
        rawDataSize             5312                
@@ -577,7 +577,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 500                 
        rawDataSize             5312                
@@ -626,7 +626,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 1028                
        rawDataSize             10968               
@@ -675,7 +675,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 1028                
        rawDataSize             10968               
@@ -724,7 +724,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 309                 
        rawDataSize             1482                
@@ -773,7 +773,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 309                 
        rawDataSize             1482                
@@ -822,7 +822,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 6                   
        rawDataSize             18                  
@@ -871,7 +871,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 6                   
        rawDataSize             18                  
@@ -920,7 +920,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 618                 
        rawDataSize             2964                
@@ -969,7 +969,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 5                   
        rawDataSize             19                  
@@ -1018,7 +1018,7 @@ Database:                 default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 309                 
        rawDataSize             1482                
@@ -1067,7 +1067,7 @@ Database:                 default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 309                 
        rawDataSize             1482                
@@ -1116,7 +1116,7 @@ Database:                 default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 309                 
        rawDataSize             1482                
@@ -1165,7 +1165,7 @@ Database:                 default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 309                 
        rawDataSize             1482                
@@ -1214,7 +1214,7 @@ Database:                 default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 309                 
        rawDataSize             3582                

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out 
b/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out
index cba50a7..32e4513 100644
--- a/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out
+++ b/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out
@@ -56,7 +56,7 @@ Database:             default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 500                 
        rawDataSize             4812                
@@ -94,7 +94,7 @@ Database:             default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 500                 
        rawDataSize             4812                
@@ -155,7 +155,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 309                 
        rawDataSize             1173                
@@ -193,7 +193,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 309                 
        rawDataSize             1173                
@@ -256,7 +256,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 155                 
        rawDataSize             586                 
@@ -294,7 +294,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 154                 
        rawDataSize             591                 
@@ -415,10 +415,11 @@ STAGE DEPENDENCIES:
   Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
   Stage-4
   Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-8
   Stage-3
   Stage-5
   Stage-6 depends on stages: Stage-5
+  Stage-8 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -462,6 +463,22 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.RCFileOutputFormat
                   serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
                   name: default.test_table
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: string), 
'2008-04-08' (type: string), _col2 (type: string)
+              outputColumnNames: key, value, ds, hr
+              Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(value, 
'hll')
+                keys: ds (type: string), hr (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-7
     Conditional Operator
@@ -488,6 +505,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: string, string
+          Table: default.test_table
 
   Stage: Stage-3
     Merge File Operator
@@ -509,6 +530,35 @@ STAGE PLANS:
           hdfs directory: true
 #### A masked pattern was here ####
 
+  Stage: Stage-8
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: 
string)
+              Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string), _col1 (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
 PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = 
'2008-04-08', hr)
 SELECT key, value, IF (key % 100 == 0, '11', '12') FROM
 (SELECT key, COUNT(*) AS value FROM srcpart
@@ -555,7 +605,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 4                   
        rawDataSize             14                  
@@ -593,7 +643,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                2                   
        numRows                 305                 
        rawDataSize             1163                

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out 
b/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out
index 8ae7320..5f1d264 100644
--- 
a/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out
+++ 
b/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out
@@ -23,7 +23,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -68,6 +69,22 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.test_table_out
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: string), _col2 
(type: string), '1' (type: string)
+              outputColumnNames: key, value, agg, part
+              Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(value, 
'hll'), compute_stats(agg, 'hll')
+                keys: part (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 750 Data size: 7968 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -84,6 +101,39 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value, agg
+          Column Types: string, string, string
+          Table: default.test_table_out
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: SELECT key, value, count(1) FROM src GROUP BY ROLLUP (key, 
value)
 PREHOOK: type: QUERY
@@ -746,7 +796,7 @@ Database:                   default
 Table:                 test_table_out           
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 619                 
        rawDataSize             6309                
@@ -1426,7 +1476,7 @@ Database:                 default
 Table:                 test_table_out_2         
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"grouping_key\":\"true\",\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 619                 
        rawDataSize             7547                
@@ -1452,7 +1502,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -1497,6 +1548,22 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.test_table_out
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: string), _col2 
(type: string), '1' (type: string)
+              outputColumnNames: key, value, agg, part
+              Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(value, 
'hll'), compute_stats(agg, 'hll')
+                keys: part (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -1513,6 +1580,39 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value, agg
+          Column Types: string, string, string
+          Table: default.test_table_out
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+              value expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') 
 SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE
@@ -1548,7 +1648,7 @@ Database:                 default
 Table:                 test_table_out           
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 928                 
        rawDataSize             9954                
@@ -1601,7 +1701,7 @@ Database:                 default
 Table:                 test_table_out_2         
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"grouping_key\":\"true\",\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 928                 
        rawDataSize             11810               
@@ -1627,7 +1727,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -1672,6 +1773,22 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.test_table_out
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: string), _col2 
(type: string), '1' (type: string)
+              outputColumnNames: key, value, agg, part
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(value, 
'hll'), compute_stats(agg, 'hll')
+                keys: part (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -1688,6 +1805,39 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value, agg
+          Column Types: string, string, string
+          Table: default.test_table_out
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') 
 SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, 
value)
@@ -1723,7 +1873,7 @@ Database:                 default
 Table:                 test_table_out           
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 618                 
        rawDataSize             6054                
@@ -1776,7 +1926,7 @@ Database:                 default
 Table:                 test_table_out_2         
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"grouping_key\":\"true\",\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 618                 
        rawDataSize             7290                

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out 
b/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out
index 98c062b..c701de9 100644
--- a/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out
+++ b/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out
@@ -44,7 +44,7 @@ Database:             default
 Table:                 list_bucketing_table     
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                2                   
        numRows                 309                 
        rawDataSize             1482                
@@ -112,7 +112,7 @@ Database:                   default
 Table:                 list_bucketing_table2    
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 309                 
        rawDataSize             1482                

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out 
b/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out
index f2218e3..0711eb5 100644
--- a/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out
+++ b/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out
@@ -91,6 +91,40 @@ STAGE PLANS:
                         output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                         name: default.test_table_out
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string), 
'1' (type: string)
+                    outputColumnNames: key, value, part
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                      keys: part (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-7
     Conditional Operator
@@ -116,6 +150,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: string, string
+          Table: default.test_table_out
 
   Stage: Stage-3
     Map Reduce
@@ -179,7 +217,7 @@ Database:                   default
 Table:                 test_table_out           
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 309                 
        rawDataSize             1482                
@@ -216,7 +254,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -280,6 +319,22 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.test_table_out
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: string), '1' 
(type: string)
+              outputColumnNames: key, value, part
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(value, 
'hll')
+                keys: part (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -296,6 +351,39 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: string, string
+          Table: default.test_table_out
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col0 (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') 
 SELECT a.key, a.value FROM (
@@ -341,7 +429,7 @@ Database:                   default
 Table:                 test_table_out           
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 0                   
        rawDataSize             0                   
@@ -401,6 +489,34 @@ STAGE PLANS:
                         output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                         name: default.test_table_out
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string)
+                    outputColumnNames: key, value
+                    Group By Operator
+                      aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                      keys: '1' (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Reduce Output Operator
+                        key expressions: '1' (type: string)
+                        sort order: +
+                        Map-reduce partition columns: '1' (type: string)
+                        value expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: '1' (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Select Operator
+            expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 '1' (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-8
     Conditional Operator
@@ -426,6 +542,10 @@ STAGE PLANS:
   Stage: Stage-3
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: string, string
+          Table: default.test_table_out
 
   Stage: Stage-4
     Map Reduce
@@ -491,7 +611,7 @@ Database:                   default
 Table:                 test_table_out           
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 1028                
        rawDataSize             10968               
@@ -519,7 +639,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-3 depends on stages: Stage-0
+  Stage-3 depends on stages: Stage-0, Stage-4
+  Stage-4 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -564,6 +685,20 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.test_table_out
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: string)
+              outputColumnNames: key, value
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(value, 
'hll')
+                keys: '1' (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -580,6 +715,35 @@ STAGE PLANS:
   Stage: Stage-3
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: string, string
+          Table: default.test_table_out
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: '1' (type: string)
+              sort order: +
+              Map-reduce partition columns: '1' (type: string)
+              value expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: '1' (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Select Operator
+            expressions: _col1 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 '1' (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
 SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 
b ON a.key = b.key
@@ -617,7 +781,7 @@ Database:                   default
 Table:                 test_table_out           
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 309                 
        rawDataSize             2728                

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out 
b/ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out
index 44a9435..694d15c 100644
--- a/ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out
+++ b/ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out
@@ -38,7 +38,7 @@ Database:             default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 1028                
        rawDataSize             10968               
@@ -87,7 +87,7 @@ Database:             default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                2                   
        numRows                 1028                
        rawDataSize             10968               

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out 
b/ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out
index d915c30..a10c06e 100644
--- a/ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out
+++ b/ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out
@@ -81,7 +81,7 @@ Database:             default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 500                 
        rawDataSize             5312                
@@ -173,7 +173,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 309                 
        rawDataSize             2718                
@@ -265,7 +265,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 500                 
        rawDataSize             5312                
@@ -357,7 +357,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 309                 
        rawDataSize             2690                

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out 
b/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out
index 5c7659b..cf332e2 100644
--- a/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out
+++ b/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out
@@ -25,7 +25,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -56,6 +57,22 @@ STAGE PLANS:
                 output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 name: default.test_table
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: string), 
UDFToString(_col2) (type: string)
+            outputColumnNames: key, value, hr
+            Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE 
Column stats: NONE
+            Group By Operator
+              aggregations: compute_stats(key, 'hll'), compute_stats(value, 
'hll')
+              keys: '2008-04-08' (type: string), hr (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2, _col3
+              Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -73,6 +90,39 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.test_table
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: '2008-04-08' (type: string), _col1 (type: 
string)
+              sort order: ++
+              Map-reduce partition columns: '2008-04-08' (type: string), _col1 
(type: string)
+              Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: '2008-04-08' (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 '2008-04-08' (type: string), _col1 (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3
+            Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = 
'2008-04-08', hr)
 SELECT key2, value, cast(hr as int) FROM
@@ -122,7 +172,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                2                   
        numRows                 670                 
        rawDataSize             5888                
@@ -160,7 +210,7 @@ Database:                   default
 Table:                 test_table               
 #### A masked pattern was here ####
 Partition Parameters:           
-       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
        numFiles                1                   
        numRows                 330                 
        rawDataSize             2924                

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/innerjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/innerjoin.q.out 
b/ql/src/test/results/clientpositive/innerjoin.q.out
index 741c9fb..138eb93 100644
--- a/ql/src/test/results/clientpositive/innerjoin.q.out
+++ b/ql/src/test/results/clientpositive/innerjoin.q.out
@@ -17,7 +17,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -75,6 +76,21 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                   name: default.dest_j1
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: string)
+              outputColumnNames: key, value
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(value, 
'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -89,6 +105,32 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.dest_j1
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: FROM src src1 INNER JOIN src src2 ON (src1.key = src2.key)
 INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/input11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/input11.q.out 
b/ql/src/test/results/clientpositive/input11.q.out
index 356cef1..bb59b4f 100644
--- a/ql/src/test/results/clientpositive/input11.q.out
+++ b/ql/src/test/results/clientpositive/input11.q.out
@@ -46,6 +46,32 @@ STAGE PLANS:
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       name: default.dest1
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string)
+                  outputColumnNames: key, value
+                  Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 864 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 864 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-7
     Conditional Operator
@@ -69,6 +95,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.dest1
 
   Stage: Stage-3
     Map Reduce

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/input12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/input12.q.out 
b/ql/src/test/results/clientpositive/input12.q.out
index 8bb52c0..8096c71 100644
--- a/ql/src/test/results/clientpositive/input12.q.out
+++ b/ql/src/test/results/clientpositive/input12.q.out
@@ -39,24 +39,16 @@ STAGE DEPENDENCIES:
   Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7
   Stage-6
   Stage-0 depends on stages: Stage-6, Stage-5, Stage-8
-  Stage-4 depends on stages: Stage-0
+  Stage-4 depends on stages: Stage-0, Stage-11, Stage-13
   Stage-5
   Stage-7
   Stage-8 depends on stages: Stage-7
-  Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, 
Stage-13
-  Stage-12
-  Stage-1 depends on stages: Stage-12, Stage-11, Stage-14
-  Stage-10 depends on stages: Stage-1
-  Stage-11
-  Stage-13
-  Stage-14 depends on stages: Stage-13
-  Stage-21 depends on stages: Stage-3 , consists of Stage-18, Stage-17, 
Stage-19
-  Stage-18
-  Stage-2 depends on stages: Stage-18, Stage-17, Stage-20
-  Stage-16 depends on stages: Stage-2
-  Stage-17
-  Stage-19
-  Stage-20 depends on stages: Stage-19
+  Stage-1 depends on stages: Stage-3
+  Stage-10 depends on stages: Stage-1, Stage-11, Stage-13
+  Stage-11 depends on stages: Stage-3
+  Stage-12 depends on stages: Stage-2, Stage-11, Stage-13
+  Stage-2 depends on stages: Stage-3
+  Stage-13 depends on stages: Stage-3
 
 STAGE PLANS:
   Stage: Stage-3
@@ -80,6 +72,19 @@ STAGE PLANS:
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       name: default.dest1
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string)
+                  outputColumnNames: key, value
+                  Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 864 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 864 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
             Filter Operator
               predicate: ((key < 200) and (key >= 100)) (type: boolean)
               Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE 
Column stats: NONE
@@ -95,6 +100,21 @@ STAGE PLANS:
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       name: default.dest2
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string)
+                  outputColumnNames: key, value
+                  Statistics: Num rows: 55 Data size: 584 Basic stats: 
COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(value, 'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 864 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
             Filter Operator
               predicate: (key >= 200) (type: boolean)
               Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE 
Column stats: NONE
@@ -110,6 +130,35 @@ STAGE PLANS:
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                       name: default.dest3
+                Select Operator
+                  expressions: _col0 (type: int)
+                  outputColumnNames: key
+                  Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll')
+                    keys: '2008-04-08' (type: string), '12' (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-9
     Conditional Operator
@@ -133,6 +182,10 @@ STAGE PLANS:
   Stage: Stage-4
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.dest1
 
   Stage: Stage-5
     Map Reduce
@@ -164,15 +217,6 @@ STAGE PLANS:
           hdfs directory: true
 #### A masked pattern was here ####
 
-  Stage: Stage-15
-    Conditional Operator
-
-  Stage: Stage-12
-    Move Operator
-      files:
-          hdfs directory: true
-#### A masked pattern was here ####
-
   Stage: Stage-1
     Move Operator
       tables:
@@ -186,45 +230,40 @@ STAGE PLANS:
   Stage: Stage-10
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.dest2
 
   Stage: Stage-11
     Map Reduce
       Map Operator Tree:
           TableScan
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.dest2
-
-  Stage: Stage-13
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.dest2
-
-  Stage: Stage-14
-    Move Operator
-      files:
-          hdfs directory: true
-#### A masked pattern was here ####
-
-  Stage: Stage-21
-    Conditional Operator
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-18
-    Move Operator
-      files:
-          hdfs directory: true
-#### A masked pattern was here ####
+  Stage: Stage-12
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: key
+          Column Types: int
+          Table: default.dest3
 
   Stage: Stage-2
     Move Operator
@@ -239,39 +278,34 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest3
 
-  Stage: Stage-16
-    Stats Work
-      Basic Stats Work:
-
-  Stage: Stage-17
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            File Output Operator
-              compressed: false
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.dest3
-
-  Stage: Stage-19
+  Stage: Stage-13
     Map Reduce
       Map Operator Tree:
           TableScan
+            Reduce Output Operator
+              key expressions: '2008-04-08' (type: string), '12' (type: string)
+              sort order: ++
+              Map-reduce partition columns: '2008-04-08' (type: string), '12' 
(type: string)
+              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0)
+          keys: '2008-04-08' (type: string), '12' (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column 
stats: NONE
+          Select Operator
+            expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 '2008-04-08' (type: string), '12' (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
+              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE 
Column stats: NONE
               table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.dest3
-
-  Stage: Stage-20
-    Move Operator
-      files:
-          hdfs directory: true
-#### A masked pattern was here ####
 
 PREHOOK: query: FROM src 
 INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100

[37/51] [partial] hive git commit: HIVE-13567 : Enable auto-gather column stats by default (Zoltan Haindrich, Pengcheng Xiong via Ashutosh Chauhan)

Reply via email to