[38/51] [partial] hive git commit: HIVE-13567 : Enable auto-gather column stats by default (Zoltan Haindrich, Pengcheng Xiong via Ashutosh Chauhan)

hashutosh Mon, 11 Dec 2017 16:00:28 -0800

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out 
b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out
index 3b3e227..f15c0f9 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out
@@ -106,6 +106,22 @@ STAGE PLANS:
                     TotalFiles: 1
                     GatherStats: true
                     MultiFileSpray: false
+                  Select Operator
+                    expressions: _col0 (type: int), _col1 (type: int)
+                    outputColumnNames: key, cnt
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(key, 'hll'), 
compute_stats(cnt, 'hll')
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        null sort order: 
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                        tag: -1
+                        value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+                        auto parallelism: false
       Path -> Alias:
 #### A masked pattern was here ####
       Path -> Partition:
@@ -161,6 +177,35 @@ STAGE PLANS:
             name: default.t1
       Truncated Path -> Alias:
         /t1 [t1]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: final
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col1
+                  columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
 
   Stage: Stage-7
     Conditional Operator
@@ -203,6 +248,11 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, cnt
+          Column Types: int, int
+          Table: default.outputtbl1
+          Is Table Level Stats: true
 
   Stage: Stage-3
     Map Reduce
@@ -429,7 +479,8 @@ STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
   Stage-0 depends on stages: Stage-2
-  Stage-3 depends on stages: Stage-0
+  Stage-3 depends on stages: Stage-0, Stage-4
+  Stage-4 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -625,6 +676,33 @@ STAGE PLANS:
               TotalFiles: 1
               GatherStats: true
               MultiFileSpray: false
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: string), _col2 
(type: int)
+              outputColumnNames: key1, key2, cnt
+              Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 
'hll'), compute_stats(cnt, 'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        column.name.delimiter ,
+                        columns _col0,_col1,_col2
+                        columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+                        escape.delim \
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
 
   Stage: Stage-0
     Move Operator
@@ -658,6 +736,80 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key1, key2, cnt
+          Column Types: int, string, int
+          Table: default.outputtbl2
+          Is Table Level Stats: true
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              null sort order: 
+              sort order: 
+              Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE 
Column stats: NONE
+              tag: -1
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              column.name.delimiter ,
+              columns _col0,_col1,_col2
+              columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+              escape.delim \
+              serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                column.name.delimiter ,
+                columns _col0,_col1,_col2
+                columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+                escape.delim \
+                serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          mode: final
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col1,_col2
+                  columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
 
 PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2
 SELECT key, val, count(1) FROM T1 GROUP BY key, val
@@ -737,7 +889,7 @@ STAGE PLANS:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         properties:
-                          COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                          COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                           bucket_count -1
                           column.name.delimiter ,
                           columns key,cnt
@@ -758,6 +910,22 @@ STAGE PLANS:
                     TotalFiles: 1
                     GatherStats: true
                     MultiFileSpray: false
+                  Select Operator
+                    expressions: _col0 (type: int), _col1 (type: int)
+                    outputColumnNames: key, cnt
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(key, 'hll'), 
compute_stats(cnt, 'hll')
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        null sort order: 
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                        tag: -1
+                        value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+                        auto parallelism: false
       Path -> Alias:
 #### A masked pattern was here ####
       Path -> Partition:
@@ -813,6 +981,35 @@ STAGE PLANS:
             name: default.t1
       Truncated Path -> Alias:
         /t1 [t1]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: final
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col1
+                  columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
 
   Stage: Stage-7
     Conditional Operator
@@ -832,7 +1029,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,cnt
@@ -855,6 +1052,11 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, cnt
+          Column Types: int, int
+          Table: default.outputtbl1
+          Is Table Level Stats: true
 
   Stage: Stage-3
     Map Reduce
@@ -870,7 +1072,7 @@ STAGE PLANS:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
-                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                     bucket_count -1
                     column.name.delimiter ,
                     columns key,cnt
@@ -900,7 +1102,7 @@ STAGE PLANS:
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns key,cnt
@@ -921,7 +1123,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,cnt
@@ -957,7 +1159,7 @@ STAGE PLANS:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
-                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                     bucket_count -1
                     column.name.delimiter ,
                     columns key,cnt
@@ -987,7 +1189,7 @@ STAGE PLANS:
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns key,cnt
@@ -1008,7 +1210,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,cnt
@@ -1112,7 +1314,7 @@ STAGE PLANS:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         properties:
-                          COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                          COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                           bucket_count -1
                           column.name.delimiter ,
                           columns key,cnt
@@ -1133,6 +1335,22 @@ STAGE PLANS:
                     TotalFiles: 1
                     GatherStats: true
                     MultiFileSpray: false
+                  Select Operator
+                    expressions: _col0 (type: int), _col1 (type: int)
+                    outputColumnNames: key, cnt
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(key, 'hll'), 
compute_stats(cnt, 'hll')
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        null sort order: 
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                        tag: -1
+                        value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+                        auto parallelism: false
       Path -> Alias:
 #### A masked pattern was here ####
       Path -> Partition:
@@ -1188,6 +1406,35 @@ STAGE PLANS:
             name: default.t1
       Truncated Path -> Alias:
         /t1 [t1]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: final
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col1
+                  columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
 
   Stage: Stage-7
     Conditional Operator
@@ -1207,7 +1454,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,cnt
@@ -1230,6 +1477,11 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, cnt
+          Column Types: int, int
+          Table: default.outputtbl1
+          Is Table Level Stats: true
 
   Stage: Stage-3
     Map Reduce
@@ -1245,7 +1497,7 @@ STAGE PLANS:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
-                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                     bucket_count -1
                     column.name.delimiter ,
                     columns key,cnt
@@ -1275,7 +1527,7 @@ STAGE PLANS:
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns key,cnt
@@ -1296,7 +1548,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,cnt
@@ -1332,7 +1584,7 @@ STAGE PLANS:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
-                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                     bucket_count -1
                     column.name.delimiter ,
                     columns key,cnt
@@ -1362,7 +1614,7 @@ STAGE PLANS:
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns key,cnt
@@ -1383,7 +1635,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,cnt
@@ -1516,6 +1768,22 @@ STAGE PLANS:
                     TotalFiles: 1
                     GatherStats: true
                     MultiFileSpray: false
+                  Select Operator
+                    expressions: _col0 (type: int), _col1 (type: int), _col2 
(type: int)
+                    outputColumnNames: key1, key2, cnt
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(key1, 'hll'), 
compute_stats(key2, 'hll'), compute_stats(cnt, 'hll')
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 1272 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        null sort order: 
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 1272 Basic stats: 
COMPLETE Column stats: NONE
+                        tag: -1
+                        value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+                        auto parallelism: false
       Path -> Alias:
 #### A masked pattern was here ####
       Path -> Partition:
@@ -1571,6 +1839,35 @@ STAGE PLANS:
             name: default.t1
       Truncated Path -> Alias:
         /t1 [t1]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          mode: final
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col1,_col2
+                  columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
 
   Stage: Stage-7
     Conditional Operator
@@ -1613,6 +1910,11 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key1, key2, cnt
+          Column Types: int, int, int
+          Table: default.outputtbl3
+          Is Table Level Stats: true
 
   Stage: Stage-3
     Map Reduce
@@ -1840,7 +2142,8 @@ STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
   Stage-0 depends on stages: Stage-2
-  Stage-3 depends on stages: Stage-0
+  Stage-3 depends on stages: Stage-0, Stage-4
+  Stage-4 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -2036,6 +2339,33 @@ STAGE PLANS:
               TotalFiles: 1
               GatherStats: true
               MultiFileSpray: false
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: int), _col2 (type: 
string), _col3 (type: int)
+              outputColumnNames: key1, key2, key3, cnt
+              Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 
'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        column.name.delimiter ,
+                        columns _col0,_col1,_col2,_col3
+                        columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+                        escape.delim \
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
 
   Stage: Stage-0
     Move Operator
@@ -2069,33 +2399,107 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key1, key2, key3, cnt
+          Column Types: int, int, string, int
+          Table: default.outputtbl4
+          Is Table Level Stats: true
 
-PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4
-SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-PREHOOK: Output: default@outputtbl4
-POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4
-SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-POSTHOOK: Output: default@outputtbl4
-POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ]
-POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, 
type:string, comment:null), ]
-POSTHOOK: Lineage: outputtbl4.key2 SIMPLE []
-POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, 
type:string, comment:null), ]
-PREHOOK: query: SELECT * FROM outputTbl4
-PREHOOK: type: QUERY
-PREHOOK: Input: default@outputtbl4
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              null sort order: 
+              sort order: 
+              Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE 
Column stats: NONE
+              tag: -1
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+              auto parallelism: false
+      Path -> Alias:
 #### A masked pattern was here ####
-POSTHOOK: query: SELECT * FROM outputTbl4
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@outputtbl4
+      Path -> Partition:
 #### A masked pattern was here ####
-1      1       11      1
-2      1       12      1
-3      1       13      1
-7      1       17      1
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              column.name.delimiter ,
+              columns _col0,_col1,_col2,_col3
+              columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+              escape.delim \
+              serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                column.name.delimiter ,
+                columns _col0,_col1,_col2,_col3
+                columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+                escape.delim \
+                serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2), 
compute_stats(VALUE._col3)
+          mode: final
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE 
Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col1,_col2,_col3
+                  columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4
+SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@outputtbl4
+POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4
+SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@outputtbl4
+POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ]
+POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, 
type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl4.key2 SIMPLE []
+POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, 
type:string, comment:null), ]
+PREHOOK: query: SELECT * FROM outputTbl4
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl4
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM outputTbl4
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl4
+#### A masked pattern was here ####
+1      1       11      1
+2      1       12      1
+3      1       13      1
+7      1       17      1
 8      1       18      1
 8      1       28      1
 PREHOOK: query: EXPLAIN EXTENDED 
@@ -2110,7 +2514,8 @@ STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
   Stage-0 depends on stages: Stage-2
-  Stage-3 depends on stages: Stage-0
+  Stage-3 depends on stages: Stage-0, Stage-4
+  Stage-4 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -2285,7 +2690,7 @@ STAGE PLANS:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
-                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}}
                     bucket_count -1
                     column.name.delimiter ,
                     columns key1,key2,cnt
@@ -2306,6 +2711,33 @@ STAGE PLANS:
               TotalFiles: 1
               GatherStats: true
               MultiFileSpray: false
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: int), _col2 (type: 
int)
+              outputColumnNames: key1, key2, cnt
+              Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 
'hll'), compute_stats(cnt, 'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        column.name.delimiter ,
+                        columns _col0,_col1,_col2
+                        columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+                        escape.delim \
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
 
   Stage: Stage-0
     Move Operator
@@ -2316,7 +2748,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key1,key2,cnt
@@ -2339,6 +2771,80 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key1, key2, cnt
+          Column Types: int, int, int
+          Table: default.outputtbl3
+          Is Table Level Stats: true
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              null sort order: 
+              sort order: 
+              Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE 
Column stats: NONE
+              tag: -1
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              column.name.delimiter ,
+              columns _col0,_col1,_col2
+              columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+              escape.delim \
+              serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                column.name.delimiter ,
+                columns _col0,_col1,_col2
+                columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+                escape.delim \
+                serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          mode: final
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col1,_col2
+                  columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
 
 PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3
 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1
@@ -2382,7 +2888,8 @@ STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
   Stage-0 depends on stages: Stage-2
-  Stage-3 depends on stages: Stage-0
+  Stage-3 depends on stages: Stage-0, Stage-4
+  Stage-4 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -2567,7 +3074,7 @@ STAGE PLANS:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
-                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                     bucket_count -1
                     column.name.delimiter ,
                     columns key,cnt
@@ -2588,6 +3095,33 @@ STAGE PLANS:
               TotalFiles: 1
               GatherStats: true
               MultiFileSpray: false
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: int)
+              outputColumnNames: key, cnt
+              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 
'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        column.name.delimiter ,
+                        columns _col0,_col1
+                        columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+                        escape.delim \
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
 
   Stage: Stage-0
     Move Operator
@@ -2598,7 +3132,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,cnt
@@ -2621,6 +3155,80 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, cnt
+          Column Types: int, int
+          Table: default.outputtbl1
+          Is Table Level Stats: true
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              null sort order: 
+              sort order: 
+              Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE 
Column stats: NONE
+              tag: -1
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              column.name.delimiter ,
+              columns _col0,_col1
+              columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+              escape.delim \
+              serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                column.name.delimiter ,
+                columns _col0,_col1
+                columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+                escape.delim \
+                serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: final
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col1
+                  columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
 
 PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
 SELECT cast(key + key as string), sum(cnt) from
@@ -2712,7 +3320,7 @@ STAGE PLANS:
                           input format: 
org.apache.hadoop.mapred.TextInputFormat
                           output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                           properties:
-                            COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                            COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                             bucket_count -1
                             column.name.delimiter ,
                             columns key,cnt
@@ -2733,6 +3341,22 @@ STAGE PLANS:
                       TotalFiles: 1
                       GatherStats: true
                       MultiFileSpray: false
+                    Select Operator
+                      expressions: _col0 (type: int), _col1 (type: int)
+                      outputColumnNames: key, cnt
+                      Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: compute_stats(key, 'hll'), 
compute_stats(cnt, 'hll')
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          null sort order: 
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                          tag: -1
+                          value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+                          auto parallelism: false
           TableScan
             alias: t1
             Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column 
stats: NONE
@@ -2764,7 +3388,7 @@ STAGE PLANS:
                           input format: 
org.apache.hadoop.mapred.TextInputFormat
                           output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                           properties:
-                            COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                            COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                             bucket_count -1
                             column.name.delimiter ,
                             columns key,cnt
@@ -2785,6 +3409,22 @@ STAGE PLANS:
                       TotalFiles: 1
                       GatherStats: true
                       MultiFileSpray: false
+                    Select Operator
+                      expressions: _col0 (type: int), _col1 (type: int)
+                      outputColumnNames: key, cnt
+                      Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: compute_stats(key, 'hll'), 
compute_stats(cnt, 'hll')
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          null sort order: 
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                          tag: -1
+                          value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+                          auto parallelism: false
       Path -> Alias:
 #### A masked pattern was here ####
       Path -> Partition:
@@ -2840,6 +3480,35 @@ STAGE PLANS:
             name: default.t1
       Truncated Path -> Alias:
         /t1 [null-subquery1:$hdt$_0-subquery1:t1, 
null-subquery2:$hdt$_0-subquery2:t1]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: final
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col1
+                  columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
 
   Stage: Stage-7
     Conditional Operator
@@ -2859,7 +3528,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,cnt
@@ -2882,6 +3551,11 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, cnt
+          Column Types: int, int
+          Table: default.outputtbl1
+          Is Table Level Stats: true
 
   Stage: Stage-3
     Map Reduce
@@ -2897,7 +3571,7 @@ STAGE PLANS:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
-                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                     bucket_count -1
                     column.name.delimiter ,
                     columns key,cnt
@@ -2927,7 +3601,7 @@ STAGE PLANS:
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns key,cnt
@@ -2948,7 +3622,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,cnt
@@ -2984,7 +3658,7 @@ STAGE PLANS:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
-                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                     bucket_count -1
                     column.name.delimiter ,
                     columns key,cnt
@@ -3014,7 +3688,7 @@ STAGE PLANS:
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns key,cnt
@@ -3035,7 +3709,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,cnt
@@ -3344,7 +4018,7 @@ STAGE PLANS:
                           input format: 
org.apache.hadoop.mapred.TextInputFormat
                           output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                           properties:
-                            COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                            COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                             bucket_count -1
                             column.name.delimiter ,
                             columns key,cnt
@@ -3365,6 +4039,22 @@ STAGE PLANS:
                       TotalFiles: 1
                       GatherStats: true
                       MultiFileSpray: false
+                    Select Operator
+                      expressions: _col0 (type: int), _col1 (type: int)
+                      outputColumnNames: key, cnt
+                      Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: compute_stats(key, 'hll'), 
compute_stats(cnt, 'hll')
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          null sort order: 
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                          tag: -1
+                          value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+                          auto parallelism: false
           TableScan
             GatherStats: false
             Union
@@ -3384,7 +4074,7 @@ STAGE PLANS:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       properties:
-                        COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                        COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                         bucket_count -1
                         column.name.delimiter ,
                         columns key,cnt
@@ -3405,6 +4095,22 @@ STAGE PLANS:
                   TotalFiles: 1
                   GatherStats: true
                   MultiFileSpray: false
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: int)
+                  outputColumnNames: key, cnt
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), 
compute_stats(cnt, 'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      null sort order: 
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 848 Basic stats: 
COMPLETE Column stats: NONE
+                      tag: -1
+                      value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+                      auto parallelism: false
       Path -> Alias:
 #### A masked pattern was here ####
       Path -> Partition:
@@ -3483,6 +4189,35 @@ STAGE PLANS:
       Truncated Path -> Alias:
         /t1 [null-subquery1:$hdt$_0-subquery1:t1]
 #### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: final
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col1
+                  columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
 
   Stage: Stage-8
     Conditional Operator
@@ -3502,7 +4237,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,cnt
@@ -3525,6 +4260,11 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, cnt
+          Column Types: int, int
+          Table: default.outputtbl1
+          Is Table Level Stats: true
 
   Stage: Stage-4
     Map Reduce
@@ -3540,7 +4280,7 @@ STAGE PLANS:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
-                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                     bucket_count -1
                     column.name.delimiter ,
                     columns key,cnt
@@ -3570,7 +4310,7 @@ STAGE PLANS:
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns key,cnt
@@ -3591,7 +4331,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,cnt
@@ -3627,7 +4367,7 @@ STAGE PLANS:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
-                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                     bucket_count -1
                     column.name.delimiter ,
                     columns key,cnt
@@ -3657,7 +4397,7 @@ STAGE PLANS:
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns key,cnt
@@ -3678,7 +4418,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,cnt
@@ -3763,7 +4503,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -3895,7 +4636,7 @@ STAGE PLANS:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
-                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                     bucket_count -1
                     column.name.delimiter ,
                     columns key,cnt
@@ -3916,6 +4657,33 @@ STAGE PLANS:
               TotalFiles: 1
               GatherStats: true
               MultiFileSpray: false
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: int)
+              outputColumnNames: key, cnt
+              Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 
'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        column.name.delimiter ,
+                        columns _col0,_col1
+                        columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+                        escape.delim \
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
 
   Stage: Stage-0
     Move Operator
@@ -3926,7 +4694,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,cnt
@@ -3949,6 +4717,80 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, cnt
+          Column Types: int, int
+          Table: default.outputtbl1
+          Is Table Level Stats: true
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              null sort order: 
+              sort order: 
+              Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE 
Column stats: NONE
+              tag: -1
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10002
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              column.name.delimiter ,
+              columns _col0,_col1
+              columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+              escape.delim \
+              serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                column.name.delimiter ,
+                columns _col0,_col1
+                columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+                escape.delim \
+                serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: final
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col1
+                  columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
 
 PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
 SELECT subq1.key, subq1.cnt+subq2.cnt FROM 
@@ -4366,7 +5208,8 @@ STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
   Stage-0 depends on stages: Stage-2
-  Stage-3 depends on stages: Stage-0
+  Stage-3 depends on stages: Stage-0, Stage-4
+  Stage-4 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -4405,7 +5248,7 @@ STAGE PLANS:
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}}
               SORTBUCKETCOLSPREFIX TRUE
               bucket_count 2
               bucket_field_name key
@@ -4428,7 +5271,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}}
                 SORTBUCKETCOLSPREFIX TRUE
                 bucket_count 2
                 bucket_field_name key
@@ -4542,7 +5385,7 @@ STAGE PLANS:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
-                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                     bucket_count -1
                     column.name.delimiter ,
                     columns key,cnt
@@ -4563,6 +5406,33 @@ STAGE PLANS:
               TotalFiles: 1
               GatherStats: true
               MultiFileSpray: false
+            Select Operator
+              expressions: _col0 (type: int), _col1 (type: int)
+              outputColumnNames: key, cnt
+              Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 
'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+#### A masked pattern was here ####
+                  NumFilesPerFileSink: 1
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      properties:
+                        column.name.delimiter ,
+                        columns _col0,_col1
+                        columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+                        escape.delim \
+                        serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                  TotalFiles: 1
+                  GatherStats: false
+                  MultiFileSpray: false
 
   Stage: Stage-0
     Move Operator
@@ -4573,7 +5443,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,cnt
@@ -4589,13 +5459,87 @@ STAGE PLANS:
                 serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 totalSize 20
 #### A masked pattern was here ####
-              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.outputtbl1
-
-  Stage: Stage-3
-    Stats Work
-      Basic Stats Work:
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.outputtbl1
+
+  Stage: Stage-3
+    Stats Work
+      Basic Stats Work:
+#### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, cnt
+          Column Types: int, int
+          Table: default.outputtbl1
+          Is Table Level Stats: true
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              null sort order: 
+              sort order: 
+              Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE 
Column stats: NONE
+              tag: -1
+              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10003
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              column.name.delimiter ,
+              columns _col0,_col1
+              columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+              escape.delim \
+              serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                column.name.delimiter ,
+                columns _col0,_col1
+                columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+                escape.delim \
+                serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: final
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: NONE
 #### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col1
+                  columns.types 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
 
 PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
 SELECT key, count(1) FROM T2 GROUP BY key
@@ -4673,7 +5617,7 @@ STAGE PLANS:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnore


<TRUNCATED>

[38/51] [partial] hive git commit: HIVE-13567 : Enable auto-gather column stats by default (Zoltan Haindrich, Pengcheng Xiong via Ashutosh Chauhan)

Reply via email to