anno...

prasanthj Sun, 28 Sep 2014 22:39:00 -0700

Modified: 
hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out?rev=1628118&r1=1628117&r2=1628118&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out 
(original)
+++ hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out 
Mon Sep 29 05:38:05 2014
@@ -1,4 +1,24 @@
-PREHOOK: query: create table if not exists loc_staging (
+PREHOOK: query: -- hash aggregation is disabled
+
+-- There are different cases for Group By depending on map/reduce side, hash 
aggregation,
+-- grouping sets and column stats. If we don't have column stats, we just 
assume hash
+-- aggregation is disabled. Following are the possible cases and rule for 
cardinality
+-- estimation
+
+-- MAP SIDE:
+-- Case 1: NO column stats, NO hash aggregation, NO grouping sets â numRows
+-- Case 2: NO column stats, NO hash aggregation, grouping sets â numRows * 
sizeOfGroupingSet
+-- Case 3: column stats, hash aggregation, NO grouping sets â Min(numRows / 
2, ndvProduct * parallelism)
+-- Case 4: column stats, hash aggregation, grouping sets â Min((numRows * 
sizeOfGroupingSet) / 2, ndvProduct * parallelism * sizeOfGroupingSet)
+-- Case 5: column stats, NO hash aggregation, NO grouping sets â numRows
+-- Case 6: column stats, NO hash aggregation, grouping sets â numRows * 
sizeOfGroupingSet
+
+-- REDUCE SIDE:
+-- Case 7: NO column stats â numRows / 2
+-- Case 8: column stats, grouping sets â Min(numRows, ndvProduct * 
sizeOfGroupingSet)
+-- Case 9: column stats, NO grouping sets - Min(numRows, ndvProduct)
+
+create table if not exists loc_staging (
   state string,
   locid int,
   zip bigint,
@@ -7,7 +27,27 @@ PREHOOK: query: create table if not exis
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@loc_staging
-POSTHOOK: query: create table if not exists loc_staging (
+POSTHOOK: query: -- hash aggregation is disabled
+
+-- There are different cases for Group By depending on map/reduce side, hash 
aggregation,
+-- grouping sets and column stats. If we don't have column stats, we just 
assume hash
+-- aggregation is disabled. Following are the possible cases and rule for 
cardinality
+-- estimation
+
+-- MAP SIDE:
+-- Case 1: NO column stats, NO hash aggregation, NO grouping sets â numRows
+-- Case 2: NO column stats, NO hash aggregation, grouping sets â numRows * 
sizeOfGroupingSet
+-- Case 3: column stats, hash aggregation, NO grouping sets â Min(numRows / 
2, ndvProduct * parallelism)
+-- Case 4: column stats, hash aggregation, grouping sets â Min((numRows * 
sizeOfGroupingSet) / 2, ndvProduct * parallelism * sizeOfGroupingSet)
+-- Case 5: column stats, NO hash aggregation, NO grouping sets â numRows
+-- Case 6: column stats, NO hash aggregation, grouping sets â numRows * 
sizeOfGroupingSet
+
+-- REDUCE SIDE:
+-- Case 7: NO column stats â numRows / 2
+-- Case 8: column stats, grouping sets â Min(numRows, ndvProduct * 
sizeOfGroupingSet)
+-- Case 9: column stats, NO grouping sets - Min(numRows, ndvProduct)
+
+create table if not exists loc_staging (
   state string,
   locid int,
   zip bigint,
@@ -190,22 +230,20 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: analyze table loc_orc compute statistics for columns 
state,locid,zip,year
+PREHOOK: query: analyze table loc_orc compute statistics for columns 
state,locid,year
 PREHOOK: type: QUERY
 PREHOOK: Input: default@loc_orc
 #### A masked pattern was here ####
-POSTHOOK: query: analyze table loc_orc compute statistics for columns 
state,locid,zip,year
+POSTHOOK: query: analyze table loc_orc compute statistics for columns 
state,locid,year
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@loc_orc
 #### A masked pattern was here ####
-PREHOOK: query: -- only one distinct value in year column + 1 NULL value
--- map-side GBY: numRows: 8 (map-side will not do any reduction)
--- reduce-side GBY: numRows: 2
+PREHOOK: query: -- Case 5: column stats, NO hash aggregation, NO grouping sets 
- cardinality = 8
+-- Case 9: column stats, NO grouping sets - caridnality = 2
 explain select year from loc_orc group by year
 PREHOOK: type: QUERY
-POSTHOOK: query: -- only one distinct value in year column + 1 NULL value
--- map-side GBY: numRows: 8 (map-side will not do any reduction)
--- reduce-side GBY: numRows: 2
+POSTHOOK: query: -- Case 5: column stats, NO hash aggregation, NO grouping 
sets - cardinality = 8
+-- Case 9: column stats, NO grouping sets - caridnality = 2
 explain select year from loc_orc group by year
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -257,12 +295,12 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- map-side GBY: numRows: 8
--- reduce-side GBY: numRows: 4
+PREHOOK: query: -- Case 5: column stats, NO hash aggregation, NO grouping sets 
- cardinality = 8
+-- Case 9: column stats, NO grouping sets - caridnality = 8
 explain select state,locid from loc_orc group by state,locid
 PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY: numRows: 8
--- reduce-side GBY: numRows: 4
+POSTHOOK: query: -- Case 5: column stats, NO hash aggregation, NO grouping 
sets - cardinality = 8
+-- Case 9: column stats, NO grouping sets - caridnality = 8
 explain select state,locid from loc_orc group by state,locid
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -295,14 +333,14 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column 
stats: COMPLETE
+          Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column 
stats: COMPLETE
           Select Operator
             expressions: _col0 (type: string), _col1 (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE 
Column stats: COMPLETE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE 
Column stats: COMPLETE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -314,10 +352,12 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - 
cardinality = 32
+-- Case 8: column stats, grouping sets - cardinality = 32
 explain select state,locid from loc_orc group by state,locid with cube
 PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - 
cardinality = 32
+-- Case 8: column stats, grouping sets - cardinality = 32
 explain select state,locid from loc_orc group by state,locid with cube
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -339,25 +379,25 @@ STAGE PLANS:
                 keys: state (type: string), locid (type: int), '0' (type: 
string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: int), 
_col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: int), _col2 (type: string)
-                  Statistics: Num rows: 32 Data size: 3184 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 32 Data size: 5600 Basic stats: 
COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 
(type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: _col0 (type: string), _col1 (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE 
Column stats: COMPLETE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE 
Column stats: COMPLETE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -369,10 +409,12 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - 
cardinality = 24
+-- Case 8: column stats, grouping sets - cardinality = 24
 explain select state,locid from loc_orc group by state,locid with rollup
 PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - 
cardinality = 24
+-- Case 8: column stats, grouping sets - cardinality = 24
 explain select state,locid from loc_orc group by state,locid with rollup
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -394,25 +436,25 @@ STAGE PLANS:
                 keys: state (type: string), locid (type: int), '0' (type: 
string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: int), 
_col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: int), _col2 (type: string)
-                  Statistics: Num rows: 24 Data size: 2388 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 24 Data size: 4200 Basic stats: 
COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 
(type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: _col0 (type: string), _col1 (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE 
Column stats: COMPLETE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE 
Column stats: COMPLETE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -424,10 +466,12 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4
+PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - 
cardinality = 8
+-- Case 8: column stats, grouping sets - cardinality = 8
 explain select state,locid from loc_orc group by state,locid grouping 
sets((state))
 PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4
+POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - 
cardinality = 8
+-- Case 8: column stats, grouping sets - cardinality = 8
 explain select state,locid from loc_orc group by state,locid grouping 
sets((state))
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -449,25 +493,25 @@ STAGE PLANS:
                 keys: state (type: string), locid (type: int), '0' (type: 
string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: int), 
_col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: int), _col2 (type: string)
-                  Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Statistics: Num rows: 8 Data size: 1400 Basic stats: 
COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 
(type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 4 Data size: 700 Basic stats: COMPLETE Column 
stats: COMPLETE
+          Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column 
stats: COMPLETE
           Select Operator
             expressions: _col0 (type: string), _col1 (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE 
Column stats: COMPLETE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE 
Column stats: COMPLETE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -479,10 +523,12 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8
+PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - 
cardinality = 16
+-- Case 8: column stats, grouping sets - cardinality = 16
 explain select state,locid from loc_orc group by state,locid grouping 
sets((state),(locid))
 PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8
+POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - 
cardinality = 16
+-- Case 8: column stats, grouping sets - cardinality = 16
 explain select state,locid from loc_orc group by state,locid grouping 
sets((state),(locid))
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -504,25 +550,25 @@ STAGE PLANS:
                 keys: state (type: string), locid (type: int), '0' (type: 
string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: int), 
_col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: int), _col2 (type: string)
-                  Statistics: Num rows: 16 Data size: 1592 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 16 Data size: 2800 Basic stats: 
COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 
(type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column 
stats: COMPLETE
+          Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: _col0 (type: string), _col1 (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE 
Column stats: COMPLETE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE 
Column stats: COMPLETE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -534,10 +580,12 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - 
cardinality = 24
+-- Case 8: column stats, grouping sets - cardinality = 24
 explain select state,locid from loc_orc group by state,locid grouping 
sets((state),(locid),())
 PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - 
cardinality = 24
+-- Case 8: column stats, grouping sets - cardinality = 24
 explain select state,locid from loc_orc group by state,locid grouping 
sets((state),(locid),())
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -559,25 +607,25 @@ STAGE PLANS:
                 keys: state (type: string), locid (type: int), '0' (type: 
string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: int), 
_col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: int), _col2 (type: string)
-                  Statistics: Num rows: 24 Data size: 2388 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 24 Data size: 4200 Basic stats: 
COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 
(type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: _col0 (type: string), _col1 (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE 
Column stats: COMPLETE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE 
Column stats: COMPLETE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -589,10 +637,12 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - 
cardinality = 32
+-- Case 8: column stats, grouping sets - cardinality = 32
 explain select state,locid from loc_orc group by state,locid grouping 
sets((state,locid),(state),(locid),())
 PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - 
cardinality = 32
+-- Case 8: column stats, grouping sets - cardinality = 32
 explain select state,locid from loc_orc group by state,locid grouping 
sets((state,locid),(state),(locid),())
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -614,25 +664,25 @@ STAGE PLANS:
                 keys: state (type: string), locid (type: int), '0' (type: 
string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: int), 
_col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: int), _col2 (type: string)
-                  Statistics: Num rows: 32 Data size: 3184 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 32 Data size: 5600 Basic stats: 
COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 
(type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: _col0 (type: string), _col1 (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE 
Column stats: COMPLETE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE 
Column stats: COMPLETE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -644,12 +694,16 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any 
reduction)
--- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = 
min(80/2, 2)
+PREHOOK: query: -- map-side parallelism will be 10
+
+-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 4
+-- Case 9: column stats, NO grouping sets - caridnality = 2
 explain select year from loc_orc group by year
 PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any 
reduction)
--- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = 
min(80/2, 2)
+POSTHOOK: query: -- map-side parallelism will be 10
+
+-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 4
+-- Case 9: column stats, NO grouping sets - caridnality = 2
 explain select year from loc_orc group by year
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -671,25 +725,25 @@ STAGE PLANS:
                 keys: year (type: int)
                 mode: hash
                 outputColumnNames: _col0
-                Statistics: Num rows: 80 Data size: 280 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 80 Data size: 280 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE 
Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: int)
           mode: mergepartial
           outputColumnNames: _col0
-          Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column 
stats: COMPLETE
+          Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column 
stats: COMPLETE
           Select Operator
             expressions: _col0 (type: int)
             outputColumnNames: _col0
-            Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column 
stats: COMPLETE
+            Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column 
stats: COMPLETE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE 
Column stats: COMPLETE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -701,10 +755,12 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 
Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
+PREHOOK: query: -- Case 4: column stats, hash aggregation, grouping sets - 
cardinality = 16
+-- Case 8: column stats, grouping sets - cardinality = 16
 explain select state,locid from loc_orc group by state,locid with cube
 PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 
Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
+POSTHOOK: query: -- Case 4: column stats, hash aggregation, grouping sets - 
cardinality = 16
+-- Case 8: column stats, grouping sets - cardinality = 16
 explain select state,locid from loc_orc group by state,locid with cube
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -726,25 +782,84 @@ STAGE PLANS:
                 keys: state (type: string), locid (type: int), '0' (type: 
string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 320 Data size: 31840 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: int), 
_col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: int), _col2 (type: string)
-                  Statistics: Num rows: 320 Data size: 31840 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 16 Data size: 2800 Basic stats: 
COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 
(type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 35 Data size: 6125 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: _col0 (type: string), _col1 (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 35 Data size: 3150 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE 
Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE 
Column stats: COMPLETE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- ndvProduct becomes 0 as zip does not have column stats
+-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 4
+-- Case 9: column stats, NO grouping sets - caridnality = 2
+explain select state,zip from loc_orc group by state,zip
+PREHOOK: type: QUERY
+POSTHOOK: query: -- ndvProduct becomes 0 as zip does not have column stats
+-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 4
+-- Case 9: column stats, NO grouping sets - caridnality = 2
+explain select state,zip from loc_orc group by state,zip
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: loc_orc
+            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Select Operator
+              expressions: state (type: string), zip (type: bigint)
+              outputColumnNames: state, zip
+              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE 
Column stats: PARTIAL
+              Group By Operator
+                keys: state (type: string), zip (type: bigint)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE 
Column stats: PARTIAL
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: bigint)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: bigint)
+                  Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE 
Column stats: PARTIAL
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 2 Data size: 172 Basic stats: COMPLETE Column 
stats: PARTIAL
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: bigint)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 2 Data size: 172 Basic stats: COMPLETE 
Column stats: PARTIAL
             File Output Operator
               compressed: false
-              Statistics: Num rows: 35 Data size: 3150 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 2 Data size: 172 Basic stats: COMPLETE 
Column stats: PARTIAL
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -756,10 +871,12 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 32
+-- Case 7: NO column stats - cardinality = 16
 explain select state,locid from loc_orc group by state,locid with cube
 PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 32
+-- Case 7: NO column stats - cardinality = 16
 explain select state,locid from loc_orc group by state,locid with cube
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -811,10 +928,12 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 24
+-- Case 7: NO column stats - cardinality = 12
 explain select state,locid from loc_orc group by state,locid with rollup
 PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 24
+-- Case 7: NO column stats - cardinality = 12
 explain select state,locid from loc_orc group by state,locid with rollup
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -866,10 +985,12 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4
+PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 8
+-- Case 7: NO column stats - cardinality = 4
 explain select state,locid from loc_orc group by state,locid grouping 
sets((state))
 PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4
+POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 8
+-- Case 7: NO column stats - cardinality = 4
 explain select state,locid from loc_orc group by state,locid grouping 
sets((state))
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -921,10 +1042,12 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8
+PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 16
+-- Case 7: NO column stats - cardinality = 8
 explain select state,locid from loc_orc group by state,locid grouping 
sets((state),(locid))
 PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8
+POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 16
+-- Case 7: NO column stats - cardinality = 8
 explain select state,locid from loc_orc group by state,locid grouping 
sets((state),(locid))
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -976,10 +1099,12 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 24
+-- Case 7: NO column stats - cardinality = 12
 explain select state,locid from loc_orc group by state,locid grouping 
sets((state),(locid),())
 PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 24
+-- Case 7: NO column stats - cardinality = 12
 explain select state,locid from loc_orc group by state,locid grouping 
sets((state),(locid),())
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -1031,10 +1156,12 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 32
+-- Case 7: NO column stats - cardinality = 16
 explain select state,locid from loc_orc group by state,locid grouping 
sets((state,locid),(state),(locid),())
 PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 32
+-- Case 7: NO column stats - cardinality = 16
 explain select state,locid from loc_orc group by state,locid grouping 
sets((state,locid),(state),(locid),())
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -1086,12 +1213,12 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any 
reduction)
--- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = 
min(80/2, 2)
+PREHOOK: query: -- Case 1: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 8
+-- Case 7: NO column stats - cardinality = 4
 explain select year from loc_orc group by year
 PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any 
reduction)
--- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = 
min(80/2, 2)
+POSTHOOK: query: -- Case 1: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 8
+-- Case 7: NO column stats - cardinality = 4
 explain select year from loc_orc group by year
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -1113,25 +1240,25 @@ STAGE PLANS:
                 keys: year (type: int)
                 mode: hash
                 outputColumnNames: _col0
-                Statistics: Num rows: 80 Data size: 7960 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 80 Data size: 7960 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE 
Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: int)
           mode: mergepartial
           outputColumnNames: _col0
-          Statistics: Num rows: 40 Data size: 3980 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: _col0 (type: int)
             outputColumnNames: _col0
-            Statistics: Num rows: 40 Data size: 3980 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 40 Data size: 3980 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE 
Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1143,10 +1270,12 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 
Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
+PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 32
+-- Case 7: NO column stats - cardinality = 16
 explain select state,locid from loc_orc group by state,locid with cube
 PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 
Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
+POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 32
+-- Case 7: NO column stats - cardinality = 16
 explain select state,locid from loc_orc group by state,locid with cube
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -1168,25 +1297,25 @@ STAGE PLANS:
                 keys: state (type: string), locid (type: int), '0' (type: 
string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 320 Data size: 31840 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: int), 
_col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: int), _col2 (type: string)
-                  Statistics: Num rows: 320 Data size: 31840 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 32 Data size: 3184 Basic stats: 
COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 
(type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 160 Data size: 15920 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE 
Column stats: NONE
           Select Operator
             expressions: _col0 (type: string), _col1 (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 160 Data size: 15920 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 160 Data size: 15920 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE 
Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat


Added: 
hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out?rev=1628118&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out 
(added)
+++ hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out 
Mon Sep 29 05:38:05 2014
@@ -0,0 +1,485 @@
+PREHOOK: query: drop table location
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table location
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: -- There are different cases for Group By depending on 
map/reduce side, hash aggregation,
+-- grouping sets and column stats. If we don't have column stats, we just 
assume hash
+-- aggregation is disabled. Following are the possible cases and rule for 
cardinality
+-- estimation
+
+-- MAP SIDE:
+-- Case 1: NO column stats, NO hash aggregation, NO grouping sets â numRows
+-- Case 2: NO column stats, NO hash aggregation, grouping sets â numRows * 
sizeOfGroupingSet
+-- Case 3: column stats, hash aggregation, NO grouping sets â Min(numRows / 
2, ndvProduct * parallelism)
+-- Case 4: column stats, hash aggregation, grouping sets â Min((numRows * 
sizeOfGroupingSet) / 2, ndvProduct * parallelism * sizeOfGroupingSet)
+-- Case 5: column stats, NO hash aggregation, NO grouping sets â numRows
+-- Case 6: column stats, NO hash aggregation, grouping sets â numRows * 
sizeOfGroupingSet
+
+-- REDUCE SIDE:
+-- Case 7: NO column stats â numRows / 2
+-- Case 8: column stats, grouping sets â Min(numRows, ndvProduct * 
sizeOfGroupingSet)
+-- Case 9: column stats, NO grouping sets - Min(numRows, ndvProduct)
+
+create table location (state string, country string, votes bigint)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@location
+POSTHOOK: query: -- There are different cases for Group By depending on 
map/reduce side, hash aggregation,
+-- grouping sets and column stats. If we don't have column stats, we just 
assume hash
+-- aggregation is disabled. Following are the possible cases and rule for 
cardinality
+-- estimation
+
+-- MAP SIDE:
+-- Case 1: NO column stats, NO hash aggregation, NO grouping sets â numRows
+-- Case 2: NO column stats, NO hash aggregation, grouping sets â numRows * 
sizeOfGroupingSet
+-- Case 3: column stats, hash aggregation, NO grouping sets â Min(numRows / 
2, ndvProduct * parallelism)
+-- Case 4: column stats, hash aggregation, grouping sets â Min((numRows * 
sizeOfGroupingSet) / 2, ndvProduct * parallelism * sizeOfGroupingSet)
+-- Case 5: column stats, NO hash aggregation, NO grouping sets â numRows
+-- Case 6: column stats, NO hash aggregation, grouping sets â numRows * 
sizeOfGroupingSet
+
+-- REDUCE SIDE:
+-- Case 7: NO column stats â numRows / 2
+-- Case 8: column stats, grouping sets â Min(numRows, ndvProduct * 
sizeOfGroupingSet)
+-- Case 9: column stats, NO grouping sets - Min(numRows, ndvProduct)
+
+create table location (state string, country string, votes bigint)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@location
+PREHOOK: query: load data local inpath "../../data/files/location.txt" 
overwrite into table location
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@location
+POSTHOOK: query: load data local inpath "../../data/files/location.txt" 
overwrite into table location
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@location
+PREHOOK: query: analyze table location compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@location
+PREHOOK: Output: default@location
+POSTHOOK: query: analyze table location compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@location
+POSTHOOK: Output: default@location
+PREHOOK: query: analyze table location compute statistics for columns state, 
country
+PREHOOK: type: QUERY
+PREHOOK: Input: default@location
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table location compute statistics for columns state, 
country
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@location
+#### A masked pattern was here ####
+PREHOOK: query: -- Case 1: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 20
+-- Case 7: NO column stats - cardinality = 10
+explain select state, country from location group by state, country
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Case 1: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 20
+-- Case 7: NO column stats - cardinality = 10
+explain select state, country from location group by state, country
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: location
+            Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: state (type: string), country (type: string)
+              outputColumnNames: state, country
+              Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                keys: state (type: string), country (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
+                  Statistics: Num rows: 20 Data size: 200 Basic stats: 
COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column 
stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 80
+-- Case 7: NO column stats - cardinality = 40
+explain select state, country from location group by state, country with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping 
sets - cardinality = 80
+-- Case 7: NO column stats - cardinality = 40
+explain select state, country from location group by state, country with cube
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: location
+            Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: state (type: string), country (type: string)
+              outputColumnNames: state, country
+              Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                keys: state (type: string), country (type: string), '0' (type: 
string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 80 Data size: 800 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: string), _col2 (type: string)
+                  Statistics: Num rows: 80 Data size: 800 Basic stats: 
COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 
(type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 40 Data size: 400 Basic stats: COMPLETE Column 
stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 40 Data size: 400 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 40 Data size: 400 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- parallelism = 4
+
+-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 8
+-- Case 9: column stats, NO grouping sets - caridnality = 2
+explain select state, country from location group by state, country
+PREHOOK: type: QUERY
+POSTHOOK: query: -- parallelism = 4
+
+-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 8
+-- Case 9: column stats, NO grouping sets - caridnality = 2
+explain select state, country from location group by state, country
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: location
+            Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Select Operator
+              expressions: state (type: string), country (type: string)
+              outputColumnNames: state, country
+              Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Group By Operator
+                keys: state (type: string), country (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 8 Data size: 1384 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
+                  Statistics: Num rows: 8 Data size: 1384 Basic stats: 
COMPLETE Column stats: COMPLETE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column 
stats: COMPLETE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE 
Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE 
Column stats: COMPLETE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- column stats for votes is missing, so ndvProduct becomes 0 
and will be set to numRows / 2
+-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 10
+-- Case 9: column stats, NO grouping sets - caridnality = 5
+explain select state, votes from location group by state, votes
+PREHOOK: type: QUERY
+POSTHOOK: query: -- column stats for votes is missing, so ndvProduct becomes 0 
and will be set to numRows / 2
+-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 10
+-- Case 9: column stats, NO grouping sets - caridnality = 5
+explain select state, votes from location group by state, votes
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: location
+            Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: PARTIAL
+            Select Operator
+              expressions: state (type: string), votes (type: bigint)
+              outputColumnNames: state, votes
+              Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: PARTIAL
+              Group By Operator
+                keys: state (type: string), votes (type: bigint)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 10 Data size: 860 Basic stats: COMPLETE 
Column stats: PARTIAL
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: bigint)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: bigint)
+                  Statistics: Num rows: 10 Data size: 860 Basic stats: 
COMPLETE Column stats: PARTIAL
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE Column 
stats: PARTIAL
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: bigint)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE 
Column stats: PARTIAL
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 5 Data size: 430 Basic stats: COMPLETE 
Column stats: PARTIAL
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- Case 4: column stats, hash aggregation, grouping sets - 
cardinality = 32
+-- Case 8: column stats, grouping sets - cardinality = 8
+explain select state, country from location group by state, country with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Case 4: column stats, hash aggregation, grouping sets - 
cardinality = 32
+-- Case 8: column stats, grouping sets - cardinality = 8
+explain select state, country from location group by state, country with cube
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: location
+            Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Select Operator
+              expressions: state (type: string), country (type: string)
+              outputColumnNames: state, country
+              Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Group By Operator
+                keys: state (type: string), country (type: string), '0' (type: 
string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 32 Data size: 8256 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: string), _col2 (type: string)
+                  Statistics: Num rows: 32 Data size: 8256 Basic stats: 
COMPLETE Column stats: COMPLETE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 
(type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 8 Data size: 2064 Basic stats: COMPLETE Column 
stats: COMPLETE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 8 Data size: 1384 Basic stats: COMPLETE 
Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 8 Data size: 1384 Basic stats: COMPLETE 
Column stats: COMPLETE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- Case 5: column stats, NO hash aggregation, NO grouping sets 
- cardinality = 20
+-- Case 9: column stats, NO grouping sets - caridnality = 2
+explain select state, country from location group by state, country
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Case 5: column stats, NO hash aggregation, NO grouping 
sets - cardinality = 20
+-- Case 9: column stats, NO grouping sets - caridnality = 2
+explain select state, country from location group by state, country
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: location
+            Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Select Operator
+              expressions: state (type: string), country (type: string)
+              outputColumnNames: state, country
+              Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Group By Operator
+                keys: state (type: string), country (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
+                  Statistics: Num rows: 20 Data size: 3460 Basic stats: 
COMPLETE Column stats: COMPLETE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column 
stats: COMPLETE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE 
Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE 
Column stats: COMPLETE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - 
cardinality = 80
+-- Case 8: column stats, grouping sets - cardinality = 8
+explain select state, country from location group by state, country with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - 
cardinality = 80
+-- Case 8: column stats, grouping sets - cardinality = 8
+explain select state, country from location group by state, country with cube
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: location
+            Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Select Operator
+              expressions: state (type: string), country (type: string)
+              outputColumnNames: state, country
+              Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Group By Operator
+                keys: state (type: string), country (type: string), '0' (type: 
string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 80 Data size: 20640 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: string), _col2 (type: string)
+                  Statistics: Num rows: 80 Data size: 20640 Basic stats: 
COMPLETE Column stats: COMPLETE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 
(type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 8 Data size: 2064 Basic stats: COMPLETE Column 
stats: COMPLETE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 8 Data size: 1384 Basic stats: COMPLETE 
Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 8 Data size: 1384 Basic stats: COMPLETE 
Column stats: COMPLETE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: drop table location
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@location
+PREHOOK: Output: default@location
+POSTHOOK: query: drop table location
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@location
+POSTHOOK: Output: default@location

Modified: hive/trunk/ql/src/test/results/clientpositive/binarysortable_1.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/binarysortable_1.q.out?rev=1628118&r1=1628117&r2=1628118&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/binarysortable_1.q.out 
(original) and 
hive/trunk/ql/src/test/results/clientpositive/binarysortable_1.q.out Mon Sep 29 
05:38:05 2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/combine2.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/combine2.q.out?rev=1628118&r1=1628117&r2=1628118&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/combine2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/combine2.q.out Mon Sep 29 
05:38:05 2014
@@ -662,12 +662,12 @@ STAGE PLANS:
                 keys: ds (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 2000 Data size: 384000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1000 Data size: 192000 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 2000 Data size: 384000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 1000 Data size: 192000 Basic stats: 
COMPLETE Column stats: COMPLETE
                   value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator

Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out?rev=1628118&r1=1628117&r2=1628118&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out Mon Sep 
29 05:38:05 2014
@@ -44,12 +44,12 @@ STAGE PLANS:
                 keys: key (type: string), val (type: string), '0' (type: 
string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL 
Column stats: NONE
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: string), _col2 (type: string)
-                  Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL 
Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
                   value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -128,12 +128,12 @@ STAGE PLANS:
                 keys: key (type: string), '0' (type: string), val (type: 
string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL 
Column stats: NONE
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
-                  Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL 
Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
           aggregations: count(DISTINCT KEY._col2:0._col0)
@@ -200,12 +200,12 @@ STAGE PLANS:
                 keys: key (type: string), val (type: string), '0' (type: 
string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL 
Column stats: NONE
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: rand() (type: double)
-                  Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL 
Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
                   value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -213,7 +213,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 
(type: string)
           mode: partials
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column 
stats: NONE
+          Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: 
NONE
           File Output Operator
             compressed: false
             table:
@@ -229,7 +229,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: 
string)
-              Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL 
Column stats: NONE
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
               value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -308,12 +308,12 @@ STAGE PLANS:
                 keys: key (type: string), '0' (type: string), val (type: 
string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL 
Column stats: NONE
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL 
Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
           aggregations: count(DISTINCT KEY._col2:0._col0)
@@ -405,12 +405,12 @@ STAGE PLANS:
                 keys: key (type: string), val (type: string), '0' (type: 
string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL 
Column stats: NONE
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: rand() (type: double)
-                  Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL 
Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
                   value expressions: _col3 (type: bigint)
             Select Operator
               expressions: key (type: string), val (type: string)
@@ -421,7 +421,7 @@ STAGE PLANS:
                 keys: key (type: string), val (type: string), '0' (type: 
string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL 
Column stats: NONE
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
                 File Output Operator
                   compressed: false
                   table:
@@ -434,7 +434,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 
(type: string)
           mode: partials
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column 
stats: NONE
+          Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: 
NONE
           File Output Operator
             compressed: false
             table:
@@ -450,7 +450,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: 
string)
-              Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL 
Column stats: NONE
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
               value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -493,7 +493,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
               sort order: +++
               Map-reduce partition columns: rand() (type: double)
-              Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL 
Column stats: NONE
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
               value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -501,7 +501,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 
(type: string)
           mode: partials
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column 
stats: NONE
+          Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: 
NONE
           File Output Operator
             compressed: false
             table:
@@ -517,7 +517,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: 
string)
-              Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL 
Column stats: NONE
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
               value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator

Modified: 
hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out?rev=1628118&r1=1628117&r2=1628118&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out 
(original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out 
Mon Sep 29 05:38:05 2014
@@ -43,12 +43,12 @@ STAGE PLANS:
                 keys: a (type: string), b (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL 
Column stats: NONE
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
-                  Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL 
Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
                   value expressions: _col2 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -56,7 +56,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: 
string)
           mode: partials
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 0 Data size: 144 Basic stats: PARTIAL Column 
stats: NONE
+          Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: 
NONE
           File Output Operator
             compressed: false
             table:
@@ -72,7 +72,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: 
string), _col2 (type: string)
-              Statistics: Num rows: 0 Data size: 144 Basic stats: PARTIAL 
Column stats: NONE
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
               value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -149,12 +149,12 @@ STAGE PLANS:
                 keys: a (type: string), b (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL 
Column stats: NONE
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
-                  Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL 
Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
                   value expressions: _col2 (type: double)
       Reduce Operator Tree:
         Group By Operator
@@ -162,7 +162,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: 
string)
           mode: partials
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 0 Data size: 144 Basic stats: PARTIAL Column 
stats: NONE
+          Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: 
NONE
           File Output Operator
             compressed: false
             table:
@@ -178,7 +178,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: 
string), _col2 (type: string)
-              Statistics: Num rows: 0 Data size: 144 Basic stats: PARTIAL 
Column stats: NONE
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
               value expressions: _col3 (type: double)
       Reduce Operator Tree:
         Group By Operator

Modified: 
hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out?rev=1628118&r1=1628117&r2=1628118&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out 
(original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out 
Mon Sep 29 05:38:05 2014
@@ -62,12 +62,12 @@ STAGE PLANS:
                 keys: a (type: string), b (type: string), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Statistics: Num rows: 0 Data size: 288 Basic stats: PARTIAL 
Column stats: NONE
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: string), _col2 (type: string)
-                  Statistics: Num rows: 0 Data size: 288 Basic stats: PARTIAL 
Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
                   value expressions: _col3 (type: 
struct<count:bigint,sum:double,input:string>), _col4 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -149,12 +149,12 @@ STAGE PLANS:
                 keys: a (type: string), b (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL 
Column stats: NONE
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
-                  Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL 
Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
                   value expressions: _col2 (type: 
struct<count:bigint,sum:double,input:string>), _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -162,7 +162,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: 
string)
           mode: partials
           outputColumnNames: _col0, _col1, _col2, _col3, _col4
-          Statistics: Num rows: 0 Data size: 288 Basic stats: PARTIAL Column 
stats: NONE
+          Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: 
NONE
           File Output Operator
             compressed: false
             table:
@@ -178,7 +178,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: 
string), _col2 (type: string)
-              Statistics: Num rows: 0 Data size: 288 Basic stats: PARTIAL 
Column stats: NONE
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
               value expressions: _col3 (type: 
struct<count:bigint,sum:double,input:string>), _col4 (type: bigint)
       Reduce Operator Tree:
         Group By Operator

Modified: 
hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out?rev=1628118&r1=1628117&r2=1628118&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out 
(original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out 
Mon Sep 29 05:38:05 2014
@@ -49,12 +49,12 @@ STAGE PLANS:
                 keys: a (type: string), b (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL 
Column stats: NONE
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
-                  Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL 
Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
                   value expressions: _col2 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -172,12 +172,12 @@ STAGE PLANS:
                 keys: a (type: string), b (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL 
Column stats: NONE
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column 
stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
-                  Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL 
Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE 
Column stats: NONE
                   value expressions: _col2 (type: bigint)
       Reduce Operator Tree:
         Group By Operator

svn commit: r1628118 [2/4] - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ data/files/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/anno...

Reply via email to