Modified: hive/branches/llap/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out (original) +++ hive/branches/llap/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out Tue Oct 14 19:06:45 2014 @@ -1,4 +1,24 @@ -PREHOOK: query: create table if not exists loc_staging ( +PREHOOK: query: -- hash aggregation is disabled + +-- There are different cases for Group By depending on map/reduce side, hash aggregation, +-- grouping sets and column stats. If we don't have column stats, we just assume hash +-- aggregation is disabled. Following are the possible cases and rule for cardinality +-- estimation + +-- MAP SIDE: +-- Case 1: NO column stats, NO hash aggregation, NO grouping sets â numRows +-- Case 2: NO column stats, NO hash aggregation, grouping sets â numRows * sizeOfGroupingSet +-- Case 3: column stats, hash aggregation, NO grouping sets â Min(numRows / 2, ndvProduct * parallelism) +-- Case 4: column stats, hash aggregation, grouping sets â Min((numRows * sizeOfGroupingSet) / 2, ndvProduct * parallelism * sizeOfGroupingSet) +-- Case 5: column stats, NO hash aggregation, NO grouping sets â numRows +-- Case 6: column stats, NO hash aggregation, grouping sets â numRows * sizeOfGroupingSet + +-- REDUCE SIDE: +-- Case 7: NO column stats â numRows / 2 +-- Case 8: column stats, grouping sets â Min(numRows, ndvProduct * sizeOfGroupingSet) +-- Case 9: column stats, NO grouping sets - Min(numRows, ndvProduct) + +create table if not exists loc_staging ( state string, locid int, zip bigint, @@ -7,7 +27,27 @@ PREHOOK: query: create table if not exis PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@loc_staging -POSTHOOK: query: create table if not exists loc_staging ( +POSTHOOK: query: -- hash aggregation is disabled + +-- There are different cases for Group By depending on map/reduce side, hash aggregation, +-- grouping sets and column stats. If we don't have column stats, we just assume hash +-- aggregation is disabled. Following are the possible cases and rule for cardinality +-- estimation + +-- MAP SIDE: +-- Case 1: NO column stats, NO hash aggregation, NO grouping sets â numRows +-- Case 2: NO column stats, NO hash aggregation, grouping sets â numRows * sizeOfGroupingSet +-- Case 3: column stats, hash aggregation, NO grouping sets â Min(numRows / 2, ndvProduct * parallelism) +-- Case 4: column stats, hash aggregation, grouping sets â Min((numRows * sizeOfGroupingSet) / 2, ndvProduct * parallelism * sizeOfGroupingSet) +-- Case 5: column stats, NO hash aggregation, NO grouping sets â numRows +-- Case 6: column stats, NO hash aggregation, grouping sets â numRows * sizeOfGroupingSet + +-- REDUCE SIDE: +-- Case 7: NO column stats â numRows / 2 +-- Case 8: column stats, grouping sets â Min(numRows, ndvProduct * sizeOfGroupingSet) +-- Case 9: column stats, NO grouping sets - Min(numRows, ndvProduct) + +create table if not exists loc_staging ( state string, locid int, zip bigint, @@ -190,22 +230,20 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc #### A masked pattern was here #### -POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc #### A masked pattern was here #### -PREHOOK: query: -- only one distinct value in year column + 1 NULL value --- map-side GBY: numRows: 8 (map-side will not do any reduction) --- reduce-side GBY: numRows: 2 +PREHOOK: query: -- Case 5: column stats, NO hash aggregation, NO grouping sets - cardinality = 8 +-- Case 9: column stats, NO grouping sets - caridnality = 2 explain select year from loc_orc group by year PREHOOK: type: QUERY -POSTHOOK: query: -- only one distinct value in year column + 1 NULL value --- map-side GBY: numRows: 8 (map-side will not do any reduction) --- reduce-side GBY: numRows: 2 +POSTHOOK: query: -- Case 5: column stats, NO hash aggregation, NO grouping sets - cardinality = 8 +-- Case 9: column stats, NO grouping sets - caridnality = 2 explain select year from loc_orc group by year POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -257,12 +295,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- map-side GBY: numRows: 8 --- reduce-side GBY: numRows: 4 +PREHOOK: query: -- Case 5: column stats, NO hash aggregation, NO grouping sets - cardinality = 8 +-- Case 9: column stats, NO grouping sets - caridnality = 8 explain select state,locid from loc_orc group by state,locid PREHOOK: type: QUERY -POSTHOOK: query: -- map-side GBY: numRows: 8 --- reduce-side GBY: numRows: 4 +POSTHOOK: query: -- Case 5: column stats, NO hash aggregation, NO grouping sets - cardinality = 8 +-- Case 9: column stats, NO grouping sets - caridnality = 8 explain select state,locid from loc_orc group by state,locid POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -295,14 +333,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -314,10 +352,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 32 +-- Case 8: column stats, grouping sets - cardinality = 32 explain select state,locid from loc_orc group by state,locid with cube PREHOOK: type: QUERY -POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 32 +-- Case 8: column stats, grouping sets - cardinality = 32 explain select state,locid from loc_orc group by state,locid with cube POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -339,25 +379,25 @@ STAGE PLANS: keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -369,10 +409,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 24 +-- Case 8: column stats, grouping sets - cardinality = 24 explain select state,locid from loc_orc group by state,locid with rollup PREHOOK: type: QUERY -POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 24 +-- Case 8: column stats, grouping sets - cardinality = 24 explain select state,locid from loc_orc group by state,locid with rollup POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -394,25 +436,25 @@ STAGE PLANS: keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -424,10 +466,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 8 +-- Case 8: column stats, grouping sets - cardinality = 8 explain select state,locid from loc_orc group by state,locid grouping sets((state)) PREHOOK: type: QUERY -POSTHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 8 +-- Case 8: column stats, grouping sets - cardinality = 8 explain select state,locid from loc_orc group by state,locid grouping sets((state)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -449,25 +493,25 @@ STAGE PLANS: keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -479,10 +523,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 16 +-- Case 8: column stats, grouping sets - cardinality = 16 explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid)) PREHOOK: type: QUERY -POSTHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 16 +-- Case 8: column stats, grouping sets - cardinality = 16 explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -504,25 +550,25 @@ STAGE PLANS: keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -534,10 +580,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 24 +-- Case 8: column stats, grouping sets - cardinality = 24 explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()) PREHOOK: type: QUERY -POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 24 +-- Case 8: column stats, grouping sets - cardinality = 24 explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()) POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -559,25 +607,25 @@ STAGE PLANS: keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -589,10 +637,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 32 +-- Case 8: column stats, grouping sets - cardinality = 32 explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()) PREHOOK: type: QUERY -POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 32 +-- Case 8: column stats, grouping sets - cardinality = 32 explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()) POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -614,25 +664,25 @@ STAGE PLANS: keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -644,12 +694,16 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction) --- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2) +PREHOOK: query: -- map-side parallelism will be 10 + +-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 4 +-- Case 9: column stats, NO grouping sets - caridnality = 2 explain select year from loc_orc group by year PREHOOK: type: QUERY -POSTHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction) --- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2) +POSTHOOK: query: -- map-side parallelism will be 10 + +-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 4 +-- Case 9: column stats, NO grouping sets - caridnality = 2 explain select year from loc_orc group by year POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -671,25 +725,25 @@ STAGE PLANS: keys: year (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 80 Data size: 280 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80 Data size: 280 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -701,10 +755,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7) +PREHOOK: query: -- Case 4: column stats, hash aggregation, grouping sets - cardinality = 16 +-- Case 8: column stats, grouping sets - cardinality = 16 explain select state,locid from loc_orc group by state,locid with cube PREHOOK: type: QUERY -POSTHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7) +POSTHOOK: query: -- Case 4: column stats, hash aggregation, grouping sets - cardinality = 16 +-- Case 8: column stats, grouping sets - cardinality = 16 explain select state,locid from loc_orc group by state,locid with cube POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -726,25 +782,84 @@ STAGE PLANS: keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 320 Data size: 31840 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 320 Data size: 31840 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 35 Data size: 6125 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 35 Data size: 3150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- ndvProduct becomes 0 as zip does not have column stats +-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 4 +-- Case 9: column stats, NO grouping sets - caridnality = 2 +explain select state,zip from loc_orc group by state,zip +PREHOOK: type: QUERY +POSTHOOK: query: -- ndvProduct becomes 0 as zip does not have column stats +-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 4 +-- Case 9: column stats, NO grouping sets - caridnality = 2 +explain select state,zip from loc_orc group by state,zip +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: state (type: string), zip (type: bigint) + outputColumnNames: state, zip + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + keys: state (type: string), zip (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 172 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 172 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 35 Data size: 3150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 172 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -756,10 +871,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 32 +-- Case 7: NO column stats - cardinality = 16 explain select state,locid from loc_orc group by state,locid with cube PREHOOK: type: QUERY -POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 32 +-- Case 7: NO column stats - cardinality = 16 explain select state,locid from loc_orc group by state,locid with cube POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -811,10 +928,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 24 +-- Case 7: NO column stats - cardinality = 12 explain select state,locid from loc_orc group by state,locid with rollup PREHOOK: type: QUERY -POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 24 +-- Case 7: NO column stats - cardinality = 12 explain select state,locid from loc_orc group by state,locid with rollup POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -866,10 +985,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 8 +-- Case 7: NO column stats - cardinality = 4 explain select state,locid from loc_orc group by state,locid grouping sets((state)) PREHOOK: type: QUERY -POSTHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 8 +-- Case 7: NO column stats - cardinality = 4 explain select state,locid from loc_orc group by state,locid grouping sets((state)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -921,10 +1042,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 16 +-- Case 7: NO column stats - cardinality = 8 explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid)) PREHOOK: type: QUERY -POSTHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 16 +-- Case 7: NO column stats - cardinality = 8 explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -976,10 +1099,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 24 +-- Case 7: NO column stats - cardinality = 12 explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()) PREHOOK: type: QUERY -POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 24 +-- Case 7: NO column stats - cardinality = 12 explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()) POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -1031,10 +1156,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 32 +-- Case 7: NO column stats - cardinality = 16 explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()) PREHOOK: type: QUERY -POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 32 +-- Case 7: NO column stats - cardinality = 16 explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()) POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -1086,12 +1213,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction) --- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2) +PREHOOK: query: -- Case 1: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 8 +-- Case 7: NO column stats - cardinality = 4 explain select year from loc_orc group by year PREHOOK: type: QUERY -POSTHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction) --- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2) +POSTHOOK: query: -- Case 1: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 8 +-- Case 7: NO column stats - cardinality = 4 explain select year from loc_orc group by year POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -1113,25 +1240,25 @@ STAGE PLANS: keys: year (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 80 Data size: 7960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80 Data size: 7960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 40 Data size: 3980 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 40 Data size: 3980 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 3980 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1143,10 +1270,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7) +PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 32 +-- Case 7: NO column stats - cardinality = 16 explain select state,locid from loc_orc group by state,locid with cube PREHOOK: type: QUERY -POSTHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7) +POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 32 +-- Case 7: NO column stats - cardinality = 16 explain select state,locid from loc_orc group by state,locid with cube POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -1168,25 +1297,25 @@ STAGE PLANS: keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 320 Data size: 31840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 320 Data size: 31840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 160 Data size: 15920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 160 Data size: 15920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 160 Data size: 15920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Modified: hive/branches/llap/ql/src/test/results/clientpositive/annotate_stats_part.q.out URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/test/results/clientpositive/annotate_stats_part.q.out?rev=1631841&r1=1631840&r2=1631841&view=diff ============================================================================== --- hive/branches/llap/ql/src/test/results/clientpositive/annotate_stats_part.q.out (original) +++ hive/branches/llap/ql/src/test/results/clientpositive/annotate_stats_part.q.out Tue Oct 14 19:06:45 2014 @@ -56,11 +56,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL ListSink PREHOOK: query: insert overwrite table loc_orc partition(year) select * from loc_staging @@ -98,11 +98,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 5 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- partition level analyze statistics for specific parition @@ -135,11 +135,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 2 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 323 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) + expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 323 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: PARTIAL colStatState: NONE @@ -158,11 +158,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 9 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 9 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE @@ -181,11 +181,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) + expressions: state (type: string), locid (type: int), zip (type: bigint), '2001' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 400 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- partition level analyze statistics for all partitions @@ -222,11 +222,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 323 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) + expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 323 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE @@ -245,11 +245,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE @@ -268,11 +268,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- both partitions will be pruned @@ -293,14 +293,14 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL Filter Operator predicate: ((year = '2001') and (year = '__HIVE_DEFAULT_PARTITION__')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL ListSink PREHOOK: query: -- partition level partial column statistics @@ -322,33 +322,21 @@ POSTHOOK: query: -- basicStatState: COMP explain select zip from loc_orc POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: loc_orc - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: zip (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: zip (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL explain select state from loc_orc @@ -357,33 +345,44 @@ POSTHOOK: query: -- basicStatState: COMP explain select state from loc_orc POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: loc_orc - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: state (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: state (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + ListSink +PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain select year from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain select year from loc_orc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: year (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 1472 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL -- basicStatState: COMPLETE colStatState: PARTIAL @@ -394,33 +393,21 @@ POSTHOOK: query: -- column statistics fo explain select state,locid from loc_orc POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: loc_orc - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL + ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE explain select state,locid from loc_orc where year='2001' @@ -429,33 +416,21 @@ POSTHOOK: query: -- basicStatState: COMP explain select state,locid from loc_orc where year='2001' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: loc_orc - Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 630 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 630 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: loc_orc + Statistics: Num rows: 7 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 630 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE explain select state,locid from loc_orc where year!='2001' @@ -464,33 +439,21 @@ POSTHOOK: query: -- basicStatState: COMP explain select state,locid from loc_orc where year!='2001' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: loc_orc - Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: loc_orc + Statistics: Num rows: 1 Data size: 323 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 323 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL explain select * from loc_orc @@ -508,11 +471,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: -- This is to test filter expression evaluation on partition column @@ -533,7 +496,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (locid > 0) (type: boolean) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -569,17 +532,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (locid > 0) (type: boolean) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: locid (type: int), year (type: string) + expressions: locid (type: int), '2001' (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -605,12 +568,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (locid > 0) (type: boolean) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: locid (type: int), year (type: string) + expressions: locid (type: int), '2001' (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator
