Author: gunther
Date: Wed Feb  5 21:48:14 2014
New Revision: 1564947

URL: http://svn.apache.org/r1564947
Log:
HIVE-6354: Some index test golden files produce non-deterministic stats in 
explain (Patch by Gunther Hagleitner, reviewed by Vikram Dixit K)

Modified:
    hive/trunk/itests/qtest/pom.xml
    hive/trunk/ql/src/test/queries/clientpositive/index_bitmap3.q
    hive/trunk/ql/src/test/queries/clientpositive/index_bitmap_auto.q
    hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q
    hive/trunk/ql/src/test/results/clientpositive/index_bitmap3.q.out
    hive/trunk/ql/src/test/results/clientpositive/index_bitmap_auto.q.out
    hive/trunk/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out

Modified: hive/trunk/itests/qtest/pom.xml
URL: 
http://svn.apache.org/viewvc/hive/trunk/itests/qtest/pom.xml?rev=1564947&r1=1564946&r2=1564947&view=diff
==============================================================================
--- hive/trunk/itests/qtest/pom.xml (original)
+++ hive/trunk/itests/qtest/pom.xml Wed Feb  5 21:48:14 2014
@@ -36,7 +36,7 @@
     <run_disabled>false</run_disabled>
     <clustermode></clustermode>
     <execute.beeline.tests>false</execute.beeline.tests>
-    
<minimr.query.files>stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q</minimr.query.files>
+    
<minimr.query.files>stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q</minimr.query.files>
     
<minimr.query.negative.files>cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q</minimr.query.negative.files>
     
<minitez.query.files>tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q</minitez.query.files>
     
<minitez.query.files.shared>join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q</minitez.query.files.shared>

Modified: hive/trunk/ql/src/test/queries/clientpositive/index_bitmap3.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_bitmap3.q?rev=1564947&r1=1564946&r2=1564947&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_bitmap3.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_bitmap3.q Wed Feb  5 
21:48:14 2014
@@ -1,3 +1,6 @@
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=true;
+
 EXPLAIN
 CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
 EXPLAIN
@@ -12,16 +15,16 @@ SELECT * FROM default__src_src2_index__ 
 
 SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 
--- EXPLAIN
--- SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
--- FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` 
AS bitmaps FROM default__src_src1_index__
---         WHERE key = 0) a
---   JOIN 
---     (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src2_index__
---        WHERE value = "val_0") b
---  ON
---    a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
--- EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY 
a.bucketname;
+EXPLAIN
+SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
+FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src1_index__
+        WHERE key = 0) a
+  JOIN 
+    (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src2_index__
+       WHERE value = "val_0") b
+ ON
+   a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
+EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname;
 
 INSERT OVERWRITE DIRECTORY "${system:test.tmp.dir}/index_result" 
 SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`

Modified: hive/trunk/ql/src/test/queries/clientpositive/index_bitmap_auto.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_bitmap_auto.q?rev=1564947&r1=1564946&r2=1564947&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_bitmap_auto.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_bitmap_auto.q Wed Feb  
5 21:48:14 2014
@@ -1,3 +1,6 @@
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=true;
+
 -- try the query without indexing, with manual indexing, and with automatic 
indexing
 -- without indexing
 SELECT key, value FROM src WHERE key=0 AND value = "val_0" ORDER BY key;
@@ -18,16 +21,16 @@ SELECT * FROM default__src_src2_index__ 
 
 
 -- manual indexing
--- EXPLAIN
--- SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
--- FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` 
AS bitmaps FROM default__src_src1_index__
---        WHERE key = 0) a
---  JOIN 
---     (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src2_index__
---        WHERE value = "val_0") b
---  ON
---    a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
--- EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY 
a.bucketname;
+EXPLAIN
+SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
+FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src1_index__
+       WHERE key = 0) a
+ JOIN 
+    (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src2_index__
+       WHERE value = "val_0") b
+ ON
+   a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
+EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname;
 
 INSERT OVERWRITE DIRECTORY "${system:test.tmp.dir}/index_result" 
 SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`

Modified: hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q?rev=1564947&r1=1564946&r2=1564947&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q 
(original)
+++ hive/trunk/ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q Wed Feb  
5 21:48:14 2014
@@ -1,4 +1,5 @@
--- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S, 0.23)
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=true;
 
 DROP TABLE lineitem;
 CREATE TABLE lineitem (L_ORDERKEY      INT,

Modified: hive/trunk/ql/src/test/results/clientpositive/index_bitmap3.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_bitmap3.q.out?rev=1564947&r1=1564946&r2=1564947&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/index_bitmap3.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/index_bitmap3.q.out Wed Feb  
5 21:48:14 2014
@@ -94,16 +94,138 @@ POSTHOOK: Lineage: default__src_src2_ind
 POSTHOOK: Lineage: default__src_src2_index__._offset SIMPLE 
[(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), 
]
 POSTHOOK: Lineage: default__src_src2_index__.value SIMPLE 
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
 #### A masked pattern was here ####
-PREHOOK: query: -- EXPLAIN
--- SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
--- FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` 
AS bitmaps FROM default__src_src1_index__
---         WHERE key = 0) a
---   JOIN 
---     (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src2_index__
---        WHERE value = "val_0") b
---  ON
---    a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
--- EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY 
a.bucketname;
+PREHOOK: query: EXPLAIN
+SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
+FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src1_index__
+        WHERE key = 0) a
+  JOIN 
+    (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src2_index__
+       WHERE value = "val_0") b
+ ON
+   a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
+EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
+FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src1_index__
+        WHERE key = 0) a
+  JOIN 
+    (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src2_index__
+       WHERE value = "val_0") b
+ ON
+   a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
+EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: default__src_src1_index__._bitmaps EXPRESSION 
[(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src1_index__._bucketname SIMPLE 
[(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src1_index__._offset SIMPLE 
[(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), 
]
+POSTHOOK: Lineage: default__src_src1_index__.key SIMPLE 
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__src_src2_index__._bitmaps EXPRESSION 
[(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src2_index__._bucketname SIMPLE 
[(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src2_index__._offset SIMPLE 
[(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), 
]
+POSTHOOK: Lineage: default__src_src2_index__.value SIMPLE 
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: default__src_src1_index__
+            Statistics: Num rows: 500 Data size: 46311 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: (key = 0) (type: boolean)
+              Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: _bucketname (type: string), _offset (type: 
bigint), _bitmaps (type: array<bigint>)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 23155 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: bigint)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: bigint)
+                  Statistics: Num rows: 250 Data size: 23155 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string), _col1 (type: 
bigint), _col2 (type: array<bigint>)
+          TableScan
+            alias: default__src_src2_index__
+            Statistics: Num rows: 500 Data size: 48311 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: (value = 'val_0') (type: boolean)
+              Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: _bucketname (type: string), _offset (type: 
bigint), _bitmaps (type: array<bigint>)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 24155 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: bigint)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: bigint)
+                  Statistics: Num rows: 250 Data size: 24155 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col2 (type: array<bigint>)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1} {VALUE._col2}
+            1 {VALUE._col2}
+          outputColumnNames: _col0, _col1, _col2, _col5
+          Statistics: Num rows: 275 Data size: 25470 Basic stats: COMPLETE 
Column stats: NONE
+          Filter Operator
+            predicate: (not EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(_col2,_col5))) 
(type: boolean)
+            Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: bigint)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: collect_set(_col1)
+                keys: _col0 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 138 Data size: 12781 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col1 (type: array<bigint>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: collect_set(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: array<bigint>)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
 
 #### A masked pattern was here ####
 SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
@@ -119,18 +241,6 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@default__src_src1_index__
 PREHOOK: Input: default@default__src_src2_index__
 #### A masked pattern was here ####
-POSTHOOK: query: -- EXPLAIN
--- SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
--- FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` 
AS bitmaps FROM default__src_src1_index__
---         WHERE key = 0) a
---   JOIN 
---     (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src2_index__
---        WHERE value = "val_0") b
---  ON
---    a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
--- EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY 
a.bucketname;
-
-#### A masked pattern was here ####
 SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
 FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src1_index__
         WHERE key = 0) a

Modified: hive/trunk/ql/src/test/results/clientpositive/index_bitmap_auto.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_bitmap_auto.q.out?rev=1564947&r1=1564946&r2=1564947&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/index_bitmap_auto.q.out 
(original)
+++ hive/trunk/ql/src/test/results/clientpositive/index_bitmap_auto.q.out Wed 
Feb  5 21:48:14 2014
@@ -112,16 +112,139 @@ POSTHOOK: Lineage: default__src_src2_ind
 POSTHOOK: Lineage: default__src_src2_index__.value SIMPLE 
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
 #### A masked pattern was here ####
 PREHOOK: query: -- manual indexing
--- EXPLAIN
--- SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
--- FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` 
AS bitmaps FROM default__src_src1_index__
---        WHERE key = 0) a
---  JOIN 
---     (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src2_index__
---        WHERE value = "val_0") b
---  ON
---    a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
--- EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY 
a.bucketname;
+EXPLAIN
+SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
+FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src1_index__
+       WHERE key = 0) a
+ JOIN 
+    (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src2_index__
+       WHERE value = "val_0") b
+ ON
+   a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
+EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname
+PREHOOK: type: QUERY
+POSTHOOK: query: -- manual indexing
+EXPLAIN
+SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
+FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src1_index__
+       WHERE key = 0) a
+ JOIN 
+    (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src2_index__
+       WHERE value = "val_0") b
+ ON
+   a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
+EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: default__src_src1_index__._bitmaps EXPRESSION 
[(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src1_index__._bucketname SIMPLE 
[(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src1_index__._offset SIMPLE 
[(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), 
]
+POSTHOOK: Lineage: default__src_src1_index__.key SIMPLE 
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__src_src2_index__._bitmaps EXPRESSION 
[(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src2_index__._bucketname SIMPLE 
[(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src2_index__._offset SIMPLE 
[(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), 
]
+POSTHOOK: Lineage: default__src_src2_index__.value SIMPLE 
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: default__src_src1_index__
+            Statistics: Num rows: 500 Data size: 46311 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: (key = 0) (type: boolean)
+              Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: _bucketname (type: string), _offset (type: 
bigint), _bitmaps (type: array<bigint>)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 23155 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: bigint)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: bigint)
+                  Statistics: Num rows: 250 Data size: 23155 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string), _col1 (type: 
bigint), _col2 (type: array<bigint>)
+          TableScan
+            alias: default__src_src2_index__
+            Statistics: Num rows: 500 Data size: 48311 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: (value = 'val_0') (type: boolean)
+              Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: _bucketname (type: string), _offset (type: 
bigint), _bitmaps (type: array<bigint>)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 24155 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: bigint)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: bigint)
+                  Statistics: Num rows: 250 Data size: 24155 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col2 (type: array<bigint>)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1} {VALUE._col2}
+            1 {VALUE._col2}
+          outputColumnNames: _col0, _col1, _col2, _col5
+          Statistics: Num rows: 275 Data size: 25470 Basic stats: COMPLETE 
Column stats: NONE
+          Filter Operator
+            predicate: (not EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(_col2,_col5))) 
(type: boolean)
+            Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: bigint)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: collect_set(_col1)
+                keys: _col0 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 138 Data size: 12781 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col1 (type: array<bigint>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: collect_set(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: array<bigint>)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
 
 #### A masked pattern was here ####
 SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
@@ -137,19 +260,6 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@default__src_src1_index__
 PREHOOK: Input: default@default__src_src2_index__
 #### A masked pattern was here ####
-POSTHOOK: query: -- manual indexing
--- EXPLAIN
--- SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
--- FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` 
AS bitmaps FROM default__src_src1_index__
---        WHERE key = 0) a
---  JOIN 
---     (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src2_index__
---        WHERE value = "val_0") b
---  ON
---    a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
--- EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY 
a.bucketname;
-
-#### A masked pattern was here ####
 SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
 FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS 
bitmaps FROM default__src_src1_index__
         WHERE key = 0) a

Modified: hive/trunk/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out?rev=1564947&r1=1564946&r2=1564947&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out 
(original)
+++ hive/trunk/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out Wed 
Feb  5 21:48:14 2014
@@ -261,23 +261,23 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: default__lineitem_lineitem_lshipdate_idx__
-            Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE 
Column stats: NONE
             Select Operator
               expressions: l_shipdate (type: string), _count_of_l_shipdate 
(type: bigint)
               outputColumnNames: l_shipdate, _count_of_l_shipdate
-              Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
                 aggregations: sum(_count_of_l_shipdate)
                 bucketGroup: true
                 keys: l_shipdate (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 95 Data size: 11145 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 95 Data size: 11145 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 95 Data size: 8675 Basic stats: 
COMPLETE Column stats: NONE
                   value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -285,14 +285,14 @@ STAGE PLANS:
           keys: KEY._col0 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE 
Column stats: NONE
           Select Operator
             expressions: _col0 (type: string), _col1 (type: bigint)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE 
Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -597,22 +597,22 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: default__lineitem_lineitem_lshipdate_idx__
-            Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE 
Column stats: NONE
             Select Operator
               expressions: l_shipdate (type: string), _count_of_l_shipdate 
(type: bigint)
               outputColumnNames: l_shipdate, _count_of_l_shipdate
-              Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
                 aggregations: sum(_count_of_l_shipdate)
                 keys: year(l_shipdate) (type: int), month(l_shipdate) (type: 
int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 95 Data size: 11145 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: int), _col1 (type: int)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: int), _col1 
(type: int)
-                  Statistics: Num rows: 95 Data size: 11145 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 95 Data size: 8675 Basic stats: 
COMPLETE Column stats: NONE
                   value expressions: _col2 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -620,11 +620,11 @@ STAGE PLANS:
           keys: KEY._col0 (type: int), KEY._col1 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE 
Column stats: NONE
           Select Operator
             expressions: _col0 (type: int), _col1 (type: int), _col2 (type: 
bigint)
             outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
               table:
@@ -639,14 +639,14 @@ STAGE PLANS:
             Reduce Output Operator
               key expressions: _col0 (type: int), _col1 (type: int)
               sort order: ++
-              Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE 
Column stats: NONE
               value expressions: _col0 (type: int), _col1 (type: int), _col2 
(type: bigint)
       Reduce Operator Tree:
         Extract
-          Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE 
Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE 
Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -777,25 +777,25 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: lastyear:default__lineitem_lineitem_lshipdate_idx__
-            Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: (year(l_shipdate) = 1997) (type: boolean)
-              Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: l_shipdate (type: string), _count_of_l_shipdate 
(type: bigint)
                 outputColumnNames: l_shipdate, _count_of_l_shipdate
-                Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE 
Column stats: NONE
                 Group By Operator
                   aggregations: sum(_count_of_l_shipdate)
                   keys: year(l_shipdate) (type: int), month(l_shipdate) (type: 
int)
                   mode: hash
                   outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 47 Data size: 5513 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 47 Data size: 4291 Basic stats: 
COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: int), _col1 (type: int)
                     sort order: ++
                     Map-reduce partition columns: _col0 (type: int), _col1 
(type: int)
-                    Statistics: Num rows: 47 Data size: 5513 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 47 Data size: 4291 Basic stats: 
COMPLETE Column stats: NONE
                     value expressions: _col2 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -803,11 +803,11 @@ STAGE PLANS:
           keys: KEY._col0 (type: int), KEY._col1 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 23 Data size: 2099 Basic stats: COMPLETE 
Column stats: NONE
           Select Operator
             expressions: _col1 (type: int), _col2 (type: bigint)
             outputColumnNames: _col1, _col2
-            Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 23 Data size: 2099 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
               table:
@@ -823,14 +823,14 @@ STAGE PLANS:
               key expressions: _col1 (type: int)
               sort order: +
               Map-reduce partition columns: _col1 (type: int)
-              Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 23 Data size: 2099 Basic stats: COMPLETE 
Column stats: NONE
               value expressions: _col1 (type: int), _col2 (type: bigint)
           TableScan
             Reduce Output Operator
               key expressions: _col1 (type: int)
               sort order: +
               Map-reduce partition columns: _col1 (type: int)
-              Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 23 Data size: 2099 Basic stats: COMPLETE 
Column stats: NONE
               value expressions: _col1 (type: int), _col2 (type: bigint)
       Reduce Operator Tree:
         Join Operator
@@ -840,14 +840,14 @@ STAGE PLANS:
             0 {VALUE._col1} {VALUE._col2}
             1 {VALUE._col1} {VALUE._col2}
           outputColumnNames: _col1, _col2, _col4, _col5
-          Statistics: Num rows: 25 Data size: 2966 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 25 Data size: 2308 Basic stats: COMPLETE 
Column stats: NONE
           Select Operator
             expressions: _col1 (type: int), _col4 (type: int), ((_col5 - 
_col2) / _col2) (type: decimal(38,19))
             outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 25 Data size: 2966 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 25 Data size: 2308 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 25 Data size: 2966 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 25 Data size: 2308 Basic stats: COMPLETE 
Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -858,25 +858,25 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: thisyear:default__lineitem_lineitem_lshipdate_idx__
-            Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: (year(l_shipdate) = 1998) (type: boolean)
-              Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: l_shipdate (type: string), _count_of_l_shipdate 
(type: bigint)
                 outputColumnNames: l_shipdate, _count_of_l_shipdate
-                Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE 
Column stats: NONE
                 Group By Operator
                   aggregations: sum(_count_of_l_shipdate)
                   keys: year(l_shipdate) (type: int), month(l_shipdate) (type: 
int)
                   mode: hash
                   outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 47 Data size: 5513 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 47 Data size: 4291 Basic stats: 
COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: int), _col1 (type: int)
                     sort order: ++
                     Map-reduce partition columns: _col0 (type: int), _col1 
(type: int)
-                    Statistics: Num rows: 47 Data size: 5513 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 47 Data size: 4291 Basic stats: 
COMPLETE Column stats: NONE
                     value expressions: _col2 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -884,11 +884,11 @@ STAGE PLANS:
           keys: KEY._col0 (type: int), KEY._col1 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 23 Data size: 2099 Basic stats: COMPLETE 
Column stats: NONE
           Select Operator
             expressions: _col1 (type: int), _col2 (type: bigint)
             outputColumnNames: _col1, _col2
-            Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 23 Data size: 2099 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
               table:
@@ -927,23 +927,23 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: null-subquery1:default__lineitem_lineitem_lshipdate_idx__
-            Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE 
Column stats: NONE
             Select Operator
               expressions: l_shipdate (type: string), _count_of_l_shipdate 
(type: bigint)
               outputColumnNames: l_shipdate, _count_of_l_shipdate
-              Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
                 aggregations: sum(_count_of_l_shipdate)
                 bucketGroup: true
                 keys: l_shipdate (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 95 Data size: 11145 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 95 Data size: 11145 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 95 Data size: 8675 Basic stats: 
COMPLETE Column stats: NONE
                   value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -951,11 +951,11 @@ STAGE PLANS:
           keys: KEY._col0 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE 
Column stats: NONE
           Select Operator
             expressions: _col0 (type: string), _col1 (type: bigint)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
               table:
@@ -968,14 +968,14 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             Union
-              Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 163 Data size: 16390 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: _col0 (type: string), _col1 (type: bigint)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 163 Data size: 17612 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 163 Data size: 16390 Basic stats: 
COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 163 Data size: 17612 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 163 Data size: 16390 Basic stats: 
COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -988,14 +988,14 @@ STAGE PLANS:
               outputColumnNames: _col0, _col1
               Statistics: Num rows: 116 Data size: 12099 Basic stats: COMPLETE 
Column stats: NONE
               Union
-                Statistics: Num rows: 163 Data size: 17612 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 163 Data size: 16390 Basic stats: 
COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: bigint)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 163 Data size: 17612 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 163 Data size: 16390 Basic stats: 
COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 163 Data size: 17612 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 163 Data size: 16390 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -3204,23 +3204,23 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: default__tbl_tbl_key_idx__
-            Statistics: Num rows: 6 Data size: 586 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE 
Column stats: NONE
             Select Operator
               expressions: key (type: int), _count_of_key (type: bigint)
               outputColumnNames: key, _count_of_key
-              Statistics: Num rows: 6 Data size: 586 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
                 aggregations: sum(_count_of_key)
                 bucketGroup: true
                 keys: key (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 6 Data size: 586 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 6 Data size: 586 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE 
Column stats: NONE
                   value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -3228,11 +3228,11 @@ STAGE PLANS:
           keys: KEY._col0 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: _col0 (type: int), _col1 (type: bigint)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
               table:
@@ -3247,14 +3247,14 @@ STAGE PLANS:
             Reduce Output Operator
               key expressions: _col0 (type: int)
               sort order: +
-              Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE 
Column stats: NONE
               value expressions: _col0 (type: int), _col1 (type: bigint)
       Reduce Operator Tree:
         Extract
-          Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE 
Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat


Reply via email to