[09/36] hive git commit: HIVE-16654: Optimize a combination of avg(), sum(), count(distinct) etc (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

weiz Tue, 06 Jun 2017 11:35:49 -0700

http://git-wip-us.apache.org/repos/asf/hive/blob/b560f492/ql/src/test/results/clientpositive/perf/query94.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query94.q.out 
b/ql/src/test/results/clientpositive/perf/query94.q.out
index 6e24345..c5fc9e7 100644
--- a/ql/src/test/results/clientpositive/perf/query94.q.out
+++ b/ql/src/test/results/clientpositive/perf/query94.q.out
@@ -1,4 +1,4 @@
-Warning: Shuffle Join MERGEJOIN[107][tables = [$hdt$_2, $hdt$_3, $hdt$_1, 
$hdt$_4]] in Stage 'Reducer 17' is a cross product
+Warning: Shuffle Join MERGEJOIN[113][tables = [$hdt$_2, $hdt$_3, $hdt$_1, 
$hdt$_4]] in Stage 'Reducer 18' is a cross product
 PREHOOK: query: explain
 select  
    count(distinct ws_order_number) as `order count`
@@ -58,174 +58,182 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 13 <- Map 12 (SIMPLE_EDGE)
-Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE)
-Reducer 16 <- Reducer 15 (SIMPLE_EDGE)
-Reducer 17 <- Map 14 (CUSTOM_SIMPLE_EDGE), Map 19 (CUSTOM_SIMPLE_EDGE), Map 20 
(CUSTOM_SIMPLE_EDGE), Map 21 (CUSTOM_SIMPLE_EDGE)
-Reducer 18 <- Reducer 17 (SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
-Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
-Reducer 6 <- Reducer 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+Reducer 14 <- Map 13 (SIMPLE_EDGE)
+Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE)
+Reducer 17 <- Reducer 16 (SIMPLE_EDGE)
+Reducer 18 <- Map 15 (CUSTOM_SIMPLE_EDGE), Map 20 (CUSTOM_SIMPLE_EDGE), Map 21 
(CUSTOM_SIMPLE_EDGE), Map 22 (CUSTOM_SIMPLE_EDGE)
+Reducer 19 <- Reducer 18 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE)
+Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 17 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
 Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
-Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE)
+Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
-      Reducer 8
+      Reducer 9
       File Output Operator [FS_74]
         Limit [LIM_72] (rows=1 width=344)
           Number of rows:100
           Select Operator [SEL_71] (rows=1 width=344)
             Output:["_col0","_col1","_col2"]
-          <-Reducer 7 [SIMPLE_EDGE]
+          <-Reducer 8 [SIMPLE_EDGE]
             SHUFFLE [RS_70]
               Select Operator [SEL_69] (rows=1 width=344)
                 Output:["_col1","_col2","_col3"]
-                Group By Operator [GBY_68] (rows=1 width=344)
-                  
Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT 
KEY._col0:0._col0)","sum(VALUE._col1)","sum(VALUE._col2)"]
-                <-Reducer 6 [SIMPLE_EDGE]
-                  SHUFFLE [RS_67]
-                    Group By Operator [GBY_66] (rows=1395035081047425024 
width=1)
-                      
Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT 
_col4)","sum(_col5)","sum(_col6)"],keys:_col4
-                      Select Operator [SEL_65] (rows=1395035081047425024 
width=1)
-                        Output:["_col4","_col5","_col6"]
-                        Filter Operator [FIL_64] (rows=1395035081047425024 
width=1)
-                          predicate:_col16 is null
-                          Select Operator [SEL_63] (rows=2790070162094850048 
width=1)
-                            Output:["_col4","_col5","_col6","_col16"]
-                            Merge Join Operator [MERGEJOIN_113] 
(rows=2790070162094850048 width=1)
-                              Conds:RS_60._col3, _col4=RS_61._col0, 
_col1(Inner),Output:["_col4","_col5","_col6","_col14"]
-                            <-Reducer 16 [SIMPLE_EDGE]
-                              SHUFFLE [RS_61]
-                                PartitionCols:_col0, _col1
-                                Group By Operator [GBY_46] 
(rows=2536427365110644736 width=1)
-                                  Output:["_col0","_col1"],keys:KEY._col0, 
KEY._col1
-                                <-Reducer 15 [SIMPLE_EDGE]
-                                  SHUFFLE [RS_45]
-                                    PartitionCols:_col0, _col1
-                                    Group By Operator [GBY_44] 
(rows=5072854730221289472 width=1)
-                                      Output:["_col0","_col1"],keys:_col2, 
_col3
-                                      Select Operator [SEL_43] 
(rows=5072854730221289472 width=1)
-                                        Output:["_col2","_col3"]
-                                        Filter Operator [FIL_42] 
(rows=5072854730221289472 width=1)
-                                          predicate:(_col2 <> _col0)
-                                          Merge Join Operator [MERGEJOIN_111] 
(rows=5072854730221289472 width=1)
-                                            
Conds:RS_39._col1=RS_40._col1(Inner),Output:["_col0","_col2","_col3"]
-                                          <-Map 14 [SIMPLE_EDGE]
-                                            PARTITION_ONLY_SHUFFLE [RS_39]
-                                              PartitionCols:_col1
-                                              Select Operator [SEL_20] 
(rows=144002668 width=135)
-                                                Output:["_col0","_col1"]
-                                                TableScan [TS_19] 
(rows=144002668 width=135)
-                                                  
default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"]
-                                          <-Reducer 18 [SIMPLE_EDGE]
-                                            SHUFFLE [RS_40]
-                                              PartitionCols:_col1
-                                              Select Operator [SEL_38] 
(rows=4611686018427387903 width=1)
-                                                Output:["_col0","_col1"]
-                                                Group By Operator [GBY_37] 
(rows=4611686018427387903 width=1)
-                                                  
Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
-                                                <-Reducer 17 [SIMPLE_EDGE]
-                                                  SHUFFLE [RS_36]
-                                                    PartitionCols:_col0, _col1
-                                                    Group By Operator [GBY_35] 
(rows=9223372036854775807 width=1)
-                                                      
Output:["_col0","_col1"],keys:_col4, _col3
-                                                      Merge Join Operator 
[MERGEJOIN_107] (rows=9223372036854775807 width=1)
-                                                        
Conds:(Inner),(Inner),(Inner),Output:["_col3","_col4"]
-                                                      <-Map 14 
[CUSTOM_SIMPLE_EDGE]
-                                                        PARTITION_ONLY_SHUFFLE 
[RS_32]
-                                                          Select Operator 
[SEL_28] (rows=144002668 width=135)
-                                                            
Output:["_col0","_col1"]
-                                                             Please refer to 
the previous TableScan [TS_19]
-                                                      <-Map 19 
[CUSTOM_SIMPLE_EDGE]
-                                                        PARTITION_ONLY_SHUFFLE 
[RS_29]
-                                                          Select Operator 
[SEL_22] (rows=73049 width=4)
-                                                            TableScan [TS_21] 
(rows=73049 width=1119)
-                                                              
default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE
-                                                      <-Map 20 
[CUSTOM_SIMPLE_EDGE]
-                                                        PARTITION_ONLY_SHUFFLE 
[RS_30]
-                                                          Select Operator 
[SEL_24] (rows=84 width=4)
-                                                            TableScan [TS_23] 
(rows=84 width=1850)
-                                                              
default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE
-                                                      <-Map 21 
[CUSTOM_SIMPLE_EDGE]
-                                                        PARTITION_ONLY_SHUFFLE 
[RS_31]
-                                                          Select Operator 
[SEL_26] (rows=40000000 width=4)
-                                                            TableScan [TS_25] 
(rows=40000000 width=1014)
-                                                              
default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE
-                            <-Reducer 5 [SIMPLE_EDGE]
-                              SHUFFLE [RS_60]
-                                PartitionCols:_col3, _col4
-                                Merge Join Operator [MERGEJOIN_112] 
(rows=210834322 width=135)
-                                  Conds:RS_57._col4=RS_58._col0(Left 
Outer),Output:["_col3","_col4","_col5","_col6","_col14"]
-                                <-Reducer 13 [SIMPLE_EDGE]
-                                  SHUFFLE [RS_58]
-                                    PartitionCols:_col0
-                                    Select Operator [SEL_18] (rows=7199233 
width=92)
-                                      Output:["_col0","_col1"]
-                                      Group By Operator [GBY_17] (rows=7199233 
width=92)
-                                        Output:["_col0"],keys:KEY._col0
-                                      <-Map 12 [SIMPLE_EDGE]
-                                        SHUFFLE [RS_16]
+                Group By Operator [GBY_112] (rows=1 width=344)
+                  
Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"]
+                <-Reducer 7 [CUSTOM_SIMPLE_EDGE]
+                  PARTITION_ONLY_SHUFFLE [RS_111]
+                    Group By Operator [GBY_110] (rows=1 width=344)
+                      
Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"]
+                      Group By Operator [GBY_109] (rows=1395035081047425024 
width=1)
+                        
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0
+                      <-Reducer 6 [SIMPLE_EDGE]
+                        SHUFFLE [RS_108]
+                          PartitionCols:_col0
+                          Group By Operator [GBY_107] 
(rows=1395035081047425024 width=1)
+                            
Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4
+                            Select Operator [SEL_65] (rows=1395035081047425024 
width=1)
+                              Output:["_col4","_col5","_col6"]
+                              Filter Operator [FIL_64] 
(rows=1395035081047425024 width=1)
+                                predicate:_col16 is null
+                                Select Operator [SEL_63] 
(rows=2790070162094850048 width=1)
+                                  Output:["_col4","_col5","_col6","_col16"]
+                                  Merge Join Operator [MERGEJOIN_119] 
(rows=2790070162094850048 width=1)
+                                    Conds:RS_60._col3, _col4=RS_61._col0, 
_col1(Inner),Output:["_col4","_col5","_col6","_col14"]
+                                  <-Reducer 17 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_61]
+                                      PartitionCols:_col0, _col1
+                                      Group By Operator [GBY_46] 
(rows=2536427365110644736 width=1)
+                                        
Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+                                      <-Reducer 16 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_45]
+                                          PartitionCols:_col0, _col1
+                                          Group By Operator [GBY_44] 
(rows=5072854730221289472 width=1)
+                                            
Output:["_col0","_col1"],keys:_col2, _col3
+                                            Select Operator [SEL_43] 
(rows=5072854730221289472 width=1)
+                                              Output:["_col2","_col3"]
+                                              Filter Operator [FIL_42] 
(rows=5072854730221289472 width=1)
+                                                predicate:(_col2 <> _col0)
+                                                Merge Join Operator 
[MERGEJOIN_117] (rows=5072854730221289472 width=1)
+                                                  
Conds:RS_39._col1=RS_40._col1(Inner),Output:["_col0","_col2","_col3"]
+                                                <-Map 15 [SIMPLE_EDGE]
+                                                  PARTITION_ONLY_SHUFFLE 
[RS_39]
+                                                    PartitionCols:_col1
+                                                    Select Operator [SEL_20] 
(rows=144002668 width=135)
+                                                      Output:["_col0","_col1"]
+                                                      TableScan [TS_19] 
(rows=144002668 width=135)
+                                                        
default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"]
+                                                <-Reducer 19 [SIMPLE_EDGE]
+                                                  SHUFFLE [RS_40]
+                                                    PartitionCols:_col1
+                                                    Select Operator [SEL_38] 
(rows=4611686018427387903 width=1)
+                                                      Output:["_col0","_col1"]
+                                                      Group By Operator 
[GBY_37] (rows=4611686018427387903 width=1)
+                                                        
Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+                                                      <-Reducer 18 
[SIMPLE_EDGE]
+                                                        SHUFFLE [RS_36]
+                                                          PartitionCols:_col0, 
_col1
+                                                          Group By Operator 
[GBY_35] (rows=9223372036854775807 width=1)
+                                                            
Output:["_col0","_col1"],keys:_col4, _col3
+                                                            Merge Join 
Operator [MERGEJOIN_113] (rows=9223372036854775807 width=1)
+                                                              
Conds:(Inner),(Inner),(Inner),Output:["_col3","_col4"]
+                                                            <-Map 15 
[CUSTOM_SIMPLE_EDGE]
+                                                              
PARTITION_ONLY_SHUFFLE [RS_32]
+                                                                Select 
Operator [SEL_28] (rows=144002668 width=135)
+                                                                  
Output:["_col0","_col1"]
+                                                                   Please 
refer to the previous TableScan [TS_19]
+                                                            <-Map 20 
[CUSTOM_SIMPLE_EDGE]
+                                                              
PARTITION_ONLY_SHUFFLE [RS_29]
+                                                                Select 
Operator [SEL_22] (rows=73049 width=4)
+                                                                  TableScan 
[TS_21] (rows=73049 width=1119)
+                                                                    
default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE
+                                                            <-Map 21 
[CUSTOM_SIMPLE_EDGE]
+                                                              
PARTITION_ONLY_SHUFFLE [RS_30]
+                                                                Select 
Operator [SEL_24] (rows=84 width=4)
+                                                                  TableScan 
[TS_23] (rows=84 width=1850)
+                                                                    
default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE
+                                                            <-Map 22 
[CUSTOM_SIMPLE_EDGE]
+                                                              
PARTITION_ONLY_SHUFFLE [RS_31]
+                                                                Select 
Operator [SEL_26] (rows=40000000 width=4)
+                                                                  TableScan 
[TS_25] (rows=40000000 width=1014)
+                                                                    
default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE
+                                  <-Reducer 5 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_60]
+                                      PartitionCols:_col3, _col4
+                                      Merge Join Operator [MERGEJOIN_118] 
(rows=210834322 width=135)
+                                        Conds:RS_57._col4=RS_58._col0(Left 
Outer),Output:["_col3","_col4","_col5","_col6","_col14"]
+                                      <-Reducer 14 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_58]
                                           PartitionCols:_col0
-                                          Group By Operator [GBY_15] 
(rows=14398467 width=92)
-                                            
Output:["_col0"],keys:wr_order_number
-                                            Filter Operator [FIL_104] 
(rows=14398467 width=92)
-                                              predicate:wr_order_number is not 
null
-                                              TableScan [TS_12] (rows=14398467 
width=92)
-                                                
default@web_returns,wr1,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"]
-                                <-Reducer 4 [SIMPLE_EDGE]
-                                  SHUFFLE [RS_57]
-                                    PartitionCols:_col4
-                                    Merge Join Operator [MERGEJOIN_110] 
(rows=191667562 width=135)
-                                      
Conds:RS_54._col2=RS_55._col0(Inner),Output:["_col3","_col4","_col5","_col6"]
-                                    <-Map 11 [SIMPLE_EDGE]
-                                      SHUFFLE [RS_55]
-                                        PartitionCols:_col0
-                                        Select Operator [SEL_11] (rows=42 
width=1850)
-                                          Output:["_col0"]
-                                          Filter Operator [FIL_103] (rows=42 
width=1850)
-                                            predicate:((web_company_name = 
'pri') and web_site_sk is not null)
-                                            TableScan [TS_9] (rows=84 
width=1850)
-                                              
default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"]
-                                    <-Reducer 3 [SIMPLE_EDGE]
-                                      SHUFFLE [RS_54]
-                                        PartitionCols:_col2
-                                        Merge Join Operator [MERGEJOIN_109] 
(rows=174243235 width=135)
-                                          
Conds:RS_51._col1=RS_52._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"]
-                                        <-Map 10 [SIMPLE_EDGE]
-                                          SHUFFLE [RS_52]
-                                            PartitionCols:_col0
-                                            Select Operator [SEL_8] 
(rows=20000000 width=1014)
-                                              Output:["_col0"]
-                                              Filter Operator [FIL_102] 
(rows=20000000 width=1014)
-                                                predicate:((ca_state = 'TX') 
and ca_address_sk is not null)
-                                                TableScan [TS_6] 
(rows=40000000 width=1014)
-                                                  
default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
-                                        <-Reducer 2 [SIMPLE_EDGE]
-                                          SHUFFLE [RS_51]
-                                            PartitionCols:_col1
-                                            Merge Join Operator 
[MERGEJOIN_108] (rows=158402938 width=135)
-                                              
Conds:RS_48._col0=RS_49._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"]
-                                            <-Map 1 [SIMPLE_EDGE]
-                                              SHUFFLE [RS_48]
+                                          Select Operator [SEL_18] 
(rows=7199233 width=92)
+                                            Output:["_col0","_col1"]
+                                            Group By Operator [GBY_17] 
(rows=7199233 width=92)
+                                              Output:["_col0"],keys:KEY._col0
+                                            <-Map 13 [SIMPLE_EDGE]
+                                              SHUFFLE [RS_16]
                                                 PartitionCols:_col0
-                                                Select Operator [SEL_2] 
(rows=144002668 width=135)
-                                                  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
-                                                  Filter Operator [FIL_100] 
(rows=144002668 width=135)
-                                                    predicate:(ws_ship_date_sk 
is not null and ws_ship_addr_sk is not null and ws_web_site_sk is not null)
-                                                    TableScan [TS_0] 
(rows=144002668 width=135)
-                                                      
default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"]
-                                            <-Map 9 [SIMPLE_EDGE]
-                                              SHUFFLE [RS_49]
-                                                PartitionCols:_col0
-                                                Select Operator [SEL_5] 
(rows=8116 width=1119)
-                                                  Output:["_col0"]
-                                                  Filter Operator [FIL_101] 
(rows=8116 width=1119)
-                                                    predicate:(CAST( d_date AS 
TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 and 
d_date_sk is not null)
-                                                    TableScan [TS_3] 
(rows=73049 width=1119)
-                                                      
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
+                                                Group By Operator [GBY_15] 
(rows=14398467 width=92)
+                                                  
Output:["_col0"],keys:wr_order_number
+                                                  Filter Operator [FIL_104] 
(rows=14398467 width=92)
+                                                    predicate:wr_order_number 
is not null
+                                                    TableScan [TS_12] 
(rows=14398467 width=92)
+                                                      
default@web_returns,wr1,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"]
+                                      <-Reducer 4 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_57]
+                                          PartitionCols:_col4
+                                          Merge Join Operator [MERGEJOIN_116] 
(rows=191667562 width=135)
+                                            
Conds:RS_54._col2=RS_55._col0(Inner),Output:["_col3","_col4","_col5","_col6"]
+                                          <-Map 12 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_55]
+                                              PartitionCols:_col0
+                                              Select Operator [SEL_11] 
(rows=42 width=1850)
+                                                Output:["_col0"]
+                                                Filter Operator [FIL_103] 
(rows=42 width=1850)
+                                                  predicate:((web_company_name 
= 'pri') and web_site_sk is not null)
+                                                  TableScan [TS_9] (rows=84 
width=1850)
+                                                    
default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"]
+                                          <-Reducer 3 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_54]
+                                              PartitionCols:_col2
+                                              Merge Join Operator 
[MERGEJOIN_115] (rows=174243235 width=135)
+                                                
Conds:RS_51._col1=RS_52._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"]
+                                              <-Map 11 [SIMPLE_EDGE]
+                                                SHUFFLE [RS_52]
+                                                  PartitionCols:_col0
+                                                  Select Operator [SEL_8] 
(rows=20000000 width=1014)
+                                                    Output:["_col0"]
+                                                    Filter Operator [FIL_102] 
(rows=20000000 width=1014)
+                                                      predicate:((ca_state = 
'TX') and ca_address_sk is not null)
+                                                      TableScan [TS_6] 
(rows=40000000 width=1014)
+                                                        
default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+                                              <-Reducer 2 [SIMPLE_EDGE]
+                                                SHUFFLE [RS_51]
+                                                  PartitionCols:_col1
+                                                  Merge Join Operator 
[MERGEJOIN_114] (rows=158402938 width=135)
+                                                    
Conds:RS_48._col0=RS_49._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"]
+                                                  <-Map 1 [SIMPLE_EDGE]
+                                                    SHUFFLE [RS_48]
+                                                      PartitionCols:_col0
+                                                      Select Operator [SEL_2] 
(rows=144002668 width=135)
+                                                        
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
+                                                        Filter Operator 
[FIL_100] (rows=144002668 width=135)
+                                                          
predicate:(ws_ship_date_sk is not null and ws_ship_addr_sk is not null and 
ws_web_site_sk is not null)
+                                                          TableScan [TS_0] 
(rows=144002668 width=135)
+                                                            
default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"]
+                                                  <-Map 10 [SIMPLE_EDGE]
+                                                    SHUFFLE [RS_49]
+                                                      PartitionCols:_col0
+                                                      Select Operator [SEL_5] 
(rows=8116 width=1119)
+                                                        Output:["_col0"]
+                                                        Filter Operator 
[FIL_101] (rows=8116 width=1119)
+                                                          predicate:(CAST( 
d_date AS TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 
and d_date_sk is not null)
+                                                          TableScan [TS_3] 
(rows=73049 width=1119)
+                                                            
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]


http://git-wip-us.apache.org/repos/asf/hive/blob/b560f492/ql/src/test/results/clientpositive/perf/query95.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query95.q.out 
b/ql/src/test/results/clientpositive/perf/query95.q.out
index 91d874e..332bef8 100644
--- a/ql/src/test/results/clientpositive/perf/query95.q.out
+++ b/ql/src/test/results/clientpositive/perf/query95.q.out
@@ -63,169 +63,177 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE)
-Reducer 13 <- Reducer 12 (SIMPLE_EDGE)
-Reducer 14 <- Map 11 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE)
-Reducer 15 <- Map 18 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE)
-Reducer 16 <- Reducer 15 (SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
-Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE), Reducer 4 
(SIMPLE_EDGE)
+Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE)
+Reducer 14 <- Reducer 13 (SIMPLE_EDGE)
+Reducer 15 <- Map 12 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE)
+Reducer 16 <- Map 19 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE)
+Reducer 17 <- Reducer 16 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 14 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE), Reducer 4 
(SIMPLE_EDGE)
 Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
-Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
+Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
-      Reducer 7
+      Reducer 8
       File Output Operator [FS_71]
         Limit [LIM_69] (rows=1 width=344)
           Number of rows:100
           Select Operator [SEL_68] (rows=1 width=344)
             Output:["_col0","_col1","_col2"]
-          <-Reducer 6 [SIMPLE_EDGE]
+          <-Reducer 7 [SIMPLE_EDGE]
             SHUFFLE [RS_67]
               Select Operator [SEL_66] (rows=1 width=344)
                 Output:["_col1","_col2","_col3"]
-                Group By Operator [GBY_65] (rows=1 width=344)
-                  
Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT 
KEY._col0:0._col0)","sum(VALUE._col1)","sum(VALUE._col2)"]
-                <-Reducer 5 [SIMPLE_EDGE]
-                  SHUFFLE [RS_64]
-                    Group By Operator [GBY_63] (rows=421668645 width=135)
-                      
Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT 
_col3)","sum(_col4)","sum(_col5)"],keys:_col3
-                      Merge Join Operator [MERGEJOIN_121] (rows=421668645 
width=135)
-                        
Conds:RS_58._col3=RS_59._col0(Inner),RS_58._col3=RS_60._col0(Inner),Output:["_col3","_col4","_col5"]
-                      <-Reducer 13 [SIMPLE_EDGE]
-                        SHUFFLE [RS_59]
+                Group By Operator [GBY_120] (rows=1 width=344)
+                  
Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"]
+                <-Reducer 6 [CUSTOM_SIMPLE_EDGE]
+                  PARTITION_ONLY_SHUFFLE [RS_119]
+                    Group By Operator [GBY_118] (rows=1 width=344)
+                      
Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"]
+                      Group By Operator [GBY_117] (rows=421668645 width=135)
+                        
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0
+                      <-Reducer 5 [SIMPLE_EDGE]
+                        SHUFFLE [RS_116]
                           PartitionCols:_col0
-                          Group By Operator [GBY_25] (rows=79201469 width=135)
-                            Output:["_col0"],keys:KEY._col0
-                          <-Reducer 12 [SIMPLE_EDGE]
-                            SHUFFLE [RS_24]
-                              PartitionCols:_col0
-                              Group By Operator [GBY_23] (rows=158402938 
width=135)
-                                Output:["_col0"],keys:_col1
-                                Select Operator [SEL_22] (rows=158402938 
width=135)
-                                  Output:["_col1"]
-                                  Filter Operator [FIL_21] (rows=158402938 
width=135)
-                                    predicate:(_col0 <> _col2)
-                                    Merge Join Operator [MERGEJOIN_118] 
(rows=158402938 width=135)
-                                      
Conds:RS_18._col1=RS_19._col1(Inner),Output:["_col0","_col1","_col2"]
-                                    <-Map 11 [SIMPLE_EDGE]
-                                      SHUFFLE [RS_18]
-                                        PartitionCols:_col1
-                                        Select Operator [SEL_14] 
(rows=144002668 width=135)
-                                          Output:["_col0","_col1"]
-                                          Filter Operator [FIL_110] 
(rows=144002668 width=135)
-                                            predicate:ws_order_number is not 
null
-                                            TableScan [TS_12] (rows=144002668 
width=135)
-                                              
default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"]
-                                    <-Map 17 [SIMPLE_EDGE]
-                                      SHUFFLE [RS_19]
-                                        PartitionCols:_col1
-                                        Select Operator [SEL_17] 
(rows=144002668 width=135)
-                                          Output:["_col0","_col1"]
-                                          Filter Operator [FIL_111] 
(rows=144002668 width=135)
-                                            predicate:ws_order_number is not 
null
-                                            TableScan [TS_15] (rows=144002668 
width=135)
-                                              
default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"]
-                      <-Reducer 16 [SIMPLE_EDGE]
-                        SHUFFLE [RS_60]
-                          PartitionCols:_col0
-                          Group By Operator [GBY_47] (rows=87121617 width=135)
-                            Output:["_col0"],keys:KEY._col0
-                          <-Reducer 15 [SIMPLE_EDGE]
-                            SHUFFLE [RS_46]
-                              PartitionCols:_col0
-                              Group By Operator [GBY_45] (rows=174243235 
width=135)
-                                Output:["_col0"],keys:_col1
-                                Merge Join Operator [MERGEJOIN_120] 
(rows=174243235 width=135)
-                                  
Conds:RS_41._col0=RS_42._col0(Inner),Output:["_col1"]
-                                <-Map 18 [SIMPLE_EDGE]
-                                  SHUFFLE [RS_42]
+                          Group By Operator [GBY_115] (rows=421668645 
width=135)
+                            
Output:["_col0","_col2","_col3"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col3
+                            Merge Join Operator [MERGEJOIN_127] 
(rows=421668645 width=135)
+                              
Conds:RS_58._col3=RS_59._col0(Inner),RS_58._col3=RS_60._col0(Inner),Output:["_col3","_col4","_col5"]
+                            <-Reducer 14 [SIMPLE_EDGE]
+                              SHUFFLE [RS_59]
+                                PartitionCols:_col0
+                                Group By Operator [GBY_25] (rows=79201469 
width=135)
+                                  Output:["_col0"],keys:KEY._col0
+                                <-Reducer 13 [SIMPLE_EDGE]
+                                  SHUFFLE [RS_24]
                                     PartitionCols:_col0
-                                    Select Operator [SEL_40] (rows=14398467 
width=92)
-                                      Output:["_col0"]
-                                      Filter Operator [FIL_114] (rows=14398467 
width=92)
-                                        predicate:wr_order_number is not null
-                                        TableScan [TS_38] (rows=14398467 
width=92)
-                                          
default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"]
-                                <-Reducer 14 [SIMPLE_EDGE]
-                                  SHUFFLE [RS_41]
+                                    Group By Operator [GBY_23] (rows=158402938 
width=135)
+                                      Output:["_col0"],keys:_col1
+                                      Select Operator [SEL_22] (rows=158402938 
width=135)
+                                        Output:["_col1"]
+                                        Filter Operator [FIL_21] 
(rows=158402938 width=135)
+                                          predicate:(_col0 <> _col2)
+                                          Merge Join Operator [MERGEJOIN_124] 
(rows=158402938 width=135)
+                                            
Conds:RS_18._col1=RS_19._col1(Inner),Output:["_col0","_col1","_col2"]
+                                          <-Map 12 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_18]
+                                              PartitionCols:_col1
+                                              Select Operator [SEL_14] 
(rows=144002668 width=135)
+                                                Output:["_col0","_col1"]
+                                                Filter Operator [FIL_110] 
(rows=144002668 width=135)
+                                                  predicate:ws_order_number is 
not null
+                                                  TableScan [TS_12] 
(rows=144002668 width=135)
+                                                    
default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"]
+                                          <-Map 18 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_19]
+                                              PartitionCols:_col1
+                                              Select Operator [SEL_17] 
(rows=144002668 width=135)
+                                                Output:["_col0","_col1"]
+                                                Filter Operator [FIL_111] 
(rows=144002668 width=135)
+                                                  predicate:ws_order_number is 
not null
+                                                  TableScan [TS_15] 
(rows=144002668 width=135)
+                                                    
default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"]
+                            <-Reducer 17 [SIMPLE_EDGE]
+                              SHUFFLE [RS_60]
+                                PartitionCols:_col0
+                                Group By Operator [GBY_47] (rows=87121617 
width=135)
+                                  Output:["_col0"],keys:KEY._col0
+                                <-Reducer 16 [SIMPLE_EDGE]
+                                  SHUFFLE [RS_46]
+                                    PartitionCols:_col0
+                                    Group By Operator [GBY_45] (rows=174243235 
width=135)
+                                      Output:["_col0"],keys:_col1
+                                      Merge Join Operator [MERGEJOIN_126] 
(rows=174243235 width=135)
+                                        
Conds:RS_41._col0=RS_42._col0(Inner),Output:["_col1"]
+                                      <-Map 19 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_42]
+                                          PartitionCols:_col0
+                                          Select Operator [SEL_40] 
(rows=14398467 width=92)
+                                            Output:["_col0"]
+                                            Filter Operator [FIL_114] 
(rows=14398467 width=92)
+                                              predicate:wr_order_number is not 
null
+                                              TableScan [TS_38] (rows=14398467 
width=92)
+                                                
default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"]
+                                      <-Reducer 15 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_41]
+                                          PartitionCols:_col0
+                                          Select Operator [SEL_37] 
(rows=158402938 width=135)
+                                            Output:["_col0"]
+                                            Filter Operator [FIL_36] 
(rows=158402938 width=135)
+                                              predicate:(_col0 <> _col2)
+                                              Merge Join Operator 
[MERGEJOIN_125] (rows=158402938 width=135)
+                                                
Conds:RS_33._col1=RS_34._col1(Inner),Output:["_col0","_col1","_col2"]
+                                              <-Map 12 [SIMPLE_EDGE]
+                                                SHUFFLE [RS_33]
+                                                  PartitionCols:_col1
+                                                  Select Operator [SEL_29] 
(rows=144002668 width=135)
+                                                    Output:["_col0","_col1"]
+                                                    Filter Operator [FIL_112] 
(rows=144002668 width=135)
+                                                      
predicate:ws_order_number is not null
+                                                       Please refer to the 
previous TableScan [TS_12]
+                                              <-Map 18 [SIMPLE_EDGE]
+                                                SHUFFLE [RS_34]
+                                                  PartitionCols:_col1
+                                                  Select Operator [SEL_32] 
(rows=144002668 width=135)
+                                                    Output:["_col0","_col1"]
+                                                    Filter Operator [FIL_113] 
(rows=144002668 width=135)
+                                                      
predicate:ws_order_number is not null
+                                                       Please refer to the 
previous TableScan [TS_15]
+                            <-Reducer 4 [SIMPLE_EDGE]
+                              SHUFFLE [RS_58]
+                                PartitionCols:_col3
+                                Merge Join Operator [MERGEJOIN_123] 
(rows=191667562 width=135)
+                                  
Conds:RS_55._col2=RS_56._col0(Inner),Output:["_col3","_col4","_col5"]
+                                <-Map 11 [SIMPLE_EDGE]
+                                  SHUFFLE [RS_56]
                                     PartitionCols:_col0
-                                    Select Operator [SEL_37] (rows=158402938 
width=135)
+                                    Select Operator [SEL_11] (rows=42 
width=1850)
                                       Output:["_col0"]
-                                      Filter Operator [FIL_36] (rows=158402938 
width=135)
-                                        predicate:(_col0 <> _col2)
-                                        Merge Join Operator [MERGEJOIN_119] 
(rows=158402938 width=135)
-                                          
Conds:RS_33._col1=RS_34._col1(Inner),Output:["_col0","_col1","_col2"]
-                                        <-Map 11 [SIMPLE_EDGE]
-                                          SHUFFLE [RS_33]
-                                            PartitionCols:_col1
-                                            Select Operator [SEL_29] 
(rows=144002668 width=135)
-                                              Output:["_col0","_col1"]
-                                              Filter Operator [FIL_112] 
(rows=144002668 width=135)
-                                                predicate:ws_order_number is 
not null
-                                                 Please refer to the previous 
TableScan [TS_12]
-                                        <-Map 17 [SIMPLE_EDGE]
-                                          SHUFFLE [RS_34]
-                                            PartitionCols:_col1
-                                            Select Operator [SEL_32] 
(rows=144002668 width=135)
-                                              Output:["_col0","_col1"]
-                                              Filter Operator [FIL_113] 
(rows=144002668 width=135)
-                                                predicate:ws_order_number is 
not null
-                                                 Please refer to the previous 
TableScan [TS_15]
-                      <-Reducer 4 [SIMPLE_EDGE]
-                        SHUFFLE [RS_58]
-                          PartitionCols:_col3
-                          Merge Join Operator [MERGEJOIN_117] (rows=191667562 
width=135)
-                            
Conds:RS_55._col2=RS_56._col0(Inner),Output:["_col3","_col4","_col5"]
-                          <-Map 10 [SIMPLE_EDGE]
-                            SHUFFLE [RS_56]
-                              PartitionCols:_col0
-                              Select Operator [SEL_11] (rows=42 width=1850)
-                                Output:["_col0"]
-                                Filter Operator [FIL_109] (rows=42 width=1850)
-                                  predicate:((web_company_name = 'pri') and 
web_site_sk is not null)
-                                  TableScan [TS_9] (rows=84 width=1850)
-                                    
default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"]
-                          <-Reducer 3 [SIMPLE_EDGE]
-                            SHUFFLE [RS_55]
-                              PartitionCols:_col2
-                              Merge Join Operator [MERGEJOIN_116] 
(rows=174243235 width=135)
-                                
Conds:RS_52._col1=RS_53._col0(Inner),Output:["_col2","_col3","_col4","_col5"]
-                              <-Map 9 [SIMPLE_EDGE]
-                                SHUFFLE [RS_53]
-                                  PartitionCols:_col0
-                                  Select Operator [SEL_8] (rows=20000000 
width=1014)
-                                    Output:["_col0"]
-                                    Filter Operator [FIL_108] (rows=20000000 
width=1014)
-                                      predicate:((ca_state = 'TX') and 
ca_address_sk is not null)
-                                      TableScan [TS_6] (rows=40000000 
width=1014)
-                                        
default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
-                              <-Reducer 2 [SIMPLE_EDGE]
-                                SHUFFLE [RS_52]
-                                  PartitionCols:_col1
-                                  Merge Join Operator [MERGEJOIN_115] 
(rows=158402938 width=135)
-                                    
Conds:RS_49._col0=RS_50._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"]
-                                  <-Map 1 [SIMPLE_EDGE]
-                                    SHUFFLE [RS_49]
-                                      PartitionCols:_col0
-                                      Select Operator [SEL_2] (rows=144002668 
width=135)
-                                        
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
-                                        Filter Operator [FIL_106] 
(rows=144002668 width=135)
-                                          predicate:(ws_order_number is not 
null and ws_ship_date_sk is not null and ws_ship_addr_sk is not null and 
ws_web_site_sk is not null)
-                                          TableScan [TS_0] (rows=144002668 
width=135)
-                                            
default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"]
-                                  <-Map 8 [SIMPLE_EDGE]
-                                    SHUFFLE [RS_50]
-                                      PartitionCols:_col0
-                                      Select Operator [SEL_5] (rows=8116 
width=1119)
-                                        Output:["_col0"]
-                                        Filter Operator [FIL_107] (rows=8116 
width=1119)
-                                          predicate:(CAST( d_date AS 
TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 and 
d_date_sk is not null)
-                                          TableScan [TS_3] (rows=73049 
width=1119)
-                                            
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
+                                      Filter Operator [FIL_109] (rows=42 
width=1850)
+                                        predicate:((web_company_name = 'pri') 
and web_site_sk is not null)
+                                        TableScan [TS_9] (rows=84 width=1850)
+                                          
default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"]
+                                <-Reducer 3 [SIMPLE_EDGE]
+                                  SHUFFLE [RS_55]
+                                    PartitionCols:_col2
+                                    Merge Join Operator [MERGEJOIN_122] 
(rows=174243235 width=135)
+                                      
Conds:RS_52._col1=RS_53._col0(Inner),Output:["_col2","_col3","_col4","_col5"]
+                                    <-Map 10 [SIMPLE_EDGE]
+                                      SHUFFLE [RS_53]
+                                        PartitionCols:_col0
+                                        Select Operator [SEL_8] (rows=20000000 
width=1014)
+                                          Output:["_col0"]
+                                          Filter Operator [FIL_108] 
(rows=20000000 width=1014)
+                                            predicate:((ca_state = 'TX') and 
ca_address_sk is not null)
+                                            TableScan [TS_6] (rows=40000000 
width=1014)
+                                              
default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+                                    <-Reducer 2 [SIMPLE_EDGE]
+                                      SHUFFLE [RS_52]
+                                        PartitionCols:_col1
+                                        Merge Join Operator [MERGEJOIN_121] 
(rows=158402938 width=135)
+                                          
Conds:RS_49._col0=RS_50._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"]
+                                        <-Map 1 [SIMPLE_EDGE]
+                                          SHUFFLE [RS_49]
+                                            PartitionCols:_col0
+                                            Select Operator [SEL_2] 
(rows=144002668 width=135)
+                                              
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
+                                              Filter Operator [FIL_106] 
(rows=144002668 width=135)
+                                                predicate:(ws_order_number is 
not null and ws_ship_date_sk is not null and ws_ship_addr_sk is not null and 
ws_web_site_sk is not null)
+                                                TableScan [TS_0] 
(rows=144002668 width=135)
+                                                  
default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"]
+                                        <-Map 9 [SIMPLE_EDGE]
+                                          SHUFFLE [RS_50]
+                                            PartitionCols:_col0
+                                            Select Operator [SEL_5] (rows=8116 
width=1119)
+                                              Output:["_col0"]
+                                              Filter Operator [FIL_107] 
(rows=8116 width=1119)
+                                                predicate:(CAST( d_date AS 
TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 and 
d_date_sk is not null)
+                                                TableScan [TS_3] (rows=73049 
width=1119)
+                                                  
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/b560f492/ql/src/test/results/clientpositive/spark/nullgroup4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/nullgroup4.q.out 
b/ql/src/test/results/clientpositive/spark/nullgroup4.q.out
index 24f0291..63afe9b 100644
--- a/ql/src/test/results/clientpositive/spark/nullgroup4.q.out
+++ b/ql/src/test/results/clientpositive/spark/nullgroup4.q.out
@@ -95,7 +95,8 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
+        Reducer 2 <- Map 1 (GROUP, 2)
+        Reducer 3 <- Reducer 2 (GROUP, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -111,26 +112,44 @@ STAGE PLANS:
                       outputColumnNames: _col1
                       Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
                       Group By Operator
-                        aggregations: count(1), count(DISTINCT _col1)
+                        aggregations: count(1)
                         keys: _col1 (type: string)
                         mode: hash
-                        outputColumnNames: _col0, _col1, _col2
+                        outputColumnNames: _col0, _col1
                         Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: string)
                           sort order: +
+                          Map-reduce partition columns: _col0 (type: string)
                           Statistics: Num rows: 250 Data size: 2656 Basic 
stats: COMPLETE Column stats: NONE
                           value expressions: _col1 (type: bigint)
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator
-                aggregations: count(VALUE._col0), count(DISTINCT 
KEY._col0:0._col0)
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: partial2
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count(_col1), count(_col0)
+                  mode: partial2
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 16 Basic stats: 
COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint), _col1 (type: 
bigint)
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0), count(VALUE._col1)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE 
Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b560f492/ql/src/test/results/clientpositive/udf_count.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_count.q.out 
b/ql/src/test/results/clientpositive/udf_count.q.out
index f60ad04..c3903b2 100644
--- a/ql/src/test/results/clientpositive/udf_count.q.out
+++ b/ql/src/test/results/clientpositive/udf_count.q.out
@@ -43,7 +43,8 @@ POSTHOOK: query: EXPLAIN SELECT count(DISTINCT key) FROM src
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -57,24 +58,50 @@ STAGE PLANS:
               outputColumnNames: key
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
-                aggregations: count(DISTINCT key)
                 keys: key (type: string)
                 mode: hash
-                outputColumnNames: _col0, _col1
+                outputColumnNames: _col0
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(DISTINCT KEY._col0:0._col0)
+          keys: KEY._col0 (type: string)
+          mode: partial2
+          outputColumnNames: _col0
+          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+          Group By Operator
+            aggregations: count(_col0)
+            mode: partial2
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
           mode: mergepartial
           outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column 
stats: NONE
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b560f492/ql/src/test/results/clientpositive/vector_empty_where.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_empty_where.q.out 
b/ql/src/test/results/clientpositive/vector_empty_where.q.out
index b2dec6d..b7580f3 100644
--- a/ql/src/test/results/clientpositive/vector_empty_where.q.out
+++ b/ql/src/test/results/clientpositive/vector_empty_where.q.out
@@ -10,7 +10,8 @@ PLAN VECTORIZATION:
 
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -38,26 +39,25 @@ STAGE PLANS:
                     projectedOutputColumns: [2]
                 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
                 Group By Operator
-                  aggregations: count(DISTINCT cint)
                   Group By Vectorization:
-                      aggregators: VectorUDAFCount(col 2) -> bigint
                       className: VectorGroupByOperator
                       vectorOutput: true
                       keyExpressions: col 2
                       native: false
-                      projectedOutputColumns: [0]
+                      projectedOutputColumns: []
                   keys: cint (type: int)
                   mode: hash
-                  outputColumnNames: _col0, _col1
+                  outputColumnNames: _col0
                   Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: int)
                     sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
                     Reduce Sink Vectorization:
                         className: VectorReduceSinkOperator
                         native: false
-                        nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        nativeConditionsNotMet: hive.execution.engine mr IN 
[tez, spark] IS false, No DISTINCT columns IS false
+                        nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                        nativeConditionsNotMet: hive.execution.engine mr IN 
[tez, spark] IS false
                     Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
       Execution mode: vectorized
       Map Vectorization:
@@ -74,17 +74,72 @@ STAGE PLANS:
           enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(DISTINCT KEY._col0:0._col0)
+          Group By Vectorization:
+              vectorOutput: false
+              native: false
+              projectedOutputColumns: null
+          keys: KEY._col0 (type: int)
+          mode: partial2
+          outputColumnNames: _col0
+          Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE 
Column stats: NONE
+          Group By Operator
+            aggregations: count(_col0)
+            Group By Vectorization:
+                vectorOutput: false
+                native: false
+                projectedOutputColumns: null
+            mode: partial2
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            TableScan Vectorization:
+                native: true
+                projectedOutputColumns: [0]
+            Reduce Output Operator
+              sort order: 
+              Reduce Sink Vectorization:
+                  className: VectorReduceSinkOperator
+                  native: false
+                  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                  nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: bigint)
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize 
IS true
+          groupByVectorOutput: true
+          inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
           Group By Vectorization:
               vectorOutput: false
               native: false
               projectedOutputColumns: null
           mode: mergepartial
           outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column 
stats: NONE
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -117,7 +172,8 @@ PLAN VECTORIZATION:
 
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -137,26 +193,25 @@ STAGE PLANS:
               predicate: cint (type: int)
               Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: count(DISTINCT cint)
                 Group By Vectorization:
-                    aggregators: VectorUDAFCount(col 2) -> bigint
                     className: VectorGroupByOperator
                     vectorOutput: true
                     keyExpressions: col 2
                     native: false
-                    projectedOutputColumns: [0]
+                    projectedOutputColumns: []
                 keys: cint (type: int)
                 mode: hash
-                outputColumnNames: _col0, _col1
+                outputColumnNames: _col0
                 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
                   Reduce Sink Vectorization:
                       className: VectorReduceSinkOperator
                       native: false
-                      nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                      nativeConditionsNotMet: hive.execution.engine mr IN 
[tez, spark] IS false, No DISTINCT columns IS false
+                      nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                      nativeConditionsNotMet: hive.execution.engine mr IN 
[tez, spark] IS false
                   Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
       Execution mode: vectorized
       Map Vectorization:
@@ -173,17 +228,72 @@ STAGE PLANS:
           enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(DISTINCT KEY._col0:0._col0)
+          Group By Vectorization:
+              vectorOutput: false
+              native: false
+              projectedOutputColumns: null
+          keys: KEY._col0 (type: int)
+          mode: partial2
+          outputColumnNames: _col0
+          Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE 
Column stats: NONE
+          Group By Operator
+            aggregations: count(_col0)
+            Group By Vectorization:
+                vectorOutput: false
+                native: false
+                projectedOutputColumns: null
+            mode: partial2
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            TableScan Vectorization:
+                native: true
+                projectedOutputColumns: [0]
+            Reduce Output Operator
+              sort order: 
+              Reduce Sink Vectorization:
+                  className: VectorReduceSinkOperator
+                  native: false
+                  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                  nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: bigint)
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize 
IS true
+          groupByVectorOutput: true
+          inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
           Group By Vectorization:
               vectorOutput: false
               native: false
               projectedOutputColumns: null
           mode: mergepartial
           outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column 
stats: NONE
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -216,7 +326,8 @@ PLAN VECTORIZATION:
 
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -244,26 +355,25 @@ STAGE PLANS:
                     projectedOutputColumns: [2]
                 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
                 Group By Operator
-                  aggregations: count(DISTINCT cint)
                   Group By Vectorization:
-                      aggregators: VectorUDAFCount(col 2) -> bigint
                       className: VectorGroupByOperator
                       vectorOutput: true
                       keyExpressions: col 2
                       native: false
-                      projectedOutputColumns: [0]
+                      projectedOutputColumns: []
                   keys: cint (type: int)
                   mode: hash
-                  outputColumnNames: _col0, _col1
+                  outputColumnNames: _col0
                   Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: int)
                     sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
                     Reduce Sink Vectorization:
                         className: VectorReduceSinkOperator
                         native: false
-                        nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        nativeConditionsNotMet: hive.execution.engine mr IN 
[tez, spark] IS false, No DISTINCT columns IS false
+                        nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                        nativeConditionsNotMet: hive.execution.engine mr IN 
[tez, spark] IS false
                     Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
       Execution mode: vectorized
       Map Vectorization:
@@ -280,17 +390,72 @@ STAGE PLANS:
           enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(DISTINCT KEY._col0:0._col0)
+          Group By Vectorization:
+              vectorOutput: false
+              native: false
+              projectedOutputColumns: null
+          keys: KEY._col0 (type: int)
+          mode: partial2
+          outputColumnNames: _col0
+          Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE 
Column stats: NONE
+          Group By Operator
+            aggregations: count(_col0)
+            Group By Vectorization:
+                vectorOutput: false
+                native: false
+                projectedOutputColumns: null
+            mode: partial2
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            TableScan Vectorization:
+                native: true
+                projectedOutputColumns: [0]
+            Reduce Output Operator
+              sort order: 
+              Reduce Sink Vectorization:
+                  className: VectorReduceSinkOperator
+                  native: false
+                  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                  nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: bigint)
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize 
IS true
+          groupByVectorOutput: true
+          inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
           Group By Vectorization:
               vectorOutput: false
               native: false
               projectedOutputColumns: null
           mode: mergepartial
           outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column 
stats: NONE
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -323,7 +488,8 @@ PLAN VECTORIZATION:
 
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -351,26 +517,25 @@ STAGE PLANS:
                     projectedOutputColumns: [2]
                 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
                 Group By Operator
-                  aggregations: count(DISTINCT cint)
                   Group By Vectorization:
-                      aggregators: VectorUDAFCount(col 2) -> bigint
                       className: VectorGroupByOperator
                       vectorOutput: true
                       keyExpressions: col 2
                       native: false
-                      projectedOutputColumns: [0]
+                      projectedOutputColumns: []
                   keys: cint (type: int)
                   mode: hash
-                  outputColumnNames: _col0, _col1
+                  outputColumnNames: _col0
                   Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: int)
                     sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
                     Reduce Sink Vectorization:
                         className: VectorReduceSinkOperator
                         native: false
-                        nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        nativeConditionsNotMet: hive.execution.engine mr IN 
[tez, spark] IS false, No DISTINCT columns IS false
+                        nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                        nativeConditionsNotMet: hive.execution.engine mr IN 
[tez, spark] IS false
                     Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
       Execution mode: vectorized
       Map Vectorization:
@@ -387,17 +552,72 @@ STAGE PLANS:
           enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(DISTINCT KEY._col0:0._col0)
+          Group By Vectorization:
+              vectorOutput: false
+              native: false
+              projectedOutputColumns: null
+          keys: KEY._col0 (type: int)
+          mode: partial2
+          outputColumnNames: _col0
+          Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE 
Column stats: NONE
+          Group By Operator
+            aggregations: count(_col0)
+            Group By Vectorization:
+                vectorOutput: false
+                native: false
+                projectedOutputColumns: null
+            mode: partial2
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            TableScan Vectorization:
+                native: true
+                projectedOutputColumns: [0]
+            Reduce Output Operator
+              sort order: 
+              Reduce Sink Vectorization:
+                  className: VectorReduceSinkOperator
+                  native: false
+                  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                  nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: bigint)
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize 
IS true
+          groupByVectorOutput: true
+          inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
           Group By Vectorization:
               vectorOutput: false
               native: false
               projectedOutputColumns: null
           mode: mergepartial
           outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column 
stats: NONE
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

[09/36] hive git commit: HIVE-16654: Optimize a combination of avg(), sum(), count(distinct) etc (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

Reply via email to