[05/15] hive git commit: HIVE-15905 : Inefficient plan for correlated subqueries (Vineet Garg via Ashutosh Chauhan)

hashutosh Tue, 14 Feb 2017 17:26:41 -0800

http://git-wip-us.apache.org/repos/asf/hive/blob/bddf5a7a/ql/src/test/results/clientpositive/perf/query81.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query81.q.out 
b/ql/src/test/results/clientpositive/perf/query81.q.out
index 8ca0068..25bd68e 100644
--- a/ql/src/test/results/clientpositive/perf/query81.q.out
+++ b/ql/src/test/results/clientpositive/perf/query81.q.out
@@ -1,4 +1,3 @@
-Warning: Shuffle Join MERGEJOIN[154][tables = [$hdt$_4, $hdt$_5, $hdt$_6]] in 
Stage 'Reducer 22' is a cross product
 PREHOOK: query: explain with customer_total_return as
  (select cr_returning_customer_sk as ctr_customer_sk
         ,ca_state as ctr_state, 
@@ -60,249 +59,163 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 10 <- Reducer 9 (SIMPLE_EDGE)
-Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE)
-Reducer 15 <- Map 20 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE)
+Reducer 10 <- Reducer 16 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE)
+Reducer 15 <- Map 18 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE)
 Reducer 16 <- Reducer 15 (SIMPLE_EDGE)
-Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE)
-Reducer 18 <- Reducer 17 (SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
-Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE), Map 30 (CUSTOM_SIMPLE_EDGE), 
Reducer 27 (CUSTOM_SIMPLE_EDGE)
-Reducer 23 <- Reducer 22 (SIMPLE_EDGE)
-Reducer 25 <- Map 24 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE)
-Reducer 26 <- Map 29 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE)
-Reducer 27 <- Reducer 26 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
 Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Reducer 18 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
-Reducer 8 <- Map 11 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
-Reducer 9 <- Map 12 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+Reducer 7 <- Map 11 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
+Reducer 8 <- Map 12 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:100
     Stage-1
-      Reducer 5
-      File Output Operator [FS_102]
-        Limit [LIM_101] (rows=100 width=1)
+      Reducer 4
+      File Output Operator [FS_67]
+        Limit [LIM_66] (rows=100 width=860)
           Number of rows:100
-          Select Operator [SEL_100] (rows=930023387364950016 width=1)
+          Select Operator [SEL_65] (rows=32266667 width=860)
             
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"]
-          <-Reducer 4 [SIMPLE_EDGE]
-            SHUFFLE [RS_99]
-              Select Operator [SEL_98] (rows=930023387364950016 width=1)
+          <-Reducer 3 [SIMPLE_EDGE]
+            SHUFFLE [RS_64]
+              Select Operator [SEL_63] (rows=32266667 width=860)
                 
Output:["_col0","_col1","_col11","_col12","_col13","_col14","_col15","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
-                Filter Operator [FIL_97] (rows=930023387364950016 width=1)
+                Filter Operator [FIL_62] (rows=32266667 width=860)
                   predicate:(_col2 > CASE WHEN (_col22 is null) THEN (null) 
ELSE (_col21) END)
-                  Select Operator [SEL_96] (rows=2790070162094850048 width=1)
+                  Select Operator [SEL_61] (rows=96800003 width=860)
                     
Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col11","_col12","_col13","_col14","_col16","_col18","_col19","_col20","_col21","_col22"]
-                    Merge Join Operator [MERGEJOIN_162] 
(rows=2790070162094850048 width=1)
-                      Conds:RS_93._col19=RS_94._col2(Left 
Outer),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17","_col20","_col21","_col22"]
-                    <-Reducer 18 [SIMPLE_EDGE]
-                      SHUFFLE [RS_94]
-                        PartitionCols:_col2
-                        Select Operator [SEL_86] (rows=2536427365110644736 
width=1)
-                          Output:["_col0","_col1","_col2"]
-                          Group By Operator [GBY_85] (rows=2536427365110644736 
width=1)
-                            
Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0
-                          <-Reducer 17 [SIMPLE_EDGE]
-                            SHUFFLE [RS_84]
-                              PartitionCols:_col0
-                              Group By Operator [GBY_83] 
(rows=5072854730221289472 width=1)
-                                
Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col3
-                                Select Operator [SEL_82] 
(rows=5072854730221289472 width=1)
-                                  Output:["_col3","_col2"]
-                                  Merge Join Operator [MERGEJOIN_161] 
(rows=5072854730221289472 width=1)
-                                    
Conds:RS_79._col1=RS_80._col0(Inner),Output:["_col2","_col3"]
-                                  <-Reducer 16 [SIMPLE_EDGE]
-                                    SHUFFLE [RS_79]
-                                      PartitionCols:_col1
-                                      Select Operator [SEL_45] (rows=22000000 
width=1014)
-                                        Output:["_col1","_col2"]
-                                        Group By Operator [GBY_44] 
(rows=22000000 width=1014)
-                                          
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1
-                                        <-Reducer 15 [SIMPLE_EDGE]
-                                          SHUFFLE [RS_43]
-                                            PartitionCols:_col0, _col1
-                                            Group By Operator [GBY_42] 
(rows=44000000 width=1014)
-                                              
Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1
-                                              Select Operator [SEL_41] 
(rows=44000000 width=1014)
-                                                
Output:["_col7","_col1","_col3"]
-                                                Merge Join Operator 
[MERGEJOIN_159] (rows=44000000 width=1014)
-                                                  
Conds:RS_38._col2=RS_39._col0(Inner),Output:["_col1","_col3","_col7"]
-                                                <-Map 20 [SIMPLE_EDGE]
-                                                  SHUFFLE [RS_39]
-                                                    PartitionCols:_col0
-                                                    Select Operator [SEL_34] 
(rows=40000000 width=1014)
-                                                      Output:["_col0","_col1"]
-                                                      Filter Operator 
[FIL_148] (rows=40000000 width=1014)
-                                                        
predicate:ca_address_sk is not null
-                                                        TableScan [TS_32] 
(rows=40000000 width=1014)
-                                                          
default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
-                                                <-Reducer 14 [SIMPLE_EDGE]
-                                                  SHUFFLE [RS_38]
-                                                    PartitionCols:_col2
-                                                    Merge Join Operator 
[MERGEJOIN_158] (rows=31678769 width=106)
-                                                      
Conds:RS_35._col0=RS_36._col0(Inner),Output:["_col1","_col2","_col3"]
-                                                    <-Map 13 [SIMPLE_EDGE]
-                                                      SHUFFLE [RS_35]
-                                                        PartitionCols:_col0
-                                                        Select Operator 
[SEL_28] (rows=28798881 width=106)
-                                                          
Output:["_col0","_col1","_col2","_col3"]
-                                                          Filter Operator 
[FIL_146] (rows=28798881 width=106)
-                                                            
predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null)
-                                                            TableScan [TS_26] 
(rows=28798881 width=106)
-                                                              
default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"]
-                                                    <-Map 19 [SIMPLE_EDGE]
-                                                      SHUFFLE [RS_36]
-                                                        PartitionCols:_col0
-                                                        Select Operator 
[SEL_31] (rows=36524 width=1119)
-                                                          Output:["_col0"]
-                                                          Filter Operator 
[FIL_147] (rows=36524 width=1119)
-                                                            predicate:((d_year 
= 1998) and d_date_sk is not null)
-                                                            TableScan [TS_29] 
(rows=73049 width=1119)
-                                                              
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
-                                  <-Reducer 23 [SIMPLE_EDGE]
-                                    SHUFFLE [RS_80]
+                    Merge Join Operator [MERGEJOIN_105] (rows=96800003 
width=860)
+                      
Conds:RS_58._col0=RS_59._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17","_col20","_col21","_col22"]
+                    <-Reducer 10 [SIMPLE_EDGE]
+                      SHUFFLE [RS_59]
+                        PartitionCols:_col0
+                        Merge Join Operator [MERGEJOIN_104] (rows=24200000 
width=1014)
+                          Conds:RS_51._col1=RS_52._col2(Left 
Outer),Output:["_col0","_col2","_col3","_col4"]
+                        <-Reducer 16 [SIMPLE_EDGE]
+                          SHUFFLE [RS_52]
+                            PartitionCols:_col2
+                            Select Operator [SEL_50] (rows=8711661 width=106)
+                              Output:["_col0","_col1","_col2"]
+                              Group By Operator [GBY_49] (rows=8711661 
width=106)
+                                
Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col0
+                                Select Operator [SEL_45] (rows=17423323 
width=106)
+                                  Output:["_col0","_col2"]
+                                  Group By Operator [GBY_44] (rows=17423323 
width=106)
+                                    
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1
+                                  <-Reducer 15 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_43]
                                       PartitionCols:_col0
-                                      Group By Operator [GBY_77] 
(rows=4611686018427387903 width=1)
-                                        Output:["_col0"],keys:KEY._col0
-                                      <-Reducer 22 [SIMPLE_EDGE]
-                                        SHUFFLE [RS_76]
+                                      Group By Operator [GBY_42] 
(rows=34846646 width=106)
+                                        
Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1
+                                        Select Operator [SEL_41] 
(rows=34846646 width=106)
+                                          Output:["_col7","_col1","_col3"]
+                                          Merge Join Operator [MERGEJOIN_103] 
(rows=34846646 width=106)
+                                            
Conds:RS_38._col2=RS_39._col0(Inner),Output:["_col1","_col3","_col7"]
+                                          <-Map 18 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_39]
+                                              PartitionCols:_col0
+                                              Select Operator [SEL_34] 
(rows=20000000 width=1014)
+                                                Output:["_col0","_col1"]
+                                                Filter Operator [FIL_98] 
(rows=20000000 width=1014)
+                                                  predicate:((ca_state = 
ca_state) and ca_address_sk is not null)
+                                                  TableScan [TS_32] 
(rows=40000000 width=1014)
+                                                    
default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+                                          <-Reducer 14 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_38]
+                                              PartitionCols:_col2
+                                              Merge Join Operator 
[MERGEJOIN_102] (rows=31678769 width=106)
+                                                
Conds:RS_35._col0=RS_36._col0(Inner),Output:["_col1","_col2","_col3"]
+                                              <-Map 13 [SIMPLE_EDGE]
+                                                SHUFFLE [RS_35]
+                                                  PartitionCols:_col0
+                                                  Select Operator [SEL_28] 
(rows=28798881 width=106)
+                                                    
Output:["_col0","_col1","_col2","_col3"]
+                                                    Filter Operator [FIL_96] 
(rows=28798881 width=106)
+                                                      
predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null)
+                                                      TableScan [TS_26] 
(rows=28798881 width=106)
+                                                        
default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"]
+                                              <-Map 17 [SIMPLE_EDGE]
+                                                SHUFFLE [RS_36]
+                                                  PartitionCols:_col0
+                                                  Select Operator [SEL_31] 
(rows=36524 width=1119)
+                                                    Output:["_col0"]
+                                                    Filter Operator [FIL_97] 
(rows=36524 width=1119)
+                                                      predicate:((d_year = 
1998) and d_date_sk is not null)
+                                                      TableScan [TS_29] 
(rows=73049 width=1119)
+                                                        
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
+                        <-Reducer 9 [SIMPLE_EDGE]
+                          SHUFFLE [RS_51]
+                            PartitionCols:_col1
+                            Select Operator [SEL_25] (rows=22000000 width=1014)
+                              Output:["_col0","_col1","_col2"]
+                              Group By Operator [GBY_24] (rows=22000000 
width=1014)
+                                
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1
+                              <-Reducer 8 [SIMPLE_EDGE]
+                                SHUFFLE [RS_23]
+                                  PartitionCols:_col0, _col1
+                                  Group By Operator [GBY_22] (rows=44000000 
width=1014)
+                                    
Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1
+                                    Select Operator [SEL_21] (rows=44000000 
width=1014)
+                                      Output:["_col7","_col1","_col3"]
+                                      Merge Join Operator [MERGEJOIN_101] 
(rows=44000000 width=1014)
+                                        
Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col1","_col3","_col7"]
+                                      <-Map 12 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_19]
                                           PartitionCols:_col0
-                                          Group By Operator [GBY_75] 
(rows=9223372036854775807 width=1)
-                                            Output:["_col0"],keys:_col2
-                                            Merge Join Operator 
[MERGEJOIN_154] (rows=9223372036854775807 width=1)
-                                              
Conds:(Inner),(Inner),Output:["_col2"]
-                                            <-Map 21 [CUSTOM_SIMPLE_EDGE]
-                                              PARTITION_ONLY_SHUFFLE [RS_70]
-                                                Select Operator [SEL_47] 
(rows=80000000 width=4)
-                                                  TableScan [TS_46] 
(rows=80000000 width=860)
-                                                    
default@customer,customer,Tbl:COMPLETE,Col:COMPLETE
-                                            <-Map 30 [CUSTOM_SIMPLE_EDGE]
-                                              PARTITION_ONLY_SHUFFLE [RS_72]
-                                                Select Operator [SEL_69] 
(rows=40000000 width=4)
-                                                  TableScan [TS_68] 
(rows=40000000 width=1014)
-                                                    
default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE
-                                            <-Reducer 27 [CUSTOM_SIMPLE_EDGE]
-                                              PARTITION_ONLY_SHUFFLE [RS_71]
-                                                Select Operator [SEL_67] 
(rows=22000000 width=1014)
-                                                  Output:["_col1"]
-                                                  Group By Operator [GBY_66] 
(rows=22000000 width=1014)
-                                                    
Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
-                                                  <-Reducer 26 [SIMPLE_EDGE]
-                                                    SHUFFLE [RS_65]
-                                                      PartitionCols:_col0, 
_col1
-                                                      Group By Operator 
[GBY_64] (rows=44000000 width=1014)
-                                                        
Output:["_col0","_col1"],keys:_col7, _col1
-                                                        Select Operator 
[SEL_63] (rows=44000000 width=1014)
-                                                          
Output:["_col7","_col1"]
-                                                          Merge Join Operator 
[MERGEJOIN_153] (rows=44000000 width=1014)
-                                                            
Conds:RS_60._col2=RS_61._col0(Inner),Output:["_col1","_col7"]
-                                                          <-Map 29 
[SIMPLE_EDGE]
-                                                            SHUFFLE [RS_61]
-                                                              
PartitionCols:_col0
-                                                              Select Operator 
[SEL_56] (rows=40000000 width=1014)
-                                                                
Output:["_col0","_col1"]
-                                                                Filter 
Operator [FIL_151] (rows=40000000 width=1014)
-                                                                  
predicate:ca_address_sk is not null
-                                                                  TableScan 
[TS_54] (rows=40000000 width=1014)
-                                                                    
default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
-                                                          <-Reducer 25 
[SIMPLE_EDGE]
-                                                            SHUFFLE [RS_60]
-                                                              
PartitionCols:_col2
-                                                              Merge Join 
Operator [MERGEJOIN_152] (rows=31678769 width=106)
-                                                                
Conds:RS_57._col0=RS_58._col0(Inner),Output:["_col1","_col2"]
-                                                              <-Map 24 
[SIMPLE_EDGE]
-                                                                SHUFFLE [RS_57]
-                                                                  
PartitionCols:_col0
-                                                                  Select 
Operator [SEL_50] (rows=28798881 width=106)
-                                                                    
Output:["_col0","_col1","_col2"]
-                                                                    Filter 
Operator [FIL_149] (rows=28798881 width=106)
-                                                                      
predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null)
-                                                                      
TableScan [TS_48] (rows=28798881 width=106)
-                                                                        
default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk"]
-                                                              <-Map 28 
[SIMPLE_EDGE]
-                                                                SHUFFLE [RS_58]
-                                                                  
PartitionCols:_col0
-                                                                  Select 
Operator [SEL_53] (rows=36524 width=1119)
-                                                                    
Output:["_col0"]
-                                                                    Filter 
Operator [FIL_150] (rows=36524 width=1119)
-                                                                      
predicate:((d_year = 1998) and d_date_sk is not null)
-                                                                      
TableScan [TS_51] (rows=73049 width=1119)
-                                                                        
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
-                    <-Reducer 3 [SIMPLE_EDGE]
-                      SHUFFLE [RS_93]
-                        PartitionCols:_col19
-                        Merge Join Operator [MERGEJOIN_160] (rows=96800003 
width=860)
-                          
Conds:RS_90._col0=RS_91._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17","_col19","_col20"]
-                        <-Reducer 10 [SIMPLE_EDGE]
-                          SHUFFLE [RS_91]
+                                          Select Operator [SEL_14] 
(rows=40000000 width=1014)
+                                            Output:["_col0","_col1"]
+                                            Filter Operator [FIL_95] 
(rows=40000000 width=1014)
+                                              predicate:ca_address_sk is not 
null
+                                              TableScan [TS_12] (rows=40000000 
width=1014)
+                                                
default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+                                      <-Reducer 7 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_18]
+                                          PartitionCols:_col2
+                                          Merge Join Operator [MERGEJOIN_100] 
(rows=31678769 width=106)
+                                            
Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col2","_col3"]
+                                          <-Map 11 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_16]
+                                              PartitionCols:_col0
+                                              Select Operator [SEL_11] 
(rows=36524 width=1119)
+                                                Output:["_col0"]
+                                                Filter Operator [FIL_94] 
(rows=36524 width=1119)
+                                                  predicate:((d_year = 1998) 
and d_date_sk is not null)
+                                                  TableScan [TS_9] (rows=73049 
width=1119)
+                                                    
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
+                                          <-Map 6 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_15]
+                                              PartitionCols:_col0
+                                              Select Operator [SEL_8] 
(rows=28798881 width=106)
+                                                
Output:["_col0","_col1","_col2","_col3"]
+                                                Filter Operator [FIL_93] 
(rows=28798881 width=106)
+                                                  
predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null 
and cr_returning_customer_sk is not null)
+                                                  TableScan [TS_6] 
(rows=28798881 width=106)
+                                                    
default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"]
+                    <-Reducer 2 [SIMPLE_EDGE]
+                      SHUFFLE [RS_58]
+                        PartitionCols:_col0
+                        Merge Join Operator [MERGEJOIN_99] (rows=88000001 
width=860)
+                          
Conds:RS_55._col2=RS_56._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17"]
+                        <-Map 1 [SIMPLE_EDGE]
+                          SHUFFLE [RS_55]
+                            PartitionCols:_col2
+                            Select Operator [SEL_2] (rows=80000000 width=860)
+                              
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
+                              Filter Operator [FIL_91] (rows=80000000 
width=860)
+                                predicate:(c_customer_sk is not null and 
c_current_addr_sk is not null)
+                                TableScan [TS_0] (rows=80000000 width=860)
+                                  
default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name"]
+                        <-Map 5 [SIMPLE_EDGE]
+                          SHUFFLE [RS_56]
                             PartitionCols:_col0
-                            Group By Operator [GBY_24] (rows=22000000 
width=1014)
-                              
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1
-                            <-Reducer 9 [SIMPLE_EDGE]
-                              SHUFFLE [RS_23]
-                                PartitionCols:_col0, _col1
-                                Group By Operator [GBY_22] (rows=44000000 
width=1014)
-                                  
Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col1, _col7
-                                  Select Operator [SEL_21] (rows=44000000 
width=1014)
-                                    Output:["_col1","_col7","_col3"]
-                                    Merge Join Operator [MERGEJOIN_157] 
(rows=44000000 width=1014)
-                                      
Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col1","_col3","_col7"]
-                                    <-Map 12 [SIMPLE_EDGE]
-                                      SHUFFLE [RS_19]
-                                        PartitionCols:_col0
-                                        Select Operator [SEL_14] 
(rows=40000000 width=1014)
-                                          Output:["_col0","_col1"]
-                                          Filter Operator [FIL_145] 
(rows=40000000 width=1014)
-                                            predicate:ca_address_sk is not null
-                                            TableScan [TS_12] (rows=40000000 
width=1014)
-                                              
default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
-                                    <-Reducer 8 [SIMPLE_EDGE]
-                                      SHUFFLE [RS_18]
-                                        PartitionCols:_col2
-                                        Merge Join Operator [MERGEJOIN_156] 
(rows=31678769 width=106)
-                                          
Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col2","_col3"]
-                                        <-Map 11 [SIMPLE_EDGE]
-                                          SHUFFLE [RS_16]
-                                            PartitionCols:_col0
-                                            Select Operator [SEL_11] 
(rows=36524 width=1119)
-                                              Output:["_col0"]
-                                              Filter Operator [FIL_144] 
(rows=36524 width=1119)
-                                                predicate:((d_year = 1998) and 
d_date_sk is not null)
-                                                TableScan [TS_9] (rows=73049 
width=1119)
-                                                  
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
-                                        <-Map 7 [SIMPLE_EDGE]
-                                          SHUFFLE [RS_15]
-                                            PartitionCols:_col0
-                                            Select Operator [SEL_8] 
(rows=28798881 width=106)
-                                              
Output:["_col0","_col1","_col2","_col3"]
-                                              Filter Operator [FIL_143] 
(rows=28798881 width=106)
-                                                predicate:(cr_returned_date_sk 
is not null and cr_returning_addr_sk is not null and cr_returning_customer_sk 
is not null)
-                                                TableScan [TS_6] 
(rows=28798881 width=106)
-                                                  
default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"]
-                        <-Reducer 2 [SIMPLE_EDGE]
-                          SHUFFLE [RS_90]
-                            PartitionCols:_col0
-                            Merge Join Operator [MERGEJOIN_155] (rows=88000001 
width=860)
-                              
Conds:RS_87._col2=RS_88._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17"]
-                            <-Map 1 [SIMPLE_EDGE]
-                              SHUFFLE [RS_87]
-                                PartitionCols:_col2
-                                Select Operator [SEL_2] (rows=80000000 
width=860)
-                                  
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
-                                  Filter Operator [FIL_141] (rows=80000000 
width=860)
-                                    predicate:(c_customer_sk is not null and 
c_current_addr_sk is not null)
-                                    TableScan [TS_0] (rows=80000000 width=860)
-                                      
default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name"]
-                            <-Map 6 [SIMPLE_EDGE]
-                              SHUFFLE [RS_88]
-                                PartitionCols:_col0
-                                Select Operator [SEL_5] (rows=20000000 
width=1014)
-                                  
Output:["_col0","_col1","_col10","_col11","_col2","_col3","_col4","_col5","_col6","_col8","_col9"]
-                                  Filter Operator [FIL_142] (rows=20000000 
width=1014)
-                                    predicate:((ca_state = 'IL') and 
ca_address_sk is not null)
-                                    TableScan [TS_3] (rows=40000000 width=1014)
-                                      
default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_street_type","ca_suite_number","ca_city","ca_county","ca_state","ca_zip","ca_country","ca_gmt_offset","ca_location_type"]
+                            Select Operator [SEL_5] (rows=20000000 width=1014)
+                              
Output:["_col0","_col1","_col10","_col11","_col2","_col3","_col4","_col5","_col6","_col8","_col9"]
+                              Filter Operator [FIL_92] (rows=20000000 
width=1014)
+                                predicate:((ca_state = 'IL') and ca_address_sk 
is not null)
+                                TableScan [TS_3] (rows=40000000 width=1014)
+                                  
default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_street_type","ca_suite_number","ca_city","ca_county","ca_state","ca_zip","ca_country","ca_gmt_offset","ca_location_type"]


http://git-wip-us.apache.org/repos/asf/hive/blob/bddf5a7a/ql/src/test/results/clientpositive/semijoin5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/semijoin5.q.out 
b/ql/src/test/results/clientpositive/semijoin5.q.out
index 07b7470..fd8e372 100644
--- a/ql/src/test/results/clientpositive/semijoin5.q.out
+++ b/ql/src/test/results/clientpositive/semijoin5.q.out
@@ -48,14 +48,11 @@ WHERE (t2.smallint_col_19) IN (SELECT
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-2 depends on stages: Stage-1, Stage-8
+  Stage-2 depends on stages: Stage-1, Stage-7
   Stage-3 depends on stages: Stage-2
   Stage-4 depends on stages: Stage-3
-  Stage-9 is a root stage
-  Stage-10 depends on stages: Stage-9
-  Stage-6 depends on stages: Stage-10
+  Stage-6 is a root stage
   Stage-7 depends on stages: Stage-6
-  Stage-8 depends on stages: Stage-7
   Stage-0 depends on stages: Stage-4
 
 STAGE PLANS:
@@ -236,149 +233,40 @@ STAGE PLANS:
                     output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-9
+  Stage: Stage-6
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: t1
+            alias: tt1
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
             Filter Operator
-              predicate: (tinyint_col_3 is not null and bigint_col_7 is not 
null and decimal2016_col_26 is not null) (type: boolean)
+              predicate: decimal2612_col_77 is not null (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
               Select Operator
-                expressions: tinyint_col_3 (type: tinyint), bigint_col_7 
(type: bigint), timestamp_col_9 (type: timestamp), decimal2016_col_26 (type: 
decimal(20,16))
-                outputColumnNames: _col0, _col1, _col2, _col3
+                expressions: decimal2612_col_77 (type: decimal(26,12))
+                outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: tinyint), _col3 (type: 
decimal(34,16)), _col1 (type: bigint)
-                  sort order: +++
-                  Map-reduce partition columns: _col0 (type: tinyint), _col3 
(type: decimal(34,16)), _col1 (type: bigint)
+                  key expressions: _col0 (type: decimal(26,12))
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: decimal(26,12))
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
-                  value expressions: _col2 (type: timestamp)
-          TableScan
-            alias: t2
-            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-            Filter Operator
-              predicate: (tinyint_col_20 is not null and decimal2709_col_9 is 
not null and tinyint_col_15 is not null) (type: boolean)
-              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-              Select Operator
-                expressions: decimal2709_col_9 (type: decimal(27,9)), 
tinyint_col_15 (type: tinyint), tinyint_col_20 (type: tinyint)
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col2 (type: tinyint), _col0 (type: 
decimal(34,16)), UDFToLong(_col1) (type: bigint)
-                  sort order: +++
-                  Map-reduce partition columns: _col2 (type: tinyint), _col0 
(type: decimal(34,16)), UDFToLong(_col1) (type: bigint)
-                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
-      Reduce Operator Tree:
-        Join Operator
-          condition map:
-               Inner Join 0 to 1
-          keys:
-            0 _col0 (type: tinyint), _col3 (type: decimal(34,16)), _col1 
(type: bigint)
-            1 _col2 (type: tinyint), _col0 (type: decimal(34,16)), 
UDFToLong(_col1) (type: bigint)
-          outputColumnNames: _col2
-          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-          Group By Operator
-            keys: _col2 (type: timestamp)
-            mode: hash
-            outputColumnNames: _col0
-            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-10
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: timestamp)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: timestamp)
-              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-      Reduce Operator Tree:
-        Group By Operator
-          keys: KEY._col0 (type: timestamp)
-          mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-6
-    Map Reduce
-      Map Operator Tree:
           TableScan
             alias: tt2
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
             Filter Operator
-              predicate: decimal1911_col_16 is not null (type: boolean)
+              predicate: ((timestamp_col_18 = timestamp_col_18) and 
decimal1911_col_16 is not null) (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
               Select Operator
                 expressions: decimal1911_col_16 (type: decimal(19,11)), 
timestamp_col_18 (type: timestamp)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col1 (type: timestamp)
-                  sort order: +
-                  Map-reduce partition columns: _col1 (type: timestamp)
-                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
-                  value expressions: _col0 (type: decimal(19,11))
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: timestamp)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: timestamp)
-              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-      Reduce Operator Tree:
-        Join Operator
-          condition map:
-               Inner Join 0 to 1
-          keys:
-            0 _col1 (type: timestamp)
-            1 _col0 (type: timestamp)
-          outputColumnNames: _col0, _col2
-          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-7
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: decimal(26,12))
-              sort order: +
-              Map-reduce partition columns: _col0 (type: decimal(26,12))
-              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-              value expressions: _col2 (type: timestamp)
-          TableScan
-            alias: tt1
-            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-            Filter Operator
-              predicate: decimal2612_col_77 is not null (type: boolean)
-              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-              Select Operator
-                expressions: decimal2612_col_77 (type: decimal(26,12))
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
-                Reduce Output Operator
                   key expressions: _col0 (type: decimal(26,12))
                   sort order: +
                   Map-reduce partition columns: _col0 (type: decimal(26,12))
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                  value expressions: _col1 (type: timestamp)
       Reduce Operator Tree:
         Join Operator
           condition map:
@@ -400,7 +288,7 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                   serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-8
+  Stage: Stage-7
     Map Reduce
       Map Operator Tree:
           TableScan

http://git-wip-us.apache.org/repos/asf/hive/blob/bddf5a7a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out 
b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
index 5313e9b..3467215 100644
--- a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
+++ b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
@@ -95,9 +95,7 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Reducer 4 
(PARTITION-LEVEL SORT, 4)
-        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 4), Reducer 6 
(PARTITION-LEVEL SORT, 4)
-        Reducer 6 <- Map 5 (GROUP, 4)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL 
SORT, 4)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -124,37 +122,22 @@ STAGE PLANS:
                   alias: lineitem
                   Statistics: Num rows: 100 Data size: 11999 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (l_shipmode = 'AIR') (type: boolean)
-                    Statistics: Num rows: 50 Data size: 5999 Basic stats: 
COMPLETE Column stats: NONE
+                    predicate: ((l_shipmode = 'AIR') and (l_linenumber = 
l_linenumber)) (type: boolean)
+                    Statistics: Num rows: 25 Data size: 2999 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: l_orderkey (type: int), l_linenumber (type: 
int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 50 Data size: 5999 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col1 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col1 (type: int)
-                        Statistics: Num rows: 50 Data size: 5999 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: int)
-        Map 5 
-            Map Operator Tree:
-                TableScan
-                  alias: li
-                  Statistics: Num rows: 100 Data size: 11999 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: l_linenumber (type: int)
-                    outputColumnNames: l_linenumber
-                    Statistics: Num rows: 100 Data size: 11999 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      keys: l_linenumber (type: int)
-                      mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 100 Data size: 11999 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 100 Data size: 11999 Basic 
stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 25 Data size: 2999 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: int), _col1 (type: int)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 25 Data size: 2999 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int), _col1 (type: int)
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: int), 
_col1 (type: int)
+                          Statistics: Num rows: 25 Data size: 2999 Basic 
stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
@@ -164,54 +147,18 @@ STAGE PLANS:
                   0 _col0 (type: int), 1 (type: int)
                   1 _col0 (type: int), _col1 (type: int)
                 outputColumnNames: _col1, _col2
-                Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
                   expressions: _col1 (type: int), _col2 (type: int)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 60 Data size: 7257 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 55 Data size: 6598 Basic stats: 
COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 60 Data size: 7257 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 55 Data size: 6598 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                         serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 4 
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col1 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col0, _col3
-                Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), _col3 (type: int)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 55 Data size: 6598 Basic stats: 
COMPLETE Column stats: NONE
-                  Group By Operator
-                    keys: _col0 (type: int), _col1 (type: int)
-                    mode: hash
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 55 Data size: 6598 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: int), _col1 (type: int)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: int), _col1 
(type: int)
-                      Statistics: Num rows: 55 Data size: 6598 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 6 
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: int)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: int)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 50 Data size: 5999 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/bddf5a7a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out 
b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
index bc25efe..1901dba 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
@@ -24,9 +24,7 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 
(PARTITION-LEVEL SORT, 2)
-        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 
(PARTITION-LEVEL SORT, 2)
-        Reducer 6 <- Map 5 (GROUP, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL 
SORT, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -49,36 +47,22 @@ STAGE PLANS:
                   alias: a
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (value > 'val_9') (type: boolean)
-                    Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+                    predicate: ((value = value) and (key = key) and (value > 
'val_9')) (type: boolean)
+                    Statistics: Num rows: 41 Data size: 435 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string), _col1 (type: 
string)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
-                        Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
-        Map 5 
-            Map Operator Tree:
-                TableScan
-                  alias: b
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: key (type: string), value (type: string)
-                    outputColumnNames: key, value
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      keys: key (type: string), value (type: string)
-                      mode: hash
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string), _col1 (type: 
string)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
-                        Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 41 Data size: 435 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: string), _col1 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 41 Data size: 435 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string), _col1 (type: 
string)
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                          Statistics: Num rows: 41 Data size: 435 Basic stats: 
COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
@@ -96,42 +80,6 @@ STAGE PLANS:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 4 
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: string), _col1 (type: string)
-                  1 _col0 (type: string), _col1 (type: string)
-                outputColumnNames: _col2, _col3
-                Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col2 (type: string), _col3 (type: string)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-                  Group By Operator
-                    keys: _col0 (type: string), _col1 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col1 (type: 
string)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
-                      Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 6 
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: string), KEY._col1 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: string)
-                  sort order: ++
-                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -274,9 +222,7 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 
(PARTITION-LEVEL SORT, 2)
-        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 
(PARTITION-LEVEL SORT, 2)
-        Reducer 6 <- Map 5 (GROUP, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL 
SORT, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -299,34 +245,23 @@ STAGE PLANS:
                 TableScan
                   alias: a
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: value (type: string)
-                    outputColumnNames: _col0
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-        Map 5 
-            Map Operator Tree:
-                TableScan
-                  alias: b
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: value (type: string)
-                    outputColumnNames: value
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      keys: value (type: string)
-                      mode: hash
+                  Filter Operator
+                    predicate: (value = value) (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: value (type: string)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: string)
+                          Statistics: Num rows: 250 Data size: 2656 Basic 
stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
@@ -336,50 +271,14 @@ STAGE PLANS:
                   0 _col1 (type: string)
                   1 _col0 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 605 Data size: 6427 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 605 Data size: 6427 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 4 
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col1
-                Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col1 (type: string)
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-                  Group By Operator
-                    keys: _col0 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0
-                    Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 6 
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator

[05/15] hive git commit: HIVE-15905 : Inefficient plan for correlated subqueries (Vineet Garg via Ashutosh Chauhan)

Reply via email to