[16/16] hive git commit: HIVE-20778: Join reordering may not be triggered if all joins in plan are created by decorrelation logic (Vineet Garg via Jesus Camacho Rodriguez)

jcamacho Sat, 03 Nov 2018 09:35:40 -0700

HIVE-20778: Join reordering may not be triggered if all joins in plan are 
created by decorrelation logic (Vineet Garg via Jesus Camacho Rodriguez)



Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ae1eb15d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ae1eb15d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ae1eb15d

Branch: refs/heads/master
Commit: ae1eb15d4347484537b85a8074d8a8619991fc3f
Parents: 6dd0136
Author: Vineet Garg <vg...@hortonworks.com>
Authored: Sat Nov 3 09:34:43 2018 -0700
Committer: Jesus Camacho Rodriguez <jcama...@apache.org>
Committed: Sat Nov 3 09:34:43 2018 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |   13 +-
 .../queries/clientpositive/subquery_notin.q     |    2 +
 .../queries/clientpositive/subquery_select.q    |    2 +
 .../clientpositive/constant_prop_3.q.out        |  226 +-
 .../llap/dynamic_semijoin_reduction_2.q.out     |   88 +-
 .../clientpositive/llap/explainuser_1.q.out     |  400 +--
 .../results/clientpositive/llap/lineage3.q.out  |    2 +-
 .../clientpositive/llap/mapjoin_hint.q.out      |  146 +-
 .../clientpositive/llap/optimize_join_ptp.q.out |   88 +-
 .../clientpositive/llap/subquery_in.q.out       |  292 +-
 .../llap/subquery_in_having.q.out               |  246 +-
 .../clientpositive/llap/subquery_multi.q.out    | 1005 +++---
 .../clientpositive/llap/subquery_notin.q.out    | 1960 +++++------
 .../clientpositive/llap/subquery_null_agg.q.out |   94 +-
 .../clientpositive/llap/subquery_scalar.q.out   | 1400 ++++----
 .../clientpositive/llap/subquery_select.q.out   | 1641 +++++-----
 .../clientpositive/llap/subquery_views.q.out    |  124 +-
 .../llap/vector_groupby_mapjoin.q.out           |  184 +-
 .../llap/vector_mapjoin_reduce.q.out            |  371 ++-
 .../test/results/clientpositive/semijoin5.q.out |   26 +-
 .../spark_dynamic_partition_pruning_3.q.out     |   52 +-
 .../spark/spark_explainuser_1.q.out             |  366 ++-
 .../clientpositive/spark/subquery_in.q.out      |  290 +-
 .../clientpositive/spark/subquery_multi.q.out   |  944 +++---
 .../clientpositive/spark/subquery_notin.q.out   | 3053 +++++++++---------
 .../spark/subquery_null_agg.q.out               |   86 +-
 .../clientpositive/spark/subquery_scalar.q.out  | 1441 +++++----
 .../clientpositive/spark/subquery_select.q.out  | 1688 +++++-----
 .../clientpositive/spark/subquery_views.q.out   |  194 +-
 .../spark/vector_mapjoin_reduce.q.out           |  491 +--
 .../clientpositive/subquery_notin_having.q.out  |  465 +--
 .../clientpositive/vector_groupby_mapjoin.q.out |  425 ++-
 32 files changed, 9207 insertions(+), 8598 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index d92cfca..ab63ce2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1798,16 +1798,17 @@ public class CalcitePlanner extends SemanticAnalyzer {
         perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, 
"Calcite: Removing sq_count_check UDF ");
       }
 
-      //  Remove Projects between Joins so that JoinToMultiJoinRule can merge 
them to MultiJoin
-      calcitePreCboPlan = hepPlan(calcitePreCboPlan, true, 
mdProvider.getMetadataProvider(), executorProvider,
-          HepMatchOrder.BOTTOM_UP, 
HiveJoinProjectTransposeRule.LEFT_PROJECT_BTW_JOIN,
-          HiveJoinProjectTransposeRule.RIGHT_PROJECT_BTW_JOIN, 
HiveProjectMergeRule.INSTANCE);
 
       // 4. Apply join order optimizations: reordering MST algorithm
       //    If join optimizations failed because of missing stats, we continue 
with
       //    the rest of optimizations
       if (profilesCBO.contains(ExtendedCBOProfile.JOIN_REORDERING)) {
         perfLogger.PerfLogBegin(this.getClass().getName(), 
PerfLogger.OPTIMIZER);
+
+        //  Remove Projects between Joins so that JoinToMultiJoinRule can 
merge them to MultiJoin
+        calcitePreCboPlan = hepPlan(calcitePreCboPlan, true, 
mdProvider.getMetadataProvider(), executorProvider,
+                                    HepMatchOrder.BOTTOM_UP, 
HiveJoinProjectTransposeRule.LEFT_PROJECT_BTW_JOIN,
+                                    
HiveJoinProjectTransposeRule.RIGHT_PROJECT_BTW_JOIN, 
HiveProjectMergeRule.INSTANCE);
         try {
           List<RelMetadataProvider> list = Lists.newArrayList();
           list.add(mdProvider.getMetadataProvider());
@@ -3346,6 +3347,10 @@ public class CalcitePlanner extends SemanticAnalyzer {
       } catch (SemanticException e) {
         throw new CalciteSubquerySemanticException(e.getMessage());
       }
+      if(isSubQuery) {
+        // since subqueries will later be rewritten into JOINs we want join 
reordering logic to trigger
+        profilesCBO.add(ExtendedCBOProfile.JOIN_REORDERING);
+      }
       return isSubQuery;
     }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/test/queries/clientpositive/subquery_notin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_notin.q 
b/ql/src/test/queries/clientpositive/subquery_notin.q
index a2d93df..f863645 100644
--- a/ql/src/test/queries/clientpositive/subquery_notin.q
+++ b/ql/src/test/queries/clientpositive/subquery_notin.q
@@ -3,6 +3,8 @@
 --! qt:dataset:lineitem
 set hive.mapred.mode=nonstrict;
 
+-- SORT_QUERY_RESULTS
+
 -- non agg, non corr
 explain
 select * 

http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/test/queries/clientpositive/subquery_select.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_select.q 
b/ql/src/test/queries/clientpositive/subquery_select.q
index 80618c7..52fcdb2 100644
--- a/ql/src/test/queries/clientpositive/subquery_select.q
+++ b/ql/src/test/queries/clientpositive/subquery_select.q
@@ -4,6 +4,8 @@
 set hive.mapred.mode=nonstrict;
 set hive.explain.user=false;
 
+-- SORT_QUERY_RESULTS
+
 -- IN, non corr
 explain SELECT p_size, p_size IN (
         SELECT MAX(p_size) FROM part)

http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/test/results/clientpositive/constant_prop_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constant_prop_3.q.out 
b/ql/src/test/results/clientpositive/constant_prop_3.q.out
index d106cd3..2b314d7 100644
--- a/ql/src/test/results/clientpositive/constant_prop_3.q.out
+++ b/ql/src/test/results/clientpositive/constant_prop_3.q.out
@@ -94,7 +94,7 @@ POSTHOOK: type: ANALYZE_TABLE
 POSTHOOK: Input: default@supplier_hive
 POSTHOOK: Output: default@supplier_hive
 #### A masked pattern was here ####
-Warning: Shuffle Join JOIN[25][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 
'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Stage-1:MAPRED' is a cross product
 PREHOOK: query: explain select
        p_brand,
        p_type,
@@ -166,36 +166,56 @@ POSTHOOK: Input: default@partsupp_hive
 POSTHOOK: Input: default@supplier_hive
 #### A masked pattern was here ####
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-2 depends on stages: Stage-1, Stage-7
-  Stage-3 depends on stages: Stage-2, Stage-8
+  Stage-5 is a root stage
+  Stage-1 depends on stages: Stage-5
+  Stage-2 depends on stages: Stage-1, Stage-6
+  Stage-3 depends on stages: Stage-2
   Stage-4 depends on stages: Stage-3
-  Stage-5 depends on stages: Stage-4
   Stage-7 is a root stage
-  Stage-8 is a root stage
-  Stage-0 depends on stages: Stage-5
+  Stage-6 depends on stages: Stage-7
+  Stage-0 depends on stages: Stage-4
 
 STAGE PLANS:
-  Stage: Stage-1
+  Stage: Stage-5
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: partsupp_hive
-            filterExpr: ps_partkey is not null (type: boolean)
+            alias: supplier_hive
+            filterExpr: (s_comment like '%Customer%Complaints%') (type: 
boolean)
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
             Filter Operator
-              predicate: ps_partkey is not null (type: boolean)
+              predicate: (s_comment like '%Customer%Complaints%') (type: 
boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
               Select Operator
-                expressions: ps_partkey (type: int), ps_suppkey (type: int)
-                outputColumnNames: _col0, _col1
+                expressions: s_suppkey (type: int)
+                outputColumnNames: s_suppkey
                 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: int)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
-                  value expressions: _col1 (type: int)
+                Group By Operator
+                  aggregations: count(), count(s_suppkey)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: PARTIAL 
Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 16 Basic stats: PARTIAL 
Column stats: NONE
+                    value expressions: _col0 (type: bigint), _col1 (type: 
bigint)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), count(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 16 Basic stats: PARTIAL Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
           TableScan
             alias: part_hive
             filterExpr: ((p_size) IN (22, 14, 27, 49, 21, 33, 35, 28) and 
(p_brand <> 'Brand#34') and p_partkey is not null and (not (p_type like 
'ECONOMY BRUSHED%'))) (type: boolean)
@@ -208,35 +228,9 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1, _col2, _col3
                 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: int)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: int)
+                  sort order: 
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
-                  value expressions: _col1 (type: string), _col2 (type: 
string), _col3 (type: int)
-      Reduce Operator Tree:
-        Join Operator
-          condition map:
-               Inner Join 0 to 1
-          keys:
-            0 _col0 (type: int)
-            1 _col0 (type: int)
-          outputColumnNames: _col1, _col3, _col4, _col5
-          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-2
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              sort order: 
-              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-              value expressions: _col1 (type: int), _col3 (type: string), 
_col4 (type: string), _col5 (type: int)
+                  value expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: string), _col3 (type: int)
           TableScan
             Reduce Output Operator
               sort order: 
@@ -249,7 +243,7 @@ STAGE PLANS:
           keys:
             0 
             1 
-          outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
           Statistics: Num rows: 1 Data size: 17 Basic stats: PARTIAL Column 
stats: NONE
           File Output Operator
             compressed: false
@@ -258,53 +252,57 @@ STAGE PLANS:
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-3
+  Stage: Stage-2
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: _col1 (type: int)
+              key expressions: _col0 (type: int)
               sort order: +
-              Map-reduce partition columns: _col1 (type: int)
+              Map-reduce partition columns: _col0 (type: int)
               Statistics: Num rows: 1 Data size: 17 Basic stats: PARTIAL 
Column stats: NONE
-              value expressions: _col3 (type: string), _col4 (type: string), 
_col5 (type: int), _col6 (type: bigint), _col7 (type: bigint)
+              value expressions: _col1 (type: string), _col2 (type: string), 
_col3 (type: int), _col4 (type: bigint), _col5 (type: bigint)
           TableScan
             Reduce Output Operator
               key expressions: _col0 (type: int)
               sort order: +
               Map-reduce partition columns: _col0 (type: int)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-              value expressions: _col1 (type: boolean)
+              value expressions: _col1 (type: int), _col3 (type: boolean)
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Outer Join 0 to 1
+               Inner Join 0 to 1
           keys:
-            0 _col1 (type: int)
+            0 _col0 (type: int)
             1 _col0 (type: int)
-          outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9
+          outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7, _col9
           Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL Column 
stats: NONE
-          Filter Operator
-            predicate: ((_col6 = 0L) or (_col9 is null and _col1 is not null 
and (_col7 >= _col6))) (type: boolean)
+          Select Operator
+            expressions: _col7 (type: int), _col1 (type: string), _col2 (type: 
string), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col9 
(type: boolean)
+            outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9
             Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL Column 
stats: NONE
-            Select Operator
-              expressions: _col1 (type: int), _col3 (type: string), _col4 
(type: string), _col5 (type: int)
-              outputColumnNames: _col1, _col3, _col4, _col5
+            Filter Operator
+              predicate: ((_col6 = 0L) or (_col9 is null and _col1 is not null 
and (_col7 >= _col6))) (type: boolean)
               Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL 
Column stats: NONE
-              Group By Operator
-                aggregations: count(DISTINCT _col1)
-                keys: _col3 (type: string), _col4 (type: string), _col5 (type: 
int), _col1 (type: int)
-                mode: hash
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+              Select Operator
+                expressions: _col1 (type: int), _col3 (type: string), _col4 
(type: string), _col5 (type: int)
+                outputColumnNames: _col1, _col3, _col4, _col5
                 Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL 
Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  table:
-                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                Group By Operator
+                  aggregations: count(DISTINCT _col1)
+                  keys: _col3 (type: string), _col4 (type: string), _col5 
(type: int), _col1 (type: int)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Statistics: Num rows: 1 Data size: 18 Basic stats: PARTIAL 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-4
+  Stage: Stage-3
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -328,7 +326,7 @@ STAGE PLANS:
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-5
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -365,43 +363,6 @@ STAGE PLANS:
                 outputColumnNames: s_suppkey
                 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
                 Group By Operator
-                  aggregations: count(), count(s_suppkey)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 16 Basic stats: PARTIAL 
Column stats: NONE
-                  Reduce Output Operator
-                    sort order: 
-                    Statistics: Num rows: 1 Data size: 16 Basic stats: PARTIAL 
Column stats: NONE
-                    value expressions: _col0 (type: bigint), _col1 (type: 
bigint)
-      Execution mode: vectorized
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: count(VALUE._col0), count(VALUE._col1)
-          mode: mergepartial
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 16 Basic stats: PARTIAL Column 
stats: NONE
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-8
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: supplier_hive
-            filterExpr: (s_comment like '%Customer%Complaints%') (type: 
boolean)
-            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-            Filter Operator
-              predicate: (s_comment like '%Customer%Complaints%') (type: 
boolean)
-              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
-              Select Operator
-                expressions: s_suppkey (type: int)
-                outputColumnNames: s_suppkey
-                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
-                Group By Operator
                   keys: s_suppkey (type: int)
                   mode: hash
                   outputColumnNames: _col0
@@ -429,6 +390,49 @@ STAGE PLANS:
                   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                   serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: partsupp_hive
+            filterExpr: ps_partkey is not null (type: boolean)
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
+            Filter Operator
+              predicate: ps_partkey is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
+              Select Operator
+                expressions: ps_partkey (type: int), ps_suppkey (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: int)
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                  value expressions: _col0 (type: int)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: int)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
+              value expressions: _col1 (type: boolean)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join 0 to 1
+          keys:
+            0 _col1 (type: int)
+            1 _col0 (type: int)
+          outputColumnNames: _col0, _col1, _col3
+          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-0
     Fetch Operator
       limit: -1

http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out 
b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
index 801416d..2810fdd 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
@@ -80,10 +80,10 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE), 
Reducer 7 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE)
-        Map 13 <- Reducer 12 (BROADCAST_EDGE)
-        Reducer 10 <- Map 13 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+        Map 9 <- Reducer 13 (BROADCAST_EDGE)
+        Reducer 10 <- Map 12 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
         Reducer 11 <- Reducer 10 (CUSTOM_SIMPLE_EDGE)
-        Reducer 12 <- Map 9 (CUSTOM_SIMPLE_EDGE)
+        Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE)
         Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
         Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
@@ -113,25 +113,37 @@ STAGE PLANS:
                         value expressions: _col3 (type: timestamp)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 13 
+        Map 12 
             Map Operator Tree:
                 TableScan
-                  alias: tt2
-                  filterExpr: (timestamp_col_18 is not null and 
decimal1911_col_16 is not null and (decimal1911_col_16 BETWEEN 
DynamicValue(RS_12_tt1_decimal2612_col_77_min) AND 
DynamicValue(RS_12_tt1_decimal2612_col_77_max) and 
in_bloom_filter(decimal1911_col_16, 
DynamicValue(RS_12_tt1_decimal2612_col_77_bloom_filter)))) (type: boolean)
-                  Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE 
Column stats: NONE
+                  alias: tt1
+                  filterExpr: decimal2612_col_77 is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE 
Column stats: NONE
                   Filter Operator
-                    predicate: ((decimal1911_col_16 BETWEEN 
DynamicValue(RS_12_tt1_decimal2612_col_77_min) AND 
DynamicValue(RS_12_tt1_decimal2612_col_77_max) and 
in_bloom_filter(decimal1911_col_16, 
DynamicValue(RS_12_tt1_decimal2612_col_77_bloom_filter))) and 
decimal1911_col_16 is not null and timestamp_col_18 is not null) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 152 Basic stats: 
COMPLETE Column stats: NONE
+                    predicate: decimal2612_col_77 is not null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 112 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: decimal1911_col_16 (type: decimal(19,11)), 
timestamp_col_18 (type: timestamp)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 1 Data size: 152 Basic stats: 
COMPLETE Column stats: NONE
+                      expressions: decimal2612_col_77 (type: decimal(26,12))
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 112 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: decimal(26,12))
                         sort order: +
                         Map-reduce partition columns: _col0 (type: 
decimal(26,12))
-                        Statistics: Num rows: 1 Data size: 152 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: timestamp)
+                        Statistics: Num rows: 1 Data size: 112 Basic stats: 
COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: _col0 (type: decimal(26,12))
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 112 Basic stats: 
COMPLETE Column stats: NONE
+                        Group By Operator
+                          aggregations: min(_col0), max(_col0), 
bloom_filter(_col0, expectedEntries=1)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 448 Basic stats: 
COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 448 Basic 
stats: COMPLETE Column stats: NONE
+                            value expressions: _col0 (type: decimal(26,12)), 
_col1 (type: decimal(26,12)), _col2 (type: binary)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
         Map 5 
@@ -197,34 +209,22 @@ STAGE PLANS:
         Map 9 
             Map Operator Tree:
                 TableScan
-                  alias: tt1
-                  filterExpr: decimal2612_col_77 is not null (type: boolean)
-                  Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE 
Column stats: NONE
+                  alias: tt2
+                  filterExpr: (timestamp_col_18 is not null and 
decimal1911_col_16 is not null and (decimal1911_col_16 BETWEEN 
DynamicValue(RS_13_tt1_decimal2612_col_77_min) AND 
DynamicValue(RS_13_tt1_decimal2612_col_77_max) and 
in_bloom_filter(decimal1911_col_16, 
DynamicValue(RS_13_tt1_decimal2612_col_77_bloom_filter)))) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE 
Column stats: NONE
                   Filter Operator
-                    predicate: decimal2612_col_77 is not null (type: boolean)
-                    Statistics: Num rows: 1 Data size: 112 Basic stats: 
COMPLETE Column stats: NONE
+                    predicate: ((decimal1911_col_16 BETWEEN 
DynamicValue(RS_13_tt1_decimal2612_col_77_min) AND 
DynamicValue(RS_13_tt1_decimal2612_col_77_max) and 
in_bloom_filter(decimal1911_col_16, 
DynamicValue(RS_13_tt1_decimal2612_col_77_bloom_filter))) and 
decimal1911_col_16 is not null and timestamp_col_18 is not null) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 152 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: decimal2612_col_77 (type: decimal(26,12))
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 112 Basic stats: 
COMPLETE Column stats: NONE
+                      expressions: decimal1911_col_16 (type: decimal(19,11)), 
timestamp_col_18 (type: timestamp)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 152 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: decimal(26,12))
                         sort order: +
                         Map-reduce partition columns: _col0 (type: 
decimal(26,12))
-                        Statistics: Num rows: 1 Data size: 112 Basic stats: 
COMPLETE Column stats: NONE
-                      Select Operator
-                        expressions: _col0 (type: decimal(26,12))
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 112 Basic stats: 
COMPLETE Column stats: NONE
-                        Group By Operator
-                          aggregations: min(_col0), max(_col0), 
bloom_filter(_col0, expectedEntries=1)
-                          mode: hash
-                          outputColumnNames: _col0, _col1, _col2
-                          Statistics: Num rows: 1 Data size: 448 Basic stats: 
COMPLETE Column stats: NONE
-                          Reduce Output Operator
-                            sort order: 
-                            Statistics: Num rows: 1 Data size: 448 Basic 
stats: COMPLETE Column stats: NONE
-                            value expressions: _col0 (type: decimal(26,12)), 
_col1 (type: decimal(26,12)), _col2 (type: binary)
+                        Statistics: Num rows: 1 Data size: 152 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: timestamp)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
         Reducer 10 
@@ -236,26 +236,26 @@ STAGE PLANS:
                 keys:
                   0 _col0 (type: decimal(26,12))
                   1 _col0 (type: decimal(26,12))
-                outputColumnNames: _col2
-                Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE 
Column stats: NONE
+                outputColumnNames: _col1
+                Statistics: Num rows: 1 Data size: 167 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
-                  expressions: _col2 (type: timestamp), -92 (type: int)
+                  expressions: _col1 (type: timestamp), -92 (type: int)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 167 Basic stats: COMPLETE 
Column stats: NONE
                   Group By Operator
                     keys: _col0 (type: timestamp), _col1 (type: int)
                     mode: hash
                     outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 1 Data size: 123 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1 Data size: 167 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: timestamp), _col1 (type: 
int)
                       sort order: ++
                       Map-reduce partition columns: _col0 (type: timestamp), 
_col1 (type: int)
-                      Statistics: Num rows: 1 Data size: 123 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1 Data size: 167 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: _col0 (type: timestamp)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 123 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1 Data size: 167 Basic stats: 
COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: min(_col0), max(_col0), 
bloom_filter(_col0, expectedEntries=1)
                         mode: hash
@@ -277,7 +277,7 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE 
Column stats: NONE
                   value expressions: _col0 (type: timestamp), _col1 (type: 
timestamp), _col2 (type: binary)
-        Reducer 12 
+        Reducer 13 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out 
b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
index 71838ee..a87890f 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
@@ -2251,36 +2251,42 @@ POSTHOOK: Input: default@src_cbo
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
-      Reducer 2 llap
-      File Output Operator [FS_12]
-        Merge Join Operator [MERGEJOIN_27] (rows=83 width=178)
-          Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col0","_col1"]
-        <-Map 1 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_8]
-            PartitionCols:_col0
-            Select Operator [SEL_2] (rows=166 width=178)
-              Output:["_col0","_col1"]
-              Filter Operator [FIL_15] (rows=166 width=178)
-                predicate:(key > '9')
-                TableScan [TS_0] (rows=500 width=178)
-                  
default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
-        <-Map 3 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_9]
-            PartitionCols:_col0
-            Group By Operator [GBY_7] (rows=83 width=87)
-              Output:["_col0"],keys:_col0
-              Select Operator [SEL_5] (rows=166 width=87)
-                Output:["_col0"]
-                Filter Operator [FIL_16] (rows=166 width=87)
+      Reducer 3 llap
+      File Output Operator [FS_14]
+        Select Operator [SEL_13] (rows=83 width=178)
+          Output:["_col0","_col1"]
+          Merge Join Operator [MERGEJOIN_29] (rows=83 width=178)
+            Conds:RS_10._col0=RS_11._col0(Inner),Output:["_col1","_col2"]
+          <-Map 4 [SIMPLE_EDGE] llap
+            SHUFFLE [RS_11]
+              PartitionCols:_col0
+              Select Operator [SEL_9] (rows=166 width=178)
+                Output:["_col0","_col1"]
+                Filter Operator [FIL_18] (rows=166 width=178)
                   predicate:(key > '9')
-                  TableScan [TS_3] (rows=500 width=87)
-                    default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+                  TableScan [TS_7] (rows=500 width=178)
+                    
default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+          <-Reducer 2 [ONE_TO_ONE_EDGE] llap
+            FORWARD [RS_10]
+              PartitionCols:_col0
+              Group By Operator [GBY_5] (rows=83 width=87)
+                Output:["_col0"],keys:KEY._col0
+              <-Map 1 [SIMPLE_EDGE] llap
+                SHUFFLE [RS_4]
+                  PartitionCols:_col0
+                  Group By Operator [GBY_3] (rows=83 width=87)
+                    Output:["_col0"],keys:key
+                    Filter Operator [FIL_17] (rows=166 width=87)
+                      predicate:(key > '9')
+                      TableScan [TS_0] (rows=500 width=87)
+                        
default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
 
 PREHOOK: query: explain select p.p_partkey, li.l_suppkey 
 from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li 
on p.p_partkey = li.l_partkey 
@@ -2299,58 +2305,56 @@ POSTHOOK: Input: default@lineitem
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE)
-Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
-Reducer 4 <- Map 6 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
-      Reducer 4 llap
-      File Output Operator [FS_22]
-        Select Operator [SEL_21] (rows=1 width=8)
+      Reducer 3 llap
+      File Output Operator [FS_21]
+        Select Operator [SEL_20] (rows=1 width=8)
           Output:["_col0","_col1"]
-          Merge Join Operator [MERGEJOIN_52] (rows=1 width=8)
-            Conds:RS_18._col1, _col4=RS_19._col0, _col1(Left 
Semi),Output:["_col0","_col3"]
-          <-Map 6 [SIMPLE_EDGE] llap
-            SHUFFLE [RS_19]
+          Merge Join Operator [MERGEJOIN_51] (rows=1 width=8)
+            Conds:RS_17._col1, _col4=RS_18._col0, _col1(Left 
Semi),Output:["_col0","_col3"]
+          <-Map 5 [SIMPLE_EDGE] llap
+            SHUFFLE [RS_18]
               PartitionCols:_col0, _col1
-              Group By Operator [GBY_17] (rows=1 width=8)
+              Group By Operator [GBY_16] (rows=1 width=8)
                 Output:["_col0","_col1"],keys:_col0, _col1
-                Select Operator [SEL_12] (rows=2 width=8)
+                Select Operator [SEL_14] (rows=2 width=8)
                   Output:["_col0","_col1"]
-                  Filter Operator [FIL_30] (rows=2 width=96)
+                  Filter Operator [FIL_29] (rows=2 width=96)
                     predicate:((l_linenumber = 1) and (l_shipmode = 'AIR') and 
l_orderkey is not null)
-                    TableScan [TS_10] (rows=100 width=96)
+                    TableScan [TS_12] (rows=100 width=96)
                       
default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_linenumber","l_shipmode"]
-          <-Reducer 3 [SIMPLE_EDGE] llap
-            SHUFFLE [RS_18]
+          <-Reducer 2 [SIMPLE_EDGE] llap
+            SHUFFLE [RS_17]
               PartitionCols:_col1, _col4
-              Merge Join Operator [MERGEJOIN_51] (rows=14 width=16)
-                
Conds:RS_13._col0=RS_14._col1(Inner),Output:["_col0","_col1","_col3","_col4"]
-              <-Map 5 [SIMPLE_EDGE] llap
-                SHUFFLE [RS_14]
-                  PartitionCols:_col1
-                  Select Operator [SEL_9] (rows=14 width=16)
-                    Output:["_col0","_col1","_col2","_col3"]
-                    Filter Operator [FIL_29] (rows=14 width=16)
-                      predicate:((l_linenumber = 1) and l_orderkey is not null 
and l_partkey is not null)
-                      TableScan [TS_7] (rows=100 width=16)
-                        
default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_partkey","l_suppkey","l_linenumber"]
-              <-Reducer 2 [ONE_TO_ONE_EDGE] llap
-                FORWARD [RS_13]
-                  PartitionCols:_col0
-                  Group By Operator [GBY_5] (rows=50 width=4)
-                    Output:["_col0"],keys:KEY._col0
-                  <-Map 1 [SIMPLE_EDGE] llap
-                    SHUFFLE [RS_4]
-                      PartitionCols:_col0
-                      Group By Operator [GBY_3] (rows=50 width=4)
-                        Output:["_col0"],keys:l_partkey
+              Select Operator [SEL_11] (rows=14 width=16)
+                Output:["_col0","_col1","_col3","_col4"]
+                Merge Join Operator [MERGEJOIN_50] (rows=14 width=16)
+                  Conds:RS_8._col1=RS_9._col0(Left 
Semi),Output:["_col0","_col1","_col2","_col3"]
+                <-Map 1 [SIMPLE_EDGE] llap
+                  SHUFFLE [RS_8]
+                    PartitionCols:_col1
+                    Select Operator [SEL_2] (rows=14 width=16)
+                      Output:["_col0","_col1","_col2","_col3"]
+                      Filter Operator [FIL_27] (rows=14 width=16)
+                        predicate:((l_linenumber = 1) and l_orderkey is not 
null and l_partkey is not null)
+                        TableScan [TS_0] (rows=100 width=16)
+                          
default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_partkey","l_suppkey","l_linenumber"]
+                <-Map 4 [SIMPLE_EDGE] llap
+                  SHUFFLE [RS_9]
+                    PartitionCols:_col0
+                    Group By Operator [GBY_7] (rows=50 width=4)
+                      Output:["_col0"],keys:_col0
+                      Select Operator [SEL_5] (rows=100 width=4)
+                        Output:["_col0"]
                         Filter Operator [FIL_28] (rows=100 width=4)
                           predicate:l_partkey is not null
-                          TableScan [TS_0] (rows=100 width=4)
+                          TableScan [TS_3] (rows=100 width=4)
                             
default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey"]
 
 PREHOOK: query: explain select key, value, count(*) 
@@ -2372,74 +2376,78 @@ POSTHOOK: Input: default@src_cbo
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
-Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
-Reducer 6 <- Map 5 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 6 <- Map 1 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
-      Reducer 4 llap
-      File Output Operator [FS_31]
-        Merge Join Operator [MERGEJOIN_54] (rows=41 width=186)
-          Conds:RS_27._col2=RS_28._col0(Left 
Semi),Output:["_col0","_col1","_col2"]
-        <-Reducer 3 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_27]
+      Reducer 5 llap
+      File Output Operator [FS_33]
+        Merge Join Operator [MERGEJOIN_56] (rows=41 width=186)
+          Conds:RS_29._col2=RS_30._col0(Left 
Semi),Output:["_col0","_col1","_col2"]
+        <-Reducer 4 [SIMPLE_EDGE] llap
+          SHUFFLE [RS_29]
             PartitionCols:_col2
-            Filter Operator [FIL_37] (rows=41 width=186)
+            Filter Operator [FIL_39] (rows=41 width=186)
               predicate:_col2 is not null
-              Group By Operator [GBY_14] (rows=41 width=186)
+              Group By Operator [GBY_16] (rows=41 width=186)
                 
Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0,
 KEY._col1
-              <-Reducer 2 [SIMPLE_EDGE] llap
-                SHUFFLE [RS_13]
+              <-Reducer 3 [SIMPLE_EDGE] llap
+                SHUFFLE [RS_15]
                   PartitionCols:_col0, _col1
-                  Group By Operator [GBY_12] (rows=41 width=186)
-                    
Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1
-                    Merge Join Operator [MERGEJOIN_53] (rows=83 width=178)
-                      Conds:RS_8._col0=RS_9._col0(Left 
Semi),Output:["_col0","_col1"]
-                    <-Map 5 [SIMPLE_EDGE] llap
-                      SHUFFLE [RS_9]
+                  Group By Operator [GBY_14] (rows=41 width=186)
+                    
Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col2
+                    Merge Join Operator [MERGEJOIN_55] (rows=83 width=178)
+                      
Conds:RS_10._col0=RS_11._col0(Inner),Output:["_col1","_col2"]
+                    <-Map 7 [SIMPLE_EDGE] llap
+                      SHUFFLE [RS_11]
                         PartitionCols:_col0
-                        Group By Operator [GBY_7] (rows=83 width=87)
-                          Output:["_col0"],keys:_col0
-                          Select Operator [SEL_5] (rows=166 width=87)
-                            Output:["_col0"]
-                            Filter Operator [FIL_39] (rows=166 width=87)
-                              predicate:(key > '8')
-                              TableScan [TS_3] (rows=500 width=87)
-                                
default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
-                    <-Map 1 [SIMPLE_EDGE] llap
-                      SHUFFLE [RS_8]
-                        PartitionCols:_col0
-                        Select Operator [SEL_2] (rows=166 width=178)
+                        Select Operator [SEL_9] (rows=166 width=178)
                           Output:["_col0","_col1"]
-                          Filter Operator [FIL_38] (rows=166 width=178)
+                          Filter Operator [FIL_41] (rows=166 width=178)
                             predicate:(key > '8')
-                            TableScan [TS_0] (rows=500 width=178)
+                            TableScan [TS_7] (rows=500 width=178)
                               
default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+                    <-Reducer 2 [ONE_TO_ONE_EDGE] llap
+                      FORWARD [RS_10]
+                        PartitionCols:_col0
+                        Group By Operator [GBY_5] (rows=83 width=87)
+                          Output:["_col0"],keys:KEY._col0
+                        <-Map 1 [SIMPLE_EDGE] llap
+                          SHUFFLE [RS_4]
+                            PartitionCols:_col0
+                            Group By Operator [GBY_3] (rows=83 width=87)
+                              Output:["_col0"],keys:key
+                              Filter Operator [FIL_40] (rows=166 width=87)
+                                predicate:(key > '8')
+                                TableScan [TS_0] (rows=500 width=87)
+                                  
default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
         <-Reducer 6 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_28]
+          SHUFFLE [RS_30]
             PartitionCols:_col0
-            Group By Operator [GBY_26] (rows=41 width=8)
+            Group By Operator [GBY_28] (rows=41 width=8)
               Output:["_col0"],keys:_col0
-              Select Operator [SEL_24] (rows=83 width=8)
+              Select Operator [SEL_26] (rows=83 width=8)
                 Output:["_col0"]
-                Filter Operator [FIL_40] (rows=83 width=8)
+                Filter Operator [FIL_42] (rows=83 width=8)
                   predicate:_col1 is not null
-                  Select Operator [SEL_42] (rows=83 width=8)
+                  Select Operator [SEL_44] (rows=83 width=8)
                     Output:["_col1"]
-                    Group By Operator [GBY_22] (rows=83 width=95)
+                    Group By Operator [GBY_24] (rows=83 width=95)
                       
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
-                    <-Map 5 [SIMPLE_EDGE] llap
-                      SHUFFLE [RS_21]
+                    <-Map 1 [SIMPLE_EDGE] llap
+                      SHUFFLE [RS_23]
                         PartitionCols:_col0
-                        Group By Operator [GBY_20] (rows=83 width=95)
+                        Group By Operator [GBY_22] (rows=83 width=95)
                           
Output:["_col0","_col1"],aggregations:["count()"],keys:key
-                          Filter Operator [FIL_41] (rows=166 width=87)
+                          Filter Operator [FIL_43] (rows=166 width=87)
                             predicate:(key > '9')
-                             Please refer to the previous TableScan [TS_3]
+                             Please refer to the previous TableScan [TS_0]
 
 PREHOOK: query: explain select p_mfgr, p_name, avg(p_size) 
 from part 
@@ -2528,64 +2536,66 @@ POSTHOOK: Input: default@src_cbo
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE)
-Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
+Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE)
 Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
-Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE)
-Reducer 7 <- Map 5 (SIMPLE_EDGE)
+Reducer 6 <- Map 5 (SIMPLE_EDGE)
+Reducer 7 <- Map 5 (CUSTOM_SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
       Reducer 4 llap
-      File Output Operator [FS_26]
-        Select Operator [SEL_25] (rows=631 width=178)
+      File Output Operator [FS_27]
+        Select Operator [SEL_26] (rows=631 width=178)
           Output:["_col0","_col1"]
         <-Reducer 3 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_24]
-            Select Operator [SEL_23] (rows=631 width=178)
+          SHUFFLE [RS_25]
+            Select Operator [SEL_24] (rows=631 width=178)
               Output:["_col0","_col1"]
-              Filter Operator [FIL_22] (rows=631 width=194)
+              Filter Operator [FIL_23] (rows=631 width=194)
                 predicate:((_col2 = 0L) or (_col5 is null and _col0 is not 
null and (_col3 >= _col2)))
-                Merge Join Operator [MERGEJOIN_36] (rows=631 width=194)
-                  Conds:RS_19._col0=RS_20._col0(Left 
Outer),Output:["_col0","_col1","_col2","_col3","_col5"]
-                <-Reducer 2 [SIMPLE_EDGE] llap
-                  SHUFFLE [RS_19]
-                    PartitionCols:_col0
-                    Merge Join Operator [MERGEJOIN_35] (rows=500 width=194)
-                      Conds:(Inner),Output:["_col0","_col1","_col2","_col3"]
-                    <-Map 1 [CUSTOM_SIMPLE_EDGE] llap
-                      PARTITION_ONLY_SHUFFLE [RS_16]
-                        Select Operator [SEL_1] (rows=500 width=178)
-                          Output:["_col0","_col1"]
-                          TableScan [TS_0] (rows=500 width=178)
-                            
default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
-                    <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap
-                      PARTITION_ONLY_SHUFFLE [RS_17]
-                        Group By Operator [GBY_7] (rows=1 width=16)
-                          
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"]
-                        <-Map 5 [CUSTOM_SIMPLE_EDGE] llap
-                          SHUFFLE [RS_6]
-                            Group By Operator [GBY_5] (rows=1 width=16)
-                              
Output:["_col0","_col1"],aggregations:["count()","count(key)"]
-                              Filter Operator [FIL_28] (rows=166 width=87)
-                                predicate:(key > '2')
-                                TableScan [TS_2] (rows=500 width=87)
-                                  
default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
-                <-Reducer 7 [ONE_TO_ONE_EDGE] llap
-                  FORWARD [RS_20]
-                    PartitionCols:_col0
-                    Select Operator [SEL_15] (rows=83 width=91)
-                      Output:["_col0","_col1"]
-                      Group By Operator [GBY_14] (rows=83 width=87)
-                        Output:["_col0"],keys:KEY._col0
-                      <-Map 5 [SIMPLE_EDGE] llap
-                        SHUFFLE [RS_13]
+                Select Operator [SEL_22] (rows=631 width=194)
+                  Output:["_col0","_col1","_col2","_col3","_col5"]
+                  Merge Join Operator [MERGEJOIN_37] (rows=631 width=194)
+                    
Conds:(Inner),Output:["_col0","_col1","_col3","_col4","_col5"]
+                  <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap
+                    PARTITION_ONLY_SHUFFLE [RS_19]
+                      Merge Join Operator [MERGEJOIN_36] (rows=631 width=178)
+                        Conds:RS_16._col0=RS_17._col0(Left 
Outer),Output:["_col0","_col1","_col3"]
+                      <-Map 1 [SIMPLE_EDGE] llap
+                        SHUFFLE [RS_16]
                           PartitionCols:_col0
-                          Group By Operator [GBY_12] (rows=83 width=87)
-                            Output:["_col0"],keys:key
-                            Filter Operator [FIL_29] (rows=166 width=87)
+                          Select Operator [SEL_1] (rows=500 width=178)
+                            Output:["_col0","_col1"]
+                            TableScan [TS_0] (rows=500 width=178)
+                              
default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+                      <-Reducer 6 [ONE_TO_ONE_EDGE] llap
+                        FORWARD [RS_17]
+                          PartitionCols:_col0
+                          Select Operator [SEL_8] (rows=83 width=91)
+                            Output:["_col0","_col1"]
+                            Group By Operator [GBY_7] (rows=83 width=87)
+                              Output:["_col0"],keys:KEY._col0
+                            <-Map 5 [SIMPLE_EDGE] llap
+                              PARTITION_ONLY_SHUFFLE [RS_6]
+                                PartitionCols:_col0
+                                Group By Operator [GBY_5] (rows=83 width=87)
+                                  Output:["_col0"],keys:key
+                                  Filter Operator [FIL_29] (rows=166 width=87)
+                                    predicate:(key > '2')
+                                    TableScan [TS_2] (rows=500 width=87)
+                                      
default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+                  <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap
+                    PARTITION_ONLY_SHUFFLE [RS_20]
+                      Group By Operator [GBY_14] (rows=1 width=16)
+                        
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"]
+                      <-Map 5 [CUSTOM_SIMPLE_EDGE] llap
+                        PARTITION_ONLY_SHUFFLE [RS_13]
+                          Group By Operator [GBY_12] (rows=1 width=16)
+                            
Output:["_col0","_col1"],aggregations:["count()","count(key)"]
+                            Filter Operator [FIL_30] (rows=166 width=87)
                               predicate:(key > '2')
                                Please refer to the previous TableScan [TS_2]
 
@@ -2697,8 +2707,8 @@ POSTHOOK: Input: default@part
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE)
-Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE)
 Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
 Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE)
 
@@ -2707,50 +2717,52 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 4 llap
-      File Output Operator [FS_31]
-        Select Operator [SEL_30] (rows=27 width=125)
+      File Output Operator [FS_32]
+        Select Operator [SEL_31] (rows=27 width=125)
           Output:["_col0","_col1"]
         <-Reducer 3 [SIMPLE_EDGE] llap
-          SHUFFLE [RS_29]
-            Select Operator [SEL_28] (rows=27 width=125)
+          SHUFFLE [RS_30]
+            Select Operator [SEL_29] (rows=27 width=125)
               Output:["_col0","_col1"]
-              Filter Operator [FIL_27] (rows=27 width=141)
+              Filter Operator [FIL_28] (rows=27 width=141)
                 predicate:((_col2 = 0L) or (_col5 is null and _col1 is not 
null and (_col3 >= _col2)))
-                Merge Join Operator [MERGEJOIN_37] (rows=27 width=141)
-                  Conds:RS_24.UDFToDouble(_col1)=RS_25._col0(Left 
Outer),Output:["_col0","_col1","_col2","_col3","_col5"]
-                <-Reducer 6 [SIMPLE_EDGE] llap
-                  SHUFFLE [RS_25]
-                    PartitionCols:_col0
-                    Select Operator [SEL_20] (rows=1 width=12)
-                      Output:["_col0","_col1"]
-                      Group By Operator [GBY_7] (rows=1 width=16)
-                        
Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"]
-                      <-Map 5 [CUSTOM_SIMPLE_EDGE] llap
-                        PARTITION_ONLY_SHUFFLE [RS_6]
-                          Group By Operator [GBY_5] (rows=1 width=16)
-                            
Output:["_col0","_col1"],aggregations:["sum(p_size)","count(p_size)"]
-                            Filter Operator [FIL_33] (rows=8 width=4)
-                              predicate:(p_size < 10)
-                              TableScan [TS_2] (rows=26 width=4)
-                                
default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"]
-                <-Reducer 2 [SIMPLE_EDGE] llap
-                  SHUFFLE [RS_24]
-                    PartitionCols:UDFToDouble(_col1)
-                    Merge Join Operator [MERGEJOIN_36] (rows=26 width=141)
-                      Conds:(Inner),Output:["_col0","_col1","_col2","_col3"]
-                    <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap
-                      SHUFFLE [RS_22]
-                        Group By Operator [GBY_12] (rows=1 width=16)
-                          
Output:["_col0","_col1"],aggregations:["count()","count(_col0)"]
-                          Select Operator [SEL_8] (rows=1 width=16)
-                            Output:["_col0"]
+                Select Operator [SEL_27] (rows=27 width=141)
+                  Output:["_col0","_col1","_col2","_col3","_col5"]
+                  Merge Join Operator [MERGEJOIN_38] (rows=27 width=141)
+                    
Conds:(Inner),Output:["_col0","_col1","_col3","_col4","_col5"]
+                  <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap
+                    PARTITION_ONLY_SHUFFLE [RS_25]
+                      Group By Operator [GBY_19] (rows=1 width=16)
+                        
Output:["_col0","_col1"],aggregations:["count()","count(_col0)"]
+                        Select Operator [SEL_15] (rows=1 width=16)
+                          Output:["_col0"]
+                          Group By Operator [GBY_7] (rows=1 width=16)
+                            
Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"]
+                          <-Map 5 [CUSTOM_SIMPLE_EDGE] llap
+                            PARTITION_ONLY_SHUFFLE [RS_6]
+                              Group By Operator [GBY_5] (rows=1 width=16)
+                                
Output:["_col0","_col1"],aggregations:["sum(p_size)","count(p_size)"]
+                                Filter Operator [FIL_35] (rows=8 width=4)
+                                  predicate:(p_size < 10)
+                                  TableScan [TS_2] (rows=26 width=4)
+                                    
default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"]
+                  <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap
+                    PARTITION_ONLY_SHUFFLE [RS_24]
+                      Merge Join Operator [MERGEJOIN_37] (rows=27 width=125)
+                        Conds:RS_21.UDFToDouble(_col1)=RS_22._col0(Left 
Outer),Output:["_col0","_col1","_col3"]
+                      <-Reducer 6 [SIMPLE_EDGE] llap
+                        PARTITION_ONLY_SHUFFLE [RS_22]
+                          PartitionCols:_col0
+                          Select Operator [SEL_8] (rows=1 width=12)
+                            Output:["_col0","_col1"]
                              Please refer to the previous Group By Operator 
[GBY_7]
-                    <-Map 1 [CUSTOM_SIMPLE_EDGE] llap
-                      PARTITION_ONLY_SHUFFLE [RS_21]
-                        Select Operator [SEL_1] (rows=26 width=125)
-                          Output:["_col0","_col1"]
-                          TableScan [TS_0] (rows=26 width=125)
-                            
default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_size"]
+                      <-Map 1 [SIMPLE_EDGE] llap
+                        SHUFFLE [RS_21]
+                          PartitionCols:UDFToDouble(_col1)
+                          Select Operator [SEL_1] (rows=26 width=125)
+                            Output:["_col0","_col1"]
+                            TableScan [TS_0] (rows=26 width=125)
+                              
default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_size"]
 
 PREHOOK: query: explain select b.p_mfgr, min(p_retailprice) 
 from part b 

http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/test/results/clientpositive/llap/lineage3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/lineage3.q.out 
b/ql/src/test/results/clientpositive/llap/lineage3.q.out
index 94c6a13..22d1cdb 100644
--- a/ql/src/test/results/clientpositive/llap/lineage3.q.out
+++ b/ql/src/test/results/clientpositive/llap/lineage3.q.out
@@ -180,7 +180,7 @@ PREHOOK: Input: default@src1
 #### A masked pattern was here ####
 
{"version":"1.0","engine":"tez","database":"default","hash":"94e9cc0a67801fe1503a3cb0c5029d59","queryText":"select
 * from src1 a\nwhere exists\n  (select cint from alltypesorc b\n   where a.key 
= b.ctinyint + 300)\nand key > 
300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key)
 > 
300.0D)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(a.key
 = 
a.key)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"b.ctinyint
 is not 
null","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(UDFToDouble((UDFToInteger(b.ctinyint)
 + 300)) = 
UDFToDouble(a.key))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1
 
.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]}
 311    val_311
-Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in 
Stage 'Reducer 3' is a cross product
 PREHOOK: query: select key, value from src1
 where key not in (select key+18 from src1) order by key
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out 
b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out
index ac505a5..e62eb0e 100644
--- a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out
+++ b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out
@@ -446,7 +446,7 @@ POSTHOOK: Lineage: part_null_n1.p_partkey SCRIPT []
 POSTHOOK: Lineage: part_null_n1.p_retailprice SCRIPT []
 POSTHOOK: Lineage: part_null_n1.p_size SCRIPT []
 POSTHOOK: Lineage: part_null_n1.p_type SCRIPT []
-Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in 
Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 3' is a cross product
 PREHOOK: query: explain select /*+ mapjoin(None)*/ * from part where p_name = 
(select p_name from part_null_n1 where p_name is null)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@part
@@ -466,32 +466,13 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE), Reducer 4 
(XPROD_EDGE)
-        Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+        Reducer 3 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE), Reducer 2 
(XPROD_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: part
-                  filterExpr: (p_name = null) (type: boolean)
-                  Statistics: Num rows: 26 Data size: 16094 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: (p_name = null) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 619 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: p_partkey (type: int), p_mfgr (type: 
string), p_brand (type: string), p_type (type: string), p_size (type: int), 
p_container (type: string), p_retailprice (type: double), p_comment (type: 
string)
-                      outputColumnNames: _col0, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
-                      Statistics: Num rows: 1 Data size: 582 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 582 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col0 (type: int), _col2 (type: 
string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 
(type: string), _col7 (type: double), _col8 (type: string)
-            Execution mode: vectorized, llap
-            LLAP IO: no inputs
-        Map 3 
-            Map Operator Tree:
-                TableScan
                   alias: part_null_n1
                   filterExpr: p_name is null (type: boolean)
                   Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
@@ -514,7 +495,42 @@ STAGE PLANS:
                         Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
             Execution mode: vectorized, llap
             LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  filterExpr: (p_name = null) (type: boolean)
+                  Statistics: Num rows: 26 Data size: 16094 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (p_name = null) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 619 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: p_partkey (type: int), p_mfgr (type: 
string), p_brand (type: string), p_type (type: string), p_size (type: int), 
p_container (type: string), p_retailprice (type: double), p_comment (type: 
string)
+                      outputColumnNames: _col0, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
+                      Statistics: Num rows: 1 Data size: 582 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 582 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: int), _col2 (type: 
string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 
(type: string), _col7 (type: double), _col8 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
         Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE 
Column stats: NONE
+                Filter Operator
+                  predicate: (sq_count_check(_col0) <= 1) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    Statistics: Num rows: 1 Data size: 192 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 192 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
               Merge Join Operator
@@ -525,10 +541,10 @@ STAGE PLANS:
                   0 
                   1 
                   2 
-                outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8
+                outputColumnNames: _col2, _col4, _col5, _col6, _col7, _col8, 
_col9, _col10
                 Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
-                  expressions: _col0 (type: int), null (type: string), _col2 
(type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), 
_col6 (type: string), _col7 (type: double), _col8 (type: string)
+                  expressions: _col2 (type: int), null (type: string), _col4 
(type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), 
_col8 (type: string), _col9 (type: double), _col10 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
                   Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE 
Column stats: NONE
                   File Output Operator
@@ -538,22 +554,6 @@ STAGE PLANS:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                         serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 4 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE 
Column stats: NONE
-                Filter Operator
-                  predicate: (sq_count_check(_col0) <= 1) (type: boolean)
-                  Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE 
Column stats: NONE
-                  Select Operator
-                    Statistics: Num rows: 1 Data size: 192 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 1 Data size: 192 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -561,7 +561,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Map 1' is a cross product
+Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Map 3' is a cross product
 PREHOOK: query: explain select * from part where p_name = (select p_name from 
part_null_n1 where p_name is null)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@part
@@ -581,13 +581,38 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Map 2 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE)
-        Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+        Map 3 <- Map 1 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
+                  alias: part_null_n1
+                  filterExpr: p_name is null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: p_name is null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 192 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 192 Basic stats: 
COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
                   alias: part
                   filterExpr: (p_name = null) (type: boolean)
                   Statistics: Num rows: 26 Data size: 16094 Basic stats: 
COMPLETE Column stats: COMPLETE
@@ -606,13 +631,13 @@ STAGE PLANS:
                           0 
                           1 
                           2 
-                        outputColumnNames: _col0, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
+                        outputColumnNames: _col2, _col4, _col5, _col6, _col7, 
_col8, _col9, _col10
                         input vertices:
-                          1 Reducer 3
-                          2 Map 2
+                          0 Reducer 2
+                          1 Map 1
                         Statistics: Num rows: 1 Data size: 959 Basic stats: 
COMPLETE Column stats: NONE
                         Select Operator
-                          expressions: _col0 (type: int), null (type: string), 
_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: 
int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+                          expressions: _col2 (type: int), null (type: string), 
_col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: 
int), _col8 (type: string), _col9 (type: double), _col10 (type: string)
                           outputColumnNames: _col0, _col1, _col2, _col3, 
_col4, _col5, _col6, _col7, _col8
                           Statistics: Num rows: 1 Data size: 959 Basic stats: 
COMPLETE Column stats: NONE
                           File Output Operator
@@ -624,32 +649,7 @@ STAGE PLANS:
                                 serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 2 
-            Map Operator Tree:
-                TableScan
-                  alias: part_null_n1
-                  filterExpr: p_name is null (type: boolean)
-                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: p_name is null (type: boolean)
-                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
-                      Group By Operator
-                        aggregations: count()
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 192 Basic stats: 
COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          sort order: 
-                          Statistics: Num rows: 1 Data size: 192 Basic stats: 
COMPLETE Column stats: NONE
-                          value expressions: _col0 (type: bigint)
-                      Reduce Output Operator
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
-            Execution mode: vectorized, llap
-            LLAP IO: no inputs
-        Reducer 3 
+        Reducer 2 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/test/results/clientpositive/llap/optimize_join_ptp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/optimize_join_ptp.q.out 
b/ql/src/test/results/clientpositive/llap/optimize_join_ptp.q.out
index a68275b..af55d4d 100644
--- a/ql/src/test/results/clientpositive/llap/optimize_join_ptp.q.out
+++ b/ql/src/test/results/clientpositive/llap/optimize_join_ptp.q.out
@@ -55,33 +55,13 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: t1_n97
-                  filterExpr: (k < 15) (type: boolean)
-                  Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  Filter Operator
-                    predicate: (k < 15) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 95 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: v (type: string), k (type: int)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 1 Data size: 95 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: UDFToDouble(_col1) (type: double)
-                        sort order: +
-                        Map-reduce partition columns: UDFToDouble(_col1) 
(type: double)
-                        Statistics: Num rows: 1 Data size: 95 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col0 (type: string), _col1 (type: 
int)
-            Execution mode: vectorized, llap
-            LLAP IO: no inputs
-        Map 3 
-            Map Operator Tree:
-                TableScan
                   alias: t2_n60
                   filterExpr: ((v = 'people') and k is not null) (type: 
boolean)
                   Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE 
Column stats: COMPLETE
@@ -90,10 +70,10 @@ STAGE PLANS:
                     Statistics: Num rows: 1 Data size: 99 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: k (type: double)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      outputColumnNames: k
+                      Statistics: Num rows: 1 Data size: 99 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Group By Operator
-                        keys: _col0 (type: double)
+                        keys: k (type: double)
                         mode: hash
                         outputColumnNames: _col0
                         Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
@@ -104,24 +84,62 @@ STAGE PLANS:
                           Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: t1_n97
+                  filterExpr: (k < 15) (type: boolean)
+                  Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (k < 15) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 95 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: v (type: string), k (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 95 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: UDFToDouble(_col1) (type: double)
+                        sort order: +
+                        Map-reduce partition columns: UDFToDouble(_col1) 
(type: double)
+                        Statistics: Num rows: 1 Data size: 95 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: string), _col1 (type: 
int)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
         Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: double)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: double)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: double)
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+        Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
-                  0 UDFToDouble(_col1) (type: double)
-                  1 _col0 (type: double)
-                outputColumnNames: _col0, _col1
+                  0 _col0 (type: double)
+                  1 UDFToDouble(_col1) (type: double)
+                outputColumnNames: _col1, _col2
                 Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE 
Column stats: COMPLETE
-                File Output Operator
-                  compressed: false
+                Select Operator
+                  expressions: _col1 (type: string), _col2 (type: int)
+                  outputColumnNames: _col0, _col1
                   Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  table:
-                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 95 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator

[16/16] hive git commit: HIVE-20778: Join reordering may not be triggered if all joins in plan are created by decorrelation logic (Vineet Garg via Jesus Camacho Rodriguez)

Reply via email to