[2/2] hive git commit: HIVE-18201 : Disable XPROD_EDGE for sq_count_check() created for scalar subqueries (Ashutosh Chauhan via Jesus Camacho Rodriguez)

hashutosh Tue, 31 Jul 2018 21:38:49 -0700

HIVE-18201 : Disable XPROD_EDGE for sq_count_check()  created for scalar 
subqueries (Ashutosh Chauhan via Jesus Camacho Rodriguez)



Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4d436953
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4d436953
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4d436953

Branch: refs/heads/master
Commit: 4d436953e6be1302a0867aa16a8c5ecd2804eed7
Parents: e9e1f8f
Author: Ashutosh Chauhan <[email protected]>
Authored: Tue Dec 12 15:15:00 2017 -0800
Committer: Ashutosh Chauhan <[email protected]>
Committed: Tue Jul 31 21:36:09 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   2 +
 .../hive/ql/optimizer/ConvertJoinMapJoin.java   |  25 +-
 .../test/queries/clientpositive/perf/query6.q   |   2 +
 .../queries/clientpositive/subquery_in_having.q |   4 +-
 .../clientpositive/llap/auto_join_filters.q.out |   4 +-
 .../clientpositive/llap/auto_join_nulls.q.out   |   2 +-
 .../results/clientpositive/llap/mapjoin2.q.out  |   2 +-
 .../clientpositive/llap/mapjoin_hint.q.out      |  62 ++-
 .../llap/subquery_in_having.q.out               | 427 ++++++++-----------
 .../llap/tez_fixed_bucket_pruning.q.out         | 252 +++++------
 .../llap/vector_complex_all.q.out               |  94 ++--
 .../llap/vector_groupby_mapjoin.q.out           | 113 ++---
 .../llap/vector_join_filters.q.out              |   2 +-
 .../llap/vectorized_multi_output_select.q.out   |  58 ++-
 .../clientpositive/perf/tez/query6.q.out        | 189 ++++----
 15 files changed, 592 insertions(+), 646 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index cce908f..093b4a7 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2027,6 +2027,8 @@ public class HiveConf extends Configuration {
         "However, if it is on, and the predicted number of entries in 
hashtable for a given join \n" +
         "input is larger than this number, the join will not be converted to a 
mapjoin. \n" +
         "The value \"-1\" means no limit."),
+    XPRODSMALLTABLEROWSTHRESHOLD("hive.xprod.mapjoin.small.table.rows", 
1,"Maximum number of rows on build side"
+        + " of map join before it switches over to cross product edge"),
     HIVECONVERTJOINMAXSHUFFLESIZE("hive.auto.convert.join.shuffle.max.size", 
10000000000L,
        "If hive.auto.convert.join.noconditionaltask is off, this parameter 
does not take affect. \n" +
        "However, if it is on, and the predicted size of the larger input for a 
given join is greater \n" +

http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
index 011dadf..4145baf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
@@ -113,14 +113,6 @@ public class ConvertJoinMapJoin implements NodeProcessor {
     MemoryMonitorInfo memoryMonitorInfo = getMemoryMonitorInfo(maxSize, 
context.conf, llapInfo);
     joinOp.getConf().setMemoryMonitorInfo(memoryMonitorInfo);
 
-    // not use map join in case of cross product
-    boolean cartesianProductEdgeEnabled =
-      HiveConf.getBoolVar(context.conf, 
HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED);
-    if (cartesianProductEdgeEnabled && !hasOuterJoin(joinOp) && 
isCrossProduct(joinOp)) {
-      fallbackToMergeJoin(joinOp, context);
-      return null;
-    }
-
     TezBucketJoinProcCtx tezBucketJoinProcCtx = new 
TezBucketJoinProcCtx(context.conf);
     boolean hiveConvertJoin = 
context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) &
             !context.parseContext.getDisableMapJoin();
@@ -988,6 +980,23 @@ public class ConvertJoinMapJoin implements NodeProcessor {
       return -1;
     }
 
+    // only allow cross product in map joins if build side is 'small'
+    boolean cartesianProductEdgeEnabled =
+      HiveConf.getBoolVar(context.conf, 
HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED);
+    if (cartesianProductEdgeEnabled && !hasOuterJoin(joinOp) && 
isCrossProduct(joinOp)) {
+      for (int i = 0 ; i < joinOp.getParentOperators().size(); i ++) {
+        if (i != bigTablePosition) {
+          Statistics parentStats = 
joinOp.getParentOperators().get(i).getStatistics();
+          if (parentStats.getNumRows() >
+            HiveConf.getIntVar(context.conf, 
HiveConf.ConfVars.XPRODSMALLTABLEROWSTHRESHOLD)) {
+            // if any of smaller side is estimated to generate more than
+            // threshold rows we would disable mapjoin
+            return -1;
+          }
+        }
+      }
+    }
+
     // We store the total memory that this MapJoin is going to use,
     // which is calculated as totalSize/buckets, with totalSize
     // equal to sum of small tables size.

http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/queries/clientpositive/perf/query6.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query6.q 
b/ql/src/test/queries/clientpositive/perf/query6.q
index d45045d..aabce52 100644
--- a/ql/src/test/queries/clientpositive/perf/query6.q
+++ b/ql/src/test/queries/clientpositive/perf/query6.q
@@ -1,3 +1,5 @@
+set hive.auto.convert.join=true;
+set hive.tez.cartesian-product.enabled=true;
 set hive.mapred.mode=nonstrict;
 -- start query 1 in stream 0 using template query6.tpl and seed 1819994127
 explain

http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/queries/clientpositive/subquery_in_having.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_in_having.q 
b/ql/src/test/queries/clientpositive/subquery_in_having.q
index ec6981b..8b6d1a7 100644
--- a/ql/src/test/queries/clientpositive/subquery_in_having.q
+++ b/ql/src/test/queries/clientpositive/subquery_in_having.q
@@ -1,5 +1,6 @@
 --! qt:dataset:src
 set hive.mapred.mode=nonstrict;
+set hive.optimize.shared.work.extended=false;
 -- SORT_QUERY_RESULTS
 
 -- data setup
@@ -154,4 +155,5 @@ group by key, value
 having count(*) not in (select count(*) from src_null_n4 s1 where s1.key > '9' 
and s1.value <> b.value group by s1.key );
 
 DROP TABLE src_null_n4;
-DROP TABLE part_subq;
\ No newline at end of file
+DROP TABLE part_subq;
+reset hive.optimize.shared.work.extended;

http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out 
b/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out
index 7a271fc..a639792 100644
--- a/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out
@@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/in3.txt' INTO TABLE my
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@myinput1_n5
-Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM 
myinput1_n5 a JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = 
a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
 PREHOOK: type: QUERY
 PREHOOK: Input: default@myinput1_n5
@@ -300,7 +300,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/in/000001_0' into tabl
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@smb_input2_n0
-Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 
a JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND 
b.key > 40 AND b.value > 50 AND b.key = b.value
 PREHOOK: type: QUERY
 PREHOOK: Input: default@myinput1_n5

http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out 
b/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out
index c7bb127..194fc5d 100644
--- a/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out
@@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/in1.txt' INTO TABLE my
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@myinput1_n2
-Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 
a JOIN myinput1_n2 b
 PREHOOK: type: QUERY
 PREHOOK: Input: default@myinput1_n2

http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/mapjoin2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/mapjoin2.q.out 
b/ql/src/test/results/clientpositive/llap/mapjoin2.q.out
index 4638fce..872f918 100644
--- a/ql/src/test/results/clientpositive/llap/mapjoin2.q.out
+++ b/ql/src/test/results/clientpositive/llap/mapjoin2.q.out
@@ -57,7 +57,7 @@ POSTHOOK: Input: default@tbl_n1
 #### A masked pattern was here ####
 false  false   true    true
 true   true    false   false
-Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( 
SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 
confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key
 PREHOOK: type: QUERY
 PREHOOK: Input: _dummy_database@_dummy_table

http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out 
b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out
index 5cccce9..3c6270a 100644
--- a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out
+++ b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out
@@ -527,7 +527,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in 
Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: explain select * from part where p_name = (select p_name from 
part_null_n1 where p_name is null)
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from part where p_name = (select p_name from 
part_null_n1 where p_name is null)
@@ -541,8 +541,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE), Reducer 4 
(XPROD_EDGE)
-        Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
+        Map 1 <- Map 2 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE)
+        Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -558,13 +558,33 @@ STAGE PLANS:
                       expressions: p_partkey (type: int), p_mfgr (type: 
string), p_brand (type: string), p_type (type: string), p_size (type: int), 
p_container (type: string), p_retailprice (type: double), p_comment (type: 
string)
                       outputColumnNames: _col0, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
                       Statistics: Num rows: 1 Data size: 582 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 582 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col0 (type: int), _col2 (type: 
string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 
(type: string), _col7 (type: double), _col8 (type: string)
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                             Inner Join 0 to 2
+                        keys:
+                          0 
+                          1 
+                          2 
+                        outputColumnNames: _col0, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
+                        input vertices:
+                          1 Reducer 3
+                          2 Map 2
+                        Statistics: Num rows: 1 Data size: 959 Basic stats: 
COMPLETE Column stats: NONE
+                        Select Operator
+                          expressions: _col0 (type: int), null (type: string), 
_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: 
int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+                          outputColumnNames: _col0, _col1, _col2, _col3, 
_col4, _col5, _col6, _col7, _col8
+                          Statistics: Num rows: 1 Data size: 959 Basic stats: 
COMPLETE Column stats: NONE
+                          File Output Operator
+                            compressed: false
+                            Statistics: Num rows: 1 Data size: 959 Basic 
stats: COMPLETE Column stats: NONE
+                            table:
+                                input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                                serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 3 
+        Map 2 
             Map Operator Tree:
                 TableScan
                   alias: part_null_n1
@@ -589,31 +609,7 @@ STAGE PLANS:
                         Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Reducer 2 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                     Inner Join 0 to 2
-                keys:
-                  0 
-                  1 
-                  2 
-                outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8
-                Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), null (type: string), _col2 
(type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), 
_col6 (type: string), _col7 (type: double), _col8 (type: string)
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
-                  Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE 
Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 1 Data size: 959 Basic stats: 
COMPLETE Column stats: NONE
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 4 
+        Reducer 3 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out 
b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
index 20428e1..af8e23a 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
@@ -1570,10 +1570,9 @@ POSTHOOK: Output: default@src_null_n4
 POSTHOOK: Lineage: src_null_n4.key SCRIPT []
 POSTHOOK: Lineage: src_null_n4.value EXPRESSION []
 Warning: Map Join MAPJOIN[131][bigTable=?] in task 'Map 1' is a cross product
-Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 1' is a cross product
-Warning: Map Join MAPJOIN[134][bigTable=?] in task 'Map 1' is a cross product
-Warning: Shuffle Join MERGEJOIN[133][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 3' is a cross product
-Warning: Shuffle Join MERGEJOIN[135][tables = [$hdt$_2, $hdt$_3]] in Stage 
'Reducer 6' is a cross product
+Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 6' is a cross product
+Warning: Map Join MAPJOIN[133][bigTable=?] in task 'Reducer 7' is a cross 
product
+Warning: Map Join MAPJOIN[135][bigTable=?] in task 'Reducer 9' is a cross 
product
 PREHOOK: query: explain
 select key, value, count(*)
 from src_null_n4 b
@@ -1597,18 +1596,16 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), 
Reducer 13 (BROADCAST_EDGE)
-        Reducer 11 <- Map 10 (SIMPLE_EDGE)
-        Reducer 12 <- Map 10 (SIMPLE_EDGE)
-        Reducer 13 <- Map 10 (SIMPLE_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (BROADCAST_EDGE), Reducer 
8 (BROADCAST_EDGE)
-        Reducer 3 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE)
-        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
-        Reducer 5 <- Map 1 (SIMPLE_EDGE)
-        Reducer 6 <- Map 1 (XPROD_EDGE), Reducer 9 (XPROD_EDGE)
-        Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+        Map 1 <- Reducer 4 (BROADCAST_EDGE)
+        Map 6 <- Reducer 5 (BROADCAST_EDGE)
+        Reducer 10 <- Reducer 9 (SIMPLE_EDGE)
+        Reducer 11 <- Reducer 10 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (BROADCAST_EDGE), Reducer 
8 (BROADCAST_EDGE)
+        Reducer 4 <- Map 3 (SIMPLE_EDGE)
+        Reducer 5 <- Map 3 (SIMPLE_EDGE)
+        Reducer 7 <- Map 1 (BROADCAST_EDGE), Map 6 (SIMPLE_EDGE)
         Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
-        Reducer 9 <- Map 1 (SIMPLE_EDGE)
+        Reducer 9 <- Map 1 (BROADCAST_EDGE), Map 6 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1628,7 +1625,7 @@ STAGE PLANS:
                         1 
                       outputColumnNames: _col0, _col1, _col2, _col3
                       input vertices:
-                        1 Reducer 11
+                        1 Reducer 4
                       residual filter predicates: {(_col2 <> _col1)}
                       Statistics: Num rows: 1 Data size: 553 Basic stats: 
COMPLETE Column stats: NONE
                       Filter Operator
@@ -1661,38 +1658,6 @@ STAGE PLANS:
                         sort order: 
                         Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: string), _col1 (type: 
string)
-                  Select Operator
-                    expressions: key (type: string), value (type: string)
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Left Outer Join 0 to 1
-                      keys:
-                        0 
-                        1 
-                      outputColumnNames: _col0, _col1, _col2, _col3
-                      input vertices:
-                        1 Reducer 12
-                      residual filter predicates: {(_col2 <> _col1)}
-                      Statistics: Num rows: 1 Data size: 553 Basic stats: 
COMPLETE Column stats: NONE
-                      Filter Operator
-                        predicate: _col3 is null (type: boolean)
-                        Statistics: Num rows: 1 Data size: 553 Basic stats: 
COMPLETE Column stats: NONE
-                        Select Operator
-                          expressions: _col0 (type: string), _col1 (type: 
string)
-                          outputColumnNames: _col0, _col1
-                          Statistics: Num rows: 1 Data size: 553 Basic stats: 
COMPLETE Column stats: NONE
-                          Group By Operator
-                            keys: _col1 (type: string), _col0 (type: string)
-                            mode: hash
-                            outputColumnNames: _col0, _col1
-                            Statistics: Num rows: 1 Data size: 553 Basic 
stats: COMPLETE Column stats: NONE
-                            Reduce Output Operator
-                              key expressions: _col0 (type: string), _col1 
(type: string)
-                              sort order: ++
-                              Map-reduce partition columns: _col0 (type: 
string)
-                              Statistics: Num rows: 1 Data size: 553 Basic 
stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (key > '9') (type: boolean)
                     Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
@@ -1704,6 +1669,39 @@ STAGE PLANS:
                         sort order: 
                         Statistics: Num rows: 1 Data size: 368 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: string), _col1 (type: 
string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: src_null_n4
+                  filterExpr: value is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: value is not null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: value (type: string)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
                   Select Operator
                     expressions: key (type: string), value (type: string)
                     outputColumnNames: _col0, _col1
@@ -1716,7 +1714,7 @@ STAGE PLANS:
                         1 
                       outputColumnNames: _col0, _col1, _col2, _col3
                       input vertices:
-                        1 Reducer 13
+                        1 Reducer 5
                       residual filter predicates: {(_col2 <> _col1)}
                       Statistics: Num rows: 1 Data size: 553 Basic stats: 
COMPLETE Column stats: NONE
                       Filter Operator
@@ -1736,87 +1734,61 @@ STAGE PLANS:
                               sort order: ++
                               Map-reduce partition columns: _col0 (type: 
string)
                               Statistics: Num rows: 1 Data size: 553 Basic 
stats: COMPLETE Column stats: NONE
+                            Reduce Output Operator
+                              key expressions: _col0 (type: string), _col1 
(type: string)
+                              sort order: ++
+                              Map-reduce partition columns: _col0 (type: 
string)
+                              Statistics: Num rows: 1 Data size: 553 Basic 
stats: COMPLETE Column stats: NONE
             Execution mode: llap
             LLAP IO: no inputs
-        Map 10 
-            Map Operator Tree:
-                TableScan
-                  alias: src_null_n4
-                  filterExpr: value is not null (type: boolean)
-                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: value is not null (type: boolean)
-                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      keys: value (type: string)
-                      mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
-            Execution mode: vectorized, llap
-            LLAP IO: no inputs
-        Reducer 11 
+        Reducer 10 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: string), true (type: boolean)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                  Reduce Output Operator
-                    sort order: 
-                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
-                    value expressions: _col0 (type: string), _col1 (type: 
boolean)
-        Reducer 12 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: string)
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
                 mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
-                  expressions: _col0 (type: string), true (type: boolean)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
-                  Reduce Output Operator
-                    sort order: 
-                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
-                    value expressions: _col0 (type: string), _col1 (type: 
boolean)
-        Reducer 13 
+                  expressions: _col0 (type: string), _col2 (type: bigint)
+                  outputColumnNames: _col0, _col2
+                  Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: _col2 is not null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: string), _col2 (type: bigint)
+                      outputColumnNames: _col1, _col2
+                      Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col1 (type: string), _col2 (type: bigint)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string), _col1 (type: 
bigint)
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: string), 
_col1 (type: bigint)
+                          Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 11 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                keys: KEY._col0 (type: string)
+                keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
                 mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
-                  expressions: _col0 (type: string), true (type: boolean)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  expressions: _col1 (type: bigint), _col0 (type: string), 
true (type: boolean)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE 
Column stats: NONE
                   Reduce Output Operator
-                    sort order: 
-                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
-                    value expressions: _col0 (type: string), _col1 (type: 
boolean)
+                    key expressions: _col1 (type: string), _col0 (type: bigint)
+                    sort order: ++
+                    Map-reduce partition columns: _col1 (type: string), _col0 
(type: bigint)
+                    Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
+                    value expressions: _col2 (type: boolean)
         Reducer 2 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
@@ -1838,7 +1810,7 @@ STAGE PLANS:
                       1 _col0 (type: string)
                     outputColumnNames: _col0, _col1, _col2, _col4, _col5
                     input vertices:
-                      1 Reducer 4
+                      1 Reducer 8
                     Statistics: Num rows: 1 Data size: 608 Basic stats: 
COMPLETE Column stats: NONE
                     Map Join Operator
                       condition map:
@@ -1848,7 +1820,7 @@ STAGE PLANS:
                         1 _col1 (type: string), _col0 (type: bigint)
                       outputColumnNames: _col0, _col1, _col2, _col4, _col5, 
_col8
                       input vertices:
-                        1 Reducer 8
+                        1 Reducer 11
                       Statistics: Num rows: 1 Data size: 668 Basic stats: 
COMPLETE Column stats: NONE
                       Filter Operator
                         predicate: CASE WHEN ((_col4 = 0L)) THEN (true) WHEN 
(_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col2 
is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: 
boolean)
@@ -1864,63 +1836,42 @@ STAGE PLANS:
                                 input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                                 serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 3 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 
-                  1 
-                outputColumnNames: _col0, _col1, _col2
-                residual filter predicates: {(_col1 <> _col2)}
-                Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: string), _col2 (type: string)
-                  outputColumnNames: _col0, _col2
-                  Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE 
Column stats: NONE
-                  Group By Operator
-                    aggregations: count()
-                    keys: _col2 (type: string), _col0 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col1 (type: 
string)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
-                      value expressions: _col2 (type: bigint)
         Reducer 4 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: count(VALUE._col0)
-                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                keys: KEY._col0 (type: string)
                 mode: mergepartial
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE 
Column stats: NONE
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
-                  expressions: _col0 (type: string), _col2 (type: bigint)
-                  outputColumnNames: _col1, _col2
-                  Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE 
Column stats: NONE
-                  Group By Operator
-                    aggregations: count(), count(_col2)
-                    keys: _col1 (type: string)
-                    mode: complete
-                    outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
-                      value expressions: _col1 (type: bigint), _col2 (type: 
bigint)
+                  expressions: _col0 (type: string), true (type: boolean)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: string), _col1 (type: 
boolean)
         Reducer 5 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), true (type: boolean)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: string), _col1 (type: 
boolean)
+        Reducer 7 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
                 keys: KEY._col0 (type: string), KEY._col1 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
@@ -1934,39 +1885,34 @@ STAGE PLANS:
                     mode: complete
                     outputColumnNames: _col0
                     Statistics: Num rows: 1 Data size: 553 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 1 Data size: 553 Basic stats: 
COMPLETE Column stats: NONE
-                      value expressions: _col0 (type: string)
-        Reducer 6 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 
-                  1 
-                outputColumnNames: _col0, _col1, _col2
-                residual filter predicates: {(_col1 <> _col2)}
-                Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: string), _col2 (type: string)
-                  outputColumnNames: _col0, _col2
-                  Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE 
Column stats: NONE
-                  Group By Operator
-                    aggregations: count()
-                    keys: _col2 (type: string), _col0 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col1 (type: 
string)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 
+                        1 
+                      outputColumnNames: _col0, _col1, _col2
+                      input vertices:
+                        0 Map 1
+                      residual filter predicates: {(_col1 <> _col2)}
                       Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
-                      value expressions: _col2 (type: bigint)
-        Reducer 7 
+                      Select Operator
+                        expressions: _col0 (type: string), _col2 (type: string)
+                        outputColumnNames: _col0, _col2
+                        Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
+                        Group By Operator
+                          aggregations: count()
+                          keys: _col2 (type: string), _col0 (type: string)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            key expressions: _col0 (type: string), _col1 
(type: string)
+                            sort order: ++
+                            Map-reduce partition columns: _col0 (type: string)
+                            Statistics: Num rows: 1 Data size: 922 Basic 
stats: COMPLETE Column stats: NONE
+                            value expressions: _col2 (type: bigint)
+        Reducer 8 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -1977,45 +1923,22 @@ STAGE PLANS:
                 Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: string), _col2 (type: bigint)
-                  outputColumnNames: _col0, _col2
+                  outputColumnNames: _col1, _col2
                   Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: _col2 is not null (type: boolean)
+                  Group By Operator
+                    aggregations: count(), count(_col2)
+                    keys: _col1 (type: string)
+                    mode: complete
+                    outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: _col0 (type: string), _col2 (type: bigint)
-                      outputColumnNames: _col1, _col2
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
                       Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
-                      Group By Operator
-                        keys: _col1 (type: string), _col2 (type: bigint)
-                        mode: hash
-                        outputColumnNames: _col0, _col1
-                        Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: string), _col1 (type: 
bigint)
-                          sort order: ++
-                          Map-reduce partition columns: _col0 (type: string), 
_col1 (type: bigint)
-                          Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 8 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col1 (type: bigint), _col0 (type: string), 
true (type: boolean)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE 
Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col1 (type: string), _col0 (type: bigint)
-                    sort order: ++
-                    Map-reduce partition columns: _col1 (type: string), _col0 
(type: bigint)
-                    Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
-                    value expressions: _col2 (type: boolean)
+                      value expressions: _col1 (type: bigint), _col2 (type: 
bigint)
         Reducer 9 
-            Execution mode: vectorized, llap
+            Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: string), KEY._col1 (type: string)
@@ -2031,10 +1954,33 @@ STAGE PLANS:
                     mode: complete
                     outputColumnNames: _col0
                     Statistics: Num rows: 1 Data size: 553 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 1 Data size: 553 Basic stats: 
COMPLETE Column stats: NONE
-                      value expressions: _col0 (type: string)
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 
+                        1 
+                      outputColumnNames: _col0, _col1, _col2
+                      input vertices:
+                        0 Map 1
+                      residual filter predicates: {(_col1 <> _col2)}
+                      Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: _col0 (type: string), _col2 (type: string)
+                        outputColumnNames: _col0, _col2
+                        Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
+                        Group By Operator
+                          aggregations: count()
+                          keys: _col2 (type: string), _col0 (type: string)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 922 Basic stats: 
COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            key expressions: _col0 (type: string), _col1 
(type: string)
+                            sort order: ++
+                            Map-reduce partition columns: _col0 (type: 
string), _col1 (type: string)
+                            Statistics: Num rows: 1 Data size: 922 Basic 
stats: COMPLETE Column stats: NONE
+                            value expressions: _col2 (type: bigint)
 
   Stage: Stage-0
     Fetch Operator
@@ -2043,10 +1989,9 @@ STAGE PLANS:
         ListSink
 
 Warning: Map Join MAPJOIN[131][bigTable=?] in task 'Map 1' is a cross product
-Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 1' is a cross product
-Warning: Map Join MAPJOIN[134][bigTable=?] in task 'Map 1' is a cross product
-Warning: Shuffle Join MERGEJOIN[133][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 3' is a cross product
-Warning: Shuffle Join MERGEJOIN[135][tables = [$hdt$_2, $hdt$_3]] in Stage 
'Reducer 6' is a cross product
+Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 6' is a cross product
+Warning: Map Join MAPJOIN[133][bigTable=?] in task 'Reducer 7' is a cross 
product
+Warning: Map Join MAPJOIN[135][bigTable=?] in task 'Reducer 9' is a cross 
product
 PREHOOK: query: select key, value, count(*)
 from src_null_n4 b
 where NOT EXISTS (select key from src_null_n4 where src_null_n4.value <> 
b.value)

http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out 
b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
index 2c38d8c..98b2013 100644
--- a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
@@ -424,7 +424,7 @@ POSTHOOK: type: ANALYZE_TABLE
 POSTHOOK: Input: default@l3_monthly_dw_dimplan
 POSTHOOK: Output: default@l3_monthly_dw_dimplan
 #### A masked pattern was here ####
-Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: EXPLAIN EXTENDED
 SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS 
PROJECT_KEY
 FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join
@@ -478,8 +478,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE), Map 5 
(BROADCAST_EDGE), Map 6 (BROADCAST_EDGE)
-        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 
(BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -497,13 +497,59 @@ STAGE PLANS:
                       expressions: plan_detail_object_id (type: bigint)
                       outputColumnNames: _col0
                       Statistics: Num rows: 15 Data size: 240 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        null sort order: 
-                        sort order: 
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        Estimated key counts: Map 3 => 1
+                        keys:
+                          0 
+                          1 
+                        outputColumnNames: _col0, _col2
+                        input vertices:
+                          1 Map 3
+                        Position of Big Table: 0
                         Statistics: Num rows: 15 Data size: 240 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        tag: 0
-                        value expressions: _col0 (type: bigint)
-                        auto parallelism: false
+                        Map Join Operator
+                          condition map:
+                               Left Outer Join 0 to 1
+                          Estimated key counts: Map 4 => 90170
+                          keys:
+                            0 _col2 (type: bigint), _col0 (type: bigint)
+                            1 _col1 (type: bigint), _col3 (type: bigint)
+                          outputColumnNames: _col2, _col5
+                          input vertices:
+                            1 Map 4
+                          Position of Big Table: 0
+                          Statistics: Num rows: 15 Data size: 240 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Select Operator
+                            expressions: _col2 (type: bigint), _col5 (type: 
bigint)
+                            outputColumnNames: _col0, _col5
+                            Statistics: Num rows: 15 Data size: 240 Basic 
stats: COMPLETE Column stats: COMPLETE
+                            Map Join Operator
+                              condition map:
+                                   Left Outer Join 0 to 1
+                              Estimated key counts: Map 5 => 1
+                              keys:
+                                0 _col0 (type: bigint)
+                                1 _col1 (type: bigint)
+                              outputColumnNames: _col5, _col7
+                              input vertices:
+                                1 Map 5
+                              Position of Big Table: 0
+                              Statistics: Num rows: 15 Data size: 240 Basic 
stats: COMPLETE Column stats: COMPLETE
+                              Select Operator
+                                expressions: _col5 (type: bigint), _col7 
(type: bigint)
+                                outputColumnNames: _col0, _col1
+                                Statistics: Num rows: 15 Data size: 240 Basic 
stats: COMPLETE Column stats: COMPLETE
+                                Reduce Output Operator
+                                  key expressions: _col0 (type: bigint), _col1 
(type: bigint)
+                                  null sort order: aa
+                                  sort order: ++
+                                  Statistics: Num rows: 15 Data size: 240 
Basic stats: COMPLETE Column stats: COMPLETE
+                                  tag: -1
+                                  TopN: 5
+                                  TopN Hash Memory Usage: 0.1
+                                  auto parallelism: false
             Execution mode: vectorized, llap
             LLAP IO: all inputs
             Path -> Alias:
@@ -559,7 +605,7 @@ STAGE PLANS:
                   name: 
default.l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104_1
             Truncated Path -> Alias:
               /l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104_1 [dw]
-        Map 4 
+        Map 3 
             Map Operator Tree:
                 TableScan
                   alias: snap
@@ -631,7 +677,7 @@ STAGE PLANS:
                   name: default.l3_clarity__l3_snap_number_2018022300104
             Truncated Path -> Alias:
               /l3_clarity__l3_snap_number_2018022300104 [snap]
-        Map 5 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: s1
@@ -712,7 +758,7 @@ STAGE PLANS:
                   name: default.l3_monthly_dw_dimplan
             Truncated Path -> Alias:
               /l3_monthly_dw_dimplan [s1]
-        Map 6 
+        Map 5 
             Map Operator Tree:
                 TableScan
                   alias: s2
@@ -792,60 +838,6 @@ STAGE PLANS:
             Truncated Path -> Alias:
               
/l3_clarity__l3_monthly_dw_factplan_datajoin_1_s2_2018022300104_1 [s2]
         Reducer 2 
-            Execution mode: llap
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 
-                  1 
-                outputColumnNames: _col0, _col2
-                Position of Big Table: 0
-                Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Map Join Operator
-                  condition map:
-                       Left Outer Join 0 to 1
-                  Estimated key counts: Map 5 => 90170
-                  keys:
-                    0 _col2 (type: bigint), _col0 (type: bigint)
-                    1 _col1 (type: bigint), _col3 (type: bigint)
-                  outputColumnNames: _col2, _col5
-                  input vertices:
-                    1 Map 5
-                  Position of Big Table: 0
-                  Statistics: Num rows: 15 Data size: 240 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    expressions: _col2 (type: bigint), _col5 (type: bigint)
-                    outputColumnNames: _col0, _col5
-                    Statistics: Num rows: 15 Data size: 240 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Map Join Operator
-                      condition map:
-                           Left Outer Join 0 to 1
-                      Estimated key counts: Map 6 => 1
-                      keys:
-                        0 _col0 (type: bigint)
-                        1 _col1 (type: bigint)
-                      outputColumnNames: _col5, _col7
-                      input vertices:
-                        1 Map 6
-                      Position of Big Table: 0
-                      Statistics: Num rows: 15 Data size: 240 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Select Operator
-                        expressions: _col5 (type: bigint), _col7 (type: bigint)
-                        outputColumnNames: _col0, _col1
-                        Statistics: Num rows: 15 Data size: 240 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: bigint), _col1 (type: 
bigint)
-                          null sort order: aa
-                          sort order: ++
-                          Statistics: Num rows: 15 Data size: 240 Basic stats: 
COMPLETE Column stats: COMPLETE
-                          tag: -1
-                          TopN: 5
-                          TopN Hash Memory Usage: 0.1
-                          auto parallelism: false
-        Reducer 3 
             Execution mode: vectorized, llap
             Needs Tagging: false
             Reduce Operator Tree:
@@ -889,7 +881,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, 
S2.PROJECT_KEY AS PROJECT_KEY
 FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join
 l3_clarity__L3_MONTHLY_DW_FACTPLAN_DW_STG_2018022300104_1 DW on 1=1
@@ -931,7 +923,7 @@ POSTHOOK: Input: default@l3_monthly_dw_dimplan
 7147200        NULL    27114
 7147200        NULL    27114
 7147200        NULL    27114
-Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: EXPLAIN EXTENDED
 SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS 
PROJECT_KEY
 FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join
@@ -985,8 +977,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE), Map 5 
(BROADCAST_EDGE), Map 6 (BROADCAST_EDGE)
-        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 
(BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1004,13 +996,59 @@ STAGE PLANS:
                       expressions: plan_detail_object_id (type: bigint)
                       outputColumnNames: _col0
                       Statistics: Num rows: 15 Data size: 240 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        null sort order: 
-                        sort order: 
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        Estimated key counts: Map 3 => 1
+                        keys:
+                          0 
+                          1 
+                        outputColumnNames: _col0, _col2
+                        input vertices:
+                          1 Map 3
+                        Position of Big Table: 0
                         Statistics: Num rows: 15 Data size: 240 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        tag: 0
-                        value expressions: _col0 (type: bigint)
-                        auto parallelism: false
+                        Map Join Operator
+                          condition map:
+                               Left Outer Join 0 to 1
+                          Estimated key counts: Map 4 => 90170
+                          keys:
+                            0 _col2 (type: bigint), _col0 (type: bigint)
+                            1 _col1 (type: bigint), _col3 (type: bigint)
+                          outputColumnNames: _col2, _col5
+                          input vertices:
+                            1 Map 4
+                          Position of Big Table: 0
+                          Statistics: Num rows: 15 Data size: 240 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Select Operator
+                            expressions: _col2 (type: bigint), _col5 (type: 
bigint)
+                            outputColumnNames: _col0, _col5
+                            Statistics: Num rows: 15 Data size: 240 Basic 
stats: COMPLETE Column stats: COMPLETE
+                            Map Join Operator
+                              condition map:
+                                   Left Outer Join 0 to 1
+                              Estimated key counts: Map 5 => 1
+                              keys:
+                                0 _col0 (type: bigint)
+                                1 _col1 (type: bigint)
+                              outputColumnNames: _col5, _col7
+                              input vertices:
+                                1 Map 5
+                              Position of Big Table: 0
+                              Statistics: Num rows: 15 Data size: 240 Basic 
stats: COMPLETE Column stats: COMPLETE
+                              Select Operator
+                                expressions: _col5 (type: bigint), _col7 
(type: bigint)
+                                outputColumnNames: _col0, _col1
+                                Statistics: Num rows: 15 Data size: 240 Basic 
stats: COMPLETE Column stats: COMPLETE
+                                Reduce Output Operator
+                                  key expressions: _col0 (type: bigint), _col1 
(type: bigint)
+                                  null sort order: aa
+                                  sort order: ++
+                                  Statistics: Num rows: 15 Data size: 240 
Basic stats: COMPLETE Column stats: COMPLETE
+                                  tag: -1
+                                  TopN: 5
+                                  TopN Hash Memory Usage: 0.1
+                                  auto parallelism: false
             Execution mode: vectorized, llap
             LLAP IO: all inputs
             Path -> Alias:
@@ -1066,7 +1104,7 @@ STAGE PLANS:
                   name: 
default.l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104_1
             Truncated Path -> Alias:
               /l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104_1 [dw]
-        Map 4 
+        Map 3 
             Map Operator Tree:
                 TableScan
                   alias: snap
@@ -1138,7 +1176,7 @@ STAGE PLANS:
                   name: default.l3_clarity__l3_snap_number_2018022300104
             Truncated Path -> Alias:
               /l3_clarity__l3_snap_number_2018022300104 [snap]
-        Map 5 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: s1
@@ -1220,7 +1258,7 @@ STAGE PLANS:
                   name: default.l3_monthly_dw_dimplan
             Truncated Path -> Alias:
               /l3_monthly_dw_dimplan [s1]
-        Map 6 
+        Map 5 
             Map Operator Tree:
                 TableScan
                   alias: s2
@@ -1300,60 +1338,6 @@ STAGE PLANS:
             Truncated Path -> Alias:
               
/l3_clarity__l3_monthly_dw_factplan_datajoin_1_s2_2018022300104_1 [s2]
         Reducer 2 
-            Execution mode: llap
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 
-                  1 
-                outputColumnNames: _col0, _col2
-                Position of Big Table: 0
-                Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Map Join Operator
-                  condition map:
-                       Left Outer Join 0 to 1
-                  Estimated key counts: Map 5 => 90170
-                  keys:
-                    0 _col2 (type: bigint), _col0 (type: bigint)
-                    1 _col1 (type: bigint), _col3 (type: bigint)
-                  outputColumnNames: _col2, _col5
-                  input vertices:
-                    1 Map 5
-                  Position of Big Table: 0
-                  Statistics: Num rows: 15 Data size: 240 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    expressions: _col2 (type: bigint), _col5 (type: bigint)
-                    outputColumnNames: _col0, _col5
-                    Statistics: Num rows: 15 Data size: 240 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Map Join Operator
-                      condition map:
-                           Left Outer Join 0 to 1
-                      Estimated key counts: Map 6 => 1
-                      keys:
-                        0 _col0 (type: bigint)
-                        1 _col1 (type: bigint)
-                      outputColumnNames: _col5, _col7
-                      input vertices:
-                        1 Map 6
-                      Position of Big Table: 0
-                      Statistics: Num rows: 15 Data size: 240 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Select Operator
-                        expressions: _col5 (type: bigint), _col7 (type: bigint)
-                        outputColumnNames: _col0, _col1
-                        Statistics: Num rows: 15 Data size: 240 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: bigint), _col1 (type: 
bigint)
-                          null sort order: aa
-                          sort order: ++
-                          Statistics: Num rows: 15 Data size: 240 Basic stats: 
COMPLETE Column stats: COMPLETE
-                          tag: -1
-                          TopN: 5
-                          TopN Hash Memory Usage: 0.1
-                          auto parallelism: false
-        Reducer 3 
             Execution mode: vectorized, llap
             Needs Tagging: false
             Reduce Operator Tree:
@@ -1397,7 +1381,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, 
S2.PROJECT_KEY AS PROJECT_KEY
 FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join
 l3_clarity__L3_MONTHLY_DW_FACTPLAN_DW_STG_2018022300104_1 DW on 1=1

http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out 
b/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out
index d5ea64f..4e1698d 100644
--- a/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out
@@ -642,7 +642,7 @@ b   str
 two    line1
 four   line2
 six    line3
-Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_1, $hdt$_2, $hdt$_3, 
$hdt$_0]] in Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product
 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
 INSERT INTO TABLE orc_create_complex_n0
 SELECT orc_create_staging_n0.*, src1.key FROM orc_create_staging_n0 cross join 
src src1 cross join orc_create_staging_n0 spam1 cross join 
orc_create_staging_n0 spam2
@@ -667,7 +667,7 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE), Map 4 
(XPROD_EDGE), Map 5 (XPROD_EDGE)
+        Map 4 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 3 
(BROADCAST_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -712,7 +712,7 @@ STAGE PLANS:
                     dataColumns: str:string, mp:map<string,string>, 
lst:array<string>, strct:struct<a:string,b:string>
                     partitionColumnCount: 0
                     scratchColumnTypeNames: []
-        Map 3 
+        Map 2 
             Map Operator Tree:
                 TableScan
                   alias: spam2
@@ -751,7 +751,7 @@ STAGE PLANS:
                     dataColumns: str:string, mp:map<string,string>, 
lst:array<string>, strct:struct<a:string,b:string>
                     partitionColumnCount: 0
                     scratchColumnTypeNames: []
-        Map 4 
+        Map 3 
             Map Operator Tree:
                 TableScan
                   alias: spam1
@@ -790,7 +790,7 @@ STAGE PLANS:
                     dataColumns: str:string, mp:map<string,string>, 
lst:array<string>, strct:struct<a:string,b:string>
                     partitionColumnCount: 0
                     scratchColumnTypeNames: []
-        Map 5 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: src1
@@ -806,16 +806,47 @@ STAGE PLANS:
                         native: true
                         projectedOutputColumnNums: [0]
                     Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
-                      Reduce Sink Vectorization:
-                          className: VectorReduceSinkEmptyKeyOperator
-                          keyColumnNums: []
-                          native: true
-                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                          valueColumnNums: [0]
-                      Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      value expressions: _col0 (type: string)
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                           Inner Join 0 to 2
+                           Inner Join 0 to 3
+                      keys:
+                        0 
+                        1 
+                        2 
+                        3 
+                      Map Join Vectorization:
+                          bigTableValueExpressions: col 0:string
+                          className: VectorMapJoinOperator
+                          native: false
+                          nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS 
true, Optimized Table and Supports Key Types IS true
+                          nativeConditionsNotMet: One MapJoin Condition IS 
false
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col6
+                      input vertices:
+                        0 Map 1
+                        1 Map 2
+                        2 Map 3
+                      Statistics: Num rows: 500 Data size: 1768000 Basic 
stats: COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: _col0 (type: string), _col1 (type: 
map<string,string>), _col2 (type: array<string>), _col3 (type: 
struct<a:string,b:string>), _col6 (type: string)
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                        Select Vectorization:
+                            className: VectorSelectOperator
+                            native: true
+                            projectedOutputColumnNums: [0, 1, 2, 3, 4]
+                        Statistics: Num rows: 500 Data size: 1768000 Basic 
stats: COMPLETE Column stats: NONE
+                        File Output Operator
+                          compressed: false
+                          File Sink Vectorization:
+                              className: VectorFileSinkOperator
+                              native: false
+                          Statistics: Num rows: 500 Data size: 1768000 Basic 
stats: COMPLETE Column stats: NONE
+                          table:
+                              input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                              output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                              name: default.orc_create_complex_n0
             Execution mode: vectorized, llap
             Map Vectorization:
                 enabled: true
@@ -823,7 +854,7 @@ STAGE PLANS:
                 inputFormatFeatureSupport: [DECIMAL_64]
                 featureSupportInUse: [DECIMAL_64]
                 inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
-                allNative: true
+                allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
                 rowBatchContext:
@@ -831,34 +862,7 @@ STAGE PLANS:
                     includeColumns: [0]
                     dataColumns: key:string, value:string
                     partitionColumnCount: 0
-                    scratchColumnTypeNames: []
-        Reducer 2 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                     Inner Join 0 to 2
-                     Inner Join 0 to 3
-                keys:
-                  0 
-                  1 
-                  2 
-                  3 
-                outputColumnNames: _col0, _col1, _col2, _col3, _col6
-                Statistics: Num rows: 500 Data size: 1768000 Basic stats: 
COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: string), _col1 (type: 
map<string,string>), _col2 (type: array<string>), _col3 (type: 
struct<a:string,b:string>), _col6 (type: string)
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                  Statistics: Num rows: 500 Data size: 1768000 Basic stats: 
COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 500 Data size: 1768000 Basic stats: 
COMPLETE Column stats: NONE
-                    table:
-                        input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
-                        serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                        name: default.orc_create_complex_n0
+                    scratchColumnTypeNames: [string, map<string,string>, 
array<string>, struct<a:string,b:string>]
 
   Stage: Stage-2
     Dependency Collection
@@ -877,7 +881,7 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 
-Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_1, $hdt$_2, $hdt$_3, 
$hdt$_0]] in Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product
 PREHOOK: query: INSERT INTO TABLE orc_create_complex_n0
 SELECT orc_create_staging_n0.*, src1.key FROM orc_create_staging_n0 cross join 
src src1 cross join orc_create_staging_n0 spam1 cross join 
orc_create_staging_n0 spam2
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out 
b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
index 10abe77..6443678 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
@@ -1,4 +1,4 @@
-Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: explain vectorization expression
 select *
 from src
@@ -26,10 +26,10 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE), Reducer 6 
(BROADCAST_EDGE)
-        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-        Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
-        Reducer 6 <- Map 4 (SIMPLE_EDGE)
+        Map 1 <- Reducer 4 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
+        Reducer 5 <- Map 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -47,14 +47,58 @@ STAGE PLANS:
                         native: true
                         projectedOutputColumnNums: [0, 1]
                     Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
-                      Reduce Sink Vectorization:
-                          className: VectorReduceSinkEmptyKeyOperator
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 
+                        1 
+                      Map Join Vectorization:
+                          className: VectorMapJoinInnerMultiKeyOperator
                           native: true
-                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                      Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      value expressions: _col0 (type: string), _col1 (type: 
string)
+                          nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS 
true
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      input vertices:
+                        1 Reducer 4
+                      Statistics: Num rows: 500 Data size: 97000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Map Join Operator
+                        condition map:
+                             Left Outer Join 0 to 1
+                        keys:
+                          0 _col0 (type: string)
+                          1 _col0 (type: string)
+                        Map Join Vectorization:
+                            className: VectorMapJoinOuterStringOperator
+                            native: true
+                            nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized 
Table and Supports Key Types IS true
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col5
+                        input vertices:
+                          1 Reducer 5
+                        Statistics: Num rows: 500 Data size: 98584 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Filter Operator
+                          Filter Vectorization:
+                              className: VectorFilterOperator
+                              native: true
+                              predicateExpression: FilterExprOrExpr(children: 
FilterLongColEqualLongScalar(col 3:bigint, val 0), FilterExprAndExpr(children: 
SelectColumnIsNull(col 5:boolean), SelectColumnIsNotNull(col 0:string), 
FilterLongColGreaterEqualLongColumn(col 4:bigint, col 3:bigint)))
+                          predicate: ((_col2 = 0L) or (_col5 is null and _col0 
is not null and (_col3 >= _col2))) (type: boolean)
+                          Statistics: Num rows: 500 Data size: 98584 Basic 
stats: COMPLETE Column stats: COMPLETE
+                          Select Operator
+                            expressions: _col0 (type: string), _col1 (type: 
string)
+                            outputColumnNames: _col0, _col1
+                            Select Vectorization:
+                                className: VectorSelectOperator
+                                native: true
+                                projectedOutputColumnNums: [0, 1]
+                            Statistics: Num rows: 500 Data size: 89000 Basic 
stats: COMPLETE Column stats: COMPLETE
+                            Reduce Output Operator
+                              key expressions: _col0 (type: string)
+                              sort order: +
+                              Reduce Sink Vectorization:
+                                  className: VectorReduceSinkObjectHashOperator
+                                  native: true
+                                  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              Statistics: Num rows: 500 Data size: 89000 Basic 
stats: COMPLETE Column stats: COMPLETE
+                              value expressions: _col1 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
             Map Vectorization:
@@ -66,7 +110,7 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
-        Map 4 
+        Map 3 
             Map Operator Tree:
                 TableScan
                   alias: src
@@ -134,39 +178,6 @@ STAGE PLANS:
                 usesVectorUDFAdaptor: false
                 vectorized: true
         Reducer 2 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 
-                  1 
-                outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 500 Data size: 97000 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Map Join Operator
-                  condition map:
-                       Left Outer Join 0 to 1
-                  keys:
-                    0 _col0 (type: string)
-                    1 _col0 (type: string)
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col5
-                  input vertices:
-                    1 Reducer 6
-                  Statistics: Num rows: 500 Data size: 98584 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: ((_col2 = 0L) or (_col5 is null and _col0 is 
not null and (_col3 >= _col2))) (type: boolean)
-                    Statistics: Num rows: 500 Data size: 98584 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: string), _col1 (type: string)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Statistics: Num rows: 500 Data size: 89000 Basic 
stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: string)
-        Reducer 3 
             Execution mode: vectorized, llap
             Reduce Vectorization:
                 enabled: true
@@ -193,7 +204,7 @@ STAGE PLANS:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 5 
+        Reducer 4 
             Execution mode: vectorized, llap
             Reduce Vectorization:
                 enabled: true
@@ -222,7 +233,7 @@ STAGE PLANS:
                       nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                   Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE 
Column stats: COMPLETE
                   value expressions: _col0 (type: bigint), _col1 (type: bigint)
-        Reducer 6 
+        Reducer 5 
             Execution mode: vectorized, llap
             Reduce Vectorization:
                 enabled: true
@@ -269,7 +280,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: select *
 from src
 where not key in
@@ -298,7 +309,7 @@ POSTHOOK: Output: database:default
 POSTHOOK: Output: default@orcsrc
 POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
 POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
-Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: select *
 from orcsrc
 where not key in
@@ -315,7 +326,7 @@ order by key
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@orcsrc
 #### A masked pattern was here ####
-Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: select *
 from orcsrc
 where not key in

[2/2] hive git commit: HIVE-18201 : Disable XPROD_EDGE for sq_count_check() created for scalar subqueries (Ashutosh Chauhan via Jesus Camacho Rodriguez)

Reply via email to