Repository: hive
Updated Branches:
  refs/heads/master c30dcbb4b -> 116d2393f


HIVE-20514: Query with outer join filter is failing with dynamic partition 
join(Vineet Garg, reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/116d2393
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/116d2393
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/116d2393

Branch: refs/heads/master
Commit: 116d2393f4dd06eaac7ef905bfdae10ae4e7b2ea
Parents: c30dcbb
Author: Vineet Garg <[email protected]>
Authored: Mon Sep 10 11:18:45 2018 -0700
Committer: Vineet Garg <[email protected]>
Committed: Mon Sep 10 11:18:45 2018 -0700

----------------------------------------------------------------------
 .../hive/ql/optimizer/MapJoinProcessor.java     |  19 +--
 .../clientpositive/tez_dynpart_hashjoin_1.q     |  16 +++
 .../llap/tez_dynpart_hashjoin_1.q.out           | 140 +++++++++++++++++++
 .../llap/tez_dynpart_hashjoin_3.q.out           |   4 +-
 4 files changed, 169 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/116d2393/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
index bae80f3..019372b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
@@ -1158,15 +1158,18 @@ public class MapJoinProcessor extends Transform {
     }
 
     Map<Byte, List<ExprNodeDesc>> filters = desc.getFilters();
-    Map<Byte, List<ExprNodeDesc>> newFilters = new HashMap<Byte, 
List<ExprNodeDesc>>();
-    for (Map.Entry<Byte, List<ExprNodeDesc>> entry : filters.entrySet()) {
-      byte srcTag = entry.getKey();
-      List<ExprNodeDesc> filter = entry.getValue();
-
-      Operator<?> terminal = oldReduceSinkParentOps.get(srcTag);
-      newFilters.put(srcTag, ExprNodeDescUtils.backtrack(filter, op, 
terminal));
+    if(adjustParentsChildren) {
+      // backtrack and update filter expressions only if RS is to be removed
+      Map<Byte, List<ExprNodeDesc>> newFilters = new HashMap<Byte, 
List<ExprNodeDesc>>();
+      for (Map.Entry<Byte, List<ExprNodeDesc>> entry : filters.entrySet()) {
+        byte srcTag = entry.getKey();
+        List<ExprNodeDesc> filter = entry.getValue();
+
+        Operator<?> terminal = oldReduceSinkParentOps.get(srcTag);
+        newFilters.put(srcTag, ExprNodeDescUtils.backtrack(filter, op, 
terminal));
+      }
+      desc.setFilters(filters = newFilters);
     }
-    desc.setFilters(filters = newFilters);
 
     // create dumpfile prefix needed to create descriptor
     String dumpFilePrefix = "";

http://git-wip-us.apache.org/repos/asf/hive/blob/116d2393/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q 
b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q
index ea3dfce..47c0038 100644
--- a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q
+++ b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q
@@ -60,6 +60,22 @@ set hive.auto.convert.join.noconditionaltask.size=20000;
 set hive.exec.reducers.bytes.per.reducer=20000;
 set hive.stats.fetch.column.stats=false;
 -- Try with dynamically partitioned hashjoin
+
+-- hashjoin with filter
+explain select
+  *
+from alltypesorc a left outer join alltypesorc b on a.cint = b.cint and 
a.csmallint != a.cint
+where
+  a.cint between 1000000 and 3000000
+order by a.cint;
+
+select
+  *
+from alltypesorc a left outer join alltypesorc b on a.cint = b.cint and 
a.csmallint != a.cint
+where
+  a.cint between 1000000 and 3000000
+order by a.cint;
+
 explain
 select
   *

http://git-wip-us.apache.org/repos/asf/hive/blob/116d2393/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out 
b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out
index cfa87a7..d204b47 100644
--- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out
@@ -415,6 +415,146 @@ NULL      6
 -8915  1
 -3799  1
 10782  1
+PREHOOK: query: explain select
+  *
+from alltypesorc a left outer join alltypesorc b on a.cint = b.cint and 
a.csmallint != a.cint
+where
+  a.cint between 1000000 and 3000000
+order by a.cint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select
+  *
+from alltypesorc a left outer join alltypesorc b on a.cint = b.cint and 
a.csmallint != a.cint
+where
+  a.cint between 1000000 and 3000000
+order by a.cint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  filterExpr: cint BETWEEN 1000000 AND 3000000 (type: boolean)
+                  Statistics: Num rows: ###Masked### Data size: ###Masked### 
Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean)
+                    Statistics: Num rows: ###Masked### Data size: ###Masked### 
Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ctinyint (type: tinyint), csmallint (type: 
smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), 
cdouble (type: double), cstring1 (type: string), cstring2 (type: string), 
ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: 
boolean), cboolean2 (type: boolean)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11
+                      Statistics: Num rows: ###Masked### Data size: 
###Masked### Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col2 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col2 (type: int)
+                        Statistics: Num rows: ###Masked### Data size: 
###Masked### Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: tinyint), _col1 (type: 
smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), 
_col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 
(type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  filterExpr: cint BETWEEN 1000000 AND 3000000 (type: boolean)
+                  Statistics: Num rows: ###Masked### Data size: ###Masked### 
Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean)
+                    Statistics: Num rows: ###Masked### Data size: ###Masked### 
Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ctinyint (type: tinyint), csmallint (type: 
smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), 
cdouble (type: double), cstring1 (type: string), cstring2 (type: string), 
ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: 
boolean), cboolean2 (type: boolean)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11
+                      Statistics: Num rows: ###Masked### Data size: 
###Masked### Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col2 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col2 (type: int)
+                        Statistics: Num rows: ###Masked### Data size: 
###Masked### Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: tinyint), _col1 (type: 
smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), 
_col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 
(type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Map Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                filter predicates:
+                  0 {(UDFToInteger(VALUE._col1) <> KEY.reducesinkkey0)}
+                  1 
+                keys:
+                  0 KEY.reducesinkkey0 (type: int)
+                  1 KEY.reducesinkkey0 (type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
+                input vertices:
+                  1 Map 4
+                Statistics: Num rows: ###Masked### Data size: ###Masked### 
Basic stats: COMPLETE Column stats: NONE
+                HybridGraceHashJoin: true
+                Reduce Output Operator
+                  key expressions: _col2 (type: int)
+                  sort order: +
+                  Statistics: Num rows: ###Masked### Data size: ###Masked### 
Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: tinyint), _col1 (type: 
smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), 
_col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 
(type: timestamp), _col10 (type: boolean), _col11 (type: boolean), _col12 
(type: tinyint), _col13 (type: smallint), _col14 (type: int), _col15 (type: 
bigint), _col16 (type: float), _col17 (type: double), _col18 (type: string), 
_col19 (type: string), _col20 (type: timestamp), _col21 (type: timestamp), 
_col22 (type: boolean), _col23 (type: boolean)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: 
smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), 
VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: 
string), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 
(type: timestamp), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), 
VALUE._col11 (type: tinyint), VALUE._col12 (type: smallint), VALUE._col13 
(type: int), VALUE._col14 (type: bigint), VALUE._col15 (type: float), 
VALUE._col16 (type: double), VALUE._col17 (type: string), VALUE._col18 (type: 
string), VALUE._col19 (type: timestamp), VALUE._col20 (type: timestamp), 
VALUE._col21 (type: boolean), VALUE._col22 (type: boolean)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
+                Statistics: Num rows: ###Masked### Data size: ###Masked### 
Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: ###Masked### Data size: ###Masked### 
Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select
+  *
+from alltypesorc a left outer join alltypesorc b on a.cint = b.cint and 
a.csmallint != a.cint
+where
+  a.cint between 1000000 and 3000000
+order by a.cint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select
+  *
+from alltypesorc a left outer join alltypesorc b on a.cint = b.cint and 
a.csmallint != a.cint
+where
+  a.cint between 1000000 and 3000000
+order by a.cint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+11     NULL    1000828 1531084669      11.0    NULL    wM316f6NqGIkoP388j3F6   
poWQQo3Upvt3Wh  1969-12-31 16:00:02.351 NULL    false   true    NULL    NULL    
NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
+NULL   -3799   1248059 1864027286      NULL    -3799.0 Uhps6mMh3IfHB3j7yH62K   
4KWs6gw7lv2WYd66P       NULL    1969-12-31 15:59:54.622 false   true    NULL    
-3799   1248059 1864027286      NULL    -3799.0 Uhps6mMh3IfHB3j7yH62K   
4KWs6gw7lv2WYd66P       NULL    1969-12-31 15:59:54.622 false   true
+NULL   10782   1286921 1864027286      NULL    10782.0 ODLrXI8882q8LS8 
4KWs6gw7lv2WYd66P       NULL    1969-12-31 15:59:52.138 true    true    NULL    
10782   1286921 1864027286      NULL    10782.0 ODLrXI8882q8LS8 
4KWs6gw7lv2WYd66P       NULL    1969-12-31 15:59:52.138 true    true
+NULL   -13036  1288927 -1645852809     NULL    -13036.0        yinBY725P7V2    
xH7445Rals48VOulSyR5F   NULL    1969-12-31 16:00:00.763 true    false   NULL    
-13036  1288927 -1645852809     NULL    -13036.0        yinBY725P7V2    
xH7445Rals48VOulSyR5F   NULL    1969-12-31 16:00:00.763 true    false
+11     NULL    1310786 -413875656      11.0    NULL    W0rvA4H1xn0xMG4uk0      
8yVVjG  1969-12-31 16:00:02.351 NULL    false   true    NULL    NULL    NULL    
NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
+-51    NULL    2089466 -240556350      -51.0   NULL    cXX24dH7tblSj46j2g      
C31eea0wrHHqvj  1969-12-31 16:00:08.451 NULL    true    true    NULL    NULL    
NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
+NULL   -8915   2101183 1864027286      NULL    -8915.0 x7By66525       
4KWs6gw7lv2WYd66P       NULL    1969-12-31 16:00:05.831 false   true    NULL    
-8915   2101183 1864027286      NULL    -8915.0 x7By66525       
4KWs6gw7lv2WYd66P       NULL    1969-12-31 16:00:05.831 false   true
+8      NULL    2229621 -381406148      8.0     NULL    q7onkS7QRPh5ghOK        
oKb0bi  1969-12-31 16:00:15.892 NULL    true    false   NULL    NULL    NULL    
NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
+8      NULL    2433892 -1611863517     8.0     NULL    674ILv3V2TxFqXP6wSbL    
VLprkK2XfX      1969-12-31 16:00:15.892 NULL    false   true    NULL    NULL    
NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
+-51    NULL    2949963 -1580871111     -51.0   NULL    0K68k3bdl7jO7   TPPAu   
1969-12-31 16:00:08.451 NULL    true    false   NULL    NULL    NULL    NULL    
NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
 PREHOOK: query: explain
 select
   *

http://git-wip-us.apache.org/repos/asf/hive/blob/116d2393/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out 
b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out
index 2a03d37..990e357 100644
--- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out
@@ -182,13 +182,13 @@ STAGE PLANS:
                     Statistics: Num rows: 1 Data size: 310 Basic stats: 
COMPLETE Column stats: COMPLETE
                     value expressions: _col0 (type: tinyint), _col1 (type: 
smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), 
_col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 
(type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
         Reducer 3 
-            Execution mode: llap
+            Execution mode: vectorized, llap
             Reduce Operator Tree:
               Map Join Operator
                 condition map:
                      Left Outer Join 0 to 1
                 filter predicates:
-                  0 {(_col2 < 100)}
+                  0 {(KEY.reducesinkkey0 < 100)}
                   1 
                 keys:
                   0 KEY.reducesinkkey0 (type: int)

Reply via email to