Author: prasanthj
Date: Tue Dec 2 18:44:13 2014
New Revision: 1642987
URL: http://svn.apache.org/r1642987
Log:
HIVE-8888: Mapjoin with LateralViewJoin generates wrong plan in Tez (Gunther
Hagleitner via Prasanth J)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java
hive/trunk/ql/src/test/results/clientpositive/tez/lvj_mapjoin.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java?rev=1642987&r1=1642986&r2=1642987&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java Tue
Dec 2 18:44:13 2014
@@ -111,9 +111,16 @@ public class GenTezWork implements NodeP
// will result into a vertex with multiple FS or RS operators.
if (context.childToWorkMap.containsKey(operator)) {
// if we've seen both root and child, we can bail.
+
+ // clear out the mapjoin set. we don't need it anymore.
+ context.currentMapJoinOperators.clear();
+
+ // clear out the union set. we don't need it anymore.
+ context.currentUnionOperators.clear();
+
return null;
} else {
- // At this point we don't have to do anything special in this case.
Just
+ // At this point we don't have to do anything special. Just
// run through the regular paces w/o creating a new task.
work = context.rootToWorkMap.get(root);
}
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/lvj_mapjoin.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/lvj_mapjoin.q.out?rev=1642987&r1=1642986&r2=1642987&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/lvj_mapjoin.q.out
(original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/lvj_mapjoin.q.out Tue Dec
2 18:44:13 2014
@@ -110,59 +110,12 @@ STAGE PLANS:
Stage: Stage-1
Tez
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
+ Map 1 <- Map 2 (BROADCAST_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: expod2
- Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: bid is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 26 Basic stats:
COMPLETE Column stats: NONE
- Lateral View Forward
- Statistics: Num rows: 2 Data size: 26 Basic stats:
COMPLETE Column stats: NONE
- Select Operator
- expressions: bid (type: int)
- outputColumnNames: bid
- Statistics: Num rows: 2 Data size: 26 Basic stats:
COMPLETE Column stats: NONE
- Lateral View Join Operator
- outputColumnNames: _col0, _col5
- Statistics: Num rows: 4 Data size: 52 Basic stats:
COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col5 (type:
string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 4 Data size: 52 Basic stats:
COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4 Data size: 52 Basic
stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- Select Operator
- expressions: bv (type: array<string>)
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 26 Basic stats:
COMPLETE Column stats: NONE
- UDTF Operator
- Statistics: Num rows: 2 Data size: 26 Basic stats:
COMPLETE Column stats: NONE
- function name: explode
- Lateral View Join Operator
- outputColumnNames: _col0, _col5
- Statistics: Num rows: 4 Data size: 52 Basic stats:
COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col5 (type:
string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 4 Data size: 52 Basic
stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4 Data size: 52 Basic
stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- Map 2
- Map Operator Tree:
- TableScan
alias: expod1
Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE
Column stats: NONE
Filter Operator
@@ -192,7 +145,7 @@ STAGE PLANS:
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
- 1 Map 1
+ 1 Map 2
Statistics: Num rows: 4 Data size: 57 Basic
stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (_col0 = _col2) (type: boolean)
@@ -233,7 +186,7 @@ STAGE PLANS:
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
- 1 Map 1
+ 1 Map 2
Statistics: Num rows: 4 Data size: 57 Basic
stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (_col0 = _col2) (type: boolean)
@@ -249,6 +202,53 @@ STAGE PLANS:
input format:
org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: expod2
+ Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE
Column stats: NONE
+ Filter Operator
+ predicate: bid is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 26 Basic stats:
COMPLETE Column stats: NONE
+ Lateral View Forward
+ Statistics: Num rows: 2 Data size: 26 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: bid (type: int)
+ outputColumnNames: bid
+ Statistics: Num rows: 2 Data size: 26 Basic stats:
COMPLETE Column stats: NONE
+ Lateral View Join Operator
+ outputColumnNames: _col0, _col5
+ Statistics: Num rows: 4 Data size: 52 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col5 (type:
string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 4 Data size: 52 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 4 Data size: 52 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Select Operator
+ expressions: bv (type: array<string>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 26 Basic stats:
COMPLETE Column stats: NONE
+ UDTF Operator
+ Statistics: Num rows: 2 Data size: 26 Basic stats:
COMPLETE Column stats: NONE
+ function name: explode
+ Lateral View Join Operator
+ outputColumnNames: _col0, _col5
+ Statistics: Num rows: 4 Data size: 52 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col5 (type:
string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 4 Data size: 52 Basic
stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 4 Data size: 52 Basic
stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Stage: Stage-0
Fetch Operator