Author: daijy
Date: Wed May 27 19:47:11 2015
New Revision: 1682106
URL: http://svn.apache.org/r1682106
Log:
PIG-4377: Skewed outer join produce wrong result if a key is oversampled
(PIG-4377-4.patch)
Modified:
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld
Modified:
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
URL:
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java?rev=1682106&r1=1682105&r2=1682106&view=diff
==============================================================================
---
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
(original)
+++
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
Wed May 27 19:47:11 2015
@@ -1680,7 +1680,9 @@ public class TezCompiler extends PhyPlan
TezCompilerUtil.connect(tezPlan, prevOp, sampleJobPair.first);
POValueOutputTez sampleOut = (POValueOutputTez)
sampleJobPair.first.plan.getLeaves().get(0);
- for (int i = 0; i < 2; i++) {
+ for (int i = 0; i <= 2; i++) {
+ // We need to send sample to left relation partitioner vertex,
right relation load vertex,
+ // and join vertex (IsFirstReduceOfKey in join vertex need
sample file as well)
joinJobs[i].setSampleOperator(sampleJobPair.first);
// Configure broadcast edges for distribution map
@@ -1689,8 +1691,10 @@ public class TezCompiler extends PhyPlan
sampleOut.addOutputKey(joinJobs[i].getOperatorKey().toString());
// Configure skewed partitioner for join
- edge = joinJobs[2].inEdges.get(joinJobs[i].getOperatorKey());
- edge.partitionerClass = SkewedPartitionerTez.class;
+ if (i != 2) {
+ edge =
joinJobs[2].inEdges.get(joinJobs[i].getOperatorKey());
+ edge.partitionerClass = SkewedPartitionerTez.class;
+ }
}
joinJobs[2].markSkewedJoin();
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld?rev=1682106&r1=1682105&r2=1682106&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld
(original)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld
Wed May 27 19:47:11 2015
@@ -5,7 +5,7 @@
# TEZ DAG plan: pig-0_scope-0
#--------------------------------------------------
Tez vertex scope-25 -> Tez vertex scope-29,Tez vertex scope-38,Tez
vertex scope-48,
-Tez vertex scope-38 -> Tez vertex scope-29,Tez vertex scope-48,
+Tez vertex scope-38 -> Tez vertex scope-29,Tez vertex scope-48,Tez
vertex scope-52,
Tez vertex scope-48 -> Tez vertex scope-52,
Tez vertex scope-29 -> Tez vertex scope-52,
Tez vertex scope-52
@@ -55,7 +55,7 @@ a: Split - scope-58
|---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0
Tez vertex scope-38
# Plan on vertex
-POValueOutputTez - scope-47 -> [scope-29, scope-48]
+POValueOutputTez - scope-47 -> [scope-29, scope-48, scope-52]
|
|---New For Each(false)[tuple] - scope-46
| |
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld?rev=1682106&r1=1682105&r2=1682106&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld
(original)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld
Wed May 27 19:47:11 2015
@@ -5,7 +5,7 @@
# TEZ DAG plan: pig-0_scope-0
#--------------------------------------------------
Tez vertex scope-27 -> Tez vertex scope-36,Tez vertex scope-46,
-Tez vertex scope-36 -> Tez vertex scope-28,Tez vertex scope-46,
+Tez vertex scope-36 -> Tez vertex scope-28,Tez vertex scope-46,Tez
vertex scope-50,
Tez vertex scope-46 -> Tez vertex scope-50,
Tez vertex scope-28 -> Tez vertex scope-50,
Tez vertex scope-50
@@ -43,7 +43,7 @@ Local Rearrange[tuple]{tuple}(false) - s
|---a:
Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0
Tez vertex scope-36
# Plan on vertex
-POValueOutputTez - scope-45 -> [scope-28, scope-46]
+POValueOutputTez - scope-45 -> [scope-28, scope-46, scope-50]
|
|---New For Each(false)[tuple] - scope-44
| |
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld?rev=1682106&r1=1682105&r2=1682106&view=diff
==============================================================================
---
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld
(original)
+++
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld
Wed May 27 19:47:11 2015
@@ -7,7 +7,7 @@
Tez vertex scope-30 -> Tez vertex scope-34,Tez vertex scope-36,
Tez vertex scope-34 -> Tez vertex scope-36,
Tez vertex scope-36 -> Tez vertex scope-48,Tez vertex scope-58,
-Tez vertex scope-48 -> Tez vertex scope-40,Tez vertex scope-58,
+Tez vertex scope-48 -> Tez vertex scope-40,Tez vertex scope-58,Tez
vertex scope-62,
Tez vertex scope-58 -> Tez vertex scope-62,
Tez vertex scope-40 -> Tez vertex scope-62,
Tez vertex scope-62
@@ -67,7 +67,7 @@ Local Rearrange[tuple]{tuple}(false) - s
|---POShuffledValueInputTez - scope-37 <- [scope-30,
scope-34]
Tez vertex scope-48
# Plan on vertex
-POValueOutputTez - scope-57 -> [scope-40, scope-58]
+POValueOutputTez - scope-57 -> [scope-40, scope-58, scope-62]
|
|---New For Each(false)[tuple] - scope-56
| |
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld?rev=1682106&r1=1682105&r2=1682106&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld
(original)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld
Wed May 27 19:47:11 2015
@@ -5,7 +5,7 @@
# TEZ DAG plan: pig-0_scope-0
#--------------------------------------------------
Tez vertex scope-30 -> Tez vertex scope-48,Tez vertex scope-58,
-Tez vertex scope-48 -> Tez vertex scope-40,Tez vertex scope-58,
+Tez vertex scope-48 -> Tez vertex scope-40,Tez vertex scope-58,Tez
vertex scope-62,
Tez vertex scope-58 -> Tez vertex scope-62,
Tez vertex scope-40 -> Tez vertex scope-62,
Tez vertex scope-62
@@ -71,7 +71,7 @@ a: Split - scope-68
|---a: Load(file:///tmp/input:org.apache.pig.builtin.PigStorage) - scope-0
Tez vertex scope-48
# Plan on vertex
-POValueOutputTez - scope-57 -> [scope-40, scope-58]
+POValueOutputTez - scope-57 -> [scope-40, scope-58, scope-62]
|
|---New For Each(false)[tuple] - scope-56
| |
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld?rev=1682106&r1=1682105&r2=1682106&view=diff
==============================================================================
---
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld
(original)
+++
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld
Wed May 27 19:47:11 2015
@@ -5,7 +5,7 @@
# TEZ DAG plan: pig-0_scope-0
#--------------------------------------------------
Tez vertex scope-30 -> Tez vertex scope-48,Tez vertex scope-58,
-Tez vertex scope-48 -> Tez vertex scope-37,Tez vertex scope-58,
+Tez vertex scope-48 -> Tez vertex scope-37,Tez vertex scope-58,Tez
vertex scope-62,
Tez vertex scope-58 -> Tez vertex scope-62,
Tez vertex scope-31 -> Tez vertex scope-35,Tez vertex scope-37,
Tez vertex scope-35 -> Tez vertex scope-37,
@@ -45,7 +45,7 @@ Local Rearrange[tuple]{tuple}(false) - s
|---d:
Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0
Tez vertex scope-48
# Plan on vertex
-POValueOutputTez - scope-57 -> [scope-37, scope-58]
+POValueOutputTez - scope-57 -> [scope-37, scope-58, scope-62]
|
|---New For Each(false)[tuple] - scope-56
| |
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld?rev=1682106&r1=1682105&r2=1682106&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld
(original)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld
Wed May 27 19:47:11 2015
@@ -5,7 +5,7 @@
# TEZ DAG plan: pig-0_scope-0
#--------------------------------------------------
Tez vertex scope-30 -> Tez vertex scope-48,Tez vertex scope-58,
-Tez vertex scope-48 -> Tez vertex scope-31,Tez vertex scope-58,
+Tez vertex scope-48 -> Tez vertex scope-31,Tez vertex scope-58,Tez
vertex scope-62,
Tez vertex scope-58 -> Tez vertex scope-62,
Tez vertex scope-31 -> Tez vertex scope-62,
Tez vertex scope-62
@@ -43,7 +43,7 @@ Local Rearrange[tuple]{tuple}(false) - s
|---d:
Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0
Tez vertex scope-48
# Plan on vertex
-POValueOutputTez - scope-57 -> [scope-31, scope-58]
+POValueOutputTez - scope-57 -> [scope-31, scope-58, scope-62]
|
|---New For Each(false)[tuple] - scope-56
| |
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld?rev=1682106&r1=1682105&r2=1682106&view=diff
==============================================================================
---
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld
(original)
+++
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld
Wed May 27 19:47:11 2015
@@ -7,7 +7,7 @@
Tez vertex scope-114 -> Tez vertex scope-116,
Tez vertex scope-115 -> Tez vertex scope-116,
Tez vertex scope-116 -> Tez vertex scope-128,Tez vertex scope-138,
-Tez vertex scope-128 -> Tez vertex scope-120,Tez vertex scope-138,
+Tez vertex scope-128 -> Tez vertex scope-120,Tez vertex scope-138,Tez
vertex scope-142,
Tez vertex scope-138 -> Tez vertex scope-142,
Tez vertex scope-120 -> Tez vertex scope-142,
Tez vertex scope-142
@@ -65,7 +65,7 @@ Local Rearrange[tuple]{tuple}(false) - s
|---POShuffledValueInputTez - scope-117 <- [scope-114,
scope-115]
Tez vertex scope-128
# Plan on vertex
-POValueOutputTez - scope-137 -> [scope-120, scope-138]
+POValueOutputTez - scope-137 -> [scope-120, scope-138, scope-142]
|
|---New For Each(false)[tuple] - scope-136
| |
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld?rev=1682106&r1=1682105&r2=1682106&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld
(original)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld
Wed May 27 19:47:11 2015
@@ -7,7 +7,7 @@
Tez vertex scope-29 -> Tez vertex group scope-63,Tez vertex group
scope-64,
Tez vertex scope-30 -> Tez vertex group scope-63,Tez vertex group
scope-64,
Tez vertex group scope-64 -> Tez vertex scope-43,
-Tez vertex scope-43 -> Tez vertex scope-35,Tez vertex scope-53,
+Tez vertex scope-43 -> Tez vertex scope-35,Tez vertex scope-53,Tez
vertex scope-57,
Tez vertex group scope-63 -> Tez vertex scope-53,
Tez vertex scope-53 -> Tez vertex scope-57,
Tez vertex scope-35 -> Tez vertex scope-57,
@@ -79,7 +79,7 @@ Tez vertex group scope-64 <- [scope-29,
# No plan on vertex group
Tez vertex scope-43
# Plan on vertex
-POValueOutputTez - scope-52 -> [scope-35, scope-53]
+POValueOutputTez - scope-52 -> [scope-35, scope-53, scope-57]
|
|---New For Each(false)[tuple] - scope-51
| |