Author: daijy
Date: Wed May 27 19:47:11 2015
New Revision: 1682106

URL: http://svn.apache.org/r1682106
Log:
PIG-4377: Skewed outer join produce wrong result if a key is oversampled 
(PIG-4377-4.patch)

Modified:
    
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
    pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld
    pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld
    
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld
    pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld
    
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld
    pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld
    
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld
    pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld

Modified: 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java?rev=1682106&r1=1682105&r2=1682106&view=diff
==============================================================================
--- 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
 Wed May 27 19:47:11 2015
@@ -1680,7 +1680,9 @@ public class TezCompiler extends PhyPlan
             TezCompilerUtil.connect(tezPlan, prevOp, sampleJobPair.first);
 
             POValueOutputTez sampleOut = (POValueOutputTez) 
sampleJobPair.first.plan.getLeaves().get(0);
-            for (int i = 0; i < 2; i++) {
+            for (int i = 0; i <= 2; i++) {
+                // We need to send sample to left relation partitioner vertex, 
right relation load vertex,
+                // and join vertex (IsFirstReduceOfKey in join vertex need 
sample file as well)
                 joinJobs[i].setSampleOperator(sampleJobPair.first);
 
                 // Configure broadcast edges for distribution map
@@ -1689,8 +1691,10 @@ public class TezCompiler extends PhyPlan
                 
sampleOut.addOutputKey(joinJobs[i].getOperatorKey().toString());
 
                 // Configure skewed partitioner for join
-                edge = joinJobs[2].inEdges.get(joinJobs[i].getOperatorKey());
-                edge.partitionerClass = SkewedPartitionerTez.class;
+                if (i != 2) {
+                    edge = 
joinJobs[2].inEdges.get(joinJobs[i].getOperatorKey());
+                    edge.partitionerClass = SkewedPartitionerTez.class;
+                }
             }
 
             joinJobs[2].markSkewedJoin();

Modified: 
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld?rev=1682106&r1=1682105&r2=1682106&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld 
(original)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld 
Wed May 27 19:47:11 2015
@@ -5,7 +5,7 @@
 # TEZ DAG plan: pig-0_scope-0
 #--------------------------------------------------
 Tez vertex scope-25    ->      Tez vertex scope-29,Tez vertex scope-38,Tez 
vertex scope-48,
-Tez vertex scope-38    ->      Tez vertex scope-29,Tez vertex scope-48,
+Tez vertex scope-38    ->      Tez vertex scope-29,Tez vertex scope-48,Tez 
vertex scope-52,
 Tez vertex scope-48    ->      Tez vertex scope-52,
 Tez vertex scope-29    ->      Tez vertex scope-52,
 Tez vertex scope-52
@@ -55,7 +55,7 @@ a: Split - scope-58
     |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0
 Tez vertex scope-38
 # Plan on vertex
-POValueOutputTez - scope-47    ->       [scope-29, scope-48]
+POValueOutputTez - scope-47    ->       [scope-29, scope-48, scope-52]
 |
 |---New For Each(false)[tuple] - scope-46
     |   |

Modified: 
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld?rev=1682106&r1=1682105&r2=1682106&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld 
(original)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld 
Wed May 27 19:47:11 2015
@@ -5,7 +5,7 @@
 # TEZ DAG plan: pig-0_scope-0
 #--------------------------------------------------
 Tez vertex scope-27    ->      Tez vertex scope-36,Tez vertex scope-46,
-Tez vertex scope-36    ->      Tez vertex scope-28,Tez vertex scope-46,
+Tez vertex scope-36    ->      Tez vertex scope-28,Tez vertex scope-46,Tez 
vertex scope-50,
 Tez vertex scope-46    ->      Tez vertex scope-50,
 Tez vertex scope-28    ->      Tez vertex scope-50,
 Tez vertex scope-50
@@ -43,7 +43,7 @@ Local Rearrange[tuple]{tuple}(false) - s
                 |---a: 
Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0
 Tez vertex scope-36
 # Plan on vertex
-POValueOutputTez - scope-45    ->       [scope-28, scope-46]
+POValueOutputTez - scope-45    ->       [scope-28, scope-46, scope-50]
 |
 |---New For Each(false)[tuple] - scope-44
     |   |

Modified: 
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld?rev=1682106&r1=1682105&r2=1682106&view=diff
==============================================================================
--- 
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld
 (original)
+++ 
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld
 Wed May 27 19:47:11 2015
@@ -7,7 +7,7 @@
 Tez vertex scope-30    ->      Tez vertex scope-34,Tez vertex scope-36,
 Tez vertex scope-34    ->      Tez vertex scope-36,
 Tez vertex scope-36    ->      Tez vertex scope-48,Tez vertex scope-58,
-Tez vertex scope-48    ->      Tez vertex scope-40,Tez vertex scope-58,
+Tez vertex scope-48    ->      Tez vertex scope-40,Tez vertex scope-58,Tez 
vertex scope-62,
 Tez vertex scope-58    ->      Tez vertex scope-62,
 Tez vertex scope-40    ->      Tez vertex scope-62,
 Tez vertex scope-62
@@ -67,7 +67,7 @@ Local Rearrange[tuple]{tuple}(false) - s
             |---POShuffledValueInputTez - scope-37     <-       [scope-30, 
scope-34]
 Tez vertex scope-48
 # Plan on vertex
-POValueOutputTez - scope-57    ->       [scope-40, scope-58]
+POValueOutputTez - scope-57    ->       [scope-40, scope-58, scope-62]
 |
 |---New For Each(false)[tuple] - scope-56
     |   |

Modified: 
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld?rev=1682106&r1=1682105&r2=1682106&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld 
(original)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld 
Wed May 27 19:47:11 2015
@@ -5,7 +5,7 @@
 # TEZ DAG plan: pig-0_scope-0
 #--------------------------------------------------
 Tez vertex scope-30    ->      Tez vertex scope-48,Tez vertex scope-58,
-Tez vertex scope-48    ->      Tez vertex scope-40,Tez vertex scope-58,
+Tez vertex scope-48    ->      Tez vertex scope-40,Tez vertex scope-58,Tez 
vertex scope-62,
 Tez vertex scope-58    ->      Tez vertex scope-62,
 Tez vertex scope-40    ->      Tez vertex scope-62,
 Tez vertex scope-62
@@ -71,7 +71,7 @@ a: Split - scope-68
     |---a: Load(file:///tmp/input:org.apache.pig.builtin.PigStorage) - scope-0
 Tez vertex scope-48
 # Plan on vertex
-POValueOutputTez - scope-57    ->       [scope-40, scope-58]
+POValueOutputTez - scope-57    ->       [scope-40, scope-58, scope-62]
 |
 |---New For Each(false)[tuple] - scope-56
     |   |

Modified: 
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld?rev=1682106&r1=1682105&r2=1682106&view=diff
==============================================================================
--- 
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld
 (original)
+++ 
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld
 Wed May 27 19:47:11 2015
@@ -5,7 +5,7 @@
 # TEZ DAG plan: pig-0_scope-0
 #--------------------------------------------------
 Tez vertex scope-30    ->      Tez vertex scope-48,Tez vertex scope-58,
-Tez vertex scope-48    ->      Tez vertex scope-37,Tez vertex scope-58,
+Tez vertex scope-48    ->      Tez vertex scope-37,Tez vertex scope-58,Tez 
vertex scope-62,
 Tez vertex scope-58    ->      Tez vertex scope-62,
 Tez vertex scope-31    ->      Tez vertex scope-35,Tez vertex scope-37,
 Tez vertex scope-35    ->      Tez vertex scope-37,
@@ -45,7 +45,7 @@ Local Rearrange[tuple]{tuple}(false) - s
                 |---d: 
Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0
 Tez vertex scope-48
 # Plan on vertex
-POValueOutputTez - scope-57    ->       [scope-37, scope-58]
+POValueOutputTez - scope-57    ->       [scope-37, scope-58, scope-62]
 |
 |---New For Each(false)[tuple] - scope-56
     |   |

Modified: 
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld?rev=1682106&r1=1682105&r2=1682106&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld 
(original)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld 
Wed May 27 19:47:11 2015
@@ -5,7 +5,7 @@
 # TEZ DAG plan: pig-0_scope-0
 #--------------------------------------------------
 Tez vertex scope-30    ->      Tez vertex scope-48,Tez vertex scope-58,
-Tez vertex scope-48    ->      Tez vertex scope-31,Tez vertex scope-58,
+Tez vertex scope-48    ->      Tez vertex scope-31,Tez vertex scope-58,Tez 
vertex scope-62,
 Tez vertex scope-58    ->      Tez vertex scope-62,
 Tez vertex scope-31    ->      Tez vertex scope-62,
 Tez vertex scope-62
@@ -43,7 +43,7 @@ Local Rearrange[tuple]{tuple}(false) - s
                 |---d: 
Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0
 Tez vertex scope-48
 # Plan on vertex
-POValueOutputTez - scope-57    ->       [scope-31, scope-58]
+POValueOutputTez - scope-57    ->       [scope-31, scope-58, scope-62]
 |
 |---New For Each(false)[tuple] - scope-56
     |   |

Modified: 
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld?rev=1682106&r1=1682105&r2=1682106&view=diff
==============================================================================
--- 
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld 
(original)
+++ 
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld 
Wed May 27 19:47:11 2015
@@ -7,7 +7,7 @@
 Tez vertex scope-114   ->      Tez vertex scope-116,
 Tez vertex scope-115   ->      Tez vertex scope-116,
 Tez vertex scope-116   ->      Tez vertex scope-128,Tez vertex scope-138,
-Tez vertex scope-128   ->      Tez vertex scope-120,Tez vertex scope-138,
+Tez vertex scope-128   ->      Tez vertex scope-120,Tez vertex scope-138,Tez 
vertex scope-142,
 Tez vertex scope-138   ->      Tez vertex scope-142,
 Tez vertex scope-120   ->      Tez vertex scope-142,
 Tez vertex scope-142
@@ -65,7 +65,7 @@ Local Rearrange[tuple]{tuple}(false) - s
             |---POShuffledValueInputTez - scope-117    <-       [scope-114, 
scope-115]
 Tez vertex scope-128
 # Plan on vertex
-POValueOutputTez - scope-137   ->       [scope-120, scope-138]
+POValueOutputTez - scope-137   ->       [scope-120, scope-138, scope-142]
 |
 |---New For Each(false)[tuple] - scope-136
     |   |

Modified: 
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld?rev=1682106&r1=1682105&r2=1682106&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld 
(original)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld 
Wed May 27 19:47:11 2015
@@ -7,7 +7,7 @@
 Tez vertex scope-29    ->      Tez vertex group scope-63,Tez vertex group 
scope-64,
 Tez vertex scope-30    ->      Tez vertex group scope-63,Tez vertex group 
scope-64,
 Tez vertex group scope-64      ->      Tez vertex scope-43,
-Tez vertex scope-43    ->      Tez vertex scope-35,Tez vertex scope-53,
+Tez vertex scope-43    ->      Tez vertex scope-35,Tez vertex scope-53,Tez 
vertex scope-57,
 Tez vertex group scope-63      ->      Tez vertex scope-53,
 Tez vertex scope-53    ->      Tez vertex scope-57,
 Tez vertex scope-35    ->      Tez vertex scope-57,
@@ -79,7 +79,7 @@ Tez vertex group scope-64     <-       [scope-29,
 # No plan on vertex group
 Tez vertex scope-43
 # Plan on vertex
-POValueOutputTez - scope-52    ->       [scope-35, scope-53]
+POValueOutputTez - scope-52    ->       [scope-35, scope-53, scope-57]
 |
 |---New For Each(false)[tuple] - scope-51
     |   |


Reply via email to