Author: rohini
Date: Mon Dec 19 20:15:13 2016
New Revision: 1775168
URL: http://svn.apache.org/viewvc?rev=1775168&view=rev
Log:
PIG-3417: Job fails when skewed join is done on tuple key (nkollar via rohini)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
pig/trunk/test/e2e/pig/tests/nightly.conf
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-2.gld
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12-OPTOFF.gld
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12.gld
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld
Modified: pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1775168&r1=1775167&r2=1775168&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Mon Dec 19 20:15:13 2016
@@ -171,6 +171,8 @@ OPTIMIZATIONS
BUG FIXES
+PIG-3417: Job fails when skewed join is done on tuple key (nkollar via rohini)
+
PIG-5074: Build broken when hadoopversion=20 in branch 0.16 (szita via daijy)
PIG-5064: NPE in TestScriptUDF#testPythonBuiltinModuleImport1 when JAVA_HOME
is not set (water via daijy)
Modified:
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
URL:
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java?rev=1775168&r1=1775167&r2=1775168&view=diff
==============================================================================
---
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
(original)
+++
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
Mon Dec 19 20:15:13 2016
@@ -2432,7 +2432,7 @@ public class MRCompiler extends PhyPlanV
}else{
for(int i=0; i<transformPlans.size(); i++) {
eps1.add(transformPlans.get(i));
- flat1.add(true);
+ flat1.add(i == transformPlans.size() - 1 ? true : false);
}
}
Modified:
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
URL:
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java?rev=1775168&r1=1775167&r2=1775168&view=diff
==============================================================================
---
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
(original)
+++
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
Mon Dec 19 20:15:13 2016
@@ -1507,7 +1507,7 @@ public class TezCompiler extends PhyPlan
for (int i=0; i<transformPlans.size(); i++) {
eps1.add(transformPlans.get(i));
- flat1.add(true);
+ flat1.add(i == transformPlans.size() - 1 ? true : false);
}
// This foreach will pick the sort key columns from the
POPoissonSample output
Modified: pig/trunk/test/e2e/pig/tests/nightly.conf
URL:
http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/nightly.conf?rev=1775168&r1=1775167&r2=1775168&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/nightly.conf (original)
+++ pig/trunk/test/e2e/pig/tests/nightly.conf Mon Dec 19 20:15:13 2016
@@ -3186,6 +3186,23 @@ e = join a by name full outer, b by name
store e into ':OUTPATH:';\,
},
+ # skew join with tuple key
+ {
+ 'num' => 15,
+ 'java_params' =>
['-Dpig.skewedjoin.reduce.maxtuple=100'],
+ 'pig' => q\a = load
':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
+b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration,
contributions);
+c = group a by (name, age);
+d = group b by (name, age);
+e = join c by $0, d by $0 using 'skewed' parallel 5;
+store e into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load
':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
+b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration,
contributions);
+c = group a by (name, age);
+d = group b by (name, age);
+e = join c by $0, d by $0;
+store e into ':OUTPATH:';\
+ }
]
},
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld?rev=1775168&r1=1775167&r2=1775168&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld
(original)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SelfJoin-2.gld
Mon Dec 19 20:15:13 2016
@@ -18,7 +18,7 @@ a: Split - scope-58
| | |
| | Constant(DummyVal) - scope-32
| |
-| |---New For Each(true,true)[tuple] - scope-37
+| |---New For Each(false,true)[tuple] - scope-37
| | |
| | Project[int][0] - scope-21
| | |
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld?rev=1775168&r1=1775167&r2=1775168&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld
(original)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-1.gld
Mon Dec 19 20:15:13 2016
@@ -16,7 +16,7 @@ Local Rearrange[tuple]{tuple}(false) - s
| |
| Constant(DummyVal) - scope-30
|
-|---New For Each(true,true)[tuple] - scope-41
+|---New For Each(false,true)[tuple] - scope-41
| |
| Project[int][0] - scope-16
| |
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-2.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-2.gld?rev=1775168&r1=1775167&r2=1775168&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-2.gld
(original)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-SkewJoin-2.gld
Mon Dec 19 20:15:13 2016
@@ -16,7 +16,7 @@ Local Rearrange[tuple]{tuple}(false) - s
| |
| Constant(DummyVal) - scope-34
|
-|---New For Each(true,true)[tuple] - scope-39
+|---New For Each(false,true)[tuple] - scope-39
| |
| Project[int][0] - scope-20
| |
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12-OPTOFF.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12-OPTOFF.gld?rev=1775168&r1=1775167&r2=1775168&view=diff
==============================================================================
---
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12-OPTOFF.gld
(original)
+++
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12-OPTOFF.gld
Mon Dec 19 20:15:13 2016
@@ -118,7 +118,7 @@ a1: Split - scope-138
| | |
| | Constant(DummyVal) - scope-112
| |
-| |---New For Each(true,true)[tuple] - scope-117
+| |---New For Each(false,true)[tuple] - scope-117
| | |
| | Project[int][0] - scope-73
| | |
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12.gld?rev=1775168&r1=1775167&r2=1775168&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12.gld
(original)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-12.gld
Mon Dec 19 20:15:13 2016
@@ -24,7 +24,7 @@ a: Split - scope-141
| | | | |
| | | | Constant(DummyVal) - scope-174
| | | |
-| | | |---New For Each(true,true)[tuple] - scope-180
+| | | |---New For Each(false,true)[tuple] - scope-180
| | | | |
| | | | Project[int][0] - scope-177
| | | | |
@@ -52,7 +52,7 @@ a: Split - scope-141
| | | | |
| | | | Constant(DummyVal) - scope-186
| | | |
-| | | |---New For Each(true,true)[tuple] - scope-192
+| | | |---New For Each(false,true)[tuple] - scope-192
| | | | |
| | | | Project[int][0] - scope-189
| | | | |
@@ -90,7 +90,7 @@ a: Split - scope-141
| | | | |
| | | | Constant(DummyVal) - scope-201
| | | |
-| | | |---New For Each(true,true)[tuple] - scope-207
+| | | |---New For Each(false,true)[tuple] - scope-207
| | | | |
| | | | Project[int][0] - scope-204
| | | | |
@@ -124,7 +124,7 @@ a: Split - scope-141
| | | | |
| | | | Constant(DummyVal) - scope-216
| | | |
-| | | |---New For Each(true,true)[tuple] - scope-222
+| | | |---New For Each(false,true)[tuple] - scope-222
| | | | |
| | | | Project[int][0] - scope-219
| | | | |
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld?rev=1775168&r1=1775167&r2=1775168&view=diff
==============================================================================
---
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld
(original)
+++
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15-OPTOFF.gld
Mon Dec 19 20:15:13 2016
@@ -50,7 +50,7 @@ Local Rearrange[tuple]{tuple}(false) - s
| |
| Constant(DummyVal) - scope-42
|
-|---New For Each(true,true)[tuple] - scope-47
+|---New For Each(false,true)[tuple] - scope-47
| |
| Project[int][0] - scope-26
| |
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld?rev=1775168&r1=1775167&r2=1775168&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld
(original)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-15.gld
Mon Dec 19 20:15:13 2016
@@ -18,7 +18,7 @@ a: Split - scope-68
| | |
| | Constant(DummyVal) - scope-72
| |
-| |---New For Each(true,true)[tuple] - scope-78
+| |---New For Each(false,true)[tuple] - scope-78
| | |
| | Project[int][0] - scope-75
| | |
@@ -36,7 +36,7 @@ a: Split - scope-68
| | |
| | Constant(DummyVal) - scope-82
| |
-| |---New For Each(true,true)[tuple] - scope-88
+| |---New For Each(false,true)[tuple] - scope-88
| | |
| | Project[int][0] - scope-85
| | |
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld?rev=1775168&r1=1775167&r2=1775168&view=diff
==============================================================================
---
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld
(original)
+++
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16-OPTOFF.gld
Mon Dec 19 20:15:13 2016
@@ -18,7 +18,7 @@ Local Rearrange[tuple]{tuple}(false) - s
| |
| Constant(DummyVal) - scope-42
|
-|---New For Each(true,true)[tuple] - scope-53
+|---New For Each(false,true)[tuple] - scope-53
| |
| Project[int][0] - scope-26
| |
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld?rev=1775168&r1=1775167&r2=1775168&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld
(original)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-16.gld
Mon Dec 19 20:15:13 2016
@@ -16,7 +16,7 @@ Local Rearrange[tuple]{tuple}(false) - s
| |
| Constant(DummyVal) - scope-42
|
-|---New For Each(true,true)[tuple] - scope-53
+|---New For Each(false,true)[tuple] - scope-53
| |
| Project[int][0] - scope-26
| |
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld?rev=1775168&r1=1775167&r2=1775168&view=diff
==============================================================================
---
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld
(original)
+++
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6-OPTOFF.gld
Mon Dec 19 20:15:13 2016
@@ -48,7 +48,7 @@ Local Rearrange[tuple]{tuple}(false) - s
| |
| Constant(DummyVal) - scope-122
|
-|---New For Each(true,true)[tuple] - scope-127
+|---New For Each(false,true)[tuple] - scope-127
| |
| Project[int][0] - scope-110
| |
Modified:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld?rev=1775168&r1=1775167&r2=1775168&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld
(original)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Union-6.gld
Mon Dec 19 20:15:13 2016
@@ -19,7 +19,7 @@ Local Rearrange[tuple]{tuple}(false) - s
| |
| Constant(DummyVal) - scope-68
|
-|---New For Each(true,true)[tuple] - scope-74
+|---New For Each(false,true)[tuple] - scope-74
| |
| Project[int][0] - scope-71
| |
@@ -50,7 +50,7 @@ Local Rearrange[tuple]{tuple}(false) - s
| |
| Constant(DummyVal) - scope-78
|
-|---New For Each(true,true)[tuple] - scope-84
+|---New For Each(false,true)[tuple] - scope-84
| |
| Project[int][0] - scope-81
| |