Author: daijy
Date: Fri Sep 25 17:44:03 2015
New Revision: 1705336
URL: http://svn.apache.org/viewvc?rev=1705336&view=rev
Log:
PIG-4635: NPE while running pig script in tez mode
Modified:
pig/branches/branch-0.15/CHANGES.txt
pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezOperator.java
pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPrinter.java
pig/branches/branch-0.15/test/org/apache/pig/tez/TestTezGraceParallelism.java
Modified: pig/branches/branch-0.15/CHANGES.txt
URL:
http://svn.apache.org/viewvc/pig/branches/branch-0.15/CHANGES.txt?rev=1705336&r1=1705335&r2=1705336&view=diff
==============================================================================
--- pig/branches/branch-0.15/CHANGES.txt (original)
+++ pig/branches/branch-0.15/CHANGES.txt Fri Sep 25 17:44:03 2015
@@ -28,6 +28,8 @@ OPTIMIZATIONS
BUG FIXES
+PIG-4635: NPE while running pig script in tez mode (daijy)
+
PIG-4683: Nested order is broken after PIG-3591 in some cases (daijy)
PIG-4628: Pig 0.14 job with order by fails in mapreduce mode with Oozie
(knoguchi)
Modified:
pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezOperator.java
URL:
http://svn.apache.org/viewvc/pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezOperator.java?rev=1705336&r1=1705335&r2=1705336&view=diff
==============================================================================
---
pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezOperator.java
(original)
+++
pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezOperator.java
Fri Sep 25 17:44:03 2015
@@ -187,6 +187,7 @@ public class TezOperator extends Operato
private transient VertexGroupInfo vertexGroupInfo;
// Mapping of OperatorKey of POStore OperatorKey to vertexGroup TezOperator
private Map<OperatorKey, OperatorKey> vertexGroupStores = null;
+ private boolean isVertexGroup = false;
public static class LoaderInfo implements Serializable {
private List<POLoad> loads = null;
@@ -477,7 +478,7 @@ public class TezOperator extends Operato
// Union is the only operator that uses alias vertex (VertexGroup) now. But
// more operators could be added to the list in the future.
public boolean isVertexGroup() {
- return vertexGroupInfo != null;
+ return isVertexGroup;
}
public VertexGroupInfo getVertexGroupInfo() {
@@ -486,6 +487,7 @@ public class TezOperator extends Operato
public void setVertexGroupInfo(VertexGroupInfo vertexGroup) {
this.vertexGroupInfo = vertexGroup;
+ this.isVertexGroup = true;
}
public void addVertexGroupStore(OperatorKey storeKey, OperatorKey
vertexGroupKey) {
Modified:
pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPrinter.java
URL:
http://svn.apache.org/viewvc/pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPrinter.java?rev=1705336&r1=1705335&r2=1705336&view=diff
==============================================================================
---
pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPrinter.java
(original)
+++
pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPrinter.java
Fri Sep 25 17:44:03 2015
@@ -55,9 +55,13 @@ public class TezPrinter extends TezOpPla
public void visitTezOp(TezOperator tezOper) throws VisitorException {
if (tezOper.isVertexGroup()) {
VertexGroupInfo info = tezOper.getVertexGroupInfo();
- mStream.println("Tez vertex group "
- + tezOper.getOperatorKey().toString() + "\t<-\t "
- + info.getInputs() + "\t->\t " + info.getOutput());
+ mStream.print("Tez vertex group "
+ + tezOper.getOperatorKey().toString());
+ if (info!=null) {
+ mStream.println("\t<-\t " + info.getInputs() + "\t->\t " +
info.getOutput());
+ } else {
+ mStream.println();
+ }
mStream.println("# No plan on vertex group");
} else {
mStream.println("Tez vertex " +
tezOper.getOperatorKey().toString());
Modified:
pig/branches/branch-0.15/test/org/apache/pig/tez/TestTezGraceParallelism.java
URL:
http://svn.apache.org/viewvc/pig/branches/branch-0.15/test/org/apache/pig/tez/TestTezGraceParallelism.java?rev=1705336&r1=1705335&r2=1705336&view=diff
==============================================================================
---
pig/branches/branch-0.15/test/org/apache/pig/tez/TestTezGraceParallelism.java
(original)
+++
pig/branches/branch-0.15/test/org/apache/pig/tez/TestTezGraceParallelism.java
Fri Sep 25 17:44:03 2015
@@ -26,6 +26,7 @@ import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
+import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
@@ -248,4 +249,37 @@ public class TestTezGraceParallelism {
Util.removeLogAppender(PigGraceShuffleVertexManager.class,
"testJoinWithDifferentDepth2");
}
}
+
+ @Test
+ // See PIG-4635 for a NPE in TezOperDependencyParallelismEstimator
+ public void testJoinWithUnion() throws IOException{
+ NodeIdGenerator.reset();
+ PigServer.resetScope();
+ StringWriter writer = new StringWriter();
+ Util.createLogAppender("testJoinWithUnion", writer,
PigGraceShuffleVertexManager.class);
+ try {
+ // DAG: 29 -> 32 -> 41 \
+ // -> 70 (vertex group) -> 61
+ // 42 -> 45 -> 54 /
+ pigServer.registerQuery("A = load '" + INPUT_DIR + "/" +
INPUT_FILE2 + "' as (name:chararray, gender:chararray);");
+ pigServer.registerQuery("B = distinct A;");
+ pigServer.registerQuery("C = group B by name;");
+ pigServer.registerQuery("D = load '" + INPUT_DIR + "/" +
INPUT_FILE2 + "' as (name:chararray, gender:chararray);");
+ pigServer.registerQuery("E = distinct D;");
+ pigServer.registerQuery("F = group E by name;");
+ pigServer.registerQuery("G = union C, F;");
+ pigServer.registerQuery("H = distinct G;");
+ Iterator<Tuple> iter = pigServer.openIterator("H");
+ int count = 0;
+ while (iter.hasNext()) {
+ iter.next();
+ count++;
+ }
+ assertEquals(count, 20);
+ assertTrue(writer.toString().contains("time to set parallelism for
scope-41"));
+ assertTrue(writer.toString().contains("time to set parallelism for
scope-54"));
+ } finally {
+ Util.removeLogAppender(PigGraceShuffleVertexManager.class,
"testJoinWithUnion");
+ }
+ }
}