Repository: tez Updated Branches: refs/heads/branch-0.7 083462d0b -> e2d584e40
TEZ-3137. Tez task failed with illegal state exception in recovery (zjffdu) Project: http://git-wip-us.apache.org/repos/asf/tez/repo Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/e2d584e4 Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/e2d584e4 Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/e2d584e4 Branch: refs/heads/branch-0.7 Commit: e2d584e40521aadc759c233e7399f9046db8202a Parents: 083462d Author: Jeff Zhang <[email protected]> Authored: Thu Feb 25 12:11:02 2016 +0800 Committer: Jeff Zhang <[email protected]> Committed: Thu Feb 25 12:11:02 2016 +0800 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tez/blob/e2d584e4/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 3efe9fb..b97c129 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -9,6 +9,7 @@ INCOMPATIBLE CHANGES TEZ-2972. Avoid task rescheduling when a node turns unhealthy ALL CHANGES + TEZ-3137. Tez task failed with illegal state exception in recovery TEZ-3126. Log reason for not reducing parallelism TEZ-3123. Containers can get re-used even with conflicting local resources. TEZ-3117. Deadlock in Edge and Vertex code http://git-wip-us.apache.org/repos/asf/tez/blob/e2d584e4/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java ---------------------------------------------------------------------- diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java index aec738c..e646363 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java @@ -274,6 +274,8 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl new VertexStateChangedCallback(); private VertexState recoveredState = VertexState.NEW; + private boolean isInRecovery = false; + @VisibleForTesting List<TezEvent> recoveredEvents = new ArrayList<TezEvent>(); private boolean vertexAlreadyInitialized = false; @@ -2598,6 +2600,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl public VertexState transition(VertexImpl vertex, VertexEvent vertexEvent) { VertexEventRecoverVertex recoverEvent = (VertexEventRecoverVertex) vertexEvent; VertexState desiredState = recoverEvent.getDesiredState(); + vertex.isInRecovery = true; switch (desiredState) { case RUNNING: @@ -2993,6 +2996,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl @Override public VertexState transition(VertexImpl vertex, VertexEvent vertexEvent) { + vertex.isInRecovery = true; VertexEventSourceVertexRecovered sourceRecoveredEvent = (VertexEventSourceVertexRecovered) vertexEvent; // Use distance from root from Recovery events as upstream vertices may not @@ -3626,7 +3630,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl // this start event can only come directly from the DAG. That means this // is a top level vertex of the dag Preconditions.checkState( - (vertex.sourceVertices == null || vertex.sourceVertices.isEmpty()), + (vertex.sourceVertices == null || vertex.sourceVertices.isEmpty() || vertex.isInRecovery), "Vertex: " + vertex.logIdentifier + " got invalid start event"); vertex.startSignalPending = true; vertex.startTimeRequested = vertex.clock.getTime();
