Repository: reef Updated Branches: refs/heads/master 9e0181653 -> 26780fd87
[REEF-1273] Race condition between Task failed and Task done This addressed the issue by * Checking task state before failing the job. * Adding CountDownEvents in Tasks to wait until jobs are Done in tests. JIRA: [REEF-1273](https://issues.apache.org/jira/browse/REEF-1273) This closes #898 Project: http://git-wip-us.apache.org/repos/asf/reef/repo Commit: http://git-wip-us.apache.org/repos/asf/reef/commit/26780fd8 Tree: http://git-wip-us.apache.org/repos/asf/reef/tree/26780fd8 Diff: http://git-wip-us.apache.org/repos/asf/reef/diff/26780fd8 Branch: refs/heads/master Commit: 26780fd87ae3836b098603567aaaf6bccd46638b Parents: 9e01816 Author: Andrew Chung <[email protected]> Authored: Tue Mar 22 18:40:35 2016 -0700 Committer: Julia Wang <[email protected]> Committed: Thu Mar 24 18:38:03 2016 -0700 ---------------------------------------------------------------------- .../Runtime/Evaluator/Task/TaskStatus.cs | 27 ++++++++++++++++---- .../ContextRuntimeTests.cs | 9 ++++++- 2 files changed, 30 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/reef/blob/26780fd8/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/Task/TaskStatus.cs ---------------------------------------------------------------------- diff --git a/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/Task/TaskStatus.cs b/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/Task/TaskStatus.cs index 88bdaa4..0a4491b 100644 --- a/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/Task/TaskStatus.cs +++ b/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/Task/TaskStatus.cs @@ -95,13 +95,30 @@ namespace Org.Apache.REEF.Common.Runtime.Evaluator.Task { lock (_stateLock) { - if (!_lastException.IsPresent()) + try { - _lastException = Optional<Exception>.Of(e); + if (HasEnded()) + { + // Note that this is possible if the job is already DONE, but a + // Task Close is triggered prior to the DONE signal propagates to the + // Driver. If the Task Close handler is not implemented, the Handler will + // mark the Task with an Exception, although for all intents and purposes + // the Task is already done and should not be affected. + return; + } + + if (!_lastException.IsPresent()) + { + _lastException = Optional<Exception>.Of(e); + } + + State = TaskState.Failed; + _taskLifeCycle.Stop(); + } + finally + { + Heartbeat(); } - State = TaskState.Failed; - _taskLifeCycle.Stop(); - Heartbeat(); } } http://git-wip-us.apache.org/repos/asf/reef/blob/26780fd8/lang/cs/Org.Apache.REEF.Evaluator.Tests/ContextRuntimeTests.cs ---------------------------------------------------------------------- diff --git a/lang/cs/Org.Apache.REEF.Evaluator.Tests/ContextRuntimeTests.cs b/lang/cs/Org.Apache.REEF.Evaluator.Tests/ContextRuntimeTests.cs index 0894ee7..da540a3 100644 --- a/lang/cs/Org.Apache.REEF.Evaluator.Tests/ContextRuntimeTests.cs +++ b/lang/cs/Org.Apache.REEF.Evaluator.Tests/ContextRuntimeTests.cs @@ -231,6 +231,7 @@ namespace Org.Apache.REEF.Evaluator.Tests } testTask.CountDownEvent.Signal(); + testTask.DisposedEvent.Wait(); } } } @@ -272,6 +273,7 @@ namespace Org.Apache.REEF.Evaluator.Tests } testTask.CountDownEvent.Signal(); + testTask.DisposedEvent.Wait(); } } } @@ -304,6 +306,7 @@ namespace Org.Apache.REEF.Evaluator.Tests testTask.CountDownEvent.Signal(); testTask.StopEvent.Wait(); Assert.False(contextRuntime.GetTaskStatus().IsPresent()); + testTask.DisposedEvent.Wait(); contextRuntime.StartTask(taskConfig, hbMgr); Assert.Equal(contextRuntime.GetTaskStatus().Value.state, State.RUNNING); @@ -319,6 +322,7 @@ namespace Org.Apache.REEF.Evaluator.Tests secondTestTask.CountDownEvent.Signal(); secondTestTask.StopEvent.Wait(); Assert.False(contextRuntime.GetTaskStatus().IsPresent()); + secondTestTask.DisposedEvent.Wait(); } } @@ -449,15 +453,18 @@ namespace Org.Apache.REEF.Evaluator.Tests { CountDownEvent = new CountdownEvent(1); StopEvent = new CountdownEvent(1); + DisposedEvent = new CountdownEvent(1); } public CountdownEvent CountDownEvent { get; private set; } public CountdownEvent StopEvent { get; private set; } + public CountdownEvent DisposedEvent { get; private set; } + public void Dispose() { - throw new NotImplementedException(); + DisposedEvent.Signal(); } public byte[] Call(byte[] memento)
