Repository: reef
Updated Branches:
  refs/heads/master 9e0181653 -> 26780fd87


[REEF-1273] Race condition between Task failed and Task done

This addressed the issue by
  * Checking task state before failing the job.
  * Adding CountDownEvents in Tasks to wait until jobs are Done in tests.

JIRA:
  [REEF-1273](https://issues.apache.org/jira/browse/REEF-1273)

This closes #898


Project: http://git-wip-us.apache.org/repos/asf/reef/repo
Commit: http://git-wip-us.apache.org/repos/asf/reef/commit/26780fd8
Tree: http://git-wip-us.apache.org/repos/asf/reef/tree/26780fd8
Diff: http://git-wip-us.apache.org/repos/asf/reef/diff/26780fd8

Branch: refs/heads/master
Commit: 26780fd87ae3836b098603567aaaf6bccd46638b
Parents: 9e01816
Author: Andrew Chung <[email protected]>
Authored: Tue Mar 22 18:40:35 2016 -0700
Committer: Julia Wang <[email protected]>
Committed: Thu Mar 24 18:38:03 2016 -0700

----------------------------------------------------------------------
 .../Runtime/Evaluator/Task/TaskStatus.cs        | 27 ++++++++++++++++----
 .../ContextRuntimeTests.cs                      |  9 ++++++-
 2 files changed, 30 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/reef/blob/26780fd8/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/Task/TaskStatus.cs
----------------------------------------------------------------------
diff --git 
a/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/Task/TaskStatus.cs 
b/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/Task/TaskStatus.cs
index 88bdaa4..0a4491b 100644
--- a/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/Task/TaskStatus.cs
+++ b/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/Task/TaskStatus.cs
@@ -95,13 +95,30 @@ namespace Org.Apache.REEF.Common.Runtime.Evaluator.Task
         {
             lock (_stateLock)
             {
-                if (!_lastException.IsPresent())
+                try
                 {
-                    _lastException = Optional<Exception>.Of(e);
+                    if (HasEnded())
+                    {
+                        // Note that this is possible if the job is already 
DONE, but a
+                        // Task Close is triggered prior to the DONE signal 
propagates to the
+                        // Driver. If the Task Close handler is not 
implemented, the Handler will 
+                        // mark the Task with an Exception, although for all 
intents and purposes
+                        // the Task is already done and should not be affected.
+                        return;
+                    }
+
+                    if (!_lastException.IsPresent())
+                    {
+                        _lastException = Optional<Exception>.Of(e);
+                    }
+
+                    State = TaskState.Failed;
+                    _taskLifeCycle.Stop();
+                }
+                finally
+                {
+                    Heartbeat();
                 }
-                State = TaskState.Failed;
-                _taskLifeCycle.Stop();
-                Heartbeat();
             }
         }
 

http://git-wip-us.apache.org/repos/asf/reef/blob/26780fd8/lang/cs/Org.Apache.REEF.Evaluator.Tests/ContextRuntimeTests.cs
----------------------------------------------------------------------
diff --git a/lang/cs/Org.Apache.REEF.Evaluator.Tests/ContextRuntimeTests.cs 
b/lang/cs/Org.Apache.REEF.Evaluator.Tests/ContextRuntimeTests.cs
index 0894ee7..da540a3 100644
--- a/lang/cs/Org.Apache.REEF.Evaluator.Tests/ContextRuntimeTests.cs
+++ b/lang/cs/Org.Apache.REEF.Evaluator.Tests/ContextRuntimeTests.cs
@@ -231,6 +231,7 @@ namespace Org.Apache.REEF.Evaluator.Tests
                     }
 
                     testTask.CountDownEvent.Signal();
+                    testTask.DisposedEvent.Wait();
                 }
             }
         }
@@ -272,6 +273,7 @@ namespace Org.Apache.REEF.Evaluator.Tests
                     }
 
                     testTask.CountDownEvent.Signal();
+                    testTask.DisposedEvent.Wait();
                 }
             }
         }
@@ -304,6 +306,7 @@ namespace Org.Apache.REEF.Evaluator.Tests
                 testTask.CountDownEvent.Signal();
                 testTask.StopEvent.Wait();
                 Assert.False(contextRuntime.GetTaskStatus().IsPresent());
+                testTask.DisposedEvent.Wait();
 
                 contextRuntime.StartTask(taskConfig, hbMgr);
                 Assert.Equal(contextRuntime.GetTaskStatus().Value.state, 
State.RUNNING);
@@ -319,6 +322,7 @@ namespace Org.Apache.REEF.Evaluator.Tests
                 secondTestTask.CountDownEvent.Signal();
                 secondTestTask.StopEvent.Wait();
                 Assert.False(contextRuntime.GetTaskStatus().IsPresent());
+                secondTestTask.DisposedEvent.Wait();
             }
         }
 
@@ -449,15 +453,18 @@ namespace Org.Apache.REEF.Evaluator.Tests
             {
                 CountDownEvent = new CountdownEvent(1);
                 StopEvent = new CountdownEvent(1);
+                DisposedEvent = new CountdownEvent(1);
             }
 
             public CountdownEvent CountDownEvent { get; private set; }
 
             public CountdownEvent StopEvent { get; private set; }
 
+            public CountdownEvent DisposedEvent { get; private set; }
+
             public void Dispose()
             {
-                throw new NotImplementedException();
+                DisposedEvent.Signal();
             }
 
             public byte[] Call(byte[] memento)

Reply via email to