Repository: reef
Updated Branches:
  refs/heads/master 4f786095d -> 80d6a1f6a


[REEF-1364] Report unhandled exceptions in the Evaluator to the Driver

This addressed the issue by
  * Adding a generic unhandled exception handler.
  * Add an EvaluatorRuntime unhandled exception handler that sends a message to
    the Driver on failure.
  * Registering unhandled exception handler at start and unregistering it when
    the EvaluatorRuntime unhandled exception handler is successfully set up.

JIRA:
  [REEF-1364](https://issues.apache.org/jira/browse/REEF-1364)

Pull Request:
  This closes #985


Project: http://git-wip-us.apache.org/repos/asf/reef/repo
Commit: http://git-wip-us.apache.org/repos/asf/reef/commit/80d6a1f6
Tree: http://git-wip-us.apache.org/repos/asf/reef/tree/80d6a1f6
Diff: http://git-wip-us.apache.org/repos/asf/reef/diff/80d6a1f6

Branch: refs/heads/master
Commit: 80d6a1f6acc3bbede45b92cee9f19c217186b1d5
Parents: 4f78609
Author: Andrew Chung <[email protected]>
Authored: Mon May 2 16:39:39 2016 -0700
Committer: Markus Weimer <[email protected]>
Committed: Wed May 4 10:31:45 2016 -0700

----------------------------------------------------------------------
 .../Runtime/Evaluator/EvaluatorRuntime.cs       |   9 +
 lang/cs/Org.Apache.REEF.Evaluator/Evaluator.cs  |  61 +++----
 .../Bridge/TestUnhandledTaskException.cs        | 180 +++++++++++++++++++
 .../Org.Apache.REEF.Tests.csproj                |   1 +
 .../DefaultUnhandledExceptionHandler.cs         |  59 ++++++
 .../Org.Apache.Reef.Utilities.csproj            |   1 +
 .../Properties/AssemblyInfo.cs                  |  14 ++
 7 files changed, 286 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/reef/blob/80d6a1f6/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/EvaluatorRuntime.cs
----------------------------------------------------------------------
diff --git 
a/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/EvaluatorRuntime.cs 
b/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/EvaluatorRuntime.cs
index 5c0b25a..c95d81f 100644
--- a/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/EvaluatorRuntime.cs
+++ b/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/EvaluatorRuntime.cs
@@ -21,6 +21,7 @@ using Org.Apache.REEF.Common.Protobuf.ReefProtocol;
 using Org.Apache.REEF.Common.Runtime.Evaluator.Context;
 using Org.Apache.REEF.Tang.Annotations;
 using Org.Apache.REEF.Utilities;
+using Org.Apache.REEF.Utilities.Diagnostics;
 using Org.Apache.REEF.Utilities.Logging;
 using Org.Apache.REEF.Wake.Time;
 using Org.Apache.REEF.Wake.Time.Runtime.Event;
@@ -64,11 +65,19 @@ namespace Org.Apache.REEF.Common.Runtime.Evaluator
                 // register the driver observer
                 _evaluatorControlChannel = 
remoteManager.RegisterObserver(driverObserver);
 
+                AppDomain.CurrentDomain.UnhandledException += 
EvaluatorRuntimeUnhandledException;
+                DefaultUnhandledExceptionHandler.Unregister();
+
                 // start the heart beat
                 _clock.ScheduleAlarm(0, _heartBeatManager);
             }
         }
 
+        private void EvaluatorRuntimeUnhandledException(object sender, 
UnhandledExceptionEventArgs e)
+        {
+            OnException((Exception)e.ExceptionObject);
+        }
+
         public State State
         {
             get

http://git-wip-us.apache.org/repos/asf/reef/blob/80d6a1f6/lang/cs/Org.Apache.REEF.Evaluator/Evaluator.cs
----------------------------------------------------------------------
diff --git a/lang/cs/Org.Apache.REEF.Evaluator/Evaluator.cs 
b/lang/cs/Org.Apache.REEF.Evaluator/Evaluator.cs
index fc5c635..4986866 100644
--- a/lang/cs/Org.Apache.REEF.Evaluator/Evaluator.cs
+++ b/lang/cs/Org.Apache.REEF.Evaluator/Evaluator.cs
@@ -29,6 +29,7 @@ using Org.Apache.REEF.Tang.Annotations;
 using Org.Apache.REEF.Tang.Formats;
 using Org.Apache.REEF.Tang.Implementations.Tang;
 using Org.Apache.REEF.Tang.Interface;
+using Org.Apache.REEF.Utilities.Diagnostics;
 using Org.Apache.REEF.Utilities.Logging;
 using Org.Apache.REEF.Wake.Time;
 using Org.Apache.REEF.Wake.Time.Parameters;
@@ -63,37 +64,31 @@ namespace Org.Apache.REEF.Evaluator
         /// <param name="args"></param>
         public static void Main(string[] args)
         {
-            try
-            {
-                if (args.Count() != 1)
-                {
-                    var e = new InvalidOperationException("Must supply only 
the evaluator.config file!");
-                    Utilities.Diagnostics.Exceptions.Throw(e, logger);
-                }
+            DefaultUnhandledExceptionHandler.Register();
 
-                if (IsDebuggingEnabled())
-                {
-                    AttachDebugger();
-                }
-                AppDomain.CurrentDomain.UnhandledException += 
UnhandledExceptionHandler;
-
-                var fullEvaluatorConfiguration = 
ReadEvaluatorConfiguration(args[0]);
-                var injector = 
TangFactory.GetTang().NewInjector(fullEvaluatorConfiguration);
-                var serializer = 
injector.GetInstance<AvroConfigurationSerializer>();
-                var rootEvaluatorConfiguration = 
-                    
TangFactory.GetTang().NewConfigurationBuilder(serializer.FromString(injector.GetNamedInstance<EvaluatorConfiguration,
 string>()))
-                        .BindSetEntry<RuntimeStartHandler, EvaluatorRuntime, 
IObserver<RuntimeStart>>()
-                        .BindSetEntry<RuntimeStopHandler, EvaluatorRuntime, 
IObserver<RuntimeStop>>()
-                        .Build();
-                var evaluator = 
injector.ForkInjector(rootEvaluatorConfiguration).GetInstance<Evaluator>();
-
-                evaluator.Run();
-                logger.Log(Level.Info, "Evaluator is returned from Run()");
+            if (args.Count() != 1)
+            {
+                var e = new InvalidOperationException("Must supply only the 
evaluator.config file!");
+                Utilities.Diagnostics.Exceptions.Throw(e, logger);
             }
-            catch (Exception e)
+
+            if (IsDebuggingEnabled())
             {
-                Fail(e);
+                AttachDebugger();
             }
+
+            var fullEvaluatorConfiguration = 
ReadEvaluatorConfiguration(args[0]);
+            var injector = 
TangFactory.GetTang().NewInjector(fullEvaluatorConfiguration);
+            var serializer = 
injector.GetInstance<AvroConfigurationSerializer>();
+            var rootEvaluatorConfiguration = 
+                
TangFactory.GetTang().NewConfigurationBuilder(serializer.FromString(injector.GetNamedInstance<EvaluatorConfiguration,
 string>()))
+                    .BindSetEntry<RuntimeStartHandler, EvaluatorRuntime, 
IObserver<RuntimeStart>>()
+                    .BindSetEntry<RuntimeStopHandler, EvaluatorRuntime, 
IObserver<RuntimeStop>>()
+                    .Build();
+            var evaluator = 
injector.ForkInjector(rootEvaluatorConfiguration).GetInstance<Evaluator>();
+
+            evaluator.Run();
+            logger.Log(Level.Info, "Evaluator is returned from Run()");
         }
 
         /// <summary>
@@ -162,17 +157,5 @@ namespace Org.Apache.REEF.Evaluator
             logger = Logger.GetLogger(typeof(Evaluator));
             Logger.SetCustomLevel(traceLevel.TraceLevel);
         }
-
-        private static void UnhandledExceptionHandler(object sender, 
UnhandledExceptionEventArgs e)
-        {
-            Fail((Exception)e.ExceptionObject);
-        }
-
-        private static void Fail(Exception ex)
-        {
-            var message = "Unhandled exception caught in Evaluator. Current 
files in the working directory: " +
-                          string.Join(", ", 
Directory.EnumerateFiles(Directory.GetCurrentDirectory(), "*.*", 
SearchOption.AllDirectories));
-            Utilities.Diagnostics.Exceptions.Throw(ex, message, logger);
-        }
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/reef/blob/80d6a1f6/lang/cs/Org.Apache.REEF.Tests/Functional/Bridge/TestUnhandledTaskException.cs
----------------------------------------------------------------------
diff --git 
a/lang/cs/Org.Apache.REEF.Tests/Functional/Bridge/TestUnhandledTaskException.cs 
b/lang/cs/Org.Apache.REEF.Tests/Functional/Bridge/TestUnhandledTaskException.cs
new file mode 100644
index 0000000..ec8fc07
--- /dev/null
+++ 
b/lang/cs/Org.Apache.REEF.Tests/Functional/Bridge/TestUnhandledTaskException.cs
@@ -0,0 +1,180 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+//   http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+using System;
+using System.Threading;
+using Org.Apache.REEF.Common.Tasks;
+using Org.Apache.REEF.Driver;
+using Org.Apache.REEF.Driver.Evaluator;
+using Org.Apache.REEF.Driver.Task;
+using Org.Apache.REEF.Tang.Annotations;
+using Org.Apache.REEF.Tang.Interface;
+using Org.Apache.REEF.Tang.Util;
+using Org.Apache.REEF.Utilities.Logging;
+using Xunit;
+
+namespace Org.Apache.REEF.Tests.Functional.Bridge
+{
+    [Collection("FunctionalTests")]
+    public sealed class TestUnhandledTaskException : ReefFunctionalTest
+    {
+        private const string ExpectedEvaluatorFailureMessage = "Unhandled 
Exception.";
+        private const string ExpectedTaskId = "TaskID";
+        private const string SuccessMessage = "Evaluator successfully received 
unhandled Exception.";
+
+        /// <summary>
+        /// This test validates that an unhandled Task Exception crashes the 
Evaluator and the Evaluator
+        /// does an attempt to send a final message to the Driver.
+        /// TODO[JIRA REEF-1286]: Currently, this only validates the first 
portion, but does not yet validate the final message.
+        /// TODO[JIRA REEF-1286]: The verification of the final message can be 
done when the Exceptions are serializable.
+        /// </summary>
+        [Fact]
+        public void TestUnhandledTaskExceptionCrashesEvaluator()
+        {
+            var testFolder = DefaultRuntimeFolder + 
Guid.NewGuid().ToString("N").Substring(0, 4);
+            TestRun(GetDriverConfiguration(), 
typeof(TestUnhandledTaskException), 1, "testUnhandledTaskException", "local", 
testFolder);
+            ValidateSuccessForLocalRuntime(0, numberOfEvaluatorsToFail: 1, 
testFolder: testFolder);
+            ValidateMessageSuccessfullyLoggedForDriver(SuccessMessage, 
testFolder, 1);
+        }
+
+        private static IConfiguration GetDriverConfiguration()
+        {
+            return DriverConfiguration.ConfigurationModule
+                .Set(DriverConfiguration.OnDriverStarted, 
GenericType<UnhandledExceptionTestDriver>.Class)
+                .Set(DriverConfiguration.OnEvaluatorCompleted, 
GenericType<UnhandledExceptionTestDriver>.Class)
+                .Set(DriverConfiguration.OnEvaluatorFailed, 
GenericType<UnhandledExceptionTestDriver>.Class)
+                .Set(DriverConfiguration.OnEvaluatorAllocated, 
GenericType<UnhandledExceptionTestDriver>.Class)
+                .Set(DriverConfiguration.OnTaskCompleted, 
GenericType<UnhandledExceptionTestDriver>.Class)
+                .Build();
+        }
+
+        /// <summary>
+        /// This Task throws an unhandled Exception in the thread that it 
spins off to
+        /// trigger an Evaluator failure.
+        /// </summary>
+        private sealed class UnhandledExceptionTestTask : ITask
+        {
+            [Inject]
+            private UnhandledExceptionTestTask()
+            {
+            }
+
+            public byte[] Call(byte[] memento)
+            {
+                var thread = new Thread(() =>
+                {
+                    throw new Exception(ExpectedEvaluatorFailureMessage);
+                });
+
+                thread.Start();
+                thread.Join();
+                return null;
+            }
+
+            public void Dispose()
+            {
+                throw new NotImplementedException();
+            }
+        }
+
+        /// <summary>
+        /// This Driver verifies that the unhandled Exception triggers an 
Evaluator failure
+        /// and verifies the type of Exception and its message.
+        /// </summary>
+        private sealed class UnhandledExceptionTestDriver : 
+            IObserver<IDriverStarted>,
+            IObserver<IAllocatedEvaluator>, 
+            IObserver<IFailedEvaluator>,
+            IObserver<ICompletedEvaluator>,
+            IObserver<ICompletedTask>
+        {
+            private static readonly Logger Logger = 
Logger.GetLogger(typeof(UnhandledExceptionTestDriver));
+
+            private readonly IEvaluatorRequestor _evaluatorRequestor;
+
+            [Inject]
+            private UnhandledExceptionTestDriver(IEvaluatorRequestor 
evaluatorRequestor)
+            {
+                _evaluatorRequestor = evaluatorRequestor;
+            }
+
+            public void OnNext(IDriverStarted value)
+            {
+                _evaluatorRequestor.Submit(
+                    _evaluatorRequestor.NewBuilder()
+                        .SetCores(1)
+                        .SetNumber(1)
+                        .Build());
+            }
+
+            public void OnNext(IAllocatedEvaluator value)
+            {
+                var taskConf = TaskConfiguration.ConfigurationModule
+                    .Set(TaskConfiguration.Identifier, ExpectedTaskId)
+                    .Set(TaskConfiguration.Task, 
GenericType<UnhandledExceptionTestTask>.Class)
+                    .Build();
+
+                value.SubmitTask(taskConf);
+            }
+
+            public void OnNext(ICompletedTask value)
+            {
+                throw new Exception("Driver should not have received a 
completed Task.");
+            }
+
+            public void OnNext(ICompletedEvaluator value)
+            {
+                throw new Exception("Driver should not have received a 
completed Evaluator.");
+            }
+
+            public void OnNext(IFailedEvaluator value)
+            {
+                if (value.EvaluatorException == null)
+                {
+                    throw new Exception("Evaluator should contain a valid 
Exception.");
+                }
+
+                if 
(!value.EvaluatorException.Message.Contains(ExpectedEvaluatorFailureMessage))
+                {
+                    // TODO[JIRA REEF-1286]: Verify the Exception message and 
the type of Exception.
+                }
+
+                if (!value.FailedTask.IsPresent())
+                {
+                    throw new Exception("Failed task should be present.");
+                }
+
+                if (value.FailedTask.Value.Id != ExpectedTaskId)
+                {
+                    throw new Exception("Failed Task does not have the right 
Task ID.");
+                }
+
+                Logger.Log(Level.Info, SuccessMessage);
+            }
+
+            public void OnError(Exception error)
+            {
+                throw new NotImplementedException();
+            }
+
+            public void OnCompleted()
+            {
+                throw new NotImplementedException();
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/reef/blob/80d6a1f6/lang/cs/Org.Apache.REEF.Tests/Org.Apache.REEF.Tests.csproj
----------------------------------------------------------------------
diff --git a/lang/cs/Org.Apache.REEF.Tests/Org.Apache.REEF.Tests.csproj 
b/lang/cs/Org.Apache.REEF.Tests/Org.Apache.REEF.Tests.csproj
index d65497b..9f3a8f5 100644
--- a/lang/cs/Org.Apache.REEF.Tests/Org.Apache.REEF.Tests.csproj
+++ b/lang/cs/Org.Apache.REEF.Tests/Org.Apache.REEF.Tests.csproj
@@ -82,6 +82,7 @@ under the License.
     <Compile Include="Functional\Bridge\TestSimpleContext.cs" />
     <Compile Include="Functional\Bridge\TestSimpleEventHandlers.cs" />
     <Compile Include="Functional\Bridge\TestSuspendTask.cs" />
+    <Compile Include="Functional\Bridge\TestUnhandledTaskException.cs" />
     <Compile Include="Functional\Driver\DriverTestStartHandler.cs" />
     <Compile Include="Functional\FaultTolerant\TestContextStart.cs" />
     <Compile Include="Functional\FaultTolerant\TestResubmitEvaluator.cs" />

http://git-wip-us.apache.org/repos/asf/reef/blob/80d6a1f6/lang/cs/Org.Apache.REEF.Utilities/Diagnostics/DefaultUnhandledExceptionHandler.cs
----------------------------------------------------------------------
diff --git 
a/lang/cs/Org.Apache.REEF.Utilities/Diagnostics/DefaultUnhandledExceptionHandler.cs
 
b/lang/cs/Org.Apache.REEF.Utilities/Diagnostics/DefaultUnhandledExceptionHandler.cs
new file mode 100644
index 0000000..25277c6
--- /dev/null
+++ 
b/lang/cs/Org.Apache.REEF.Utilities/Diagnostics/DefaultUnhandledExceptionHandler.cs
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+//   http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+using System;
+using System.IO;
+using Org.Apache.REEF.Utilities.Logging;
+
+namespace Org.Apache.REEF.Utilities.Diagnostics
+{
+    /// <summary>
+    /// A static class that allows the registering/unregistering of the 
default UnhandledException
+    /// Handler, which logs the Exception and lists all files in the current 
working directory.
+    /// </summary>
+    internal static class DefaultUnhandledExceptionHandler
+    {
+        private static readonly Logger Logger = 
Logger.GetLogger(typeof(DefaultUnhandledExceptionHandler));
+
+        /// <summary>
+        /// Registers the default unhandled Exception handler, which logs the 
Exception
+        /// and lists all files in the current working directory.
+        /// </summary>
+        public static void Register()
+        {
+            AppDomain.CurrentDomain.UnhandledException += Handler;
+        }
+
+        /// <summary>
+        /// Unregisters the default unhandled Exception handler.
+        /// </summary>
+        public static void Unregister()
+        {
+            AppDomain.CurrentDomain.UnhandledException -= Handler;
+        }
+
+        private static void Handler(object sender, UnhandledExceptionEventArgs 
e)
+        {
+            var message = string.Format(
+                "Unhandled exception {0}. Current files in the working 
directory: {1}",
+                e.ExceptionObject,
+                string.Join(", ", 
Directory.EnumerateFiles(Directory.GetCurrentDirectory(), "*.*", 
SearchOption.AllDirectories)));
+
+            Logger.Log(Level.Error, message);
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/reef/blob/80d6a1f6/lang/cs/Org.Apache.REEF.Utilities/Org.Apache.Reef.Utilities.csproj
----------------------------------------------------------------------
diff --git a/lang/cs/Org.Apache.REEF.Utilities/Org.Apache.Reef.Utilities.csproj 
b/lang/cs/Org.Apache.REEF.Utilities/Org.Apache.Reef.Utilities.csproj
index 0fe5477..c2cd297 100644
--- a/lang/cs/Org.Apache.REEF.Utilities/Org.Apache.Reef.Utilities.csproj
+++ b/lang/cs/Org.Apache.REEF.Utilities/Org.Apache.Reef.Utilities.csproj
@@ -56,6 +56,7 @@ under the License.
     <Compile Include="Collections\ReadOnlySet.cs" />
     <Compile Include="Diagnostics\DiagnosticsMessages.cs" />
     <Compile Include="Diagnostics\Exceptions.cs" />
+    <Compile Include="Diagnostics\DefaultUnhandledExceptionHandler.cs" />
     <Compile Include="IIdentifiable.cs" />
     <Compile Include="IMessage.cs" />
     <Compile Include="Logging\JavaLoggingSetting.cs" />

http://git-wip-us.apache.org/repos/asf/reef/blob/80d6a1f6/lang/cs/Org.Apache.REEF.Utilities/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/lang/cs/Org.Apache.REEF.Utilities/Properties/AssemblyInfo.cs 
b/lang/cs/Org.Apache.REEF.Utilities/Properties/AssemblyInfo.cs
index 74642e3..86d1af8 100644
--- a/lang/cs/Org.Apache.REEF.Utilities/Properties/AssemblyInfo.cs
+++ b/lang/cs/Org.Apache.REEF.Utilities/Properties/AssemblyInfo.cs
@@ -16,6 +16,7 @@
 // under the License.
 
 using System.Reflection;
+using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 
 // General Information about an assembly is controlled through the following 
@@ -50,3 +51,16 @@ using System.Runtime.InteropServices;
 // [assembly: AssemblyVersion("1.0.*")]
 [assembly: AssemblyVersion("0.15.0.0")]
 [assembly: AssemblyFileVersion("0.15.0.0")]
+
+// Allow REEF projects to access internals of the Utilities project to prevent
+// exposing unnecessary APIs.
+[assembly: InternalsVisibleTo("Org.Apache.REEF.Common, publickey=" +
+ 
"00240000048000009400000006020000002400005253413100040000010001005df3e621d886a9"
 +
+ 
"9c03469d0f93a9f5d45aa2c883f50cd158759e93673f759ec4657fd84cc79d2db38ef1a2d914cc"
 +
+ 
"b7c717846a897e11dd22eb260a7ce2da2dccf0263ea63e2b3f7dac24f28882aa568ef544341d17"
 +
+ 
"618392a1095f4049ad079d4f4f0b429bb535699155fd6a7652ec7d6c1f1ba2b560f11ef3a86b5945d288cf")]
+[assembly: InternalsVisibleTo("Org.Apache.REEF.Evaluator, publickey=" +
+ 
"00240000048000009400000006020000002400005253413100040000010001005df3e621d886a9"
 +
+ 
"9c03469d0f93a9f5d45aa2c883f50cd158759e93673f759ec4657fd84cc79d2db38ef1a2d914cc"
 +
+ 
"b7c717846a897e11dd22eb260a7ce2da2dccf0263ea63e2b3f7dac24f28882aa568ef544341d17"
 +
+ 
"618392a1095f4049ad079d4f4f0b429bb535699155fd6a7652ec7d6c1f1ba2b560f11ef3a86b5945d288cf")]
\ No newline at end of file

Reply via email to