Repository: reef
Updated Branches:
refs/heads/master 4f786095d -> 80d6a1f6a
[REEF-1364] Report unhandled exceptions in the Evaluator to the Driver
This addressed the issue by
* Adding a generic unhandled exception handler.
* Add an EvaluatorRuntime unhandled exception handler that sends a message to
the Driver on failure.
* Registering unhandled exception handler at start and unregistering it when
the EvaluatorRuntime unhandled exception handler is successfully set up.
JIRA:
[REEF-1364](https://issues.apache.org/jira/browse/REEF-1364)
Pull Request:
This closes #985
Project: http://git-wip-us.apache.org/repos/asf/reef/repo
Commit: http://git-wip-us.apache.org/repos/asf/reef/commit/80d6a1f6
Tree: http://git-wip-us.apache.org/repos/asf/reef/tree/80d6a1f6
Diff: http://git-wip-us.apache.org/repos/asf/reef/diff/80d6a1f6
Branch: refs/heads/master
Commit: 80d6a1f6acc3bbede45b92cee9f19c217186b1d5
Parents: 4f78609
Author: Andrew Chung <[email protected]>
Authored: Mon May 2 16:39:39 2016 -0700
Committer: Markus Weimer <[email protected]>
Committed: Wed May 4 10:31:45 2016 -0700
----------------------------------------------------------------------
.../Runtime/Evaluator/EvaluatorRuntime.cs | 9 +
lang/cs/Org.Apache.REEF.Evaluator/Evaluator.cs | 61 +++----
.../Bridge/TestUnhandledTaskException.cs | 180 +++++++++++++++++++
.../Org.Apache.REEF.Tests.csproj | 1 +
.../DefaultUnhandledExceptionHandler.cs | 59 ++++++
.../Org.Apache.Reef.Utilities.csproj | 1 +
.../Properties/AssemblyInfo.cs | 14 ++
7 files changed, 286 insertions(+), 39 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/reef/blob/80d6a1f6/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/EvaluatorRuntime.cs
----------------------------------------------------------------------
diff --git
a/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/EvaluatorRuntime.cs
b/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/EvaluatorRuntime.cs
index 5c0b25a..c95d81f 100644
--- a/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/EvaluatorRuntime.cs
+++ b/lang/cs/Org.Apache.REEF.Common/Runtime/Evaluator/EvaluatorRuntime.cs
@@ -21,6 +21,7 @@ using Org.Apache.REEF.Common.Protobuf.ReefProtocol;
using Org.Apache.REEF.Common.Runtime.Evaluator.Context;
using Org.Apache.REEF.Tang.Annotations;
using Org.Apache.REEF.Utilities;
+using Org.Apache.REEF.Utilities.Diagnostics;
using Org.Apache.REEF.Utilities.Logging;
using Org.Apache.REEF.Wake.Time;
using Org.Apache.REEF.Wake.Time.Runtime.Event;
@@ -64,11 +65,19 @@ namespace Org.Apache.REEF.Common.Runtime.Evaluator
// register the driver observer
_evaluatorControlChannel =
remoteManager.RegisterObserver(driverObserver);
+ AppDomain.CurrentDomain.UnhandledException +=
EvaluatorRuntimeUnhandledException;
+ DefaultUnhandledExceptionHandler.Unregister();
+
// start the heart beat
_clock.ScheduleAlarm(0, _heartBeatManager);
}
}
+ private void EvaluatorRuntimeUnhandledException(object sender,
UnhandledExceptionEventArgs e)
+ {
+ OnException((Exception)e.ExceptionObject);
+ }
+
public State State
{
get
http://git-wip-us.apache.org/repos/asf/reef/blob/80d6a1f6/lang/cs/Org.Apache.REEF.Evaluator/Evaluator.cs
----------------------------------------------------------------------
diff --git a/lang/cs/Org.Apache.REEF.Evaluator/Evaluator.cs
b/lang/cs/Org.Apache.REEF.Evaluator/Evaluator.cs
index fc5c635..4986866 100644
--- a/lang/cs/Org.Apache.REEF.Evaluator/Evaluator.cs
+++ b/lang/cs/Org.Apache.REEF.Evaluator/Evaluator.cs
@@ -29,6 +29,7 @@ using Org.Apache.REEF.Tang.Annotations;
using Org.Apache.REEF.Tang.Formats;
using Org.Apache.REEF.Tang.Implementations.Tang;
using Org.Apache.REEF.Tang.Interface;
+using Org.Apache.REEF.Utilities.Diagnostics;
using Org.Apache.REEF.Utilities.Logging;
using Org.Apache.REEF.Wake.Time;
using Org.Apache.REEF.Wake.Time.Parameters;
@@ -63,37 +64,31 @@ namespace Org.Apache.REEF.Evaluator
/// <param name="args"></param>
public static void Main(string[] args)
{
- try
- {
- if (args.Count() != 1)
- {
- var e = new InvalidOperationException("Must supply only
the evaluator.config file!");
- Utilities.Diagnostics.Exceptions.Throw(e, logger);
- }
+ DefaultUnhandledExceptionHandler.Register();
- if (IsDebuggingEnabled())
- {
- AttachDebugger();
- }
- AppDomain.CurrentDomain.UnhandledException +=
UnhandledExceptionHandler;
-
- var fullEvaluatorConfiguration =
ReadEvaluatorConfiguration(args[0]);
- var injector =
TangFactory.GetTang().NewInjector(fullEvaluatorConfiguration);
- var serializer =
injector.GetInstance<AvroConfigurationSerializer>();
- var rootEvaluatorConfiguration =
-
TangFactory.GetTang().NewConfigurationBuilder(serializer.FromString(injector.GetNamedInstance<EvaluatorConfiguration,
string>()))
- .BindSetEntry<RuntimeStartHandler, EvaluatorRuntime,
IObserver<RuntimeStart>>()
- .BindSetEntry<RuntimeStopHandler, EvaluatorRuntime,
IObserver<RuntimeStop>>()
- .Build();
- var evaluator =
injector.ForkInjector(rootEvaluatorConfiguration).GetInstance<Evaluator>();
-
- evaluator.Run();
- logger.Log(Level.Info, "Evaluator is returned from Run()");
+ if (args.Count() != 1)
+ {
+ var e = new InvalidOperationException("Must supply only the
evaluator.config file!");
+ Utilities.Diagnostics.Exceptions.Throw(e, logger);
}
- catch (Exception e)
+
+ if (IsDebuggingEnabled())
{
- Fail(e);
+ AttachDebugger();
}
+
+ var fullEvaluatorConfiguration =
ReadEvaluatorConfiguration(args[0]);
+ var injector =
TangFactory.GetTang().NewInjector(fullEvaluatorConfiguration);
+ var serializer =
injector.GetInstance<AvroConfigurationSerializer>();
+ var rootEvaluatorConfiguration =
+
TangFactory.GetTang().NewConfigurationBuilder(serializer.FromString(injector.GetNamedInstance<EvaluatorConfiguration,
string>()))
+ .BindSetEntry<RuntimeStartHandler, EvaluatorRuntime,
IObserver<RuntimeStart>>()
+ .BindSetEntry<RuntimeStopHandler, EvaluatorRuntime,
IObserver<RuntimeStop>>()
+ .Build();
+ var evaluator =
injector.ForkInjector(rootEvaluatorConfiguration).GetInstance<Evaluator>();
+
+ evaluator.Run();
+ logger.Log(Level.Info, "Evaluator is returned from Run()");
}
/// <summary>
@@ -162,17 +157,5 @@ namespace Org.Apache.REEF.Evaluator
logger = Logger.GetLogger(typeof(Evaluator));
Logger.SetCustomLevel(traceLevel.TraceLevel);
}
-
- private static void UnhandledExceptionHandler(object sender,
UnhandledExceptionEventArgs e)
- {
- Fail((Exception)e.ExceptionObject);
- }
-
- private static void Fail(Exception ex)
- {
- var message = "Unhandled exception caught in Evaluator. Current
files in the working directory: " +
- string.Join(", ",
Directory.EnumerateFiles(Directory.GetCurrentDirectory(), "*.*",
SearchOption.AllDirectories));
- Utilities.Diagnostics.Exceptions.Throw(ex, message, logger);
- }
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/reef/blob/80d6a1f6/lang/cs/Org.Apache.REEF.Tests/Functional/Bridge/TestUnhandledTaskException.cs
----------------------------------------------------------------------
diff --git
a/lang/cs/Org.Apache.REEF.Tests/Functional/Bridge/TestUnhandledTaskException.cs
b/lang/cs/Org.Apache.REEF.Tests/Functional/Bridge/TestUnhandledTaskException.cs
new file mode 100644
index 0000000..ec8fc07
--- /dev/null
+++
b/lang/cs/Org.Apache.REEF.Tests/Functional/Bridge/TestUnhandledTaskException.cs
@@ -0,0 +1,180 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+using System;
+using System.Threading;
+using Org.Apache.REEF.Common.Tasks;
+using Org.Apache.REEF.Driver;
+using Org.Apache.REEF.Driver.Evaluator;
+using Org.Apache.REEF.Driver.Task;
+using Org.Apache.REEF.Tang.Annotations;
+using Org.Apache.REEF.Tang.Interface;
+using Org.Apache.REEF.Tang.Util;
+using Org.Apache.REEF.Utilities.Logging;
+using Xunit;
+
+namespace Org.Apache.REEF.Tests.Functional.Bridge
+{
+ [Collection("FunctionalTests")]
+ public sealed class TestUnhandledTaskException : ReefFunctionalTest
+ {
+ private const string ExpectedEvaluatorFailureMessage = "Unhandled
Exception.";
+ private const string ExpectedTaskId = "TaskID";
+ private const string SuccessMessage = "Evaluator successfully received
unhandled Exception.";
+
+ /// <summary>
+ /// This test validates that an unhandled Task Exception crashes the
Evaluator and the Evaluator
+ /// does an attempt to send a final message to the Driver.
+ /// TODO[JIRA REEF-1286]: Currently, this only validates the first
portion, but does not yet validate the final message.
+ /// TODO[JIRA REEF-1286]: The verification of the final message can be
done when the Exceptions are serializable.
+ /// </summary>
+ [Fact]
+ public void TestUnhandledTaskExceptionCrashesEvaluator()
+ {
+ var testFolder = DefaultRuntimeFolder +
Guid.NewGuid().ToString("N").Substring(0, 4);
+ TestRun(GetDriverConfiguration(),
typeof(TestUnhandledTaskException), 1, "testUnhandledTaskException", "local",
testFolder);
+ ValidateSuccessForLocalRuntime(0, numberOfEvaluatorsToFail: 1,
testFolder: testFolder);
+ ValidateMessageSuccessfullyLoggedForDriver(SuccessMessage,
testFolder, 1);
+ }
+
+ private static IConfiguration GetDriverConfiguration()
+ {
+ return DriverConfiguration.ConfigurationModule
+ .Set(DriverConfiguration.OnDriverStarted,
GenericType<UnhandledExceptionTestDriver>.Class)
+ .Set(DriverConfiguration.OnEvaluatorCompleted,
GenericType<UnhandledExceptionTestDriver>.Class)
+ .Set(DriverConfiguration.OnEvaluatorFailed,
GenericType<UnhandledExceptionTestDriver>.Class)
+ .Set(DriverConfiguration.OnEvaluatorAllocated,
GenericType<UnhandledExceptionTestDriver>.Class)
+ .Set(DriverConfiguration.OnTaskCompleted,
GenericType<UnhandledExceptionTestDriver>.Class)
+ .Build();
+ }
+
+ /// <summary>
+ /// This Task throws an unhandled Exception in the thread that it
spins off to
+ /// trigger an Evaluator failure.
+ /// </summary>
+ private sealed class UnhandledExceptionTestTask : ITask
+ {
+ [Inject]
+ private UnhandledExceptionTestTask()
+ {
+ }
+
+ public byte[] Call(byte[] memento)
+ {
+ var thread = new Thread(() =>
+ {
+ throw new Exception(ExpectedEvaluatorFailureMessage);
+ });
+
+ thread.Start();
+ thread.Join();
+ return null;
+ }
+
+ public void Dispose()
+ {
+ throw new NotImplementedException();
+ }
+ }
+
+ /// <summary>
+ /// This Driver verifies that the unhandled Exception triggers an
Evaluator failure
+ /// and verifies the type of Exception and its message.
+ /// </summary>
+ private sealed class UnhandledExceptionTestDriver :
+ IObserver<IDriverStarted>,
+ IObserver<IAllocatedEvaluator>,
+ IObserver<IFailedEvaluator>,
+ IObserver<ICompletedEvaluator>,
+ IObserver<ICompletedTask>
+ {
+ private static readonly Logger Logger =
Logger.GetLogger(typeof(UnhandledExceptionTestDriver));
+
+ private readonly IEvaluatorRequestor _evaluatorRequestor;
+
+ [Inject]
+ private UnhandledExceptionTestDriver(IEvaluatorRequestor
evaluatorRequestor)
+ {
+ _evaluatorRequestor = evaluatorRequestor;
+ }
+
+ public void OnNext(IDriverStarted value)
+ {
+ _evaluatorRequestor.Submit(
+ _evaluatorRequestor.NewBuilder()
+ .SetCores(1)
+ .SetNumber(1)
+ .Build());
+ }
+
+ public void OnNext(IAllocatedEvaluator value)
+ {
+ var taskConf = TaskConfiguration.ConfigurationModule
+ .Set(TaskConfiguration.Identifier, ExpectedTaskId)
+ .Set(TaskConfiguration.Task,
GenericType<UnhandledExceptionTestTask>.Class)
+ .Build();
+
+ value.SubmitTask(taskConf);
+ }
+
+ public void OnNext(ICompletedTask value)
+ {
+ throw new Exception("Driver should not have received a
completed Task.");
+ }
+
+ public void OnNext(ICompletedEvaluator value)
+ {
+ throw new Exception("Driver should not have received a
completed Evaluator.");
+ }
+
+ public void OnNext(IFailedEvaluator value)
+ {
+ if (value.EvaluatorException == null)
+ {
+ throw new Exception("Evaluator should contain a valid
Exception.");
+ }
+
+ if
(!value.EvaluatorException.Message.Contains(ExpectedEvaluatorFailureMessage))
+ {
+ // TODO[JIRA REEF-1286]: Verify the Exception message and
the type of Exception.
+ }
+
+ if (!value.FailedTask.IsPresent())
+ {
+ throw new Exception("Failed task should be present.");
+ }
+
+ if (value.FailedTask.Value.Id != ExpectedTaskId)
+ {
+ throw new Exception("Failed Task does not have the right
Task ID.");
+ }
+
+ Logger.Log(Level.Info, SuccessMessage);
+ }
+
+ public void OnError(Exception error)
+ {
+ throw new NotImplementedException();
+ }
+
+ public void OnCompleted()
+ {
+ throw new NotImplementedException();
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/reef/blob/80d6a1f6/lang/cs/Org.Apache.REEF.Tests/Org.Apache.REEF.Tests.csproj
----------------------------------------------------------------------
diff --git a/lang/cs/Org.Apache.REEF.Tests/Org.Apache.REEF.Tests.csproj
b/lang/cs/Org.Apache.REEF.Tests/Org.Apache.REEF.Tests.csproj
index d65497b..9f3a8f5 100644
--- a/lang/cs/Org.Apache.REEF.Tests/Org.Apache.REEF.Tests.csproj
+++ b/lang/cs/Org.Apache.REEF.Tests/Org.Apache.REEF.Tests.csproj
@@ -82,6 +82,7 @@ under the License.
<Compile Include="Functional\Bridge\TestSimpleContext.cs" />
<Compile Include="Functional\Bridge\TestSimpleEventHandlers.cs" />
<Compile Include="Functional\Bridge\TestSuspendTask.cs" />
+ <Compile Include="Functional\Bridge\TestUnhandledTaskException.cs" />
<Compile Include="Functional\Driver\DriverTestStartHandler.cs" />
<Compile Include="Functional\FaultTolerant\TestContextStart.cs" />
<Compile Include="Functional\FaultTolerant\TestResubmitEvaluator.cs" />
http://git-wip-us.apache.org/repos/asf/reef/blob/80d6a1f6/lang/cs/Org.Apache.REEF.Utilities/Diagnostics/DefaultUnhandledExceptionHandler.cs
----------------------------------------------------------------------
diff --git
a/lang/cs/Org.Apache.REEF.Utilities/Diagnostics/DefaultUnhandledExceptionHandler.cs
b/lang/cs/Org.Apache.REEF.Utilities/Diagnostics/DefaultUnhandledExceptionHandler.cs
new file mode 100644
index 0000000..25277c6
--- /dev/null
+++
b/lang/cs/Org.Apache.REEF.Utilities/Diagnostics/DefaultUnhandledExceptionHandler.cs
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+using System;
+using System.IO;
+using Org.Apache.REEF.Utilities.Logging;
+
+namespace Org.Apache.REEF.Utilities.Diagnostics
+{
+ /// <summary>
+ /// A static class that allows the registering/unregistering of the
default UnhandledException
+ /// Handler, which logs the Exception and lists all files in the current
working directory.
+ /// </summary>
+ internal static class DefaultUnhandledExceptionHandler
+ {
+ private static readonly Logger Logger =
Logger.GetLogger(typeof(DefaultUnhandledExceptionHandler));
+
+ /// <summary>
+ /// Registers the default unhandled Exception handler, which logs the
Exception
+ /// and lists all files in the current working directory.
+ /// </summary>
+ public static void Register()
+ {
+ AppDomain.CurrentDomain.UnhandledException += Handler;
+ }
+
+ /// <summary>
+ /// Unregisters the default unhandled Exception handler.
+ /// </summary>
+ public static void Unregister()
+ {
+ AppDomain.CurrentDomain.UnhandledException -= Handler;
+ }
+
+ private static void Handler(object sender, UnhandledExceptionEventArgs
e)
+ {
+ var message = string.Format(
+ "Unhandled exception {0}. Current files in the working
directory: {1}",
+ e.ExceptionObject,
+ string.Join(", ",
Directory.EnumerateFiles(Directory.GetCurrentDirectory(), "*.*",
SearchOption.AllDirectories)));
+
+ Logger.Log(Level.Error, message);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/reef/blob/80d6a1f6/lang/cs/Org.Apache.REEF.Utilities/Org.Apache.Reef.Utilities.csproj
----------------------------------------------------------------------
diff --git a/lang/cs/Org.Apache.REEF.Utilities/Org.Apache.Reef.Utilities.csproj
b/lang/cs/Org.Apache.REEF.Utilities/Org.Apache.Reef.Utilities.csproj
index 0fe5477..c2cd297 100644
--- a/lang/cs/Org.Apache.REEF.Utilities/Org.Apache.Reef.Utilities.csproj
+++ b/lang/cs/Org.Apache.REEF.Utilities/Org.Apache.Reef.Utilities.csproj
@@ -56,6 +56,7 @@ under the License.
<Compile Include="Collections\ReadOnlySet.cs" />
<Compile Include="Diagnostics\DiagnosticsMessages.cs" />
<Compile Include="Diagnostics\Exceptions.cs" />
+ <Compile Include="Diagnostics\DefaultUnhandledExceptionHandler.cs" />
<Compile Include="IIdentifiable.cs" />
<Compile Include="IMessage.cs" />
<Compile Include="Logging\JavaLoggingSetting.cs" />
http://git-wip-us.apache.org/repos/asf/reef/blob/80d6a1f6/lang/cs/Org.Apache.REEF.Utilities/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/lang/cs/Org.Apache.REEF.Utilities/Properties/AssemblyInfo.cs
b/lang/cs/Org.Apache.REEF.Utilities/Properties/AssemblyInfo.cs
index 74642e3..86d1af8 100644
--- a/lang/cs/Org.Apache.REEF.Utilities/Properties/AssemblyInfo.cs
+++ b/lang/cs/Org.Apache.REEF.Utilities/Properties/AssemblyInfo.cs
@@ -16,6 +16,7 @@
// under the License.
using System.Reflection;
+using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// General Information about an assembly is controlled through the following
@@ -50,3 +51,16 @@ using System.Runtime.InteropServices;
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("0.15.0.0")]
[assembly: AssemblyFileVersion("0.15.0.0")]
+
+// Allow REEF projects to access internals of the Utilities project to prevent
+// exposing unnecessary APIs.
+[assembly: InternalsVisibleTo("Org.Apache.REEF.Common, publickey=" +
+
"00240000048000009400000006020000002400005253413100040000010001005df3e621d886a9"
+
+
"9c03469d0f93a9f5d45aa2c883f50cd158759e93673f759ec4657fd84cc79d2db38ef1a2d914cc"
+
+
"b7c717846a897e11dd22eb260a7ce2da2dccf0263ea63e2b3f7dac24f28882aa568ef544341d17"
+
+
"618392a1095f4049ad079d4f4f0b429bb535699155fd6a7652ec7d6c1f1ba2b560f11ef3a86b5945d288cf")]
+[assembly: InternalsVisibleTo("Org.Apache.REEF.Evaluator, publickey=" +
+
"00240000048000009400000006020000002400005253413100040000010001005df3e621d886a9"
+
+
"9c03469d0f93a9f5d45aa2c883f50cd158759e93673f759ec4657fd84cc79d2db38ef1a2d914cc"
+
+
"b7c717846a897e11dd22eb260a7ce2da2dccf0263ea63e2b3f7dac24f28882aa568ef544341d17"
+
+
"618392a1095f4049ad079d4f4f0b429bb535699155fd6a7652ec7d6c1f1ba2b560f11ef3a86b5945d288cf")]
\ No newline at end of file