Repository: reef Updated Branches: refs/heads/master f8e45ff36 -> c48c3f042
[REEF-1719] Log machine status in TaskHostBase JIRA: [REEF-1719](https://issues.apache.org/jira/browse/REEF-1719) This closes #1231 Project: http://git-wip-us.apache.org/repos/asf/reef/repo Commit: http://git-wip-us.apache.org/repos/asf/reef/commit/c48c3f04 Tree: http://git-wip-us.apache.org/repos/asf/reef/tree/c48c3f04 Diff: http://git-wip-us.apache.org/repos/asf/reef/diff/c48c3f04 Branch: refs/heads/master Commit: c48c3f0420626171ffb85d1216da2e718ecf36f7 Parents: f8e45ff Author: Julia Wang <[email protected]> Authored: Tue Jan 17 19:09:49 2017 -0800 Committer: Mariia Mykhailova <[email protected]> Committed: Wed Jan 18 10:34:42 2017 -0800 ---------------------------------------------------------------------- lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/IMRUDriver.cs | 1 + lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/TaskManager.cs | 3 ++- .../Org.Apache.REEF.IMRU/OnREEF/IMRUTasks/TaskHostBase.cs | 10 +++++++++- 3 files changed, 12 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/reef/blob/c48c3f04/lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/IMRUDriver.cs ---------------------------------------------------------------------- diff --git a/lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/IMRUDriver.cs b/lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/IMRUDriver.cs index 5f18aaf..a169575 100644 --- a/lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/IMRUDriver.cs +++ b/lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/IMRUDriver.cs @@ -756,6 +756,7 @@ namespace Org.Apache.REEF.IMRU.OnREEF.Driver if (waitingTasks.Any()) { + Logger.Log(Level.Info, "There are {0} tasks that timed out", waitingTasks.Count); WaitingForCloseTaskNoResponseAction(waitingTasks); } break; http://git-wip-us.apache.org/repos/asf/reef/blob/c48c3f04/lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/TaskManager.cs ---------------------------------------------------------------------- diff --git a/lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/TaskManager.cs b/lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/TaskManager.cs index 4ba9745..72e1d75 100644 --- a/lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/TaskManager.cs +++ b/lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/TaskManager.cs @@ -552,11 +552,12 @@ namespace Org.Apache.REEF.IMRU.OnREEF.Driver { try { - return string.Format("State={0}, taskId={1}, ContextId={2}, evaluatorId={3}, evaluatorHost={4}", + return string.Format("State={0}, taskId={1}, ContextId={2}, evaluatorId={3}, TimeInCurrentStateinMs {4}, evaluatorHost={5}", t.Value.TaskState.CurrentState, t.Key, t.Value.ActiveContext.Id, t.Value.ActiveContext.EvaluatorId, + (DateTime.Now - t.Value.TimeStateUpdated).Milliseconds, t.Value.ActiveContext.EvaluatorDescriptor.NodeDescriptor.HostName); } catch (Exception ex) http://git-wip-us.apache.org/repos/asf/reef/blob/c48c3f04/lang/cs/Org.Apache.REEF.IMRU/OnREEF/IMRUTasks/TaskHostBase.cs ---------------------------------------------------------------------- diff --git a/lang/cs/Org.Apache.REEF.IMRU/OnREEF/IMRUTasks/TaskHostBase.cs b/lang/cs/Org.Apache.REEF.IMRU/OnREEF/IMRUTasks/TaskHostBase.cs index 718a794..1bf1ff4 100644 --- a/lang/cs/Org.Apache.REEF.IMRU/OnREEF/IMRUTasks/TaskHostBase.cs +++ b/lang/cs/Org.Apache.REEF.IMRU/OnREEF/IMRUTasks/TaskHostBase.cs @@ -20,6 +20,7 @@ using System.IO; using System.Net.Sockets; using System.Runtime.Remoting; using System.Threading; +using Org.Apache.REEF.Common.Runtime; using Org.Apache.REEF.Common.Tasks; using Org.Apache.REEF.Common.Tasks.Events; using Org.Apache.REEF.IMRU.OnREEF.Driver; @@ -64,6 +65,11 @@ namespace Org.Apache.REEF.IMRU.OnREEF.IMRUTasks protected readonly CancellationTokenSource _cancellationSource; /// <summary> + /// Machine status for log purpose + /// </summary> + private readonly MachineStatus _machineStatus = new MachineStatus(); + + /// <summary> /// Task host base class to hold the common stuff of both mapper and update tasks /// </summary> /// <param name="groupCommunicationsClient">Group Communication Client</param> @@ -74,6 +80,7 @@ namespace Org.Apache.REEF.IMRU.OnREEF.IMRUTasks TaskCloseCoordinator taskCloseCoordinator, bool invokeGc) { + Logger.Log(Level.Info, "Entering TaskHostBase constructor with machine status {0}.", _machineStatus.ToString()); _groupCommunicationsClient = groupCommunicationsClient; _communicationGroupClient = groupCommunicationsClient.GetCommunicationGroup(IMRUConstants.CommunicationGroupName); @@ -88,7 +95,7 @@ namespace Org.Apache.REEF.IMRU.OnREEF.IMRUTasks /// </summary> public byte[] Call(byte[] memento) { - Logger.Log(Level.Info, "Entering {0} Call().", TaskHostName); + Logger.Log(Level.Info, "Entering {0} Call() with machine status {1}.", TaskHostName, _machineStatus.ToString()); try { _groupCommunicationsClient.Initialize(_cancellationSource); @@ -111,6 +118,7 @@ namespace Org.Apache.REEF.IMRU.OnREEF.IMRUTasks } finally { + Logger.Log(Level.Info, "TaskHostBase::Finally"); _taskCloseCoordinator.SignalTaskStopped(); } Logger.Log(Level.Info, "{0} returned with cancellation token:{1}.", TaskHostName, _cancellationSource.IsCancellationRequested);
