fixg monitoring issue
Project: http://git-wip-us.apache.org/repos/asf/airavata/repo Commit: http://git-wip-us.apache.org/repos/asf/airavata/commit/53dd791b Tree: http://git-wip-us.apache.org/repos/asf/airavata/tree/53dd791b Diff: http://git-wip-us.apache.org/repos/asf/airavata/diff/53dd791b Branch: refs/heads/workflow-support Commit: 53dd791bd1689c3d43196f7654b6b91f17cab189 Parents: a0f5419 Author: lahiru <[email protected]> Authored: Sat Jul 12 00:08:55 2014 -0400 Committer: lahiru <[email protected]> Committed: Sat Jul 12 00:08:55 2014 -0400 ---------------------------------------------------------------------- .../gfac/monitor/impl/pull/qstat/HPCPullMonitor.java | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/airavata/blob/53dd791b/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java ---------------------------------------------------------------------- diff --git a/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java b/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java index a1f38fc..b34cfe7 100644 --- a/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java +++ b/modules/gfac/gfac-monitor/src/main/java/org/apache/airavata/gfac/monitor/impl/pull/qstat/HPCPullMonitor.java @@ -149,13 +149,13 @@ public class HPCPullMonitor extends PullMonitor { if (iHostMonitorData.getHost().getType() instanceof GsisshHostType || iHostMonitorData.getHost().getType() instanceof SSHHostType) { currentHostDescription = iHostMonitorData.getHost(); - String hostName = iHostMonitorData.getHost().getType().getHostAddress(); + String hostName = iHostMonitorData.getHost().getType().getHostAddress(); ResourceConnection connection = null; if (connections.containsKey(hostName)) { logger.debug("We already have this connection so not going to create one"); connection = connections.get(hostName); } else { - connection = new ResourceConnection(iHostMonitorData,getAuthenticationInfo()); + connection = new ResourceConnection(iHostMonitorData, getAuthenticationInfo()); connections.put(hostName, connection); } List<MonitorID> monitorID = iHostMonitorData.getMonitorIDs(); @@ -176,20 +176,21 @@ public class HPCPullMonitor extends PullMonitor { try { gfac.invokeOutFlowHandlers(iMonitorID.getJobExecutionContext()); } catch (GFacException e) { - publisher.publish(new TaskStatusChangeRequest(new TaskIdentity(iMonitorID.getExperimentID(), iMonitorID.getWorkflowNodeID(), - iMonitorID.getTaskID()), TaskState.FAILED)); - publisher.publish(new ExperimentStatusChangeRequest(new ExperimentIdentity(iMonitorID.getExperimentID()), - ExperimentState.FAILED)); + publisher.publish(new TaskStatusChangeRequest(new TaskIdentity(iMonitorID.getExperimentID(), iMonitorID.getWorkflowNodeID(), + iMonitorID.getTaskID()), TaskState.FAILED)); + publisher.publish(new ExperimentStatusChangeRequest(new ExperimentIdentity(iMonitorID.getExperimentID()), + ExperimentState.FAILED)); logger.info(e.getLocalizedMessage(), e); } } else if (iMonitorID.getFailedCount() > 2 && iMonitorID.getStatus().equals(JobState.UNKNOWN)) { logger.error("Tried to monitor the job with ID " + iMonitorID.getJobID() + " But failed 3 times, so skip this Job from Monitor"); iMonitorID.setLastMonitored(new Timestamp((new Date()).getTime())); completedJobs.add(iMonitorID); + } else if (!iMonitorID.getStatus().equals(JobState.UNKNOWN)) { + iMonitorID.setFailedCount(0); } else { // Evey iMonitorID.setLastMonitored(new Timestamp((new Date()).getTime())); - iMonitorID.setFailedCount(0); // if the job is complete we remove it from the Map, if any of these maps // get empty this userMonitorData will get delete from the queue }
