Repository: airavata Updated Branches: refs/heads/master ec4172700 -> 47afa398b
changing the monitoring to work with dynamic host configuration - AIRAVATA-1028 Project: http://git-wip-us.apache.org/repos/asf/airavata/repo Commit: http://git-wip-us.apache.org/repos/asf/airavata/commit/47afa398 Tree: http://git-wip-us.apache.org/repos/asf/airavata/tree/47afa398 Diff: http://git-wip-us.apache.org/repos/asf/airavata/diff/47afa398 Branch: refs/heads/master Commit: 47afa398b8f0a89c509e07347432333558bf4a90 Parents: ec41727 Author: lahiru <[email protected]> Authored: Mon Mar 3 13:48:09 2014 -0500 Committer: lahiru <[email protected]> Committed: Mon Mar 3 13:48:09 2014 -0500 ---------------------------------------------------------------------- .../monitor/impl/pull/qstat/QstatMonitor.java | 26 +++++++++++--------- .../impl/pull/qstat/ResourceConnection.java | 2 ++ 2 files changed, 17 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/airavata/blob/47afa398/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/QstatMonitor.java ---------------------------------------------------------------------- diff --git a/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/QstatMonitor.java b/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/QstatMonitor.java index 8f0b79d..c9f8331 100644 --- a/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/QstatMonitor.java +++ b/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/QstatMonitor.java @@ -118,7 +118,11 @@ public class QstatMonitor extends PullMonitor implements Runnable { logger.debug("We already have this connection so not going to create one"); connection = connections.get(hostName); } else { - connection = new ResourceConnection(take, gsisshHostType.getInstalledPath()); + if(gsisshHostType.getInstalledPath() == null){ + connection = new ResourceConnection(take, "/opt/torque/bin"); + }else{ + connection = new ResourceConnection(take, gsisshHostType.getInstalledPath()); + } connections.put(hostName, connection); } jobStatus.setMonitorID(take); @@ -148,7 +152,7 @@ public class QstatMonitor extends PullMonitor implements Runnable { }else if(e.getMessage().contains("illegally formed job identifier")){ logger.error("Wrong job ID is given so dropping the job from monitoring system"); } else if (!this.queue.contains(take)) { // we put the job back to the queue only if its state is not unknown - if (take.getFailedCount() < 3) { + if (take.getFailedCount() < 2) { try { take.setFailedCount(take.getFailedCount() + 1); this.queue.put(take); @@ -159,19 +163,19 @@ public class QstatMonitor extends PullMonitor implements Runnable { logger.error("Tried to monitor the job 3 times, so dropping of the the Job with ID: " + take.getJobID()); } } - logger.error("Error retrieving the job status"); throw new AiravataMonitorException("Error retrieving the job status", e); } catch (Exception e){ if (take.getFailedCount() < 3) { - try { - take.setFailedCount(take.getFailedCount() + 1); - this.queue.put(take); - } catch (InterruptedException e1) { - e1.printStackTrace(); - } - } else { - logger.error("Tryied to monitor the job 3 times, so dropping of the the Job with ID: " + take.getJobID()); + try { + take.setFailedCount(take.getFailedCount() + 1); + this.queue.put(take); + } catch (InterruptedException e1) { + e1.printStackTrace(); + } + } else { + logger.error("Tryied to monitor the job 3 times, so dropping of the the Job with ID: " + take.getJobID()); } + throw new AiravataMonitorException("Error retrieving the job status", e); } } http://git-wip-us.apache.org/repos/asf/airavata/blob/47afa398/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/ResourceConnection.java ---------------------------------------------------------------------- diff --git a/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/ResourceConnection.java b/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/ResourceConnection.java index 8568da5..1a2d04d 100644 --- a/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/ResourceConnection.java +++ b/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/ResourceConnection.java @@ -204,6 +204,8 @@ public class ResourceConnection { public JobState getJobStatus(MonitorID monitorID) throws SSHApiException { String jobID = monitorID.getJobID(); + //todo so currently we execute the qstat for each job but we can use user based monitoring + //todo or we should concatenate all the commands and execute them in one go and parse the response RawCommandInfo rawCommandInfo = new RawCommandInfo(this.installedPath + "qstat -f " + jobID); StandardOutReader stdOutReader = new StandardOutReader();
