Repository: airavata
Updated Branches:
  refs/heads/master ec4172700 -> 47afa398b


changing the monitoring to work with dynamic host configuration - AIRAVATA-1028


Project: http://git-wip-us.apache.org/repos/asf/airavata/repo
Commit: http://git-wip-us.apache.org/repos/asf/airavata/commit/47afa398
Tree: http://git-wip-us.apache.org/repos/asf/airavata/tree/47afa398
Diff: http://git-wip-us.apache.org/repos/asf/airavata/diff/47afa398

Branch: refs/heads/master
Commit: 47afa398b8f0a89c509e07347432333558bf4a90
Parents: ec41727
Author: lahiru <[email protected]>
Authored: Mon Mar 3 13:48:09 2014 -0500
Committer: lahiru <[email protected]>
Committed: Mon Mar 3 13:48:09 2014 -0500

----------------------------------------------------------------------
 .../monitor/impl/pull/qstat/QstatMonitor.java   | 26 +++++++++++---------
 .../impl/pull/qstat/ResourceConnection.java     |  2 ++
 2 files changed, 17 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/airavata/blob/47afa398/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/QstatMonitor.java
----------------------------------------------------------------------
diff --git 
a/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/QstatMonitor.java
 
b/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/QstatMonitor.java
index 8f0b79d..c9f8331 100644
--- 
a/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/QstatMonitor.java
+++ 
b/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/QstatMonitor.java
@@ -118,7 +118,11 @@ public class QstatMonitor extends PullMonitor implements 
Runnable {
                         logger.debug("We already have this connection so not 
going to create one");
                         connection = connections.get(hostName);
                     } else {
-                        connection = new ResourceConnection(take, 
gsisshHostType.getInstalledPath());
+                        if(gsisshHostType.getInstalledPath() == null){
+                            connection = new ResourceConnection(take, 
"/opt/torque/bin");
+                        }else{
+                            connection = new ResourceConnection(take, 
gsisshHostType.getInstalledPath());
+                        }
                         connections.put(hostName, connection);
                     }
                     jobStatus.setMonitorID(take);
@@ -148,7 +152,7 @@ public class QstatMonitor extends PullMonitor implements 
Runnable {
                 }else if(e.getMessage().contains("illegally formed job 
identifier")){
                    logger.error("Wrong job ID is given so dropping the job 
from monitoring system");
                 } else if (!this.queue.contains(take)) {   // we put the job 
back to the queue only if its state is not unknown
-                    if (take.getFailedCount() < 3) {
+                    if (take.getFailedCount() < 2) {
                         try {
                             take.setFailedCount(take.getFailedCount() + 1);
                             this.queue.put(take);
@@ -159,19 +163,19 @@ public class QstatMonitor extends PullMonitor implements 
Runnable {
                         logger.error("Tried to monitor the job 3 times, so 
dropping of the the Job with ID: " + take.getJobID());
                     }
                 }
-                logger.error("Error retrieving the job status");
                 throw new AiravataMonitorException("Error retrieving the job 
status", e);
             } catch (Exception e){
                 if (take.getFailedCount() < 3) {
-                        try {
-                            take.setFailedCount(take.getFailedCount() + 1);
-                            this.queue.put(take);
-                        } catch (InterruptedException e1) {
-                            e1.printStackTrace();
-                        }
-                    } else {
-                        logger.error("Tryied to monitor the job 3 times, so 
dropping of the the Job with ID: " + take.getJobID());
+                    try {
+                        take.setFailedCount(take.getFailedCount() + 1);
+                        this.queue.put(take);
+                    } catch (InterruptedException e1) {
+                        e1.printStackTrace();
+                    }
+                } else {
+                    logger.error("Tryied to monitor the job 3 times, so 
dropping of the the Job with ID: " + take.getJobID());
                 }
+                throw new AiravataMonitorException("Error retrieving the job 
status", e);
             }
         }
 

http://git-wip-us.apache.org/repos/asf/airavata/blob/47afa398/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/ResourceConnection.java
----------------------------------------------------------------------
diff --git 
a/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/ResourceConnection.java
 
b/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/ResourceConnection.java
index 8568da5..1a2d04d 100644
--- 
a/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/ResourceConnection.java
+++ 
b/modules/airavata-job-monitor/src/main/java/org/apache/airavata/job/monitor/impl/pull/qstat/ResourceConnection.java
@@ -204,6 +204,8 @@ public class ResourceConnection {
 
     public JobState getJobStatus(MonitorID monitorID) throws SSHApiException {
         String jobID = monitorID.getJobID();
+        //todo so currently we execute the qstat for each job but we can use 
user based monitoring
+        //todo or we should concatenate all the commands and execute them in 
one go and parse the response
         RawCommandInfo rawCommandInfo = new RawCommandInfo(this.installedPath 
+ "qstat -f " + jobID);
 
         StandardOutReader stdOutReader = new StandardOutReader();

Reply via email to