This is an automated email from the ASF dual-hosted git repository.
isjarana pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/airavata.git
The following commit(s) were added to refs/heads/develop by this push:
new f9980f76ba update cluster status when job failed
new dcae5a3575 Merge pull request #412 from isururanawaka/metaschedular
f9980f76ba is described below
commit f9980f76ba1a1bb845232048d9f817fe7ff9e2fb
Author: Isuru Ranawaka <[email protected]>
AuthorDate: Tue Apr 4 12:25:28 2023 -0400
update cluster status when job failed
---
.../scigap/develop/group_vars/all/vars.yml | 1 +
.../templates/airavata-server.properties.j2 | 1 +
.../rescheduler/ExponentialBackOffReScheduler.java | 2 +-
.../rescheduler/ProcessReschedulingService.java | 2 +-
.../airavata/common/utils/ServerSettings.java | 12 +++++++---
.../ComputationalResourceMonitoringService.java | 2 +-
.../server/OrchestratorServerHandler.java | 28 ++++++++++++++++++----
7 files changed, 38 insertions(+), 10 deletions(-)
diff --git
a/dev-tools/ansible/inventories/scigap/develop/group_vars/all/vars.yml
b/dev-tools/ansible/inventories/scigap/develop/group_vars/all/vars.yml
index ef57562e75..c631efc95b 100644
--- a/dev-tools/ansible/inventories/scigap/develop/group_vars/all/vars.yml
+++ b/dev-tools/ansible/inventories/scigap/develop/group_vars/all/vars.yml
@@ -244,6 +244,7 @@ metascheduler_gateway: dev-ultrascan
metascheduler_group_resource_profile: bd83e541-fb7b-4878-a11b-dc56696700d5
metascheduler_username: metascheacc
cluster_scanning_interval: 7800
+job_scanning_interval: 180
cluster_scanning_parallel_jobs: 1
metaschedluer_job_scanning_enable: true
metascheduler_maximum_rescheduler_threshold: 5
diff --git
a/dev-tools/ansible/roles/api-orch/templates/airavata-server.properties.j2
b/dev-tools/ansible/roles/api-orch/templates/airavata-server.properties.j2
index 3d71434cc2..107c2d32c6 100644
--- a/dev-tools/ansible/roles/api-orch/templates/airavata-server.properties.j2
+++ b/dev-tools/ansible/roles/api-orch/templates/airavata-server.properties.j2
@@ -349,6 +349,7 @@ metascheduler.gateway={{ metascheduler_gateway }}
metascheduler.group.resource.profile={{ metascheduler_group_resource_profile }}
metascheduler.username={{ metascheduler_username }}
cluster.scanning.interval={{ cluster_scanning_interval }}
+job.scanning.interval={{job_scanning_interval}}
cluster.scanning.parallel.jobs={{ cluster_scanning_parallel_jobs }}
metaschedluer.job.scanning.enable={{ metaschedluer_job_scanning_enable }}
metascheduler.maximum.rescheduler.threshold = {{
metascheduler_maximum_rescheduler_threshold }}
diff --git
a/modules/airavata-metascheduler/process-scheduler/src/main/java/org/apache/airavata/metascheduler/process/scheduling/engine/rescheduler/ExponentialBackOffReScheduler.java
b/modules/airavata-metascheduler/process-scheduler/src/main/java/org/apache/airavata/metascheduler/process/scheduling/engine/rescheduler/ExponentialBackOffReScheduler.java
index ed2088711b..26383b6296 100644
---
a/modules/airavata-metascheduler/process-scheduler/src/main/java/org/apache/airavata/metascheduler/process/scheduling/engine/rescheduler/ExponentialBackOffReScheduler.java
+++
b/modules/airavata-metascheduler/process-scheduler/src/main/java/org/apache/airavata/metascheduler/process/scheduling/engine/rescheduler/ExponentialBackOffReScheduler.java
@@ -72,7 +72,7 @@ public class ExponentialBackOffReScheduler implements
ReScheduler {
long currentTime = System.currentTimeMillis();
- double scanningInterval =
ServerSettings.getMetaschedulerScanningInterval();
+ double scanningInterval =
ServerSettings.getMetaschedulerJobScanningInterval();
if (currentTime >= (pastValue + value * scanningInterval *
1000)) {
updateResourceSchedulingModel(processModel,experimentModel,client);
diff --git
a/modules/airavata-metascheduler/process-scheduler/src/main/java/org/apache/airavata/metascheduler/process/scheduling/engine/rescheduler/ProcessReschedulingService.java
b/modules/airavata-metascheduler/process-scheduler/src/main/java/org/apache/airavata/metascheduler/process/scheduling/engine/rescheduler/ProcessReschedulingService.java
index e281b3c3c6..be8b5f7de0 100644
---
a/modules/airavata-metascheduler/process-scheduler/src/main/java/org/apache/airavata/metascheduler/process/scheduling/engine/rescheduler/ProcessReschedulingService.java
+++
b/modules/airavata-metascheduler/process-scheduler/src/main/java/org/apache/airavata/metascheduler/process/scheduling/engine/rescheduler/ProcessReschedulingService.java
@@ -44,7 +44,7 @@ public class ProcessReschedulingService implements IServer {
scheduler = schedulerFactory.getScheduler();
final int parallelJobs =
ServerSettings.getMetaschedulerNoOfScanningParallelJobs();
- final double scanningInterval =
ServerSettings.getMetaschedulerScanningInterval();
+ final double scanningInterval =
ServerSettings.getMetaschedulerJobScanningInterval();
for (int i = 0; i < parallelJobs; i++) {
diff --git
a/modules/commons/src/main/java/org/apache/airavata/common/utils/ServerSettings.java
b/modules/commons/src/main/java/org/apache/airavata/common/utils/ServerSettings.java
index 0221d17c93..6ed5c3f72b 100644
---
a/modules/commons/src/main/java/org/apache/airavata/common/utils/ServerSettings.java
+++
b/modules/commons/src/main/java/org/apache/airavata/common/utils/ServerSettings.java
@@ -148,7 +148,8 @@ public class ServerSettings extends ApplicationSettings {
public static final String METASCHEDULER_GATEWAY = "metascheduler.gateway";
public static final String METASCHEDULER_GRP_ID =
"metascheduler.group.resource.profile";
public static final String METASCHEDULER_USERNAME =
"metascheduler.username";
- public static final String METASCHEDULER_SCANNING_INTERVAL =
"cluster.scanning.interval";
+ public static final String METASCHEDULER_CLUSTER_SCANNING_INTERVAL =
"cluster.scanning.interval";
+ public static final String METASCHEDULER_JOB_SCANNING_INTERVAL =
"job.scanning.interval";
public static final String METASCHEDULER_NO_OF_SCANNING_PARALLEL_JOBS =
"cluster.scanning.parallel.jobs";
public static final String COMPUTE_RESOURCE_RESCHEDULER_CLASS =
"compute.resource.rescheduler.policy.class";
public static final String METASCHEDULER_MAXIMUM_RESCHEDULED_THRESHOLD=
"metascheduler.maximum.rescheduler.threshold";
@@ -547,8 +548,13 @@ public class ServerSettings extends ApplicationSettings {
"1"));
}
- public static double getMetaschedulerScanningInterval() throws
ApplicationSettingsException {
- return Double.parseDouble(getSetting(METASCHEDULER_SCANNING_INTERVAL,
+ public static double getMetaschedulerClusterScanningInterval() throws
ApplicationSettingsException {
+ return
Double.parseDouble(getSetting(METASCHEDULER_CLUSTER_SCANNING_INTERVAL,
+ "1800000"));
+ }
+
+ public static double getMetaschedulerJobScanningInterval() throws
ApplicationSettingsException {
+ return
Double.parseDouble(getSetting(METASCHEDULER_JOB_SCANNING_INTERVAL,
"1800000"));
}
diff --git
a/modules/computer-resource-monitoring-service/src/main/java/org/apache/airavata/compute/resource/monitoring/ComputationalResourceMonitoringService.java
b/modules/computer-resource-monitoring-service/src/main/java/org/apache/airavata/compute/resource/monitoring/ComputationalResourceMonitoringService.java
index 1d2dc78cfe..015c3b729d 100644
---
a/modules/computer-resource-monitoring-service/src/main/java/org/apache/airavata/compute/resource/monitoring/ComputationalResourceMonitoringService.java
+++
b/modules/computer-resource-monitoring-service/src/main/java/org/apache/airavata/compute/resource/monitoring/ComputationalResourceMonitoringService.java
@@ -48,7 +48,7 @@ public class ComputationalResourceMonitoringService
implements IServer {
final String metaGatewayId = ServerSettings.getMetaschedulerGateway();
final String metaGroupResourceProfileId =
ServerSettings.getMetaschedulerGrpId();
final int parallelJobs =
ServerSettings.getMetaschedulerNoOfScanningParallelJobs();
- final double scanningInterval =
ServerSettings.getMetaschedulerScanningInterval();
+ final double scanningInterval =
ServerSettings.getMetaschedulerClusterScanningInterval();
for (int i = 0; i < parallelJobs; i++) {
diff --git
a/modules/orchestrator/orchestrator-service/src/main/java/org/apache/airavata/orchestrator/server/OrchestratorServerHandler.java
b/modules/orchestrator/orchestrator-service/src/main/java/org/apache/airavata/orchestrator/server/OrchestratorServerHandler.java
index 8a64de7757..4d8b9ac716 100644
---
a/modules/orchestrator/orchestrator-service/src/main/java/org/apache/airavata/orchestrator/server/OrchestratorServerHandler.java
+++
b/modules/orchestrator/orchestrator-service/src/main/java/org/apache/airavata/orchestrator/server/OrchestratorServerHandler.java
@@ -47,10 +47,7 @@ import
org.apache.airavata.model.experiment.UserConfigurationDataModel;
import org.apache.airavata.model.messaging.event.*;
import org.apache.airavata.model.process.ProcessModel;
import
org.apache.airavata.model.scheduling.ComputationalResourceSchedulingModel;
-import org.apache.airavata.model.status.ExperimentState;
-import org.apache.airavata.model.status.ExperimentStatus;
-import org.apache.airavata.model.status.ProcessState;
-import org.apache.airavata.model.status.ProcessStatus;
+import org.apache.airavata.model.status.*;
import org.apache.airavata.model.task.TaskTypes;
import org.apache.airavata.model.util.ExperimentModelUtil;
import org.apache.airavata.orchestrator.core.exception.OrchestratorException;
@@ -789,6 +786,29 @@ public class OrchestratorServerHandler implements
OrchestratorService.Iface {
case REQUEUED:
status.setState(ExperimentState.SCHEDULED);
status.setReason("Job submission failed, requeued
to resubmit");
+ List<QueueStatusModel> queueStatusModels = new
ArrayList<>();
+ final RegistryService.Client registryClient =
getRegistryServiceClient();
+ ExperimentModel experimentModel =
registryClient.getExperiment(processIdentity.getExperimentId());
+ UserConfigurationDataModel
userConfigurationDataModel = experimentModel.getUserConfigurationData();
+ if(userConfigurationDataModel != null) {
+ ComputationalResourceSchedulingModel
computationalResourceSchedulingModel =
+
userConfigurationDataModel.getComputationalResourceScheduling();
+ if(computationalResourceSchedulingModel !=
null) {
+ String queueName =
computationalResourceSchedulingModel.getQueueName();
+ String resourceId =
computationalResourceSchedulingModel.getResourceHostId();
+ ComputeResourceDescription comResourceDes =
registryClient.getComputeResource(resourceId);
+ QueueStatusModel queueStatusModel = new
QueueStatusModel();
+
queueStatusModel.setHostName(comResourceDes.getHostName());
+ queueStatusModel.setQueueName(queueName);
+ queueStatusModel.setQueueUp(false);
+ queueStatusModel.setRunningJobs(0);
+ queueStatusModel.setQueuedJobs(0);
+
queueStatusModel.setTime(System.currentTimeMillis());
+ queueStatusModels.add(queueStatusModel);
+
registryClient.registerQueueStatuses(queueStatusModels);
+ }
+ }
+
break;
case DEQUEUING:
try {