This is an automated email from the ASF dual-hosted git repository.
isjarana pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/airavata.git
The following commit(s) were added to refs/heads/develop by this push:
new db8bf345f8 cleaning up metscheduler multiple CRs
new e7d9b90c20 Merge pull request #407 from isururanawaka/metaschedular
db8bf345f8 is described below
commit db8bf345f8398ee8858998a4abaad12a083aace1
Author: Isuru Ranawaka <[email protected]>
AuthorDate: Fri Mar 10 14:43:44 2023 -0500
cleaning up metscheduler multiple CRs
---
.../scigap/develop/group_vars/all/vars.yml | 12 ++++----
.../templates/airavata-server.properties.j2 | 8 +++++
.../selection/MultipleComputeResourcePolicy.java | 24 +--------------
.../rescheduler/ExponentialBackOffReScheduler.java | 36 ++++++++++++----------
.../src/main/resources/airavata-server.properties | 23 +++++++++++++-
5 files changed, 57 insertions(+), 46 deletions(-)
diff --git
a/dev-tools/ansible/inventories/scigap/develop/group_vars/all/vars.yml
b/dev-tools/ansible/inventories/scigap/develop/group_vars/all/vars.yml
index 4fc3f3bc90..e61119b31c 100644
--- a/dev-tools/ansible/inventories/scigap/develop/group_vars/all/vars.yml
+++ b/dev-tools/ansible/inventories/scigap/develop/group_vars/all/vars.yml
@@ -238,13 +238,13 @@ zabbix_server: "rt-watch.uits.indiana.edu"
#variables for metscheduler
-cluster_monitoring_enabled: false
-metascheduler_resource_selection_policy_class:
org.apache.airavata.metascheduler.process.scheduling.cr.DefaultComputeResourceSelectionPolicy
-metascheduler_gateway: seagrid
-metascheduler_group_resource_profile: a2076a5a-0fbf-44f4-9d47-060153bc578b
+cluster_monitoring_enabled: true
+metascheduler_resource_selection_policy_class:
org.apache.airavata.metascheduler.process.scheduling.engine.cr.selection.MultipleComputeResourcePolicy
+metascheduler_gateway: dev-ultrascan
+metascheduler_group_resource_profile: bd83e541-fb7b-4878-a11b-dc56696700d5
metascheduler_username: metascheacc
-cluster_scanning_interval: 1800000
+cluster_scanning_interval: 7800
cluster_scanning_parallel_jobs: 1
-metaschedluer_job_scanning_enable: false
+metaschedluer_job_scanning_enable: true
metascheduler_maximum_rescheduler_threshold: 5
compute_resource_rescheduler_policy_class:
org.apache.airavata.metascheduler.process.scheduling.rescheduler.ExponentialBackOffReScheduler
\ No newline at end of file
diff --git
a/dev-tools/ansible/roles/api-orch/templates/airavata-server.properties.j2
b/dev-tools/ansible/roles/api-orch/templates/airavata-server.properties.j2
index 0f2644f7ce..3d71434cc2 100644
--- a/dev-tools/ansible/roles/api-orch/templates/airavata-server.properties.j2
+++ b/dev-tools/ansible/roles/api-orch/templates/airavata-server.properties.j2
@@ -353,3 +353,11 @@ cluster.scanning.parallel.jobs={{
cluster_scanning_parallel_jobs }}
metaschedluer.job.scanning.enable={{ metaschedluer_job_scanning_enable }}
metascheduler.maximum.rescheduler.threshold = {{
metascheduler_maximum_rescheduler_threshold }}
compute.resource.rescheduler.policy.class = {{
compute_resource_rescheduler_policy_class }}
+data.analyzer.scanning.interval=3600
+data.analyzer.scanning.parallel.jobs=1
+data.analyzer.enabled.gateways=dev-ultrascan
+data.analyzer.time.step.seconds=5
+data.analyzer.job.scanning.enable=False
+metascheduler.multiple.cr.enabling.factor=1
+
+
diff --git
a/modules/airavata-metascheduler/process-scheduler/src/main/java/org/apache/airavata/metascheduler/process/scheduling/engine/cr/selection/MultipleComputeResourcePolicy.java
b/modules/airavata-metascheduler/process-scheduler/src/main/java/org/apache/airavata/metascheduler/process/scheduling/engine/cr/selection/MultipleComputeResourcePolicy.java
index 559930b3e2..6a2bd22f80 100644
---
a/modules/airavata-metascheduler/process-scheduler/src/main/java/org/apache/airavata/metascheduler/process/scheduling/engine/cr/selection/MultipleComputeResourcePolicy.java
+++
b/modules/airavata-metascheduler/process-scheduler/src/main/java/org/apache/airavata/metascheduler/process/scheduling/engine/cr/selection/MultipleComputeResourcePolicy.java
@@ -64,29 +64,7 @@ public class MultipleComputeResourcePolicy extends
ComputeResourceSelectionPolic
}
}
-// int crPoolFraction =
ServerSettings.getMetaschedulerMultipleCREnablingFactor();
-//
-// List<ComputeResourcePolicy> policyList = registryClient.
-//
getGroupComputeResourcePolicyList(processModel.getGroupResourceProfileId());
-//
-// int count = 0;
-// int maxCount = (int) (policyList.size() * crPoolFraction);
-//
-// while (count < maxCount) {
-// ComputeResourcePolicy resourcePolicy =
policyList.get(count);
-// List<String> queues =
resourcePolicy.getAllowedBatchQueues();
-//
-// String computeResourceId =
resourcePolicy.getComputeResourceId();
-// ComputeResourceDescription comResourceDes =
registryClient.getComputeResource(computeResourceId);
-//
-// if (!queues.isEmpty()) {
-// QueueStatusModel queueStatusModel =
registryClient.getQueueStatus(comResourceDes.getHostName(), queues.get(0));
-// if (queueStatusModel.isQueueUp()) {
-// return
Optional.of(computationalResourceSchedulingModel);
-// }
-// }
-// count++;
-// }
+
} catch (Exception exception) {
LOGGER.error(" Exception occurred while scheduling Process with Id
{}", processId, exception);
this.registryClientPool.returnBrokenResource(registryClient);
diff --git
a/modules/airavata-metascheduler/process-scheduler/src/main/java/org/apache/airavata/metascheduler/process/scheduling/engine/rescheduler/ExponentialBackOffReScheduler.java
b/modules/airavata-metascheduler/process-scheduler/src/main/java/org/apache/airavata/metascheduler/process/scheduling/engine/rescheduler/ExponentialBackOffReScheduler.java
index 1bd8c88852..16d82c72b2 100644
---
a/modules/airavata-metascheduler/process-scheduler/src/main/java/org/apache/airavata/metascheduler/process/scheduling/engine/rescheduler/ExponentialBackOffReScheduler.java
+++
b/modules/airavata-metascheduler/process-scheduler/src/main/java/org/apache/airavata/metascheduler/process/scheduling/engine/rescheduler/ExponentialBackOffReScheduler.java
@@ -9,6 +9,7 @@ import org.apache.airavata.metascheduler.core.utils.Utils;
import org.apache.airavata.model.application.io.InputDataObjectType;
import org.apache.airavata.model.error.ExperimentNotFoundException;
import org.apache.airavata.model.experiment.ExperimentModel;
+import org.apache.airavata.model.experiment.UserConfigurationDataModel;
import org.apache.airavata.model.process.ProcessModel;
import
org.apache.airavata.model.scheduling.ComputationalResourceSchedulingModel;
import org.apache.airavata.model.status.ProcessState;
@@ -20,6 +21,7 @@ import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
@@ -122,15 +124,15 @@ public class ExponentialBackOffReScheduler implements
ReScheduler {
if (computationalResourceSchedulingModel.isPresent()) {
ComputationalResourceSchedulingModel resourceSchedulingModel =
computationalResourceSchedulingModel.get();
-// List<InputDataObjectType> inputDataObjectTypeList =
experimentModel.getExperimentInputs();
-// inputDataObjectTypeList.forEach(obj -> {
-// if (obj.getName().equals("Wall_Time")) {
-// obj.setValue("-walltime=" +
resourceSchedulingModel.getWallTimeLimit());
-// }
-// if (obj.getName().equals("Parallel_Group_Count")) {
-// obj.setValue("-mgroupcount=" +
resourceSchedulingModel.getMGroupCount());
-// }
-// });
+ List<InputDataObjectType> inputDataObjectTypeList =
experimentModel.getExperimentInputs();
+ inputDataObjectTypeList.forEach(obj -> {
+ if (obj.getName().equals("Wall_Time")) {
+ obj.setValue("-walltime=" +
resourceSchedulingModel.getWallTimeLimit());
+ }
+ if (obj.getName().equals("Parallel_Group_Count")) {
+ obj.setValue("-mgroupcount=" +
resourceSchedulingModel.getMGroupCount());
+ }
+ });
List<InputDataObjectType> processInputDataObjectTypeList =
processModel.getProcessInputs();
processInputDataObjectTypeList.forEach(obj->{
@@ -143,17 +145,19 @@ public class ExponentialBackOffReScheduler implements
ReScheduler {
});
processModel.setProcessInputs(processInputDataObjectTypeList);
-// experimentModel.setExperimentInputs(inputDataObjectTypeList);
-// experimentModel.getProcesses().forEach(pr->{
-// if (pr.getProcessId().equals(processModel.getProcessId())){
-// pr.setProcessInputs(processModel.getProcessInputs());
-// }
-// });
+ experimentModel.setExperimentInputs(inputDataObjectTypeList);
+
+ //update experiment model with selected compute resource
+ experimentModel.setProcesses(new ArrayList<>()); // avoid
duplication issues
+ UserConfigurationDataModel userConfigurationDataModel =
experimentModel.getUserConfigurationData();
+
userConfigurationDataModel.setComputationalResourceScheduling(resourceSchedulingModel);
+
experimentModel.setUserConfigurationData(userConfigurationDataModel);
+
registryClient.updateExperiment(processModel.getExperimentId(),experimentModel);
processModel.setProcessResourceSchedule(resourceSchedulingModel);
processModel.setComputeResourceId(resourceSchedulingModel.getResourceHostId());
registryClient.updateProcess(processModel,
processModel.getProcessId());
-// registryClient.updateExperiment(processModel.getExperimentId(),
experimentModel);
+
}
}
}
diff --git
a/modules/configuration/server/src/main/resources/airavata-server.properties
b/modules/configuration/server/src/main/resources/airavata-server.properties
index 7800d1907b..681e698435 100644
--- a/modules/configuration/server/src/main/resources/airavata-server.properties
+++ b/modules/configuration/server/src/main/resources/airavata-server.properties
@@ -54,7 +54,7 @@ super.tenant.gatewayId=php_reference_gateway
# Properties for cluster status monitoring
# cluster status monitoring job repeat time in seconds
-cluster.status.monitoring.enable=false
+#cluster.status.monitoring.enable=false
cluster.status.monitoring.repeat.time=18000
###########################################################################
@@ -362,3 +362,24 @@
db_event_manager=org.apache.airavata.db.event.manager.DBEventManagerRunner
# ThriftClientPool Configuration
###########################################################################
thrift.client.pool.abandoned.removal.enabled=false
+
+###########################################################################
+# Metascheduler And Compute Resource Monitoring Configuration
+###########################################################################
+cluster.status.monitoring.enable=True
+compute.resource.selection.policy.class=org.apache.airavata.metascheduler.process.scheduling.engine.cr.selection.MultipleComputeResourcePolicy
+metascheduler.gateway=
+metascheduler.group.resource.profile=
+metascheduler.username=metascheacc
+cluster.scanning.interval=7800
+cluster.scanning.parallel.jobs=1
+metaschedluer.job.scanning.enable=True
+metascheduler.maximum.rescheduler.threshold = 5
+compute.resource.rescheduler.policy.class
=org.apache.airavata.metascheduler.process.scheduling.engine.rescheduler.ExponentialBackOffReScheduler
+data.analyzer.scanning.interval=3600
+data.analyzer.scanning.parallel.jobs=1
+data.analyzer.enabled.gateways=
+data.analyzer.time.step.seconds=5
+data.analyzer.job.scanning.enable=False
+metascheduler.multiple.cr.enabling.factor=1
+