Repository: airavata Updated Branches: refs/heads/develop 9ee4022f5 -> db8beb69e
adding module to parent pom Project: http://git-wip-us.apache.org/repos/asf/airavata/repo Commit: http://git-wip-us.apache.org/repos/asf/airavata/commit/b5d77963 Tree: http://git-wip-us.apache.org/repos/asf/airavata/tree/b5d77963 Diff: http://git-wip-us.apache.org/repos/asf/airavata/diff/b5d77963 Branch: refs/heads/develop Commit: b5d7796386a4e3019e56088a7f6bf512256f717e Parents: 4d9c56e Author: scnakandala <[email protected]> Authored: Tue Nov 8 12:48:41 2016 -0500 Committer: scnakandala <[email protected]> Committed: Tue Nov 8 12:48:41 2016 -0500 ---------------------------------------------------------------------- modules/cluster-monitoring/pom.xml | 51 ++++ .../monitoring/ClusterHealthMonitor.java | 265 +++++++++++++++++++ .../src/main/resources/cluster-properties.json | 53 ++++ .../src/main/resources/id_rsa | 30 +++ .../src/main/resources/id_rsa.pub | 1 + pom.xml | 1 + 6 files changed, 401 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/airavata/blob/b5d77963/modules/cluster-monitoring/pom.xml ---------------------------------------------------------------------- diff --git a/modules/cluster-monitoring/pom.xml b/modules/cluster-monitoring/pom.xml new file mode 100644 index 0000000..0fb9e40 --- /dev/null +++ b/modules/cluster-monitoring/pom.xml @@ -0,0 +1,51 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>airavata</artifactId> + <groupId>org.apache.airavata</groupId> + <version>0.17-SNAPSHOT</version> + <relativePath>../../pom.xml</relativePath> + </parent> + <modelVersion>4.0.0</modelVersion> + + <artifactId>cluster-monitoring</artifactId> + + <dependencies> + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-io</artifactId> + <version>1.3.2</version> + </dependency> + <dependency> + <groupId>com.jcraft</groupId> + <artifactId>jsch</artifactId> + <version>0.1.50</version> + </dependency> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + </dependency> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>jcl-over-slf4j</artifactId> + </dependency> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-log4j12</artifactId> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>com.google.code.gson</groupId> + <artifactId>gson</artifactId> + <version>2.3.1</version> + </dependency> + </dependencies> + + +</project> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/airavata/blob/b5d77963/modules/cluster-monitoring/src/main/java/org/apache/airavata/cluster/monitoring/ClusterHealthMonitor.java ---------------------------------------------------------------------- diff --git a/modules/cluster-monitoring/src/main/java/org/apache/airavata/cluster/monitoring/ClusterHealthMonitor.java b/modules/cluster-monitoring/src/main/java/org/apache/airavata/cluster/monitoring/ClusterHealthMonitor.java new file mode 100644 index 0000000..2f1dc1d --- /dev/null +++ b/modules/cluster-monitoring/src/main/java/org/apache/airavata/cluster/monitoring/ClusterHealthMonitor.java @@ -0,0 +1,265 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ +package org.apache.airavata.cluster.monitoring; + + +import com.google.gson.Gson; +import com.google.gson.reflect.TypeToken; +import com.jcraft.jsch.*; +import org.apache.commons.io.IOUtils; +import org.apache.log4j.Logger; + +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +public class ClusterHealthMonitor { + private static final Logger logger = Logger.getLogger(ClusterHealthMonitor.class); + + public static void main(String[] args) throws IOException { + + byte[] publicKeyBytes = IOUtils.toByteArray(ClusterHealthMonitor.class.getResourceAsStream("/id_rsa.pub")); + byte[] privateKeyBytes = IOUtils.toByteArray(ClusterHealthMonitor.class.getResourceAsStream("/id_rsa")); + String passPhrase = "ultrascan"; + + Gson gson = new Gson(); + List<ComputeResourceProfile> computeResourceProfiles = gson.fromJson(new FileReader(ClusterHealthMonitor.class + .getResource("/cluster-properties.json").getFile()), new TypeToken<List<ComputeResourceProfile>>(){}.getType()); + + ArrayList<QueueStatus> queueStatuses = new ArrayList<>(); + + for(ComputeResourceProfile computeResourceProfile : computeResourceProfiles){ + + String userName = computeResourceProfile.getUserName(); + String hostName = computeResourceProfile.getHostName(); + int port = computeResourceProfile.getPort(); + + try{ + JSch jsch = new JSch(); + jsch.addIdentity(hostName, privateKeyBytes, publicKeyBytes, passPhrase.getBytes()); + + Session session=jsch.getSession(userName, hostName, port); + java.util.Properties config = new java.util.Properties(); + config.put("StrictHostKeyChecking", "no"); + session.setConfig(config); + + + logger.debug("Connected to " + hostName); + + session.connect(); + for(String queue : computeResourceProfile.getQueueNames()) { + String command = ""; + if (computeResourceProfile.getResourceManagerType().equals("SLURM")) + command = "sinfo -s -p " + queue + " -o \"%a %F\" | tail -1"; + else if (computeResourceProfile.getResourceManagerType().equals("PBS")) + command = "qstat -Q " + queue + "| tail -1"; + + if (command.equals("")) { + logger.warn("No matching resource manager type found for " + computeResourceProfile.getResourceManagerType()); + continue; + } + + Channel channel = session.openChannel("exec"); + ((ChannelExec) channel).setCommand(command); + channel.setInputStream(null); + ((ChannelExec) channel).setErrStream(System.err); + InputStream in = channel.getInputStream(); + channel.connect(); + byte[] tmp = new byte[1024]; + String result = ""; + while (true) { + while (in.available() > 0) { + int i = in.read(tmp, 0, 1024); + if (i < 0) break; + result += new String(tmp, 0, i); + } + if (channel.isClosed()) { + if (in.available() > 0) continue; + logger.debug(hostName + " " + queue + " " + "exit-status: " + channel.getExitStatus()); + break; + } + try { + Thread.sleep(1000); + } catch (Exception ee) { + } + } + channel.disconnect(); + + if (result != null && result.length() > 0) { + QueueStatus queueStatus = null; + if (computeResourceProfile.getResourceManagerType().equals("SLURM")) { + String[] sparts = result.split(" "); + boolean isUp = sparts[0].equalsIgnoreCase("up"); + String knts = sparts[1]; + sparts = knts.split("/"); + int running = Integer.parseInt(sparts[0].trim()); + int queued = Integer.parseInt(sparts[1].trim()); + queueStatus = new QueueStatus(hostName, queue, isUp, running, queued, System.currentTimeMillis()); + + } else if (computeResourceProfile.getResourceManagerType().equals("PBS")) { + result = result.replaceAll("\\s+", " "); + String[] sparts = result.split(" "); + boolean isUp = sparts[3].equalsIgnoreCase("yes"); + int running = Integer.parseInt(sparts[6].trim()); + int queued = Integer.parseInt(sparts[5].trim()); + queueStatus = new QueueStatus(hostName, queue, isUp, running, queued, System.currentTimeMillis()); + } + + if (queueStatus != null) + queueStatuses.add(queueStatus); + } + } + session.disconnect(); + }catch (JSchException ex){ + logger.error(ex.getMessage(), ex); + } + } + + System.out.println(queueStatuses.size()); + + } + + private static class ComputeResourceProfile{ + + private String hostName; + private String userName; + private int port; + private List<String> queueNames; + private String resourceManagerType; + + public ComputeResourceProfile(String hostName, String userName, int port, List<String> queueNames, String resourceManagerType) { + this.hostName = hostName; + this.userName = userName; + this.port = port; + this.queueNames = queueNames; + this.resourceManagerType = resourceManagerType; + } + + public String getHostName() { + return hostName; + } + + public void setHostName(String hostName) { + this.hostName = hostName; + } + + public String getUserName() { + return userName; + } + + public void setUserName(String userName) { + this.userName = userName; + } + + public int getPort() { + return port; + } + + public void setPort(int port) { + this.port = port; + } + + public List<String> getQueueNames() { + return queueNames; + } + + public void setQueueNames(List<String> queueNames) { + this.queueNames = queueNames; + } + + public String getResourceManagerType() { + return resourceManagerType; + } + + public void setResourceManagerType(String resourceManagerType) { + this.resourceManagerType = resourceManagerType; + } + } + + private static class QueueStatus{ + + private String hostName; + private String queueName; + private boolean queueUp; + private int runningJobs; + private int queuedJobs; + private long time; + + public QueueStatus(String hostName, String queueName, boolean queueUp, int runningJobs, int queuedJobs, long time) { + this.hostName = hostName; + this.queueName = queueName; + this.queueUp = queueUp; + this.runningJobs = runningJobs; + this.queuedJobs = queuedJobs; + this.time = time; + } + + public String getHostName() { + return hostName; + } + + public void setHostName(String hostName) { + this.hostName = hostName; + } + + public String getQueueName() { + return queueName; + } + + public void setQueueName(String queueName) { + this.queueName = queueName; + } + + public boolean isQueueUp() { + return queueUp; + } + + public void setQueueUp(boolean queueUp) { + this.queueUp = queueUp; + } + + public int getRunningJobs() { + return runningJobs; + } + + public void setRunningJobs(int runningJobs) { + this.runningJobs = runningJobs; + } + + public int getQueuedJobs() { + return queuedJobs; + } + + public void setQueuedJobs(int queuedJobs) { + this.queuedJobs = queuedJobs; + } + + public long getTime() { + return time; + } + + public void setTime(long time) { + this.time = time; + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/airavata/blob/b5d77963/modules/cluster-monitoring/src/main/resources/cluster-properties.json ---------------------------------------------------------------------- diff --git a/modules/cluster-monitoring/src/main/resources/cluster-properties.json b/modules/cluster-monitoring/src/main/resources/cluster-properties.json new file mode 100644 index 0000000..f016565 --- /dev/null +++ b/modules/cluster-monitoring/src/main/resources/cluster-properties.json @@ -0,0 +1,53 @@ +[ + { + "hostName": "stampede.tacc.xsede.org", + "userName": "scigap", + "port": 22, + "queueNames": [ + "normal", + "development", + "gpu" + ], + "resourceManagerType": "SLURM" + }, + { + "hostName": "comet.sdsc.edu", + "userName": "scigap", + "port": 22, + "queueNames": [ + "compute", + "shared", + "gpu" + ], + "resourceManagerType": "SLURM" + }, + { + "hostName": "bigred2.uits.iu.edu", + "userName": "cgateway", + "port": 22, + "queueNames": [ + "gpu", + "cpu", + "serial" + ], + "resourceManagerType": "PBS" + }, + { + "hostName": "gordon.sdsc.edu", + "userName": "ogce", + "port": 22, + "queueNames": [ + "normal" + ], + "resourceManagerType": "PBS" + }, + { + "hostName": "karst.uits.iu.edu", + "userName": "seagrid", + "port": 22, + "queueNames": [ + "batch" + ], + "resourceManagerType": "PBS" + } +] \ No newline at end of file http://git-wip-us.apache.org/repos/asf/airavata/blob/b5d77963/modules/cluster-monitoring/src/main/resources/id_rsa ---------------------------------------------------------------------- diff --git a/modules/cluster-monitoring/src/main/resources/id_rsa b/modules/cluster-monitoring/src/main/resources/id_rsa new file mode 100644 index 0000000..b5d1099 --- /dev/null +++ b/modules/cluster-monitoring/src/main/resources/id_rsa @@ -0,0 +1,30 @@ +-----BEGIN RSA PRIVATE KEY----- +Proc-Type: 4,ENCRYPTED +DEK-Info: DES-EDE3-CBC,AF720E62F3BF175C + +81OQtQCUQNZ9SmbdeU6zh0mDpjfmaBcDu0lPs9P/GrjE/DkepgIdYyc6f/zKzDOe +V4SGtgO4Lazv1l/LSwjRLJ0FSheTFDAgCj5TN10Kza44dQ03rX8XIylqjpDoVbKb +S+CvDN+1HCfcmFfMrw/x3MvWt7BcBeOYcKtemRClfkSFSLqvZrEkRtO6TqHwIumd +mvSQTdez2MXECmlxMeLGAN88OBA3qXdG3q1k5eojm5/MM60rWNg5kW+aUIpjwCer +l7Z01e2WkMMkw7E6roO+pRV5UO53cgqQcoYjlwGngLXPQi5QBdD9JLbaxWSAS5OZ +H04BlEVFvbbSJIP2gKlKfFsclCtg5ldi5Sefhrqi2ewWAlL0ibVf0Ed83ygmm7DS +0ljFv+U3q1O62ODiQyLpxqUZkROTYy6u18B11ck6n1F625jEK3Sh8Jhu4svxIu4x +FTNJqZvG1vwPunePp6dzaBoSa6739mC1mSaDgaCvU2BxWfxN0ePKEumZg9NpskpK +Pu9lHAWkuWs+nBuPzcloiizzd9eC9am0fKRMdGAKM0bE0eHFmsSESkmPjxVyHsK8 +dOA22IX2Ars6utlF4JqSK/vD3frYOSVJ55hIhQrKj0Kq6TyTWv4RVKRolOiOioOY +7JSKCoYwR5eDA/UyLswBghydteyq1ljmOIE276xO4VnZAbK0gyhlKEReB8Gxbi80 +rtNe11wFKUu8OVUzm2bsDGYY6ZyvKnOXcL7/gwR7oObC0Y67tUkEXS15uiTH+mec +/YVji54TT5GX7BH23sT3DxoPzTKngjAzBiRKlKlJEh2H3fLYNB5xhyrxO9rSIsBD +y3nE1cSMUUVR/IPkJKKOmrZ8JfhwQpAiDLux1NBtveo342VY2twJs/U4Y6zYspcU +d0UPFGWWC/SuxVhR191LnuT5f90V8HMW2S7vohSXx60qoB0OvYOrclYFX9eqcw2v +e7ka/Rk922HcP1Yt5ak2pIIZMaTRWNBBE17EExmKkErzbJWosNDC/3HnmxkUb/v6 +3Y1wYxIjtcAjWjDQjstKGNty82zy8yeG9NSWTrMzljkosBLXrlVhn6VBqmqhUxUS +N6ma5ORuziL5FMdSrBsqqIs8Sam7JjVGUZmeGrTiOgQSUM5GV9EyML+jBBqj9/RD +CbFQCNBCVRNX2LLXHZGnODZ0i1TZ+P7Nap7TIaD1PJwoiFwl9gysf4WYJrWBqWU3 +ORPPWSGkWFdmXGOPyXQFSEMppdHagiFDgCC+5F9VfnrKkKrB+BMmLB8fh2BSmoQH +LkrXksTTVRDP/8SaIZs+KdBIEnL6UGLiej4p27bj0B9lG0nacyj8E8u0UfxTy2YS +dhdLeG55ZU+ori0HNBT4/bGY+0gMMMrmXhLYQSR/IoWU0hgzidM67ExYQ0tFpYx9 +g2AWMghQKWnSy9Cyi9sE2umXCISOV3jkPDClBKVxwrt/DTxZcUVzz5YqlZKMXIE3 +tTq4xHsRP/3KJOTun/2elmTFQ7Ml158df2dzTtKCRQ1la0YAMx4gdNWU8a/napN3 +g7l0KSygOWVPKuVFVbR6ZlLTzeJYpeR5ZOi5o4dRAfUB0qgblwn0Hg== +-----END RSA PRIVATE KEY----- \ No newline at end of file http://git-wip-us.apache.org/repos/asf/airavata/blob/b5d77963/modules/cluster-monitoring/src/main/resources/id_rsa.pub ---------------------------------------------------------------------- diff --git a/modules/cluster-monitoring/src/main/resources/id_rsa.pub b/modules/cluster-monitoring/src/main/resources/id_rsa.pub new file mode 100644 index 0000000..b8308db --- /dev/null +++ b/modules/cluster-monitoring/src/main/resources/id_rsa.pub @@ -0,0 +1 @@ +ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAzESt/JtW7JxJ0JNSW6570OQMYtmZjWw5zMLOVffsOoTbSZ6ZM/udT7mwHrlprIi40rvBKIeUFiyB5CeTwOz5Fok/j4D0uXVLIBqRoJdYlKRquyH5EDaZmY/+5BAjJmuwsOqO6rwAFy6KXnIWXafzixUXKKUxKbx5aRLnzDw+JXb8N/6wJcZQ9UhRbnMQkyb9wZxfv1lruVCmK+OrVShIMaRb1df36khY1uj53ISVjSkY1FD3mECUP1u0nHQiE6aqccAa+9+rPD+6lgXD7eljLUiIc9lb+JqYmzDYM/BC8NZegw1hw1hU22Y3Uq3nFec798CyD1PtNkIFKjxg1VFVNw== [email protected] \ No newline at end of file http://git-wip-us.apache.org/repos/asf/airavata/blob/b5d77963/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 14918d4..b6f9969 100644 --- a/pom.xml +++ b/pom.xml @@ -634,6 +634,7 @@ <module>modules/workflow</module> <module>modules/test-suite</module> <module>modules/sharing-registry</module> + <module>modules/cluster-monitoring</module> <!-- Deprecated Modules--> <!--<module>modules/integration-tests</module>--> <!--<module>modules/workflow-model</module>-->
