Repository: eagle Updated Branches: refs/heads/master 3027e5fbc -> 579e83f5f
[EAGLE-912] make application health check message more readable Author: wujinhu <[email protected]> Closes #820 from wujinhu/EAGLE-912. Project: http://git-wip-us.apache.org/repos/asf/eagle/repo Commit: http://git-wip-us.apache.org/repos/asf/eagle/commit/579e83f5 Tree: http://git-wip-us.apache.org/repos/asf/eagle/tree/579e83f5 Diff: http://git-wip-us.apache.org/repos/asf/eagle/diff/579e83f5 Branch: refs/heads/master Commit: 579e83f5f78b79e61569d64708b895985ba1d72e Parents: 3027e5f Author: wujinhu <[email protected]> Authored: Mon Feb 20 15:13:25 2017 +0800 Committer: wujinhu <[email protected]> Committed: Mon Feb 20 15:13:25 2017 +0800 ---------------------------------------------------------------------- .../impl/ApplicationHealthCheckBase.java | 16 +++++++++++ ...adoopQueueRunningApplicationHealthCheck.java | 30 +++++++++++--------- .../MRHistoryJobApplicationHealthCheck.java | 30 +++++++++++--------- .../SparkHistoryJobApplicationHealthCheck.java | 30 +++++++++++--------- .../TopologyCheckApplicationHealthCheck.java | 4 +-- 5 files changed, 69 insertions(+), 41 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/eagle/blob/579e83f5/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckBase.java ---------------------------------------------------------------------- diff --git a/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckBase.java b/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckBase.java index 75b7c2d..c607fe7 100644 --- a/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckBase.java +++ b/eagle-core/eagle-app/eagle-app-base/src/main/java/org/apache/eagle/app/service/impl/ApplicationHealthCheckBase.java @@ -53,4 +53,20 @@ public abstract class ApplicationHealthCheckBase extends HealthCheck { } return sw.getBuffer().toString(); } + + protected String formatMillSeconds(long millseconds) { + millseconds = millseconds / 1000; + String result; + if (millseconds <= 60) { + result = millseconds + " seconds"; + } else if (millseconds > 60 && millseconds <= 3600) { + result = String.format("%.2f minutes", millseconds * 1.0 / 60); + } else if (millseconds > 3600 && millseconds <= 3600 * 24) { + result = String.format("%.2f hours", millseconds * 1.0 / 3600); + } else { + result = String.format("%.2f days", millseconds * 1.0 / 3600 / 24); + } + + return result; + } } http://git-wip-us.apache.org/repos/asf/eagle/blob/579e83f5/eagle-jpm/eagle-hadoop-queue/src/main/java/org/apache/eagle/hadoop/queue/HadoopQueueRunningApplicationHealthCheck.java ---------------------------------------------------------------------- diff --git a/eagle-jpm/eagle-hadoop-queue/src/main/java/org/apache/eagle/hadoop/queue/HadoopQueueRunningApplicationHealthCheck.java b/eagle-jpm/eagle-hadoop-queue/src/main/java/org/apache/eagle/hadoop/queue/HadoopQueueRunningApplicationHealthCheck.java index 5a5d0ee..fd6a35e 100644 --- a/eagle-jpm/eagle-hadoop-queue/src/main/java/org/apache/eagle/hadoop/queue/HadoopQueueRunningApplicationHealthCheck.java +++ b/eagle-jpm/eagle-hadoop-queue/src/main/java/org/apache/eagle/hadoop/queue/HadoopQueueRunningApplicationHealthCheck.java @@ -69,24 +69,28 @@ public class HadoopQueueRunningApplicationHealthCheck extends ApplicationHealthC GenericServiceAPIResponseEntity response = client .search(query) .metricName(HadoopClusterConstants.MetricName.HADOOP_CLUSTER_ALLOCATED_MEMORY) - .startTime(System.currentTimeMillis() - 24 * 60 * 60000L) + .startTime(System.currentTimeMillis() - 30 * 24 * 60 * 60000L) .endTime(System.currentTimeMillis()) .pageSize(10) .send(); List<Map<List<String>, List<Double>>> results = response.getObj(); - long currentProcessTimeStamp = results.get(0).get("value").get(0).longValue(); - long currentTimeStamp = System.currentTimeMillis(); - long maxDelayTime = DEFAULT_MAX_DELAY_TIME; - if (hadoopQueueRunningAppConfig.getConfig().hasPath(MAX_DELAY_TIME_KEY)) { - maxDelayTime = hadoopQueueRunningAppConfig.getConfig().getLong(MAX_DELAY_TIME_KEY); - } + try { + long currentProcessTimeStamp = results.get(0).get("value").get(0).longValue(); + long maxDelayTime = DEFAULT_MAX_DELAY_TIME; + long currentTimeStamp = System.currentTimeMillis(); + if (hadoopQueueRunningAppConfig.getConfig().hasPath(MAX_DELAY_TIME_KEY)) { + maxDelayTime = hadoopQueueRunningAppConfig.getConfig().getLong(MAX_DELAY_TIME_KEY); + } - if (!message.isEmpty() || currentTimeStamp - currentProcessTimeStamp > maxDelayTime) { - message += String.format("Current process time is %sms, delay %s minutes.", - currentProcessTimeStamp, (currentTimeStamp - currentProcessTimeStamp) * 1.0 / 60000L); - return Result.unhealthy(message); - } else { - return Result.healthy(); + if (!message.isEmpty() || currentTimeStamp - currentProcessTimeStamp > maxDelayTime) { + message += String.format("Current process time is %sms, delay %s.", + currentProcessTimeStamp, formatMillSeconds(currentTimeStamp - currentProcessTimeStamp)); + return Result.unhealthy(message); + } else { + return Result.healthy(); + } + } catch (Exception e) { + return Result.unhealthy("delay more than 30 days"); } } catch (Exception e) { return Result.unhealthy(printMessages(message, "An exception was caught when fetch application current process time: ", ExceptionUtils.getStackTrace(e))); http://git-wip-us.apache.org/repos/asf/eagle/blob/579e83f5/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/MRHistoryJobApplicationHealthCheck.java ---------------------------------------------------------------------- diff --git a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/MRHistoryJobApplicationHealthCheck.java b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/MRHistoryJobApplicationHealthCheck.java index c3d08d4..4016e6d 100644 --- a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/MRHistoryJobApplicationHealthCheck.java +++ b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/MRHistoryJobApplicationHealthCheck.java @@ -66,25 +66,29 @@ public class MRHistoryJobApplicationHealthCheck extends ApplicationHealthCheckBa GenericServiceAPIResponseEntity response = client .search(query) - .startTime(System.currentTimeMillis() - 24 * 60 * 60000L) + .startTime(System.currentTimeMillis() - 30 * 24 * 60 * 60000L) .endTime(System.currentTimeMillis()) .pageSize(10) .send(); List<Map<List<String>, List<Double>>> results = response.getObj(); - long currentProcessTimeStamp = results.get(0).get("value").get(0).longValue(); - long currentTimeStamp = System.currentTimeMillis(); - long maxDelayTime = DEFAULT_MAX_DELAY_TIME; - if (mrHistoryJobConfig.getConfig().hasPath(MAX_DELAY_TIME_KEY)) { - maxDelayTime = mrHistoryJobConfig.getConfig().getLong(MAX_DELAY_TIME_KEY); - } + try { + long currentProcessTimeStamp = results.get(0).get("value").get(0).longValue(); + long currentTimeStamp = System.currentTimeMillis(); + long maxDelayTime = DEFAULT_MAX_DELAY_TIME; + if (mrHistoryJobConfig.getConfig().hasPath(MAX_DELAY_TIME_KEY)) { + maxDelayTime = mrHistoryJobConfig.getConfig().getLong(MAX_DELAY_TIME_KEY); + } - if (!message.isEmpty() || currentTimeStamp - currentProcessTimeStamp > maxDelayTime) { - message += String.format("Current process time is %sms, delay %s hours.", - currentProcessTimeStamp, (currentTimeStamp - currentProcessTimeStamp) * 1.0 / 60000L / 60); - return Result.unhealthy(message); - } else { - return Result.healthy(); + if (!message.isEmpty() || currentTimeStamp - currentProcessTimeStamp > maxDelayTime) { + message += String.format("Current process time is %sms, delay %s.", + currentProcessTimeStamp, formatMillSeconds(currentTimeStamp - currentProcessTimeStamp)); + return Result.unhealthy(message); + } else { + return Result.healthy(); + } + } catch (Exception e) { + return Result.unhealthy("delay more than 30 days"); } } catch (Exception e) { return Result.unhealthy(printMessages(message, "An exception was caught when fetch application current process time: ", ExceptionUtils.getStackTrace(e))); http://git-wip-us.apache.org/repos/asf/eagle/blob/579e83f5/eagle-jpm/eagle-jpm-spark-history/src/main/java/org/apache/eagle/jpm/spark/history/SparkHistoryJobApplicationHealthCheck.java ---------------------------------------------------------------------- diff --git a/eagle-jpm/eagle-jpm-spark-history/src/main/java/org/apache/eagle/jpm/spark/history/SparkHistoryJobApplicationHealthCheck.java b/eagle-jpm/eagle-jpm-spark-history/src/main/java/org/apache/eagle/jpm/spark/history/SparkHistoryJobApplicationHealthCheck.java index 8127aa2..594f87a 100644 --- a/eagle-jpm/eagle-jpm-spark-history/src/main/java/org/apache/eagle/jpm/spark/history/SparkHistoryJobApplicationHealthCheck.java +++ b/eagle-jpm/eagle-jpm-spark-history/src/main/java/org/apache/eagle/jpm/spark/history/SparkHistoryJobApplicationHealthCheck.java @@ -65,25 +65,29 @@ public class SparkHistoryJobApplicationHealthCheck extends ApplicationHealthChec GenericServiceAPIResponseEntity response = client .search(query) - .startTime(System.currentTimeMillis() - 12 * 60 * 60000L) + .startTime(System.currentTimeMillis() - 15 * 24 * 60 * 60000L) .endTime(System.currentTimeMillis()) .pageSize(10) .send(); List<Map<List<String>, List<Double>>> results = response.getObj(); - long currentProcessTimeStamp = results.get(0).get("value").get(0).longValue(); - long currentTimeStamp = System.currentTimeMillis(); - long maxDelayTime = DEFAULT_MAX_DELAY_TIME; - if (sparkHistoryJobAppConfig.getConfig().hasPath(MAX_DELAY_TIME_KEY)) { - maxDelayTime = sparkHistoryJobAppConfig.getConfig().getLong(MAX_DELAY_TIME_KEY); - } + try { + long currentProcessTimeStamp = results.get(0).get("value").get(0).longValue(); + long currentTimeStamp = System.currentTimeMillis(); + long maxDelayTime = DEFAULT_MAX_DELAY_TIME; + if (sparkHistoryJobAppConfig.getConfig().hasPath(MAX_DELAY_TIME_KEY)) { + maxDelayTime = sparkHistoryJobAppConfig.getConfig().getLong(MAX_DELAY_TIME_KEY); + } - if (!message.isEmpty() || currentTimeStamp - currentProcessTimeStamp > maxDelayTime * 3) { - message += String.format("Current process time is %sms, delay %s hours.", - currentProcessTimeStamp, (currentTimeStamp - currentProcessTimeStamp) * 1.0 / 60000L / 60); - return Result.unhealthy(message); - } else { - return Result.healthy(); + if (!message.isEmpty() || currentTimeStamp - currentProcessTimeStamp > maxDelayTime * 3) { + message += String.format("Current process time is %sms, delay %s", + currentProcessTimeStamp, formatMillSeconds(currentTimeStamp - currentProcessTimeStamp)); + return Result.unhealthy(message); + } else { + return Result.healthy(); + } + } catch (Exception e) { + return Result.unhealthy("delay more than 15 days"); } } catch (Exception e) { return Result.unhealthy(printMessages(message, "An exception was caught when fetch application current process time: ", ExceptionUtils.getStackTrace(e))); http://git-wip-us.apache.org/repos/asf/eagle/blob/579e83f5/eagle-topology-check/eagle-topology-app/src/main/java/org/apache/eagle/topology/TopologyCheckApplicationHealthCheck.java ---------------------------------------------------------------------- diff --git a/eagle-topology-check/eagle-topology-app/src/main/java/org/apache/eagle/topology/TopologyCheckApplicationHealthCheck.java b/eagle-topology-check/eagle-topology-app/src/main/java/org/apache/eagle/topology/TopologyCheckApplicationHealthCheck.java index 950bb04..8f68398 100644 --- a/eagle-topology-check/eagle-topology-app/src/main/java/org/apache/eagle/topology/TopologyCheckApplicationHealthCheck.java +++ b/eagle-topology-check/eagle-topology-app/src/main/java/org/apache/eagle/topology/TopologyCheckApplicationHealthCheck.java @@ -71,8 +71,8 @@ public class TopologyCheckApplicationHealthCheck extends ApplicationHealthCheckB } if (!message.isEmpty() || currentTimeStamp - currentProcessTimeStamp > maxDelayTime) { - message += String.format("Current process time is %sms, delay %s minutes.", - currentProcessTimeStamp, (currentTimeStamp - currentProcessTimeStamp) * 1.0 / 60000L); + message += String.format("Current process time is %sms, delay %s.", + currentProcessTimeStamp, formatMillSeconds(currentTimeStamp - currentProcessTimeStamp)); return Result.unhealthy(message); } else { return Result.healthy();
