Updated Branches: refs/heads/trunk f547305d2 -> dd35a6d54
AMBARI-2867. WARNING error in nagios alert for resourcemanager. (Vitaly Brodetskyi via odiachenko) Project: http://git-wip-us.apache.org/repos/asf/incubator-ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-ambari/commit/dd35a6d5 Tree: http://git-wip-us.apache.org/repos/asf/incubator-ambari/tree/dd35a6d5 Diff: http://git-wip-us.apache.org/repos/asf/incubator-ambari/diff/dd35a6d5 Branch: refs/heads/trunk Commit: dd35a6d54413a35bac26cd0503483b371a66e9be Parents: f547305 Author: Oleksandr Diachenko <[email protected]> Authored: Mon Aug 12 20:07:37 2013 +0300 Committer: Oleksandr Diachenko <[email protected]> Committed: Mon Aug 12 20:07:37 2013 +0300 ---------------------------------------------------------------------- .../hdp-nagios/files/check_nodemanager_health.sh | 4 ++-- .../check_resourcemanager_nodes_percentage.sh | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/dd35a6d5/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh index ca13909..2a26f4e 100644 --- a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh +++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh @@ -23,10 +23,10 @@ HOST=$1 PORT=$2 NODEMANAGER_URL="http://$HOST:$PORT/ws/v1/node/info" export PATH="/usr/bin:$PATH" -RESPONSE=`curl $NODEMANAGER_URL` +RESPONSE=`curl -s $NODEMANAGER_URL` if [[ "$RESPONSE" == *'"nodeHealthy":true'* ]]; then echo "OK: nodemanager healthy true"; exit 0; fi echo "CRITICAL: nodemanager healthy false"; -exit 2; \ No newline at end of file +exit 2; http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/dd35a6d5/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_resourcemanager_nodes_percentage.sh ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_resourcemanager_nodes_percentage.sh b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_resourcemanager_nodes_percentage.sh index 48a2aae..cc899fa 100644 --- a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_resourcemanager_nodes_percentage.sh +++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_resourcemanager_nodes_percentage.sh @@ -26,18 +26,27 @@ NODE_STATUS=$3 WARN_PERCENT=$4 CRIT_PERCENT=$5 NODES="Nodes" + RESOURCEMANAGER_URL="http://$HOST:$PORT/ws/v1/cluster/metrics" export PATH="/usr/bin:$PATH" -RESPONSE=`curl $RESOURCEMANAGER_URL` -#code below is parsing RESPONSE that we get from resourcemanager api, for number between "totalNodes": and ',' -TOTAL_NODES_NUM=`echo "$RESPONSE" | sed -nre 's/^.*"totalNodes":([[:digit:]]+).*$/\1/gp'` +RESPONSE=`curl -s $RESOURCEMANAGER_URL` + +#code below is parsing RESPONSE that we get from resourcemanager api, for number between "activeNodes": and ',' +ACTIVE_NODES=`echo "$RESPONSE" | sed -nre 's/^.*"activeNodes":([[:digit:]]+).*$/\1/gp'` +LOST_NODES=`echo "$RESPONSE" | sed -nre 's/^.*"lostNodes":([[:digit:]]+).*$/\1/gp'` +UNHEALTHY_NODES=`echo "$RESPONSE" | sed -nre 's/^.*"unhealthyNodes":([[:digit:]]+).*$/\1/gp'` +DECOMMISSIONED_NODES=`echo "$RESPONSE" | sed -nre 's/^.*"decommissionedNodes":([[:digit:]]+).*$/\1/gp'` +REBOOTED_NODES=`echo "$RESPONSE" | sed -nre 's/^.*"rebootedNodes":([[:digit:]]+).*$/\1/gp'` + +TOTAL_NODES_NUM=$(($ACTIVE_NODES+$LOST_NODES+$UNHEALTHY_NODES+$DECOMMISSIONED_NODES+$REBOOTED_NODES)) NODES_NUM=`echo "$RESPONSE" | sed -nre "s/^.*\"$NODE_STATUS$NODES\":([[:digit:]]+).*$/\1/gp"` PERCENT=$(($NODES_NUM*100/$TOTAL_NODES_NUM)) + if [[ "$PERCENT" -lt "$WARN_PERCENT" ]]; then echo "OK: total:<$TOTAL_NODES_NUM>, affected:<$NODES_NUM>" exit 0; elif [[ "$PERCENT" -lt "$CRIT_PERCENT" ]]; then - echo "WARN: total:<$TOTAL_NODES_NUM>, affected:<$NODES_NUM>" + echo "WARNING: total:<$TOTAL_NODES_NUM>, affected:<$NODES_NUM>" exit 1; else echo "CRITICAL: total:<$TOTAL_NODES_NUM>, affected:<$NODES_NUM>"
