Filippo Giunchedi has uploaded a new change for review. https://gerrit.wikimedia.org/r/300863
Change subject: nagios_common: add check_prometheus_metric ...................................................................... nagios_common: add check_prometheus_metric From 0.3.0: https://github.com/prometheus/nagios_plugins/commit/f6ea7cc07075826a0ecf30595c95f974e55c5c95 Change-Id: I7adfd3c860049a562e8d6dfcedd7b7c54db9d96e --- A modules/nagios_common/files/check_commands/check_prometheus_metric A modules/nagios_common/files/check_commands/check_prometheus_metric.cfg M modules/nagios_common/manifests/commands.pp 3 files changed, 204 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/63/300863/1 diff --git a/modules/nagios_common/files/check_commands/check_prometheus_metric b/modules/nagios_common/files/check_commands/check_prometheus_metric new file mode 100644 index 0000000..5f73037 --- /dev/null +++ b/modules/nagios_common/files/check_commands/check_prometheus_metric @@ -0,0 +1,190 @@ +#!/bin/bash +# +# check_prometheus_metric.sh - nagios plugin wrapper for checking prometheus +# metrics - requires curl and jq to be in $PATH + +# default configuration +CURL=curl +JQ=jq +COMPARISON_METHOD=ge +NAN_OK="false" + +# nagios status codes +OK=0 +WARNING=1 +CRITICAL=2 +UNKNOWN=3 + + +function usage { + + cat <<'EoL' + + check_prometheus_metric.sh - simple prometheus metric extractor for nagios + + usage: + check_prometheus_metric.sh -H HOST -q QUERY -w INT -c INT -n NAME [-m METHOD] [-O] + + options: + -H HOST URL of Prometheus host to query + -q QUERY Prometheus query that returns a float or int + -w INT Warning level value (must be zero or positive) + -c INT Critical level value (must be zero or positive) + -n NAME A name for the metric being checked + -m METHOD Comparison method, one of gt, ge, lt, le, eq, ne + (defaults to ge unless otherwise specified) + -O Accept NaN as an "OK" result + +EoL +} + + +function process_command_line { + + while getopts ':H:q:w:c:m:n:O' OPT "$@" + do + case ${OPT} in + H) PROMETHEUS_SERVER="$OPTARG" ;; + q) PROMETHEUS_QUERY="$OPTARG" ;; + n) METRIC_NAME="$OPTARG" ;; + + m) if [[ ${OPTARG} =~ ^([lg][et]|eq|ne)$ ]] + then + COMPARISON_METHOD=${OPTARG} + else + NAGIOS_SHORT_TEXT="invalid comparison method: ${OPTARG}" + NAGIOS_LONG_TEXT="$(usage)" + exit + fi + ;; + + c) if [[ ${OPTARG} =~ ^[0-9]+$ ]] + then + CRITICAL_LEVEL=${OPTARG} + else + NAGIOS_SHORT_TEXT='-c CRITICAL_LEVEL requires an integer' + NAGIOS_LONG_TEXT="$(usage)" + exit + fi + ;; + + w) if [[ ${OPTARG} =~ ^[0-9]+$ ]] + then + WARNING_LEVEL=${OPTARG} + else + NAGIOS_SHORT_TEXT='-w WARNING_LEVEL requires an integer' + NAGIOS_LONG_TEXT="$(usage)" + exit + fi + ;; + + O) NAN_OK="true" + ;; + + \?) NAGIOS_SHORT_TEXT="invalid option: -$OPTARG" + NAGIOS_LONG_TEXT="$(usage)" + exit + ;; + + \:) NAGIOS_SHORT_TEXT="-$OPTARG requires an arguement" + NAGIOS_LONG_TEXT="$(usage)" + exit + ;; + esac + done + + # check for missing parameters + if [[ -z ${PROMETHEUS_SERVER} ]] || + [[ -z ${PROMETHEUS_QUERY} ]] || + [[ -z ${METRIC_NAME} ]] || + [[ -z ${WARNING_LEVEL} ]] || + [[ -z ${CRITICAL_LEVEL} ]] + then + NAGIOS_SHORT_TEXT='missing required option' + NAGIOS_LONG_TEXT="$(usage)" + exit + fi +} + + +function on_exit { + + if [[ -z ${NAGIOS_STATUS} ]] + then + NAGIOS_STATUS=UNKNOWN + fi + + if [[ -z ${NAGIOS_SHORT_TEXT} ]] + then + NAGIOS_SHORT_TEXT='an unknown error occured' + fi + + printf '%s - %s\n' ${NAGIOS_STATUS} "${NAGIOS_SHORT_TEXT}" + + if [[ -n ${NAGIOS_LONG_TEXT} ]] + then + printf '%s\n' "${NAGIOS_LONG_TEXT}" + fi + + exit ${!NAGIOS_STATUS} # hint: an indirect variable reference +} + + +function get_prometheus_result { + + local _RESULT + + _RESULT=$( ${CURL} -sgG --data-urlencode "query=${PROMETHEUS_QUERY}" "${PROMETHEUS_SERVER}/api/v1/query" | $JQ -r '.data.result[1]' ) + + # check result + if [[ ${_RESULT} =~ ^-?[0-9]+\.?[0-9]*$ ]] + then + printf '%.0F' ${_RESULT} # return an int if result is a number + else + case "${_RESULT}" in + +Inf) printf '%.0F' $(( ${WARNING_LEVEL} + ${CRITICAL_LEVEL} )) # something greater than either level + ;; + -Inf) printf -- '-1' # something smaller than any level + ;; + *) printf '%s' "${_RESULT}" # otherwise return as a string + ;; + esac + fi +} + +# set up exit function +trap on_exit EXIT TERM + +# process the cli options +process_command_line "$@" + +# get the metric value from prometheus +PROMETHEUS_RESULT="$( get_prometheus_result )" + +# check the value +if [[ ${PROMETHEUS_RESULT} =~ ^-?[0-9]+$ ]] +then + if eval [[ ${PROMETHEUS_RESULT} -${COMPARISON_METHOD} ${CRITICAL_LEVEL} ]] + then + NAGIOS_STATUS=CRITICAL + NAGIOS_SHORT_TEXT="${METRIC_NAME} is ${PROMETHEUS_RESULT}" + elif eval [[ ${PROMETHEUS_RESULT} -${COMPARISON_METHOD} $WARNING_LEVEL ]] + then + NAGIOS_STATUS=WARNING + NAGIOS_SHORT_TEXT="${METRIC_NAME} is ${PROMETHEUS_RESULT}" + else + NAGIOS_STATUS=OK + NAGIOS_SHORT_TEXT="${METRIC_NAME} is ${PROMETHEUS_RESULT}" + fi +else + if [[ "${NAN_OK}" = "true" && "${PROMETHEUS_RESULT}" = "NaN" ]] + then + NAGIOS_STATUS=OK + NAGIOS_SHORT_TEXT="${METRIC_NAME} is ${PROMETHEUS_RESULT}" + else + NAGIOS_SHORT_TEXT="unable to parse prometheus response" + NAGIOS_LONG_TEXT="${METRIC_NAME} is ${PROMETHEUS_RESULT}" + fi +fi + +exit diff --git a/modules/nagios_common/files/check_commands/check_prometheus_metric.cfg b/modules/nagios_common/files/check_commands/check_prometheus_metric.cfg new file mode 100644 index 0000000..6c5d826 --- /dev/null +++ b/modules/nagios_common/files/check_commands/check_prometheus_metric.cfg @@ -0,0 +1,10 @@ +define command { + command_name check_prometheus + command_line $USER1$/check_prometheus_metric.sh -H '$ARG1$' -q '$ARG2$' -w '$ARG3$' -c '$ARG4$' -n '$ARG5$' -m '$ARG6$' +} + +# check_prometheus, treating a NaN result as ok +define command { + command_name check_prometheus_nan_ok + command_line $USER1$/check_prometheus_metric.sh -H '$ARG1$' -q '$ARG2$' -w '$ARG3$' -c '$ARG4$' -n '$ARG5$' -m '$ARG6$' -O +} diff --git a/modules/nagios_common/manifests/commands.pp b/modules/nagios_common/manifests/commands.pp index 0434d59..70cdfce 100644 --- a/modules/nagios_common/manifests/commands.pp +++ b/modules/nagios_common/manifests/commands.pp @@ -28,6 +28,9 @@ # check_bgp/check_jnx_alarms 'libnet-snmp-perl', 'libtime-duration-perl', + # check_prometheus_metric + 'jq', + 'curl', ]: ensure => present, } @@ -51,6 +54,7 @@ 'check_bgp', 'check_jnx_alarms', 'check_ores_workers', + 'check_prometheus_metric', ] : require => File["${config_dir}/commands"], config_dir => $config_dir, -- To view, visit https://gerrit.wikimedia.org/r/300863 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I7adfd3c860049a562e8d6dfcedd7b7c54db9d96e Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Filippo Giunchedi <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
