Ottomata has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/107887


Change subject: Adding define monitor_ganglia
......................................................................

Adding define monitor_ganglia

This define is a wrapper to monitor service that allows for
icinga/nagios monitoring of arbitrary metric values in
Ganglia.  This will allow monitoring of Ganglia values
without having to add new entries into checkcommands.cfg.erb

Change-Id: If8da7c36e5f16819b3c974940f9ce3ba800fa6fb
---
M manifests/nagios.pp
M templates/icinga/checkcommands.cfg.erb
2 files changed, 115 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/87/107887/1

diff --git a/manifests/nagios.pp b/manifests/nagios.pp
index bd23ac8..f22e3a7 100644
--- a/manifests/nagios.pp
+++ b/manifests/nagios.pp
@@ -265,3 +265,111 @@
         check_command => 'nrpe_check_zfs',
     }
 }
+
+# == Define monitor_ganglia
+# Wrapper for monitor_service using check_ganglia command.
+# This allows you to monitor arbitrary values in ganglia
+# with icinga without having to add entries to checkcommands.cfg.erb
+#
+# Specifying threshold values
+# ===========================
+#
+# (This is extracted from ``check_gmond.checkval``; see the embedded
+# documentation for the most current version).
+#
+# The arguments to the ``-w`` and ``-c`` options use the following syntax:
+#
+# For numeric values
+# ------------------
+# - 5       -- match if v >= 5
+# - 3:5     -- match if 3 <= v <= 5
+# - :5      -- match if v <=5
+# - 1,2,3   -- match if v in (1,2,3)
+#
+# For string values
+# ------------------
+# - foo     -- match if v == foo
+# - foo,bar -- match if v in (foo, bar)
+#
+# Negation
+# --------
+# You can negate a threshold expression by preceding it with '!'.  For
+# example:
+#
+# - !5      -- match if v < 5
+# - !3:5    -- match if v<3 || v>5
+# - !1,2,3  -- match if v not in (1,2,3)
+#
+# ( Pasted from# 
https://github.com/wikimedia/operations-debs-check_ganglia#specifying-threshold-values
 )
+#
+# == Usage
+#   # Alert if free space in HDFS is less than 1TB
+#   monitor_ganglia { 'hdfs-capacity-remaining':
+#       description          => 'GB free in HDFS',
+#       metric               => 
'Hadoop.NameNode.FSNamesystem.CapacityRemainingGB',
+#       warning_threshold    => ':1024',
+#       critical_threshold   => ':512,
+#   }
+#
+# == Parameters
+# $description          - Description of icinga alert
+# $metric               - ganglia metric name
+# $warning              - alert warning threshold
+# $critical_threshold   - alert critical threshold
+# $gmetad_host          - Default: 'nickel.wikimedia.org'
+# $gmetad_query_port    - gmetad XML query interface port.  Default: 8654
+# $host
+# $retries
+# $group
+# $ensure
+# $critical
+# $passive
+# $freshness
+# $normal_check_interval
+# $retry_check_interval
+# $contact_group
+#
+define monitor_ganglia(
+    $description,
+    $metric,
+    $warning_threshold,
+    $critical_threshold,
+    $gmetad_host           = 'nickel.wikimedia.org',
+    $gmetad_query_port     = 8654,
+    $host                  = $::hostname,
+    $retries               = 3,
+    $group                 = $nagios_group,
+    $ensure                = present,
+    $critical              = 'false',
+    $passive               = 'false',
+    $freshness             = 36000,
+    $normal_check_interval = 1,
+    $retry_check_interval  = 1,
+    $contact_group         = 'admins'
+)
+{
+    Class['icinga::ganglia::check'] -> Monitor_ganglia[$title]
+
+    # checkcommands.cfg's check_ganglia command has
+    # many positional arguments that
+    # are passed to check_ganglia script:
+    #   $ARG1$  -g gmetad host
+    #   $ARG2$  -p gmetad xml query port
+    #   $ARG3$  -m ganglia metric name
+    #   $ARG4$  -w warning threshold
+    #   $ARG5$  -c critical threshold
+
+     monitor_service { $title:
+         ensure                => $ensure,
+         description           => $description,
+         check_command         => 
"check_ganglia!${gmetad_host}!${gmetad_query_port}!${metric}!${warning_threshold}!${critical_threshold}",
+         retries               => $retries,
+         group                 => $group,
+         critical              => $critical,
+         passive               => $passive,
+         freshness             => $freshness,
+         normal_check_interval => $normal_check_interval,
+         retry_check_interval  => $retry_check_interval,
+         contact_group         => $contact_group,
+     }
+}
diff --git a/templates/icinga/checkcommands.cfg.erb 
b/templates/icinga/checkcommands.cfg.erb
index 9fe2110..45b6aa1 100644
--- a/templates/icinga/checkcommands.cfg.erb
+++ b/templates/icinga/checkcommands.cfg.erb
@@ -501,11 +501,17 @@
        command_line    $USER1$/check_ganglios_generic_value -H $HOSTADDRESS$ 
-m packet_loss_average -w $ARG1$ -c $ARG2$ -o gt
 }
 
-
 define command{
        command_name    check_memory_used
        command_line    $USER3$/check_ganglios_memory_v2 -H $HOSTADDRESS -w 
$ARG1$ -c $ARG2$
 }
+
+# check arbitrary ganglia metric values
+define command{
+       command_name    check_ganglia
+       command_line    $USER1$/check_ganglia -q -g $ARG1$ -p $ARG2$ -H 
$HOSTADDRESS$ -m '$ARG3$' -w '$ARG4$' -c '$ARG5$'
+}
+
 # percona mysql checks
 define command{
        command_name    nrpe_check_lvs

-- 
To view, visit https://gerrit.wikimedia.org/r/107887
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: If8da7c36e5f16819b3c974940f9ce3ba800fa6fb
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <o...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to