Ottomata has submitted this change and it was merged.

Change subject: Fix for check_ganglia, removing unused checkcommands
......................................................................


Fix for check_ganglia, removing unused checkcommands

Cannot use $HOSTADDRESS$ as argument to check_ganglia's -H flag.
Ganglia IDs hosts by fqdn.  Manually passing in fqdn as a positional
argument macro.

Also renamed threshold parameters.

Change-Id: Ie52bd80b37159b0e074fe37514b7fe79373c27b7
---
M manifests/ganglia.pp
M manifests/misc/icinga.pp
M manifests/misc/udp2log.pp
M manifests/nagios.pp
M manifests/role/analytics/kafka.pp
M manifests/role/cache.pp
M templates/icinga/checkcommands.cfg.erb
7 files changed, 26 insertions(+), 44 deletions(-)

Approvals:
  Ottomata: Verified; Looks good to me, approved
  jenkins-bot: Verified



diff --git a/manifests/ganglia.pp b/manifests/ganglia.pp
index 6c859f8..9bf2f72 100644
--- a/manifests/ganglia.pp
+++ b/manifests/ganglia.pp
@@ -264,14 +264,13 @@
             case $::hostname {
                 # manutius runs gmetad to get varnish data into torrus
                 # unlike other servers, manutius uses the default rrd_rootdir
-                # neon needs gmetad for ganglios
                 /^manutius$/: {
                     $data_sources = {
                         "Upload caches eqiad" => "cp1048.eqiad.wmnet 
cp1061.eqiad.wmnet"
                     }
                     $rra_sizes = '"RRA:AVERAGE:0:1:4032" 
"RRA:AVERAGE:0.17:6:2016" "RRA:MAX:0.17:6:2016" "RRA:AVERAGE:0.042:288:732" 
"RRA:MAX:0.042:288:732"'
                 }
-                # neon runs gmetad for ganglios
+                # neon needs gmetad config for ganglios
                 /^neon$/: {
                     $data_sources = {
                         "Miscellaneous"                  => 
"tarin.pmtpa.wmnet",
diff --git a/manifests/misc/icinga.pp b/manifests/misc/icinga.pp
index 93ad0e3..e22954a 100644
--- a/manifests/misc/icinga.pp
+++ b/manifests/misc/icinga.pp
@@ -893,8 +893,6 @@
 # directly, rather than downloading and mangling
 # xmlfiles from each aggregator.
 #
-# TODO: will deprectate and remove ganglios soon.
-#
 class icinga::ganglia::check {
     package { 'check-ganglia':
         ensure  => 'installed',
diff --git a/manifests/misc/udp2log.pp b/manifests/misc/udp2log.pp
index ee941f0..db31cc0 100644
--- a/manifests/misc/udp2log.pp
+++ b/manifests/misc/udp2log.pp
@@ -256,8 +256,8 @@
                monitor_ganglia{ "udp2log-${name}-packetloss":
                        description           => 'Packetloss_Average',
                        metric                => 'packet_loss_average',
-                       warning_threshold     => '4',
-                       critical_threshold    => '8',
+                       warning               => '4',
+                       critical              => '8',
                        contact_group         => "admins,analytics",
                        # ganglia-logtailer only runs every 5.
                        # let's make nagios check every 2 minutes (to match 
ganglia_parser)
diff --git a/manifests/nagios.pp b/manifests/nagios.pp
index 311559e..c6282c1 100644
--- a/manifests/nagios.pp
+++ b/manifests/nagios.pp
@@ -307,15 +307,19 @@
 #   monitor_ganglia { 'hdfs-capacity-remaining':
 #       description          => 'GB free in HDFS',
 #       metric               => 
'Hadoop.NameNode.FSNamesystem.CapacityRemainingGB',
-#       warning_threshold    => ':1024',
-#       critical_threshold   => ':512,
+#       warning              => ':1024',
+#       critical             => ':512,
 #   }
 #
 # == Parameters
 # $description          - Description of icinga alert
 # $metric               - ganglia metric name
 # $warning              - alert warning threshold
-# $critical_threshold   - alert critical threshold
+# $critical             - alert critical threshold
+# $metric_host          - hostname in ganglia we want to monitor.
+#                         Can't use nagios macro in checkcommands.cfg
+#                         because fqdn is not available.
+#                         Default: $::fqdn of this node
 # $gmetad_host          - Default: 'nickel.wikimedia.org'
 # $gmetad_query_port    - gmetad XML query interface port.  Default: 8654
 # $host
@@ -332,8 +336,9 @@
 define monitor_ganglia(
     $description,
     $metric,
-    $warning_threshold,
-    $critical_threshold,
+    $warning,
+    $critical,
+    $metric_host           = $::fqdn,
     $gmetad_host           = 'nickel.wikimedia.org',
     $gmetad_query_port     = 8654,
     $host                  = $::hostname,
@@ -353,6 +358,7 @@
     # are passed to check_ganglia script:
     #   $ARG1$  -g gmetad host
     #   $ARG2$  -p gmetad xml query port
+    #   $ARG3$  -H Host for which we want metrics
     #   $ARG3$  -m ganglia metric name
     #   $ARG4$  -w warning threshold
     #   $ARG5$  -c critical threshold
@@ -360,7 +366,7 @@
      monitor_service { $title:
          ensure                => $ensure,
          description           => $description,
-         check_command         => 
"check_ganglia!${gmetad_host}!${gmetad_query_port}!${metric}!${warning_threshold}!${critical_threshold}",
+         check_command         => 
"check_ganglia!${gmetad_host}!${gmetad_query_port}!${metric_host}!${metric}!${warning}!${critical}",
          retries               => $retries,
          group                 => $group,
          critical              => $critical,
diff --git a/manifests/role/analytics/kafka.pp 
b/manifests/role/analytics/kafka.pp
index 1750fca..22f30fe 100644
--- a/manifests/role/analytics/kafka.pp
+++ b/manifests/role/analytics/kafka.pp
@@ -155,11 +155,11 @@
     # These thresholds have to be manually set.
     # adjust them if you add or remove data from Kafka topics.
     monitor_ganglia { 'kafka-broker-MessagesIn':
-        description        => 'Kafka Broker Messages In',
-        metric             => 
'kafka.server.BrokerTopicMetrics.AllTopicsMessagesInPerSec.FifteenMinuteRate',
-        warning_threshold  => ':1500.0',
-        critical_threshold => ':1000.0',
-        require            => Class['::kafka::server::jmxtrans'],
+        description => 'Kafka Broker Messages In',
+        metric      => 
'kafka.server.BrokerTopicMetrics.AllTopicsMessagesInPerSec.FifteenMinuteRate',
+        warning     => ':1500.0',
+        critical    => ':1000.0',
+        require     => Class['::kafka::server::jmxtrans'],
     }
 }
 
diff --git a/manifests/role/cache.pp b/manifests/role/cache.pp
index a12ef1c..0490b96 100644
--- a/manifests/role/cache.pp
+++ b/manifests/role/cache.pp
@@ -448,11 +448,11 @@
 
             # Generate an alert if we ever see any delivery report errors
             monitor_ganglia { 'varnishkafka-drerr':
-                description        => 'Varnishkafka Delivery Errors',
-                metric             => 
'kafka.varnishkafka.kafka_drerr.per_second',
-                warning_threshold  => '0.0',
-                critical_threshold => '0.0',
-                require            => Class['::varnishkafka::monitoring'],
+                description => 'Varnishkafka Delivery Errors',
+                metric      => 'kafka.varnishkafka.kafka_drerr.per_second',
+                warning     => '0.0',
+                critical    => '0.0',
+                require     => Class['::varnishkafka::monitoring'],
             }
         }
     }
diff --git a/templates/icinga/checkcommands.cfg.erb 
b/templates/icinga/checkcommands.cfg.erb
index 45b6aa1..6a148d3 100644
--- a/templates/icinga/checkcommands.cfg.erb
+++ b/templates/icinga/checkcommands.cfg.erb
@@ -495,12 +495,6 @@
        command_line    $USER1$/check_procs -w $ARG1$:$ARG2$ -c $ARG3$:$ARG4$ 
-C $ARG5$
 }
 
-# check that logging packet loss is not too high
-define command{
-       command_name    check_packet_loss_ave
-       command_line    $USER1$/check_ganglios_generic_value -H $HOSTADDRESS$ 
-m packet_loss_average -w $ARG1$ -c $ARG2$ -o gt
-}
-
 define command{
        command_name    check_memory_used
        command_line    $USER3$/check_ganglios_memory_v2 -H $HOSTADDRESS -w 
$ARG1$ -c $ARG2$
@@ -509,7 +503,7 @@
 # check arbitrary ganglia metric values
 define command{
        command_name    check_ganglia
-       command_line    $USER1$/check_ganglia -q -g $ARG1$ -p $ARG2$ -H 
$HOSTADDRESS$ -m '$ARG3$' -w '$ARG4$' -c '$ARG5$'
+       command_line    $USER1$/check_ganglia -q -g $ARG1$ -p $ARG2$ -H $ARG3$ 
-m '$ARG4$' -w '$ARG5$' -c '$ARG6$'
 }
 
 # percona mysql checks
@@ -559,21 +553,6 @@
         command_name    check_to_check_nagios_paging
         command_line    $USER1$/check_to_check_nagios_paging
 }
-
-
-# Analytics checks
-
-# Check that Kafka Brokers are getting messages produced to them.
-define command{
-       command_name    check_kafka_broker_messages_in
-       command_line    $USER1$/check_ganglios_generic_value -H $HOSTADDRESS$ 
-m kafka.server.BrokerTopicMetrics.AllTopicsMessagesInPerSec.FifteenMinuteRate 
-w $ARG1$ -c $ARG2$ -o lt
-}
-
-define command{
-       command_name    check_varnishkafka_drerr
-       command_line    $USER1$/check_ganglios_generic_value -H $HOSTADDRESS$ 
-m kafka.varnishkafka.kafka_drerr.per_second -w $ARG1$ -c $ARG2$ -o gt
-}
-
 
 # Elasticsearch Checks
 

-- 
To view, visit https://gerrit.wikimedia.org/r/107896
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ie52bd80b37159b0e074fe37514b7fe79373c27b7
Gerrit-PatchSet: 3
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <o...@wikimedia.org>
Gerrit-Reviewer: Ottomata <o...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to