Filippo Giunchedi has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/315107

Change subject: raid: tweak check_interval for forking checks
......................................................................

raid: tweak check_interval for forking checks

Also tweak nrpe defines to honour the new retry_interval arguments.

Change-Id: I00dc72e1757697dd72f4f621f9054342839a6efe
---
M modules/nrpe/manifests/monitor_service.pp
M modules/raid/manifests/init.pp
2 files changed, 36 insertions(+), 22 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/07/315107/1

diff --git a/modules/nrpe/manifests/monitor_service.pp 
b/modules/nrpe/manifests/monitor_service.pp
index aa856b7..43ee28e 100644
--- a/modules/nrpe/manifests/monitor_service.pp
+++ b/modules/nrpe/manifests/monitor_service.pp
@@ -27,11 +27,13 @@
 #
 define nrpe::monitor_service( $description,
                               $nrpe_command,
-                              $contact_group = hiera('contactgroups', 
'admins'),
-                              $retries       = 3,
-                              $timeout       = 10,
-                              $critical      = false,
-                              $ensure        = 'present') {
+                              $contact_group         = hiera('contactgroups', 
'admins'),
+                              $retries               = 3,
+                              $timeout               = 10,
+                              $critical              = false,
+                              $normal_check_interval = 1,
+                              $retry_check_interval  = 1,
+                              $ensure                = 'present') {
 
     nrpe::check { "check_${title}":
         command => $nrpe_command,
@@ -39,11 +41,13 @@
     }
 
     monitoring::service { $title:
-        ensure        => $ensure,
-        description   => $description,
-        check_command => "nrpe_check!check_${title}!${timeout}",
-        contact_group => $contact_group,
-        retries       => $retries,
-        critical      => $critical,
+        ensure                => $ensure,
+        description           => $description,
+        check_command         => "nrpe_check!check_${title}!${timeout}",
+        contact_group         => $contact_group,
+        retries               => $retries,
+        critical              => $critical,
+        normal_check_interval => $normal_check_interval,
+        retry_check_interval  => $retry_check_interval,
     }
 }
diff --git a/modules/raid/manifests/init.pp b/modules/raid/manifests/init.pp
index 660949a..5517164 100644
--- a/modules/raid/manifests/init.pp
+++ b/modules/raid/manifests/init.pp
@@ -38,8 +38,10 @@
         }
 
         nrpe::monitor_service { 'raid_megaraid':
-            description  => 'MegaRAID',
-            nrpe_command => "${check_raid} megacli",
+            description           => 'MegaRAID',
+            nrpe_command          => "${check_raid} megacli",
+            normal_check_interval => 10,
+            retry_check_interval  => 6,
         }
     }
 
@@ -78,9 +80,11 @@
         }
 
         nrpe::monitor_service { 'raid_hpssacli':
-            description  => 'HP RAID',
-            nrpe_command => '/usr/local/lib/nagios/plugins/check_hpssacli',
-            timeout      => 50, # can take > 10s on servers with lots of disks
+            description           => 'HP RAID',
+            nrpe_command          => 
'/usr/local/lib/nagios/plugins/check_hpssacli',
+            timeout               => 50, # can take > 10s on servers with lots 
of disks
+            normal_check_interval => 10,
+            retry_check_interval  => 6,
         }
 
         $get_raid_status_hpssacli = 
'/usr/local/lib/nagios/plugins/get-raid-status-hpssacli'
@@ -113,8 +117,10 @@
         }
 
         nrpe::monitor_service { 'raid_mpt':
-            description  => 'MPT RAID',
-            nrpe_command => "${check_raid} mpt",
+            description           => 'MPT RAID',
+            nrpe_command          => "${check_raid} mpt",
+            normal_check_interval => 10,
+            retry_check_interval  => 6,
         }
 
         nrpe::check { 'get_raid_status_mpt':
@@ -139,8 +145,10 @@
         require_package('arcconf')
 
         nrpe::monitor_service { 'raid_aac':
-            description  => 'Adaptec RAID',
-            nrpe_command => "${check_raid} aac",
+            description           => 'Adaptec RAID',
+            nrpe_command          => "${check_raid} aac",
+            normal_check_interval => 10,
+            retry_check_interval  => 6,
         }
     }
 
@@ -148,8 +156,10 @@
         require_package('tw-cli')
 
         nrpe::monitor_service { 'raid_twe':
-            description  => '3ware TW',
-            nrpe_command => "${check_raid} twe",
+            description           => '3ware TW',
+            nrpe_command          => "${check_raid} twe",
+            normal_check_interval => 10,
+            retry_check_interval  => 6,
         }
     }
 

-- 
To view, visit https://gerrit.wikimedia.org/r/315107
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I00dc72e1757697dd72f4f621f9054342839a6efe
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Filippo Giunchedi <fgiunch...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to