Filippo Giunchedi has submitted this change and it was merged.

Change subject: raid: tweak check_interval for forking checks
......................................................................


raid: tweak check_interval for forking checks

Also tweak nrpe defines to honour the new retry_interval arguments.

Change-Id: I00dc72e1757697dd72f4f621f9054342839a6efe
---
M modules/nrpe/manifests/monitor_service.pp
M modules/raid/manifests/init.pp
2 files changed, 43 insertions(+), 22 deletions(-)

Approvals:
  Filippo Giunchedi: Looks good to me, approved
  Alexandros Kosiaris: Looks good to me, but someone else must approve
  jenkins-bot: Verified



diff --git a/modules/nrpe/manifests/monitor_service.pp 
b/modules/nrpe/manifests/monitor_service.pp
index aa856b7..43ee28e 100644
--- a/modules/nrpe/manifests/monitor_service.pp
+++ b/modules/nrpe/manifests/monitor_service.pp
@@ -27,11 +27,13 @@
 #
 define nrpe::monitor_service( $description,
                               $nrpe_command,
-                              $contact_group = hiera('contactgroups', 
'admins'),
-                              $retries       = 3,
-                              $timeout       = 10,
-                              $critical      = false,
-                              $ensure        = 'present') {
+                              $contact_group         = hiera('contactgroups', 
'admins'),
+                              $retries               = 3,
+                              $timeout               = 10,
+                              $critical              = false,
+                              $normal_check_interval = 1,
+                              $retry_check_interval  = 1,
+                              $ensure                = 'present') {
 
     nrpe::check { "check_${title}":
         command => $nrpe_command,
@@ -39,11 +41,13 @@
     }
 
     monitoring::service { $title:
-        ensure        => $ensure,
-        description   => $description,
-        check_command => "nrpe_check!check_${title}!${timeout}",
-        contact_group => $contact_group,
-        retries       => $retries,
-        critical      => $critical,
+        ensure                => $ensure,
+        description           => $description,
+        check_command         => "nrpe_check!check_${title}!${timeout}",
+        contact_group         => $contact_group,
+        retries               => $retries,
+        critical              => $critical,
+        normal_check_interval => $normal_check_interval,
+        retry_check_interval  => $retry_check_interval,
     }
 }
diff --git a/modules/raid/manifests/init.pp b/modules/raid/manifests/init.pp
index 660949a..d99e052 100644
--- a/modules/raid/manifests/init.pp
+++ b/modules/raid/manifests/init.pp
@@ -16,6 +16,13 @@
 
     $check_raid = '/usr/bin/sudo /usr/local/lib/nagios/plugins/check_raid'
 
+    # for 'forking' checks (i.e. all but mdadm, which essentially just reads
+    # kernel memory from /proc/mdstat) check every $normal_check_interval
+    # minutes instead of default of one minute. If the check is non-OK, retry
+    # every $retry_check_interval.
+    $normal_check_interval = 10
+    $retry_check_interval = 5
+
     if 'megaraid' in $raid {
         require_package('megacli')
         $get_raid_status_megacli = 
'/usr/local/lib/nagios/plugins/get-raid-status-megacli'
@@ -38,8 +45,10 @@
         }
 
         nrpe::monitor_service { 'raid_megaraid':
-            description  => 'MegaRAID',
-            nrpe_command => "${check_raid} megacli",
+            description           => 'MegaRAID',
+            nrpe_command          => "${check_raid} megacli",
+            normal_check_interval => $normal_check_interval,
+            retry_check_interval  => $retry_check_interval,
         }
     }
 
@@ -78,9 +87,11 @@
         }
 
         nrpe::monitor_service { 'raid_hpssacli':
-            description  => 'HP RAID',
-            nrpe_command => '/usr/local/lib/nagios/plugins/check_hpssacli',
-            timeout      => 50, # can take > 10s on servers with lots of disks
+            description           => 'HP RAID',
+            nrpe_command          => 
'/usr/local/lib/nagios/plugins/check_hpssacli',
+            timeout               => 50, # can take > 10s on servers with lots 
of disks
+            normal_check_interval => $normal_check_interval,
+            retry_check_interval  => $retry_check_interval,
         }
 
         $get_raid_status_hpssacli = 
'/usr/local/lib/nagios/plugins/get-raid-status-hpssacli'
@@ -113,8 +124,10 @@
         }
 
         nrpe::monitor_service { 'raid_mpt':
-            description  => 'MPT RAID',
-            nrpe_command => "${check_raid} mpt",
+            description           => 'MPT RAID',
+            nrpe_command          => "${check_raid} mpt",
+            normal_check_interval => $normal_check_interval,
+            retry_check_interval  => $retry_check_interval,
         }
 
         nrpe::check { 'get_raid_status_mpt':
@@ -139,8 +152,10 @@
         require_package('arcconf')
 
         nrpe::monitor_service { 'raid_aac':
-            description  => 'Adaptec RAID',
-            nrpe_command => "${check_raid} aac",
+            description           => 'Adaptec RAID',
+            nrpe_command          => "${check_raid} aac",
+            normal_check_interval => $normal_check_interval,
+            retry_check_interval  => $retry_check_interval,
         }
     }
 
@@ -148,8 +163,10 @@
         require_package('tw-cli')
 
         nrpe::monitor_service { 'raid_twe':
-            description  => '3ware TW',
-            nrpe_command => "${check_raid} twe",
+            description           => '3ware TW',
+            nrpe_command          => "${check_raid} twe",
+            normal_check_interval => $normal_check_interval,
+            retry_check_interval  => $retry_check_interval,
         }
     }
 

-- 
To view, visit https://gerrit.wikimedia.org/r/315107
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I00dc72e1757697dd72f4f621f9054342839a6efe
Gerrit-PatchSet: 5
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Filippo Giunchedi <fgiunch...@wikimedia.org>
Gerrit-Reviewer: Alexandros Kosiaris <akosia...@wikimedia.org>
Gerrit-Reviewer: Faidon Liambotis <fai...@wikimedia.org>
Gerrit-Reviewer: Filippo Giunchedi <fgiunch...@wikimedia.org>
Gerrit-Reviewer: Volans <rcocci...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to