Dzahn has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/327388 )
Change subject: hiera override to skip base icinga for test/decom hosts
......................................................................
hiera override to skip base icinga for test/decom hosts
Adds a Hiera override to simply skip the entire Icinga host monitoring
section in base.
This is so that test/new/decom hosts can easily disable all of it by
just editing Hiera. This does it for the "role::spare" and as an
example to host cp1008 which is on the list from T151632.
Context is Volans' ticket description. This is just a simpler approach
to skip all of the monitoring instead of marking it in a special way
and then disabling all the notifications anways (after the config was
created).
Bug: T151632
Change-Id: I2c07a4fcd9f91a1935d14c1e61d719bb54594f02
---
M hieradata/hosts/cp1008.yaml
M hieradata/role/common/spare/system.yaml
M modules/base/manifests/monitoring/host.pp
3 files changed, 103 insertions(+), 95 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/88/327388/1
diff --git a/hieradata/hosts/cp1008.yaml b/hieradata/hosts/cp1008.yaml
index b07ce38..64f8eff 100644
--- a/hieradata/hosts/cp1008.yaml
+++ b/hieradata/hosts/cp1008.yaml
@@ -7,3 +7,5 @@
- 'cp1008.wikimedia.org'
codfw:
- 'cp1008.wikimedia.org'
+
+base::icinga::monitoring: false
diff --git a/hieradata/role/common/spare/system.yaml
b/hieradata/role/common/spare/system.yaml
index f877951..83d18ec 100644
--- a/hieradata/role/common/spare/system.yaml
+++ b/hieradata/role/common/spare/system.yaml
@@ -1,3 +1,5 @@
debdeploy::grains:
debdeploy-spare:
value: standard
+
+base::icinga::monitoring: false
diff --git a/modules/base/manifests/monitoring/host.pp
b/modules/base/manifests/monitoring/host.pp
index 4f535e5..c66bae1 100644
--- a/modules/base/manifests/monitoring/host.pp
+++ b/modules/base/manifests/monitoring/host.pp
@@ -28,112 +28,116 @@
# that are purposefully at 99%. Better ideas are welcome.
$nrpe_check_disk_options = '-w 6% -c 3% -l -e -A -i "/srv/sd[a-b][1-3]"
--exclude-type=tracefs',
$nrpe_check_disk_critical = false,
+ $enabled = hiera('base::icinga::monitoring', 'true'),
) {
include base::puppet::params # In order to be able to use some variables
- # RAID checks
- include raid
+ if $enabled {
- monitoring::host { $::hostname: }
+ # RAID checks
+ include raid
- monitoring::service { 'ssh':
- description => 'SSH',
- check_command => 'check_ssh',
- }
+ monitoring::host { $::hostname: }
- file { '/usr/local/lib/nagios/plugins/check_puppetrun':
- ensure => present,
- owner => 'root',
- group => 'root',
- mode => '0555',
- source => 'puppet:///modules/base/monitoring/check_puppetrun';
- }
- file { '/usr/local/lib/nagios/plugins/check_eth':
- ensure => present,
- owner => 'root',
- group => 'root',
- mode => '0555',
- content => template('base/check_eth.erb'),
- }
- file { '/usr/lib/nagios/plugins/check_sysctl':
- ensure => present,
- owner => 'root',
- group => 'root',
- mode => '0555',
- source => 'puppet:///modules/base/check_sysctl',
- }
+ monitoring::service { 'ssh':
+ description => 'SSH',
+ check_command => 'check_ssh',
+ }
- file { '/usr/lib/nagios/plugins/check-fresh-files-in-dir.py':
- ensure => present,
- owner => 'root',
- group => 'root',
- mode => '0555',
- source =>
'puppet:///modules/base/monitoring/check-fresh-files-in-dir.py',
- }
+ file { '/usr/local/lib/nagios/plugins/check_puppetrun':
+ ensure => present,
+ owner => 'root',
+ group => 'root',
+ mode => '0555',
+ source => 'puppet:///modules/base/monitoring/check_puppetrun';
+ }
+ file { '/usr/local/lib/nagios/plugins/check_eth':
+ ensure => present,
+ owner => 'root',
+ group => 'root',
+ mode => '0555',
+ content => template('base/check_eth.erb'),
+ }
+ file { '/usr/lib/nagios/plugins/check_sysctl':
+ ensure => present,
+ owner => 'root',
+ group => 'root',
+ mode => '0555',
+ source => 'puppet:///modules/base/check_sysctl',
+ }
- file { '/usr/local/lib/nagios/plugins/check_ipmi_sensor':
- ensure => present,
- owner => 'root',
- group => 'root',
- mode => '0555',
- source => 'puppet:///modules/base/monitoring/check_ipmi_sensor',
- }
+ file { '/usr/lib/nagios/plugins/check-fresh-files-in-dir.py':
+ ensure => present,
+ owner => 'root',
+ group => 'root',
+ mode => '0555',
+ source =>
'puppet:///modules/base/monitoring/check-fresh-files-in-dir.py',
+ }
- sudo::user { 'nagios_puppetrun':
- user => 'nagios',
- privileges => ['ALL = NOPASSWD:
/usr/local/lib/nagios/plugins/check_puppetrun'],
- }
+ file { '/usr/local/lib/nagios/plugins/check_ipmi_sensor':
+ ensure => present,
+ owner => 'root',
+ group => 'root',
+ mode => '0555',
+ source => 'puppet:///modules/base/monitoring/check_ipmi_sensor',
+ }
- # Check for disk usage on the root partition for labs instances
- # This is mapped to the monitoring template - ensure you update
- # labsnagiosbuilder/templates/classes/base.cfg under labs/nagios-builder
- # to reflect this check name
- if $::realm == 'labs' {
- nrpe::monitor_service { 'root_disk_space':
- description => 'Disk space on /',
- nrpe_command => '/usr/lib/nagios/plugins/check_disk -w 5% -c 2% -l
-e -p /',
- }
- }
+ sudo::user { 'nagios_puppetrun':
+ user => 'nagios',
+ privileges => ['ALL = NOPASSWD:
/usr/local/lib/nagios/plugins/check_puppetrun'],
+ }
- nrpe::monitor_service { 'disk_space':
- description => 'Disk space',
- critical => $nrpe_check_disk_critical,
- nrpe_command => "/usr/lib/nagios/plugins/check_disk
${nrpe_check_disk_options}",
- }
+ # Check for disk usage on the root partition for labs instances
+ # This is mapped to the monitoring template - ensure you update
+ # labsnagiosbuilder/templates/classes/base.cfg under labs/nagios-builder
+ # to reflect this check name
+ if $::realm == 'labs' {
+ nrpe::monitor_service { 'root_disk_space':
+ description => 'Disk space on /',
+ nrpe_command => '/usr/lib/nagios/plugins/check_disk -w 5% -c 2%
-l -e -p /',
+ }
+ }
- nrpe::monitor_service { 'dpkg':
- description => 'DPKG',
- nrpe_command => '/usr/local/lib/nagios/plugins/check_dpkg',
- }
- $warninginterval = $base::puppet::params::freshnessinterval
- $criticalinterval = $base::puppet::params::freshnessinterval * 2
- nrpe::monitor_service { 'puppet_checkpuppetrun':
- description => 'puppet last run',
- nrpe_command => "/usr/bin/sudo
/usr/local/lib/nagios/plugins/check_puppetrun -w ${warninginterval} -c
${criticalinterval}",
- }
- nrpe::monitor_service {'check_eth':
- description => 'configured eth',
- nrpe_command => '/usr/local/lib/nagios/plugins/check_eth',
- }
- nrpe::monitor_service { 'check_dhclient':
- description => 'dhclient process',
- nrpe_command => '/usr/lib/nagios/plugins/check_procs -w 0:0 -c 0:0 -C
dhclient',
- }
- nrpe::monitor_service { 'check_salt_minion':
- description => 'salt-minion processes',
- nrpe_command => "/usr/lib/nagios/plugins/check_procs -w 1: -c 1:4
--ereg-argument-array '^/usr/bin/python /usr/bin/salt-minion'",
- }
- if $::initsystem == 'systemd' {
- file { '/usr/local/lib/nagios/plugins/check_systemd_state':
- ensure => present,
- source => 'puppet:///modules/base/check_systemd_state.py',
- owner => 'root',
- group => 'root',
- mode => '0555',
- }
- nrpe::monitor_service { 'check_systemd_state':
- description => 'Check systemd state',
- nrpe_command =>
'/usr/local/lib/nagios/plugins/check_systemd_state',
- }
+ nrpe::monitor_service { 'disk_space':
+ description => 'Disk space',
+ critical => $nrpe_check_disk_critical,
+ nrpe_command => "/usr/lib/nagios/plugins/check_disk
${nrpe_check_disk_options}",
+ }
+
+ nrpe::monitor_service { 'dpkg':
+ description => 'DPKG',
+ nrpe_command => '/usr/local/lib/nagios/plugins/check_dpkg',
+ }
+ $warninginterval = $base::puppet::params::freshnessinterval
+ $criticalinterval = $base::puppet::params::freshnessinterval * 2
+ nrpe::monitor_service { 'puppet_checkpuppetrun':
+ description => 'puppet last run',
+ nrpe_command => "/usr/bin/sudo
/usr/local/lib/nagios/plugins/check_puppetrun -w ${warninginterval} -c
${criticalinterval}",
+ }
+ nrpe::monitor_service {'check_eth':
+ description => 'configured eth',
+ nrpe_command => '/usr/local/lib/nagios/plugins/check_eth',
+ }
+ nrpe::monitor_service { 'check_dhclient':
+ description => 'dhclient process',
+ nrpe_command => '/usr/lib/nagios/plugins/check_procs -w 0:0 -c 0:0
-C dhclient',
+ }
+ nrpe::monitor_service { 'check_salt_minion':
+ description => 'salt-minion processes',
+ nrpe_command => "/usr/lib/nagios/plugins/check_procs -w 1: -c 1:4
--ereg-argument-array '^/usr/bin/python /usr/bin/salt-minion'",
+ }
+ if $::initsystem == 'systemd' {
+ file { '/usr/local/lib/nagios/plugins/check_systemd_state':
+ ensure => present,
+ source => 'puppet:///modules/base/check_systemd_state.py',
+ owner => 'root',
+ group => 'root',
+ mode => '0555',
+ }
+ nrpe::monitor_service { 'check_systemd_state':
+ description => 'Check systemd state',
+ nrpe_command =>
'/usr/local/lib/nagios/plugins/check_systemd_state',
+ }
+ }
}
}
--
To view, visit https://gerrit.wikimedia.org/r/327388
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I2c07a4fcd9f91a1935d14c1e61d719bb54594f02
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Dzahn <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits