Giuseppe Lavagetto has submitted this change and it was merged. Change subject: puppet: get rid of the nagios_group global variable ......................................................................
puppet: get rid of the nagios_group global variable Change-Id: I8fbef8279d4b7d00997086a2851cb65dcd898823 Signed-off-by: Giuseppe Lavagetto <[email protected]> --- A hieradata/hosts/analytics1004.yaml A hieradata/hosts/analytics1009.yaml A hieradata/hosts/analytics1010.yaml A hieradata/hosts/analytics1026.yaml A hieradata/hosts/analytics1027.yaml D hieradata/hosts/ms-fe2001.yaml D hieradata/hosts/ms-fe2002.yaml D hieradata/hosts/ms-fe2003.yaml D hieradata/hosts/ms-fe2004.yaml A hieradata/hosts/searchidx1001.yaml A hieradata/mainrole/analytics_hadoop_master.yml A hieradata/mainrole/analytics_hadoop_standby.yaml A hieradata/mainrole/analytics_hadoop_worker.yaml A hieradata/mainrole/analytics_kafka.yaml A hieradata/mainrole/analytics_zookeeper.yaml A hieradata/mainrole/swift_storage.yaml M hieradata/regex.yaml M manifests/nagios.pp M manifests/site.pp M modules/monitoring/manifests/host.pp 20 files changed, 114 insertions(+), 112 deletions(-) Approvals: Giuseppe Lavagetto: Looks good to me, approved jenkins-bot: Verified diff --git a/hieradata/hosts/analytics1004.yaml b/hieradata/hosts/analytics1004.yaml new file mode 100644 index 0000000..4e4c788 --- /dev/null +++ b/hieradata/hosts/analytics1004.yaml @@ -0,0 +1 @@ +mainrole: analytics_hadoop_standby diff --git a/hieradata/hosts/analytics1009.yaml b/hieradata/hosts/analytics1009.yaml new file mode 100644 index 0000000..2501ef9 --- /dev/null +++ b/hieradata/hosts/analytics1009.yaml @@ -0,0 +1,2 @@ +nagios_group: analytics_eqiad +cluster: analytics diff --git a/hieradata/hosts/analytics1010.yaml b/hieradata/hosts/analytics1010.yaml new file mode 100644 index 0000000..2159967 --- /dev/null +++ b/hieradata/hosts/analytics1010.yaml @@ -0,0 +1 @@ +mainrole: analytics_hadoop_master diff --git a/hieradata/hosts/analytics1026.yaml b/hieradata/hosts/analytics1026.yaml new file mode 100644 index 0000000..2501ef9 --- /dev/null +++ b/hieradata/hosts/analytics1026.yaml @@ -0,0 +1,2 @@ +nagios_group: analytics_eqiad +cluster: analytics diff --git a/hieradata/hosts/analytics1027.yaml b/hieradata/hosts/analytics1027.yaml new file mode 100644 index 0000000..2501ef9 --- /dev/null +++ b/hieradata/hosts/analytics1027.yaml @@ -0,0 +1,2 @@ +nagios_group: analytics_eqiad +cluster: analytics diff --git a/hieradata/hosts/ms-fe2001.yaml b/hieradata/hosts/ms-fe2001.yaml deleted file mode 100644 index 35175b5..0000000 --- a/hieradata/hosts/ms-fe2001.yaml +++ /dev/null @@ -1 +0,0 @@ -mainrole: swift_proxy_codfw diff --git a/hieradata/hosts/ms-fe2002.yaml b/hieradata/hosts/ms-fe2002.yaml deleted file mode 100644 index 35175b5..0000000 --- a/hieradata/hosts/ms-fe2002.yaml +++ /dev/null @@ -1 +0,0 @@ -mainrole: swift_proxy_codfw diff --git a/hieradata/hosts/ms-fe2003.yaml b/hieradata/hosts/ms-fe2003.yaml deleted file mode 100644 index 35175b5..0000000 --- a/hieradata/hosts/ms-fe2003.yaml +++ /dev/null @@ -1 +0,0 @@ -mainrole: swift_proxy_codfw diff --git a/hieradata/hosts/ms-fe2004.yaml b/hieradata/hosts/ms-fe2004.yaml deleted file mode 100644 index 35175b5..0000000 --- a/hieradata/hosts/ms-fe2004.yaml +++ /dev/null @@ -1 +0,0 @@ -mainrole: swift_proxy_codfw diff --git a/hieradata/hosts/searchidx1001.yaml b/hieradata/hosts/searchidx1001.yaml new file mode 100644 index 0000000..465b455 --- /dev/null +++ b/hieradata/hosts/searchidx1001.yaml @@ -0,0 +1,2 @@ +nagios_group: lucene +cluster: search diff --git a/hieradata/mainrole/analytics_hadoop_master.yml b/hieradata/mainrole/analytics_hadoop_master.yml new file mode 100644 index 0000000..288dde3 --- /dev/null +++ b/hieradata/mainrole/analytics_hadoop_master.yml @@ -0,0 +1,4 @@ +nagios_group: analytics_eqiad +cluster: analytics + + diff --git a/hieradata/mainrole/analytics_hadoop_standby.yaml b/hieradata/mainrole/analytics_hadoop_standby.yaml new file mode 100644 index 0000000..288dde3 --- /dev/null +++ b/hieradata/mainrole/analytics_hadoop_standby.yaml @@ -0,0 +1,4 @@ +nagios_group: analytics_eqiad +cluster: analytics + + diff --git a/hieradata/mainrole/analytics_hadoop_worker.yaml b/hieradata/mainrole/analytics_hadoop_worker.yaml new file mode 100644 index 0000000..288dde3 --- /dev/null +++ b/hieradata/mainrole/analytics_hadoop_worker.yaml @@ -0,0 +1,4 @@ +nagios_group: analytics_eqiad +cluster: analytics + + diff --git a/hieradata/mainrole/analytics_kafka.yaml b/hieradata/mainrole/analytics_kafka.yaml new file mode 100644 index 0000000..d002fc6 --- /dev/null +++ b/hieradata/mainrole/analytics_kafka.yaml @@ -0,0 +1,2 @@ +cluster: analytics_kafka +nagios_group: analytics_eqiad diff --git a/hieradata/mainrole/analytics_zookeeper.yaml b/hieradata/mainrole/analytics_zookeeper.yaml new file mode 100644 index 0000000..cf8dfea --- /dev/null +++ b/hieradata/mainrole/analytics_zookeeper.yaml @@ -0,0 +1,2 @@ +cluster: analytics +nagios_group: analytics_eqiad diff --git a/hieradata/mainrole/swift_storage.yaml b/hieradata/mainrole/swift_storage.yaml new file mode 100644 index 0000000..21bef47 --- /dev/null +++ b/hieradata/mainrole/swift_storage.yaml @@ -0,0 +1,2 @@ +cluster: swift +nagios_group: swift diff --git a/hieradata/regex.yaml b/hieradata/regex.yaml index cda10e3..d0d88ac 100644 --- a/hieradata/regex.yaml +++ b/hieradata/regex.yaml @@ -51,3 +51,48 @@ mainrole: appserver admin::groups: - deployment + +swift_fe_eqiad: + __regex: !ruby/regexp /^ms-fe100[1-4]\.eqiad\.wmnet$/ + cluster: swift + nagios_group: swift + +swift_be_eqiad: + __regex: !ruby/regexp /^ms-be10[0-9][0-9]\.eqiad\.wmnet$/ + cluster: swift + nagios_group: swift + +swift_fe_esams: + __regex: !ruby/regexp /^ms-fe300[1-2]\.esams\.wmnet$/ + cluster: swift + nagios_group: swift + +swift_be_esams: + __regex: !ruby/regexp /^ms-be300[1-4]\.esams\.wmnet$/ + cluster: swift + nagios_group: swift + +swift_fe_codfw: + __regex: !ruby/regexp /^ms-fe200[1-4]\.codfw\.wmnet$/ + mainrole: swift_proxy_codfw + +swift_be_codfw: + __regex: !ruby/regexp /^ms-be20[0-9][0-9]\.codfw\.wmnet$/ + mainrole: swift_storage + +hadoop_workers: + __regex: !ruby/regexp /analytics10(11|1[3-7]|19|2[089]|3[0-9]|4[01]).eqiad.wmnet/ + mainrole: analytics_hadoop_worker + +kafka_brokers: + __regex: !ruby/regexp /analytics10(12|18|21|22)\.eqiad\.wmnet/ + mainrole: analytics_kafka + +zookeeper: + __regex: !ruby/regexp /analytics102[345].eqiad.wmnet/ + mainrole: analytics_zookeeper + +lsearchd: + __regex: !ruby/regexp /^search10[0-2][0-9]\.eqiad\.wmnet$/ + cluster: search + nagios_group: lucene diff --git a/manifests/nagios.pp b/manifests/nagios.pp index 5e2fdc8..ce545c5 100644 --- a/manifests/nagios.pp +++ b/manifests/nagios.pp @@ -9,7 +9,7 @@ $check_command, $host = $::hostname, $retries = 3, - $group = undef, + $group = hiera('nagios_group', undef), $ensure = present, $critical = 'false', $passive = 'false', @@ -23,55 +23,44 @@ fail("Parameter $host not defined!") } - if $group != undef { - $servicegroup = $group - } - elsif $nagios_group != undef { - # nagios group should be defined at the node level with hiera. - $servicegroup = $nagios_group - } else { - # this check is part of no servicegroup. - $servicegroup = undef - } - - # Export the nagios service instance - @@nagios_service { "$::hostname $title": - ensure => $ensure, - target => "${::nagios_config_dir}/puppet_checks.d/${host}.cfg", - host_name => $host, - servicegroups => $servicegroup, - service_description => $description, - check_command => $check_command, - max_check_attempts => $retries, - normal_check_interval => $normal_check_interval, - retry_check_interval => $retry_check_interval, - check_period => '24x7', - notification_interval => $critical ? { - 'true' => 240, - default => 0, - }, - notification_period => '24x7', - notification_options => 'c,r,f', - contact_groups => $critical ? { - 'true' => 'admins,sms', - default => $contact_group, - }, - passive_checks_enabled => 1, - active_checks_enabled => $passive ? { - 'true' => 0, - default => 1, - }, - is_volatile => $passive ? { - 'true' => 1, - default => 0, - }, - check_freshness => $passive ? { - 'true' => 1, - default => 0, - }, - freshness_threshold => $passive ? { - 'true' => $freshness, - default => undef, - }, + # Export the nagios service instance + @@nagios_service { "${::hostname} ${title}": + ensure => $ensure, + target => "${::nagios_config_dir}/puppet_checks.d/${host}.cfg", + host_name => $host, + servicegroups => $group, + service_description => $description, + check_command => $check_command, + max_check_attempts => $retries, + normal_check_interval => $normal_check_interval, + retry_check_interval => $retry_check_interval, + check_period => '24x7', + notification_interval => $critical ? { + 'true' => 240, + default => 0, + }, + notification_period => '24x7', + notification_options => 'c,r,f', + contact_groups => $critical ? { + 'true' => 'admins,sms', + default => $contact_group, + }, + passive_checks_enabled => 1, + active_checks_enabled => $passive ? { + 'true' => 0, + default => 1, + }, + is_volatile => $passive ? { + 'true' => 1, + default => 0, + }, + check_freshness => $passive ? { + 'true' => 1, + default => 0, + }, + freshness_threshold => $passive ? { + 'true' => $freshness, + default => undef, + }, } } diff --git a/manifests/site.pp b/manifests/site.pp index ede03b6..3b16485 100644 --- a/manifests/site.pp +++ b/manifests/site.pp @@ -113,9 +113,6 @@ # had an error when booting. analytics1004 # has been repurposed as analytics standby NameNode. node 'analytics1009.eqiad.wmnet' { - $nagios_group = 'analytics_eqiad' - # ganglia cluster name. - $cluster = 'analytics' # analytics1009 is analytics Ganglia aggregator for Row A # $ganglia_aggregator = true @@ -137,9 +134,6 @@ # TODO: either fix analytics1009, or move this # node to Row A. node 'analytics1004.eqiad.wmnet' { - $nagios_group = 'analytics_eqiad' - # ganglia cluster name. - $cluster = 'analytics' class { 'admin': groups => [ @@ -157,9 +151,6 @@ # analytics1010 is the Hadoop master node # (primary NameNode, ResourceManager, etc.) node 'analytics1010.eqiad.wmnet' { - $nagios_group = 'analytics_eqiad' - # ganglia cluster name. - $cluster = 'analytics' # analytics1010 is analytics Ganglia aggregator for Row B $ganglia_aggregator = true @@ -184,9 +175,6 @@ # hostname -> /datacenter/rack/row id is correct. This is # used for Hadoop network topology awareness. node /analytics10(11|1[3-7]|19|2[089]|3[0-9]|4[01]).eqiad.wmnet/ { - $nagios_group = 'analytics_eqiad' - # ganglia cluster name. - $cluster = 'analytics' # analytics1014 is analytics Ganglia aggregator for Row C if $::hostname == 'analytics1014' { $ganglia_aggregator = true @@ -204,10 +192,6 @@ # analytics1012, analytics1018, analytics1021 and analytics1022 are Kafka Brokers. node /analytics10(12|18|21|22)\.eqiad\.wmnet/ { - $nagios_group = 'analytics_eqiad' - # ganglia cluster name. - $cluster = 'analytics_kafka' - # one ganglia aggregator per ganglia 'cluster' per row. if ($::hostname == 'analytics1012' or # Row A $::hostname == 'analytics1018' or # Row D @@ -235,9 +219,6 @@ # analytics1023-1025 are zookeeper server nodes node /analytics102[345].eqiad.wmnet/ { - $nagios_group = 'analytics_eqiad' - # ganglia cluster name. - $cluster = 'analytics' class { 'admin': groups => [ @@ -253,9 +234,6 @@ # analytics1026 does not currently have a role node 'analytics1026.eqiad.wmnet' { - $nagios_group = 'analytics_eqiad' - # ganglia cluster name. - $cluster = 'analytics' class { 'admin': groups => [ @@ -274,9 +252,6 @@ # (Hue, Oozie, Hive, etc.). It also submits regularly scheduled # batch Hadoop jobs. node 'analytics1027.eqiad.wmnet' { - $nagios_group = 'analytics_eqiad' - # ganglia cluster name. - $cluster = 'analytics' class { 'admin': groups => [ @@ -1905,8 +1880,6 @@ # new server IP as a trusted proxy so X-Forwarded-For headers are trusted for # rate limiting purposes (bug 64622) node /^ms-fe100[1-4]\.eqiad\.wmnet$/ { - $cluster = 'swift' - $nagios_group = 'swift' if $::hostname =~ /^ms-fe100[12]$/ { $ganglia_aggregator = true } @@ -1922,8 +1895,6 @@ } node /^ms-be10[0-9][0-9]\.eqiad\.wmnet$/ { - $cluster = 'swift' - $nagios_group = 'swift' $all_drives = [ '/dev/sda', '/dev/sdb', '/dev/sdc', '/dev/sdd', '/dev/sde', '/dev/sdf', '/dev/sdg', '/dev/sdh', @@ -1942,15 +1913,11 @@ } node /^ms-fe300[1-2]\.esams\.wmnet$/ { - $cluster = 'swift' - $nagios_group = 'swift' include admin include role::swift::esams-prod::proxy } node /^ms-be300[1-4]\.esams\.wmnet$/ { - $cluster = 'swift' - $nagios_group = 'swift' # 720xd *without* SSDs; sda & sdb serve both as root and as Swift disks $all_drives = [ '/dev/sdc', '/dev/sdd', '/dev/sde', '/dev/sdf', @@ -1973,8 +1940,6 @@ node /^ms-fe200[1-4]\.codfw\.wmnet$/ { include admin - $cluster = 'swift' - $nagios_group = 'swift' if $::hostname =~ /^ms-fe200[12]$/ { $ganglia_aggregator = true } @@ -1989,9 +1954,6 @@ node /^ms-be20[0-9][0-9]\.codfw\.wmnet$/ { include admin - - $cluster = 'swift' - $nagios_group = 'swift' include role::swift::storage } @@ -2375,8 +2337,6 @@ } node /^search100[0-6]\.eqiad\.wmnet/ { - $cluster = 'search' - $nagios_group = 'lucene' if $::hostname =~ /^search100(1|2)$/ { $ganglia_aggregator = true } @@ -2391,8 +2351,6 @@ } node /^search10(0[7-9]|10)\.eqiad\.wmnet/ { - $cluster = 'search' - $nagios_group = 'lucene' class { 'admin': groups => [ 'deployment', @@ -2403,8 +2361,6 @@ } node /^search101[1-4]\.eqiad\.wmnet/ { - $cluster = 'search' - $nagios_group = 'lucene' class { 'admin': groups => [ 'deployment', @@ -2415,8 +2371,6 @@ } node /^search101[56]\.eqiad\.wmnet/ { - $cluster = 'search' - $nagios_group = 'lucene' class { 'admin': groups => [ 'deployment', @@ -2427,8 +2381,6 @@ } node /^search10(19|20)\.eqiad\.wmnet/ { - $cluster = 'search' - $nagios_group = 'lucene' class { 'admin': groups => [ 'deployment', @@ -2439,8 +2391,6 @@ } node /^search101[78]\.eqiad\.wmnet/ { - $cluster = 'search' - $nagios_group = 'lucene' class { 'admin': groups => [ 'deployment', @@ -2451,8 +2401,6 @@ } node /^search10(19|2[0-2])\.eqiad\.wmnet/ { - $cluster = 'search' - $nagios_group = 'lucene' class { 'admin': groups => [ 'deployment', @@ -2463,8 +2411,6 @@ } node /^search102[3-4]\.eqiad\.wmnet/ { - $cluster = 'search' - $nagios_group = 'lucene' class { 'admin': groups => [ 'deployment', @@ -2475,8 +2421,6 @@ } node /^searchidx100[0-2]\.eqiad\.wmnet/ { - $cluster = 'search' - $nagios_group = 'lucene' class { 'admin': groups => [ 'deployment', diff --git a/modules/monitoring/manifests/host.pp b/modules/monitoring/manifests/host.pp index 8a7aaa7..1f68801 100644 --- a/modules/monitoring/manifests/host.pp +++ b/modules/monitoring/manifests/host.pp @@ -3,7 +3,7 @@ # define monitoring::host ( $ip_address = $::ipaddress, - $group = $nagios_group, + $group = hiera('nagios_group', "${cluster}_${::site}"), $ensure = present, $critical = 'false', $contact_group = 'admins' -- To view, visit https://gerrit.wikimedia.org/r/172531 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I8fbef8279d4b7d00997086a2851cb65dcd898823 Gerrit-PatchSet: 3 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Giuseppe Lavagetto <[email protected]> Gerrit-Reviewer: Giuseppe Lavagetto <[email protected]> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
