Ottomata has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/403185 )
Change subject: Add $monitoring_enabled parameter to cache::kafka::webrequest
profile
......................................................................
Add $monitoring_enabled parameter to cache::kafka::webrequest profile
Also rename $statsd_host to $statsd to match other profiles.
This should be a no-op.
The cache::kafka::webrequest profile is included in cache::base profile,
which is in turn included by the cache role classes. As such, we set
this parameter in each cache role hiera.
Change-Id: I86dc34d21bc990ddccc94d5ab43a1763c6ada6d0
---
M hieradata/role/common/cache/canary.yaml
M hieradata/role/common/cache/misc.yaml
M hieradata/role/common/cache/text.yaml
M hieradata/role/common/cache/upload.yaml
M modules/profile/manifests/cache/kafka/webrequest.pp
5 files changed, 60 insertions(+), 34 deletions(-)
Approvals:
Ottomata: Looks good to me, approved
jenkins-bot: Verified
diff --git a/hieradata/role/common/cache/canary.yaml
b/hieradata/role/common/cache/canary.yaml
index 40bb4c2..c943922 100644
--- a/hieradata/role/common/cache/canary.yaml
+++ b/hieradata/role/common/cache/canary.yaml
@@ -94,4 +94,14 @@
# Profile::cache::ssl::unified
profile::cache::ssl::unified::monitoring: true
profile::cache::ssl::unified::letsencrypt: false
+
+# Enable varnishkafka-webrequest instance monitoring.
+profile::cache::kafka::webrequest::monitoring_enabled: true
+
+# This should match an entry in the kafka_clusters hash (defined in
common.yaml).
+# We use the fully qualified kafka cluster name (with datacenter), because we
want
+# to route all statsv -> statsd traffic to the datacenter that hosts the master
+# statsd instance. If the active statsd instance changes to codfw (for an
extended period of time)
+# should probably change this to main-codfw. If you don't things will
probably be fine,
+# but statsv will have to send messages over UDP cross-DC to the active statsd
instance.
profile::cache::kafka::statsv::kafka_cluster_name: main-eqiad
diff --git a/hieradata/role/common/cache/misc.yaml
b/hieradata/role/common/cache/misc.yaml
index 3f552b5..47b242b 100644
--- a/hieradata/role/common/cache/misc.yaml
+++ b/hieradata/role/common/cache/misc.yaml
@@ -305,3 +305,6 @@
# Profile::cache::ssl::unified
profile::cache::ssl::unified::monitoring: true
profile::cache::ssl::unified::letsencrypt: false
+
+# Enable varnishkafka-webrequest instance monitoring.
+profile::cache::kafka::webrequest::monitoring_enabled: true
diff --git a/hieradata/role/common/cache/text.yaml
b/hieradata/role/common/cache/text.yaml
index 40e5c5d..a319c17 100644
--- a/hieradata/role/common/cache/text.yaml
+++ b/hieradata/role/common/cache/text.yaml
@@ -100,6 +100,9 @@
profile::cache::ssl::unified::monitoring: true
profile::cache::ssl::unified::letsencrypt: false
+# Enable varnishkafka-webrequest instance monitoring.
+profile::cache::kafka::webrequest::monitoring_enabled: true
+
# This should match an entry in the kafka_clusters hash (defined in
common.yaml).
# We use the fully qualified kafka cluster name (with datacenter), because we
want
# to route all statsv -> statsd traffic to the datacenter that hosts the master
diff --git a/hieradata/role/common/cache/upload.yaml
b/hieradata/role/common/cache/upload.yaml
index b5c97ec..5f32a80 100644
--- a/hieradata/role/common/cache/upload.yaml
+++ b/hieradata/role/common/cache/upload.yaml
@@ -71,3 +71,6 @@
# Profile::cache::ssl::unified
profile::cache::ssl::unified::monitoring: true
profile::cache::ssl::unified::letsencrypt: false
+
+# Enable varnishkafka-webrequest instance monitoring.
+profile::cache::kafka::webrequest::monitoring_enabled: true
diff --git a/modules/profile/manifests/cache/kafka/webrequest.pp
b/modules/profile/manifests/cache/kafka/webrequest.pp
index 6c4a17c..655779b 100644
--- a/modules/profile/manifests/cache/kafka/webrequest.pp
+++ b/modules/profile/manifests/cache/kafka/webrequest.pp
@@ -5,15 +5,19 @@
#
# === Parameters
#
+# [*monitoring_enabled*]
+# True if the varnishkafka instance should be monitored.
+#
# [*cache_cluster*]
# the name of the cache cluster
#
-# [*statsd_host*]
-# the host to send statsd data to.
+# [*statsd*]
+# The host:port to send statsd data to.
#
class profile::cache::kafka::webrequest(
- $cache_cluster = hiera('cache::cluster'),
- $statsd_host = hiera('statsd'),
+ $monitoring_enabled =
hiera('profile::cache::kafka::webrequest::monitoring_enabled', false),
+ $cache_cluster = hiera('cache::cluster'),
+ $statsd = hiera('statsd'),
) {
$config = kafka_config('analytics')
# NOTE: This is used by inheriting classes role::cache::kafka::*
@@ -120,38 +124,41 @@
force_protocol_version => $kafka_protocol_version,
}
- # Generate icinga alert if varnishkafka is not running.
- nrpe::monitor_service { 'varnishkafka-webrequest':
- description => 'Webrequests Varnishkafka log producer',
- nrpe_command => "/usr/lib/nagios/plugins/check_procs -c 1 -a
'/usr/bin/varnishkafka -S /etc/varnishkafka/webrequest.conf'",
- contact_group => 'admins,analytics',
- require => Class['::varnishkafka'],
+ if $monitoring_enabled {
+ # Generate icinga alert if varnishkafka is not running.
+ nrpe::monitor_service { 'varnishkafka-webrequest':
+ description => 'Webrequests Varnishkafka log producer',
+ nrpe_command => "/usr/lib/nagios/plugins/check_procs -c 1 -a
'/usr/bin/varnishkafka -S /etc/varnishkafka/webrequest.conf'",
+ contact_group => 'admins,analytics',
+ require => Class['::varnishkafka'],
+ }
+
+ $graphite_metric_prefix =
"varnishkafka.${::hostname}.webrequest.${cache_cluster}"
+
+ # Sets up Logster to read from the Varnishkafka instance stats JSON
file
+ # and report metrics to statsd.
+ varnishkafka::monitor::statsd { 'webrequest':
+ graphite_metric_prefix => $graphite_metric_prefix,
+ statsd_host_port => $statsd,
+ }
+
+ # Generate an alert if too many delivery report errors per minute
+ # (logster only reports once a minute)
+ monitoring::graphite_threshold { 'varnishkafka-kafka_drerr':
+ ensure => 'present',
+ description => 'Varnishkafka Delivery Errors per minute',
+ dashboard_links =>
['https://grafana.wikimedia.org/dashboard/db/varnishkafka?panelId=20&fullscreen&orgId=1'],
+ metric =>
"derivative(transformNull(${graphite_metric_prefix}.varnishkafka.kafka_drerr,
0))",
+ warning => 0,
+ critical => 5000,
+ # But only alert if a large percentage of the examined datapoints
+ # are over the threshold.
+ percentage => 80,
+ from => '10min',
+ require => Logster::Job['varnishkafka-webrequest'],
+ }
}
- $graphite_metric_prefix =
"varnishkafka.${::hostname}.webrequest.${cache_cluster}"
-
- # Sets up Logster to read from the Varnishkafka instance stats JSON file
- # and report metrics to statsd.
- varnishkafka::monitor::statsd { 'webrequest':
- graphite_metric_prefix => $graphite_metric_prefix,
- statsd_host_port => $statsd_host,
- }
-
- # Generate an alert if too many delivery report errors per minute
- # (logster only reports once a minute)
- monitoring::graphite_threshold { 'varnishkafka-kafka_drerr':
- ensure => 'present',
- description => 'Varnishkafka Delivery Errors per minute',
- dashboard_links =>
['https://grafana.wikimedia.org/dashboard/db/varnishkafka?panelId=20&fullscreen&orgId=1'],
- metric =>
"derivative(transformNull(${graphite_metric_prefix}.varnishkafka.kafka_drerr,
0))",
- warning => 0,
- critical => 5000,
- # But only alert if a large percentage of the examined datapoints
- # are over the threshold.
- percentage => 80,
- from => '10min',
- require => Logster::Job['varnishkafka-webrequest'],
- }
# Make sure varnishes are configured and started for the first time
# before the instances as well, or they fail to start initially...
Service <| tag == 'varnish_instance' |> ->
Varnishkafka::Instance['webrequest']
--
To view, visit https://gerrit.wikimedia.org/r/403185
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I86dc34d21bc990ddccc94d5ab43a1763c6ada6d0
Gerrit-PatchSet: 3
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>
Gerrit-Reviewer: Elukey <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits