Elukey has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/372155 )
Change subject: role::cache::kafka::webrequest: tune graphite alarms
......................................................................
role::cache::kafka::webrequest: tune graphite alarms
Bug: T172681
Change-Id: Iedaad271bc1a5b3f42ff3475b1d74ff0c70d741b
---
M modules/role/manifests/cache/kafka/webrequest.pp
1 file changed, 3 insertions(+), 3 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/55/372155/1
diff --git a/modules/role/manifests/cache/kafka/webrequest.pp
b/modules/role/manifests/cache/kafka/webrequest.pp
index 6af96f7..c69abcc 100644
--- a/modules/role/manifests/cache/kafka/webrequest.pp
+++ b/modules/role/manifests/cache/kafka/webrequest.pp
@@ -145,15 +145,15 @@
monitoring::graphite_threshold { 'varnishkafka-kafka_drerr':
ensure => 'present',
description => 'Varnishkafka Delivery Errors per minute',
- metric =>
"derivative(transformNull(${graphite_metric_prefix}.varnishkafka.kafka_drerr,
0))",
+ metric =>
"sumSeries(summarize(perSecond(varnishkafka.cp*.$instance.*.varnishkafka.kafka_drerr),
'2h', 'sum', false))",
# More than 0 errors is warning threshold.
warning => 0,
# More than 20000 errors is critical threshold.
- critical => 20000,
+ critical => 5,
# But only alert if a large percentage of the examined datapoints
# are over the threshold.
percentage => 80,
- from => '10min',
+ from => '2hours',
require => Logster::Job['varnishkafka-webrequest'],
}
}
--
To view, visit https://gerrit.wikimedia.org/r/372155
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Iedaad271bc1a5b3f42ff3475b1d74ff0c70d741b
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Elukey <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits