Mark Bergsma has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/371104 )
Change subject: Add monitoring specific metric to DNSQuery ...................................................................... Add monitoring specific metric to DNSQuery pybal_monitor_dnsquery_request_duration_seconds is a gauge that represents the request latency, differentiated by the 'result' label (successful/failed). Bug: T171710 Change-Id: Ia4814427f11fea266532e13c771e81becfceda52 --- M pybal/monitors/dnsquery.py 1 file changed, 28 insertions(+), 2 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/debs/pybal refs/changes/04/371104/1 diff --git a/pybal/monitors/dnsquery.py b/pybal/monitors/dnsquery.py index 32c7a2a..0b0c2fa 100644 --- a/pybal/monitors/dnsquery.py +++ b/pybal/monitors/dnsquery.py @@ -6,6 +6,7 @@ """ from pybal import monitor +from pybal.metrics import Gauge from twisted.internet import reactor, defer from twisted.names import client, dns, error @@ -29,6 +30,19 @@ error.DNSQueryRefusedError, error.DNSQueryTimeoutError, error.DNSServerError, error.DNSUnknownError) + metric_labelnames = ('service', 'host', 'monitor') + metric_keywords = { + 'namespace': 'pybal', + 'subsystem': 'monitor_' + __name__.lower() + } + + dnsquery_metrics = { + 'request_duration_seconds': Gauge( + 'request_duration_seconds', + 'DNS query duration', + labelnames=metric_labelnames + ('result',), + **metric_keywords) + } def __init__(self, coordinator, server, configuration): """Constructor""" @@ -98,9 +112,15 @@ else: resultStr = None - self.report('DNS query successful, %.3f s' % (runtime.seconds() - self.checkStartTime) + duration = runtime.seconds() - self.checkStartTime + self.report('DNS query successful, %.3f s' % (duration) + (resultStr and (': ' + resultStr) or "")) self._resultUp() + + self.dnsquery_metrics['request_duration_seconds'].labels( + result='successful', + **self.metric_labels + ).set(duration) return answers, authority, additional @@ -127,13 +147,19 @@ else: errorStr = str(failure) + duration = runtime.seconds() - self.checkStartTime self.report( - 'DNS query failed, %.3f s' % (runtime.seconds() - self.checkStartTime), + 'DNS query failed, %.3f s' % (duration), level=logging.ERROR ) self._resultDown(errorStr) + self.dnsquery_metrics['request_duration_seconds'].labels( + result='failed', + **self.metric_labels + ).set(duration) + failure.trap(*self.catchList) def _checkFinished(self, result): -- To view, visit https://gerrit.wikimedia.org/r/371104 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ia4814427f11fea266532e13c771e81becfceda52 Gerrit-PatchSet: 1 Gerrit-Project: operations/debs/pybal Gerrit-Branch: master Gerrit-Owner: Mark Bergsma <m...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits