Ema has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/371105 )

Change subject: Add monitoring specific metric to RunCommand
......................................................................


Add monitoring specific metric to RunCommand

pybal_monitor_runcommand_run_duration_seconds is a gauge that represents
the run latency, differentiated by the 'result' label
(successful/failed).

Bug: T171710
Change-Id: I2aeea030955c1b2b1abe872e6ed8f5b334c09069
---
M pybal/monitors/runcommand.py
1 file changed, 30 insertions(+), 0 deletions(-)

Approvals:
  Ema: Verified; Looks good to me, approved
  Mark Bergsma: Looks good to me, but someone else must approve



diff --git a/pybal/monitors/runcommand.py b/pybal/monitors/runcommand.py
index a0ca0ec..b10afce 100644
--- a/pybal/monitors/runcommand.py
+++ b/pybal/monitors/runcommand.py
@@ -7,11 +7,13 @@
 
 from pybal import monitor
 from pybal.util import log
+from pybal.metrics import Gauge
 
 import os, sys, signal, errno
 import logging
 
 from twisted.internet import reactor, process, error
+from twisted.python.runtime import seconds
 
 class ProcessGroupProcess(process.Process, object):
     """
@@ -89,6 +91,20 @@
 
     TIMEOUT_RUN = 20
 
+    metric_labelnames = ('service', 'host', 'monitor')
+    metric_keywords = {
+        'namespace': 'pybal',
+        'subsystem': 'monitor_' + __name__.lower()
+    }
+
+    runcommand_metrics = {
+        'run_duration_seconds': Gauge(
+            'run_duration_seconds',
+            'Command duration',
+            labelnames=metric_labelnames + ('result', 'exitcode'),
+            **metric_keywords)
+    }
+
     def __init__(self, coordinator, server, configuration={}):
         """Constructor"""
 
@@ -131,6 +147,7 @@
     def runCommand(self):
         """Periodically called method that does a single uptime check."""
 
+        self.checkStartTime = seconds()
         self.runningProcess = self._spawnProcess(self, self.command, 
[self.command] + self.arguments,
                                                  sessionLeader=True, 
timeout=(self.timeout or None))
 
@@ -157,10 +174,23 @@
         Called when the process has ended
         """
 
+        duration = seconds() - self.checkStartTime
         if reason.check(error.ProcessDone):
             self._resultUp()
+            result = 'successful'
+            exitcode = 0
         elif reason.check(error.ProcessTerminated):
             self._resultDown(reason.getErrorMessage())
+            result = 'failed'
+            exitcode = reason.value.exitCode
+        else:
+            result = None
+            exitcode = None
+
+        self.runcommand_metrics['run_duration_seconds'].labels(
+            result=result, exitcode=exitcode,
+            **self.metric_labels
+            ).set(duration)
 
         # Schedule the next check
         if self.active:

-- 
To view, visit https://gerrit.wikimedia.org/r/371105
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I2aeea030955c1b2b1abe872e6ed8f5b334c09069
Gerrit-PatchSet: 2
Gerrit-Project: operations/debs/pybal
Gerrit-Branch: master
Gerrit-Owner: Mark Bergsma <m...@wikimedia.org>
Gerrit-Reviewer: Ema <e...@wikimedia.org>
Gerrit-Reviewer: Giuseppe Lavagetto <glavage...@wikimedia.org>
Gerrit-Reviewer: Mark Bergsma <m...@wikimedia.org>
Gerrit-Reviewer: Volans <rcocci...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to