Ori.livneh has uploaded a new change for review.
https://gerrit.wikimedia.org/r/177663
Change subject: hhvm: Make Ganglia monitoring scripts resilient to transient
failures
......................................................................
hhvm: Make Ganglia monitoring scripts resilient to transient failures
The metric gathering modules should not die silently if HHVM is unresponsive.
Change-Id: I81346bb319d439a191df03d50953376921b4ca92
---
M modules/hhvm/files/monitoring/hhvm_health.py
M modules/hhvm/files/monitoring/hhvm_mem.py
2 files changed, 24 insertions(+), 10 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/63/177663/1
diff --git a/modules/hhvm/files/monitoring/hhvm_health.py
b/modules/hhvm/files/monitoring/hhvm_health.py
index 4612998..763dfcd 100644
--- a/modules/hhvm/files/monitoring/hhvm_health.py
+++ b/modules/hhvm/files/monitoring/hhvm_health.py
@@ -5,10 +5,14 @@
"""
import json
+import logging
import re
import sys
import time
import urllib2
+
+
+logging.basicConfig(level=logging.INFO, stream=sys.stderr)
def flatten(mapping, prefix=''):
@@ -27,10 +31,13 @@
self.update()
def update(self):
- req = urllib2.urlopen(self.url)
- res = flatten(json.load(req), 'HHVM.')
- self.data.update(res)
- self.last_fetched = time.time()
+ try:
+ req = urllib2.urlopen(self.url)
+ res = flatten(json.load(req), 'HHVM.')
+ self.data.update(res)
+ self.last_fetched = time.time()
+ except (AttributeError, EnvironmentError, ValueError):
+ logging.exception('Failed to update stats:')
def expired(self):
return time.time() - self.last_fetched > self.expiry
@@ -72,7 +79,7 @@
for metric in metrics:
name = metric['name']
call_back = metric['call_back']
- print '%s: %s' % (name, call_back(name))
+ logging.info('%s: %s', name, call_back(name))
time.sleep(5)
diff --git a/modules/hhvm/files/monitoring/hhvm_mem.py
b/modules/hhvm/files/monitoring/hhvm_mem.py
index f63ccf4..137b70c 100644
--- a/modules/hhvm/files/monitoring/hhvm_mem.py
+++ b/modules/hhvm/files/monitoring/hhvm_mem.py
@@ -5,10 +5,14 @@
"""
import json
+import logging
import re
import sys
import time
import urllib2
+
+
+logging.basicConfig(level=logging.INFO, stream=sys.stderr)
def flatten(mapping, prefix=''):
@@ -27,10 +31,13 @@
self.update()
def update(self):
- req = urllib2.urlopen(self.url)
- res = flatten(json.load(req)['Memory'], 'HHVM.')
- self.data.update(res)
- self.last_fetched = time.time()
+ try:
+ req = urllib2.urlopen(self.url)
+ res = flatten(json.load(req)['Memory'], 'HHVM.')
+ self.data.update(res)
+ self.last_fetched = time.time()
+ except (AttributeError, EnvironmentError, ValueError):
+ logging.exception('Failed to update stats:')
def expired(self):
return time.time() - self.last_fetched > self.expiry
@@ -66,7 +73,7 @@
for metric in metrics:
name = metric['name']
call_back = metric['call_back']
- print '%s: %s' % (name, call_back(name))
+ logging.info('%s: %s', name, call_back(name))
time.sleep(5)
--
To view, visit https://gerrit.wikimedia.org/r/177663
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I81346bb319d439a191df03d50953376921b4ca92
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ori.livneh <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits