Signed-off-by: Tom Limoncelli <t...@google.com> --- daemons/ganeti-watcher | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/utils.py | 13 +++++++++++++ 2 files changed, 61 insertions(+), 0 deletions(-)
diff --git a/daemons/ganeti-watcher b/daemons/ganeti-watcher index 1f82db8..e9d9e59 100755 --- a/daemons/ganeti-watcher +++ b/daemons/ganeti-watcher @@ -36,6 +36,7 @@ import sys import time import logging from optparse import OptionParser +import urllib2 from ganeti import utils from ganeti import constants @@ -48,6 +49,8 @@ from ganeti import ssconf from ganeti import bdev from ganeti import hypervisor from ganeti.confd import client as confd_client +from ganeti import rapi +import ganeti.rapi.client MAXTRIES = 5 @@ -595,6 +598,37 @@ def OpenStateFile(path): return os.fdopen(statefile_fd, "w+") +def IsRapiResponding(hostname): + """Connects to RAPI port and does a simple test. + + @type hostname: string + @param hostname: hostname of the node to connect to. + + Returns: + True: test passed. + False: test failed. + + """ + ssl_config = rapi.client.CertAuthorityVerify(constants.RAPI_CERT_FILE) + try: + master_version = rapi.client.GanetiRapiClient( + hostname, + port=constants.DEFAULT_RAPI_PORT, + config_ssl_verification=ssl_config, + username="", password="").GetVersion() + except urllib2.URLError: + logging.warning("RAPI Result: Error: URLError") + return False + except rapi.client.CertificateError: + logging.warning("RAPI Result: Error: CertificateError") + return False + except rapi.client.GanetiApiError: + logging.warning("RAPI Result: GanetiApiError") + return False + logging.debug("RAPI Result: master_version is %s", master_version) + return master_version == constants.RAPI_VERSION + + def ParseOptions(): """Parse the command line options. @@ -668,6 +702,20 @@ def main(): # we are on master now utils.EnsureDaemon(constants.RAPI) + # If RAPI isn't responding to queries, try one restart. + logging.debug("Attempting to talk with RAPI.") + rapi_responding = IsRapiResponding(constants.LOCALHOST_IP_ADDRESS) + if not rapi_responding: + logging.warning("Couldn't get answer from Ganeti RAPI daemon." + " Restarting Ganeti RAPI.") + utils.StopDaemon(constants.RAPI) + utils.EnsureDaemon(constants.RAPI) + logging.debug("Second attempt to talk with RAPI") + rapi_responding = IsRapiResponding(constants.LOCALHOST_IP_ADDRESS) + if not rapi_responding: + logging.fatal("RAPI is not responding. Please investigate.") + logging.debug("Successfully talked to RAPI.") + try: watcher = Watcher(options, notepad) except errors.ConfigurationError: diff --git a/lib/utils.py b/lib/utils.py index 7b93870..620d465 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -2166,6 +2166,19 @@ def EnsureDaemon(name): return True +def StopDaemon(name): + """Stop daemon + + """ + result = RunCmd([constants.DAEMON_UTIL, "stop", name]) + if result.failed: + logging.error("Can't stop daemon '%s', failure %s, output: %s", + name, result.fail_reason, result.output) + return False + + return True + + def WritePidFile(name): """Write the current process pidfile. -- 1.7.0.1