Signed-off-by: Tom Limoncelli <t...@google.com>
---
 daemons/ganeti-watcher |   48 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/utils.py           |   13 +++++++++++++
 2 files changed, 61 insertions(+), 0 deletions(-)

diff --git a/daemons/ganeti-watcher b/daemons/ganeti-watcher
index 1f82db8..e9d9e59 100755
--- a/daemons/ganeti-watcher
+++ b/daemons/ganeti-watcher
@@ -36,6 +36,7 @@ import sys
 import time
 import logging
 from optparse import OptionParser
+import urllib2
 
 from ganeti import utils
 from ganeti import constants
@@ -48,6 +49,8 @@ from ganeti import ssconf
 from ganeti import bdev
 from ganeti import hypervisor
 from ganeti.confd import client as confd_client
+from ganeti import rapi
+import ganeti.rapi.client
 
 
 MAXTRIES = 5
@@ -595,6 +598,37 @@ def OpenStateFile(path):
   return os.fdopen(statefile_fd, "w+")
 
 
+def IsRapiResponding(hostname):
+  """Connects to RAPI port and does a simple test.
+
+  @type hostname: string
+  @param hostname: hostname of the node to connect to.
+
+  Returns:
+    True: test passed.
+    False: test failed.
+
+  """
+  ssl_config = rapi.client.CertAuthorityVerify(constants.RAPI_CERT_FILE)
+  try:
+    master_version = rapi.client.GanetiRapiClient(
+        hostname,
+        port=constants.DEFAULT_RAPI_PORT,
+        config_ssl_verification=ssl_config,
+        username="", password="").GetVersion()
+  except urllib2.URLError:
+    logging.warning("RAPI Result: Error: URLError")
+    return False
+  except rapi.client.CertificateError:
+    logging.warning("RAPI Result: Error: CertificateError")
+    return False
+  except rapi.client.GanetiApiError:
+    logging.warning("RAPI Result: GanetiApiError")
+    return False
+  logging.debug("RAPI Result: master_version is %s", master_version)
+  return master_version == constants.RAPI_VERSION
+
+
 def ParseOptions():
   """Parse the command line options.
 
@@ -668,6 +702,20 @@ def main():
       # we are on master now
       utils.EnsureDaemon(constants.RAPI)
 
+      # If RAPI isn't responding to queries, try one restart.
+      logging.debug("Attempting to talk with RAPI.")
+      rapi_responding = IsRapiResponding(constants.LOCALHOST_IP_ADDRESS)
+      if not rapi_responding:
+        logging.warning("Couldn't get answer from Ganeti RAPI daemon."
+                        " Restarting Ganeti RAPI.")
+        utils.StopDaemon(constants.RAPI)
+        utils.EnsureDaemon(constants.RAPI)
+        logging.debug("Second attempt to talk with RAPI")
+        rapi_responding = IsRapiResponding(constants.LOCALHOST_IP_ADDRESS)
+        if not rapi_responding:
+          logging.fatal("RAPI is not responding. Please investigate.")
+      logging.debug("Successfully talked to RAPI.")
+
       try:
         watcher = Watcher(options, notepad)
       except errors.ConfigurationError:
diff --git a/lib/utils.py b/lib/utils.py
index 7b93870..620d465 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -2166,6 +2166,19 @@ def EnsureDaemon(name):
   return True
 
 
+def StopDaemon(name):
+  """Stop daemon
+
+  """
+  result = RunCmd([constants.DAEMON_UTIL, "stop", name])
+  if result.failed:
+    logging.error("Can't stop daemon '%s', failure %s, output: %s",
+                  name, result.fail_reason, result.output)
+    return False
+
+  return True
+
+
 def WritePidFile(name):
   """Write the current process pidfile.
 
-- 
1.7.0.1

Reply via email to