If renewing the master's client SSL certificate fails, try
two more times before giving up. Unit test included.

Signed-off-by: Helga Velroyen <[email protected]>
---
 lib/cmdlib/cluster.py              | 49 ++++++++++++++++++++++----------------
 test/py/cmdlib/cluster_unittest.py | 45 ++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+), 21 deletions(-)

diff --git a/lib/cmdlib/cluster.py b/lib/cmdlib/cluster.py
index 43ced02..fb32eec 100644
--- a/lib/cmdlib/cluster.py
+++ b/lib/cmdlib/cluster.py
@@ -111,6 +111,8 @@ class LUClusterRenewCrypto(NoHooksLU):
   takes care of the renewal of the client SSL certificates.
 
   """
+  _MAX_NUM_RETRIES = 3
+
   def Exec(self, feedback_fn):
     master_uuid = self.cfg.GetMasterNode()
     cluster = self.cfg.GetClusterInfo()
@@ -129,28 +131,33 @@ class LUClusterRenewCrypto(NoHooksLU):
     except IOError:
       logging.info("No old certificate available.")
 
-    try:
-      # Technically it should not be necessary to set the cert
-      # paths. However, due to a bug in the mock library, we
-      # have to do this to be able to test the function properly.
-      _UpdateMasterClientCert(
-          self, master_uuid, cluster, feedback_fn,
-          client_cert=pathutils.NODED_CLIENT_CERT_FILE,
-          client_cert_tmp=pathutils.NODED_CLIENT_CERT_FILE_TMP)
-    except errors.OpExecError as e:
-      feedback_fn("Could not renew the master's client SSL certificate."
-                  " Cleaning up. Error: %s." % e)
-      # Cleaning up temporary certificates
-      utils.RemoveNodeFromCandidateCerts("%s-SERVER" % master_uuid,
-                                         cluster.candidate_certs)
-      utils.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid,
-                                         cluster.candidate_certs)
-      return
-    finally:
+    num_try = 0
+    successful = False
+    while not successful and num_try < self._MAX_NUM_RETRIES:
+      num_try += 1
       try:
-        utils.RemoveFile(pathutils.NODED_CLIENT_CERT_FILE_TMP)
-      except IOError:
-        pass
+        # Technically it should not be necessary to set the cert
+        # paths. However, due to a bug in the mock library, we
+        # have to do this to be able to test the function properly.
+        _UpdateMasterClientCert(
+            self, master_uuid, cluster, feedback_fn,
+            client_cert=pathutils.NODED_CLIENT_CERT_FILE,
+            client_cert_tmp=pathutils.NODED_CLIENT_CERT_FILE_TMP)
+        successful = True
+      except errors.OpExecError as e:
+        if num_try == self._MAX_NUM_RETRIES:
+          feedback_fn("Could not renew the master's client SSL certificate."
+                       " Cleaning up. Error: %s." % e)
+          # Cleaning up temporary certificates
+          utils.RemoveNodeFromCandidateCerts("%s-SERVER" % master_uuid,
+                                             cluster.candidate_certs)
+          utils.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid,
+                                             cluster.candidate_certs)
+          try:
+            utils.RemoveFile(pathutils.NODED_CLIENT_CERT_FILE_TMP)
+          except IOError:
+            pass
+          return
 
     node_errors = {}
     nodes = self.cfg.GetAllNodesInfo()
diff --git a/test/py/cmdlib/cluster_unittest.py 
b/test/py/cmdlib/cluster_unittest.py
index 1630e90..917ff14 100644
--- a/test/py/cmdlib/cluster_unittest.py
+++ b/test/py/cmdlib/cluster_unittest.py
@@ -2427,5 +2427,50 @@ class TestLUClusterRenewCrypto(CmdlibTestCase):
         expected_digest = self._GetFakeDigest(node_uuid)
         self.assertEqual(expected_digest, cluster.candidate_certs[node_uuid])
 
+  def _RpcSuccessfulAfterRetries(self, node_uuid, _):
+    if self._retries < self._max_retries:
+      self._retries += 1
+      return self.RpcResultsBuilder() \
+        .CreateFailedNodeResult(node_uuid)
+    else:
+      return self.RpcResultsBuilder() \
+        .CreateSuccessfulNodeResult(node_uuid,
+          [(constants.CRYPTO_TYPE_SSL_DIGEST, self._GetFakeDigest(node_uuid))])
+
+  @patchPathutils("cluster")
+  def testMasterRetriesSuccess(self, pathutils):
+    self._InitPathutils(pathutils)
+
+    self._max_retries = 2
+    self._retries = 0
+    self.rpc.call_node_crypto_tokens = self._RpcSuccessfulAfterRetries
+
+    op = opcodes.OpClusterRenewCrypto()
+    self.ExecOpCode(op)
+
+    self._AssertCertFiles(pathutils)
+
+    cluster = self.cfg.GetClusterInfo()
+    master_uuid = self.cfg.GetMasterNode()
+    self.assertTrue(self._GetFakeDigest(master_uuid)
+                    in cluster.candidate_certs.values())
+
+  @patchPathutils("cluster")
+  def testMasterRetriesFail(self, pathutils):
+    self._InitPathutils(pathutils)
+
+    self._max_retries = 5
+    self._retries = 0
+    self.rpc.call_node_crypto_tokens = self._RpcSuccessfulAfterRetries
+
+    op = opcodes.OpClusterRenewCrypto()
+    self.ExecOpCode(op)
+
+    self._AssertCertFiles(pathutils)
+
+    cluster = self.cfg.GetClusterInfo()
+    self.assertFalse(cluster.candidate_certs.values)
+
+
 if __name__ == "__main__":
   testutils.GanetiTestProgram()
-- 
2.2.0.rc0.207.ga3a616c

Reply via email to