On Mon, 16 Mar 2015 at 15:40 Petr Pudlak <[email protected]> wrote:

> On Thu, Mar 05, 2015 at 11:11:01PM +0100, 'Helga Velroyen' via
> ganeti-devel wrote:
> >Currently an unreachable node can make LURenewCrypto fail
> >completely. This patch adds a unit test for it, and
> >improves the error handling of unreachable nodes in
> >a way, that the rest of the nodes are still handled
> >properly.
> >
> >Signed-off-by: Helga Velroyen <[email protected]>
> >---
> > lib/cmdlib/cluster.py              | 23 ++++++++++++++----
> > test/py/cmdlib/cluster_unittest.py | 50 ++++++++++++++++++++++++++++++
> ++++++++
> > 2 files changed, 68 insertions(+), 5 deletions(-)
> >
> >diff --git a/lib/cmdlib/cluster.py b/lib/cmdlib/cluster.py
> >index 3762489..43ced02 100644
> >--- a/lib/cmdlib/cluster.py
> >+++ b/lib/cmdlib/cluster.py
> >@@ -152,17 +152,30 @@ class LUClusterRenewCrypto(NoHooksLU):
> >       except IOError:
> >         pass
> >
> >+    node_errors = {}
> >     nodes = self.cfg.GetAllNodesInfo()
> >     for (node_uuid, node_info) in nodes.items():
> >       if node_info.offline:
> >         feedback_fn("* Skipping offline node %s" % node_info.name)
> >         continue
> >       if node_uuid != master_uuid:
> >-        new_digest = CreateNewClientCert(self, node_uuid)
> >-        if node_info.master_candidate:
> >-          utils.AddNodeToCandidateCerts(node_uuid,
> >-                                        new_digest,
> >-                                        cluster.candidate_certs)
> >+        try:
> >+          new_digest = CreateNewClientCert(self, node_uuid)
> >+          if node_info.master_candidate:
> >+            utils.AddNodeToCandidateCerts(node_uuid,
> >+                                          new_digest,
> >+                                          cluster.candidate_certs)
> >+        except errors.OpExecError as e:
> >+          node_errors[node_uuid] = e
> >+
> >+    if node_errors:
> >+      msg = ("Some nodes' SSL client certificates could not be renewed."
> >+             " Please make sure those nodes are reachable and rerun"
> >+             " the operation. The affected nodes and their errors
> are:\n")
> >+      for uuid, e in node_errors.items():
> >+        msg += "Node %s: %s\n" % (uuid, e)
> >+      feedback_fn(msg)
> >+
> >     utils.RemoveNodeFromCandidateCerts("%s-SERVER" % master_uuid,
> >                                        cluster.candidate_certs)
> >     utils.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid,
> >diff --git a/test/py/cmdlib/cluster_unittest.py b/test/py/cmdlib/cluster_
> unittest.py
> >index 87fd9c7..34983d8 100644
> >--- a/test/py/cmdlib/cluster_unittest.py
> >+++ b/test/py/cmdlib/cluster_unittest.py
> >@@ -2351,6 +2351,56 @@ class TestLUClusterRenewCrypto(CmdlibTestCase):
> >     cluster = self.cfg.GetClusterInfo()
> >     self.assertFalse(cluster.candidate_certs)
> >
> >+  def _partiallyFailingRpc(self, node_uuid, _):
> >+    if node_uuid == self._failed_node:
> >+      return self.RpcResultsBuilder() \
> >+        .CreateFailedNodeResult(node_uuid)
> >+    else:
> >+      return self.RpcResultsBuilder() \
> >+        .CreateSuccessfulNodeResult(node_uuid,
> >+          [(constants.CRYPTO_TYPE_SSL_DIGEST,
> self._GetFakeDigest(node_uuid))])
> >+
> >+  @patchPathutils("cluster")
> >+  def testNonMasterFails(self, pathutils):
> >+
> >+    # patch pathutils to point to temporary files
> >+    pathutils.NODED_CERT_FILE = self._node_cert
> >+    pathutils.NODED_CLIENT_CERT_FILE = self._client_node_cert
> >+    pathutils.NODED_CLIENT_CERT_FILE_TMP = \
> >+        self._client_node_cert_tmp
> >+
> >+    # create a few non-master, online nodes
> >+    num_nodes = 3
> >+    for _ in range(num_nodes):
> >+      self.cfg.AddNewNode()
> >+    nodes = self.cfg.GetAllNodesInfo()
> >+
> >+    # pick one node as the failing one
> >+    master_uuid = self.cfg.GetMasterNode()
> >+    self._failed_node = [node_uuid for node_uuid in nodes
> >+                        if node_uuid != master_uuid][1]
>
> Just nitpicking, the 'if' should be indented one more space, pep8/pylint
> might complain about it.
>

Thanks for pointing that out. I think we still don't lint the tests, so I
missed that.


>
> >+    self.rpc.call_node_crypto_tokens = self._partiallyFailingRpc
> >+
> >+    op = opcodes.OpClusterRenewCrypto()
> >+    self.ExecOpCode(op)
> >+
> >+    # Check if the correct certificates exist and don't exist on the
> master
> >+    self.assertTrue(os.path.exists(pathutils.NODED_CERT_FILE))
> >+    self.assertTrue(os.path.exists(pathutils.NODED_CLIENT_CERT_FILE))
> >+    self.assertFalse(os.path.exists(pathutils.NODED_CLIENT_
> CERT_FILE_TMP))
> >+
> >+    # Check if we have the correct digests in the configuration
> >+    cluster = self.cfg.GetClusterInfo()
> >+    # There should be one digest missing.
> >+    self.assertEqual(num_nodes, len(cluster.candidate_certs))
> >+    nodes = self.cfg.GetAllNodesInfo()
> >+    for (node_uuid, _) in nodes.items():
> >+      if node_uuid == self._failed_node:
> >+        self.assertTrue(node_uuid not in cluster.candidate_certs)
> >+      else:
> >+        expected_digest = self._GetFakeDigest(node_uuid)
> >+        self.assertEqual(expected_digest, cluster.candidate_certs[node_
> uuid])
> >+
> >
> > if __name__ == "__main__":
> >   testutils.GanetiTestProgram()
> >--
> >2.2.0.rc0.207.ga3a616c
> >
>
> Rest LGTM, thanks
>

Reply via email to