On Tue, Feb 22, 2011 at 2:40 PM, Michael Hanselmann <[email protected]> wrote:
> Am 22. Februar 2011 13:55 schrieb René Nussbaumer <[email protected]>:
>> --- a/lib/client/gnt_cluster.py
>> +++ b/lib/client/gnt_cluster.py
>> +def _InstanceStart(opts, inst_list, start):
>> […]
>> +  results = jex.GetResults()
>> +  bad_cnt = len([False for (success, _) in results if not success])
>
> No need for a list, just use a generator.

I don't get that

>> +class _RunWhenNodesReachableHelper:
>> +  """Helper class to make shared internal state sharing easier.
>> +
>> +  @ivar success: Indicates if all action_cb calls were successful
>> +
>> +  """
>> +  def __init__(self, node_list, action_cb, node2ip, port=None,
>> +               _ping_fn=netutils.TcpPing, _sleep_fn=time.sleep):
>> +    self.down = set(node_list)
>> +    self.up = set()
>> +    self.node2ip = node2ip
>> +    self.success = True
>> +    self.action_cb = action_cb
>> +    self._ping_fn = _ping_fn
>> +    self._sleep_fn = _sleep_fn
>> +
>> +    if port is None:
>> +      self.port = netutils.GetDaemonPort(constants.NODED)
>> +    else:
>> +      self.port = port
>
> Please make the port a mandatory argument and call GetDaemonPort in
> the caller. Doing so reduces dependencies of the helper class.

Done.

>> +  def Wait(self, secs):
>> +    """Checks if a host is up or waits reamingin seconds.
>
> s/reamingin/remaining

Done.

>
>> --- a/man/gnt-cluster.rst
>> +++ b/man/gnt-cluster.rst
>> +EPO
>> +~~~
>> +
>> +**epo** [--on] [--groups|--all] *arguments*
>> +
>> +Performans an emergency power-off on nodes given as arguments. If 
>> ``--groups``
>
> s/Performans/Performs/

Done.

>
>> +is given, arguments are node groups. If ``--all`` is provided, the whole
>> +cluster will be shut down.
>
>> --- a/qa/qa_cluster.py
>> +++ b/qa/qa_cluster.py
>> @@ -27,13 +27,14 @@ import tempfile
>>  import os.path
>>
>>  from ganeti import constants
>> +from ganeti import compat
>>  from ganeti import utils
>>
>>  import qa_config
>>  import qa_utils
>>  import qa_error
>>
>> -from qa_utils import AssertEqual, AssertCommand
>> +from qa_utils import AssertEqual, AssertCommand, GetCommandOutput
>>
>>
>>  def _RemoveFileFromAllNodes(filename):
>> @@ -150,6 +151,37 @@ def TestClusterOob():
>>                  "oob_program="])
>>
>>
>> +def TestClusterEpo():
>> +  """gnt-cluster epo"""
>> +  master = qa_config.GetMasterNode()
>> +
>> +  # Conflicting
>> +  AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True)
>> +  # --all doesn't expect arguments
>> +  AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
>> +
>> +  # Unless --all is given master is not allowed to be in the list
>> +  AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
>> +
>> +  # This shouldn't fail
>> +  AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
>
> Won't this shut down all machines?

No, at this stage the QA no OOB script, if the the nodes do not
support OOB they will not been shut down and just beeing noted on the
output that they need manual care.

>> +  # All instances should have been stopped now
>> +  result_output = GetCommandOutput(master["primary"],
>> +                                   "gnt-instance list --no-header -o 
>> status")
>> +  AssertEqual(compat.any(status == "running"
>> +                         for status in result_output.splitlines()), False)
>
> You should rather check that all are stopped. They could be, after
> all, in some error status.

That was intentional, because the command would have failed in one of
the instances failed stopped, so we wouldn't reach that point. But I
can change … to ADMIN_down I guess?

>> +  # Now start everything again
>> +  AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
>
> Don't restart the QA machines. Can't you use a fake OOB script?

They will not, see above.

>> --- /dev/null
>> +++ b/test/ganeti.client.gnt_cluster_unittest.py
>> +from ganeti.client import gnt_cluster
>> +from ganeti.utils import retry
>
> Just import “utils” and use the functions directly, e.g. “utils.Retry”.

Okay, done. Don't get the splitting of utils then ;).

>> +class TestEpo(unittest.TestCase):
>> +  def setUp(self):
>> +    self.nodes = set()
>> +    self.nodes2ip = {}
>> +    self.ips2node = {}
>> +    for ip in range(1, 10):
>> +      name = "node%d" % ip
>> +      self.nodes.add(name)
>> +      self.nodes2ip[name] = ip
>> +      self.ips2node[ip] = name
>
> This can be done with less code:
>
> nodes2ip = dict(("node%s" % i, "192.0.2.%s" % i) for i in range(1, 10))
> nodes = nodes.keys()
> ip2node = dict((v, k) for (k, v) in nodes2ip.items())

Done, while this is less code, it in my eyes less readable and you've
to unwind the list 2 times.

>> +  def _FakeAction(*args):
>> +    return True
>> +
>> +  def _FakePing(*args, **kwargs):
>> +    return True
>
> Here you should verify that live_port, etc. is set correctly.

I don't see the point in that. This function should just emulate a
host up, not checking anything.

>
>> +  def _FakeSleep(secs):
>
> Assert that secs is >= 0 and <= some value.

Same applies here.

>> +    return
>> +
>> +  def testPingFnRemoveHostsUp(self):
>> +    seen = {}
>> +    def _FakeSeenPing(ip, *args, **kwargs):
>> +      node = self.ips2node[ip]
>> +
>> +      if node in seen:
>> +        seen[node] += 1
>> +      else:
>> +        seen[node] = 1
>> +      return True
>
> If you make “seen” a set, this becomes much simpler.
>
> self.assertFalse(node not in seen)
> seen.add(node)

But this is not doing the same. I want to have exactly 1 call for
every node, not 0, not 2, just 1. If I simply add them into the set I
don't know how many times it was called.

>> +    helper = gnt_cluster._RunWhenNodesReachableHelper(self.nodes,
>> +                                                      self._FakeAction,
>> +                                                      self.nodes2ip, port=0,
>> +                                                      
>> _ping_fn=_FakeSeenPing,
>> +                                                      
>> _sleep_fn=self._FakeSleep)
>> +
>> +    nodes_len = len(self.nodes)
>> +    for num in range(nodes_len):
>
> for num in enumerate(self.nodes):

Done.

>
>> +      helper.Wait(0)
>> +      if num < nodes_len - 1:
>> +        self.assertRaises(retry.RetryAgain, helper)
>> +      else:
>> +        helper()
>> +
>> +    self.assert_(compat.all(count == 1 for count in seen.values()))
>
> See above.

See above :-P

>
>> +    self.assertEqual(set(seen.keys()), self.nodes)
>> +    self.assertFalse(helper.down)
>> +    self.assertEqual(helper.up, self.nodes)
>> +
>> +  def testActionReturnFalseSetsHelperFalse(self):
>> +    called = False
>> +    def _FalseAction(*args):
>> +      return called
>> +
>> +    helper = gnt_cluster._RunWhenNodesReachableHelper(self.nodes, 
>> _FalseAction,
>> +                                                      self.nodes2ip, port=0,
>> +                                                      
>> _ping_fn=self._FakePing,
>> +                                                      
>> _sleep_fn=self._FakeSleep)
>> +    for _ in self.nodes:
>> +      try:
>> +        helper()
>> +      except retry.RetryAgain:
>> +        called = True
>
> Unfortunately there are no real closures in Python :-(

Yes.

>
>> +        pass
>
> The “pass” is pointless.

Indeed, uncatched leftover. Fixed.

>
>> +    self.assertFalse(helper.success)
>> +
>> +  def testMaybeInstanceStartup(self):
>> +    instances_arg = []
>> +    def _FakeInstanceStart(opts, instances, start):
>> +      instances_arg.append(set(instances))
>> +      return None
>> +
>> +    inst_map = {
>> +      "inst1": set(["node1", "node2"]),
>> +      "inst2": set(["node1", "node3"]),
>> +      "inst3": set(["node2", "node1"]),
>
> Can you also test with instances with one, three and four nodes? The
> algorithm must be generic.

Good point. Will add.

>
>> +      }
>> +
>> +    fn = _FakeInstanceStart
>> +    self.assert_(gnt_cluster._MaybeInstanceStartup(None, inst_map, set(),
>> +                                                   _instance_start_fn=fn))
>> +    self.assertFalse(instances_arg)
>> +    result = gnt_cluster._MaybeInstanceStartup(None, inst_map, 
>> set(["node1"]),
>> +                                               _instance_start_fn=fn)
>> +    self.assert_(result)
>> +    self.assertFalse(instances_arg)
>> +    result = gnt_cluster._MaybeInstanceStartup(None, inst_map,
>> +                                               set(["node1", "node3"]),
>> +                                               _instance_start_fn=fn)
>> +    self.assert_(result is None)
>> +    self.assertEqual(instances_arg.pop(0), set(["inst2"]))
>> +    self.assertFalse("inst2" in inst_map)
>> +    result = gnt_cluster._MaybeInstanceStartup(None, inst_map,
>> +                                               set(["node1", "node3"]),
>> +                                               _instance_start_fn=fn)
>> +    self.assert_(result)
>> +    self.assertFalse(instances_arg)
>> +    result = gnt_cluster._MaybeInstanceStartup(None, inst_map,
>> +                                               set(["node1", "node3", 
>> "node2"]),
>> +                                               _instance_start_fn=fn)
>> +    self.assert_(result is None)
>> +    self.assertEqual(instances_arg.pop(0), set(["inst1", "inst3"]))
>> +    self.assertFalse(inst_map)
>
> I may have missed it, but is there a test verifying that the action
> callback only gets started nodes?

Well the "seen" stuff is doing that indirectly. I don't have to check
if Python is providing self.up but I care that they will be moved. If
they won't be moved the seen count wouldn't be 1.

René

Reply via email to