On Thu, Jul 24, 2014 at 2:31 AM, Yuto KAWAMURA(kawamuray) < [email protected]> wrote:
> - Do not return even if instance root dir doesn't exist. > - Branching procedures by force parameter. > - Change raise HypervisorError(...) to logging.warn to not abort retry > loop which is performed in backend in soft shutdown. > - No need to sleep for waiting instance shutdown. The method will be > called until the timeout. > - Raise HypervisorError if lxc-stop failed in hard shutdown. > - Use "lxc-stop --nokill" for soft shutdown. > - In LXC >= 1.0.0, killing container by lxc-stop needs explicit "--kill" > switch. > > Signed-off-by: Yuto KAWAMURA(kawamuray) <[email protected]> > --- > lib/hypervisor/hv_lxc.py | 36 ++++++++++++------------------------ > 1 file changed, 12 insertions(+), 24 deletions(-) > > diff --git a/lib/hypervisor/hv_lxc.py b/lib/hypervisor/hv_lxc.py > index 21b947f..4971649 100644 > --- a/lib/hypervisor/hv_lxc.py > +++ b/lib/hypervisor/hv_lxc.py > @@ -25,7 +25,6 @@ > > import os > import os.path > -import time > import logging > > from ganeti import constants > @@ -334,37 +333,26 @@ class LXCHypervisor(hv_base.BaseHypervisor): > timeout=None): > """Stop an instance. > > - This method has complicated cleanup tests, as we must: > - - try to kill all leftover processes > - - try to unmount any additional sub-mountpoints > - - finally unmount the instance dir > - > """ > assert(timeout is None or force is not None) > > if name is None: > name = instance.name > > - timeout_cmd = [] > - if timeout is not None: > - timeout_cmd.extend(["timeout", str(timeout)]) > - > - root_dir = self._InstanceDir(name) > - if not os.path.exists(root_dir): > - return > - > if name in self.ListInstances(): > - # Signal init to shutdown; this is a hack > - if not retry and not force: > - result = utils.RunCmd(["chroot", root_dir, "poweroff"]) > + lxc_stop_cmd = ["lxc-stop", "-n", name] > + > + if force: > + lxc_stop_cmd.append("--kill") > + result = utils.RunCmd(lxc_stop_cmd) > Regardless of the fact that we do not provide the force and timeout options simultaneously right now, I would opt to put the timeout parameter here as well. > if result.failed: > - raise HypervisorError("Running 'poweroff' on the instance" > - " failed: %s" % result.output) > - time.sleep(2) > - result = utils.RunCmd(timeout_cmd.extend(["lxc-stop", "-n", name])) > - if result.failed: > - logging.warning("Error while doing lxc-stop for %s: %s", name, > - result.output) > + raise HypervisorError("Failed to kill instance %s: %s" % > + (name, result.output)) > + else: > + lxc_stop_cmd.extend(["--nokill", "--timeout", "-1"]) > I would guess that the --timeout -1 switch simply ensures that the command terminates cleanly after a second, and does not use the secondary meaning of the switch and initiates a hard shutdown of the container due to --nokill. Is this the case? And if so, why are you effectively overriding the function-provided timeout? + result = utils.RunCmd(lxc_stop_cmd, timeout=timeout) > + if result.failed: > + logging.error("Failed to stop instance %s: %s", name, > result.output) > > if not os.path.ismount(root_dir): > return > -- > 1.8.5.5 > > Hrvoje Ribicic Ganeti Engineering Google Germany GmbH Dienerstr. 12, 80331, München Registergericht und -nummer: Hamburg, HRB 86891 Sitz der Gesellschaft: Hamburg Geschäftsführer: Graham Law, Christine Elizabeth Flores Steuernummer: 48/725/00206 Umsatzsteueridentifikationsnummer: DE813741370
