On Thu, Jul 24, 2014 at 2:31 AM, Yuto KAWAMURA(kawamuray) <
[email protected]> wrote:

> Existing implementation doesn't care about LXC container state once it
>

The existing ... about an LXC container's state ...


> has been daemonized and detached from the lxc-start process which was
>

s/which was//


> executed by LXCHypervisor.
> This causes a problem if the LXC container exited abnormally after
> being daemonized. StartInstance won't report any error because lxc-start
>

... because the ...


> command reported success, but the container won't live long.
> Followings changes have been made to solve this problem:
>

The following changes ...


> - Add _WaitForInstanceState method to wait for instance state transition.
> - Split part of StartInstance into _SpawnLXC method. _SpawnLXC executes
>   lxc-start to run LXC container and calls _WaitForInstanceState method
>   to ensure that the daemonized container didn't exit abnormally.
> - Introduce hvparam 'lxc_startup_wait' which specifies the timeout for
>   waiting instance state transition on starting.
>

on the instance state transition when starting.


>
> Signed-off-by: Yuto KAWAMURA(kawamuray) <[email protected]>
> ---
>  lib/hypervisor/hv_lxc.py | 67
> ++++++++++++++++++++++++++++++++++++++++--------
>  man/gnt-instance.rst     | 10 ++++++++
>  src/Ganeti/Constants.hs  | 13 +++++++++-
>  3 files changed, 79 insertions(+), 11 deletions(-)
>
> diff --git a/lib/hypervisor/hv_lxc.py b/lib/hypervisor/hv_lxc.py
> index 037d3b7..131b76b 100644
> --- a/lib/hypervisor/hv_lxc.py
> +++ b/lib/hypervisor/hv_lxc.py
> @@ -78,6 +78,7 @@ class LXCHypervisor(hv_base.BaseHypervisor):
>
>    PARAMETERS = {
>      constants.HV_CPU_MASK: hv_base.OPT_CPU_MASK_CHECK,
> +    constants.HV_LXC_STARTUP_WAIT: hv_base.OPT_NONNEGATIVE_INT_CHECK,
>      }
>
>    def __init__(self):
> @@ -386,6 +387,55 @@ class LXCHypervisor(hv_base.BaseHypervisor):
>        stash["loopback-device"] = loop_dev_path
>        return dm_dev_paths[0]
>
> +  @classmethod
> +  def _WaitForInstanceState(cls, instance_name, state, timeout):
> +    """Wait for instance state transition within timeout
>

... an instance ...


> +
> +    Return True if instance state is changed to state within timeout secs.
> +    Currently only state RUNNING is supported.
>

Currently supports only the RUNNING state.

Btw, is this true? Looking at the lxc-wait man page for 1.0.5, it seems all
states are supported.


> +
> +    """
> +    result = utils.RunCmd(["lxc-wait", "-n", instance_name, "-s", state],
> +                          timeout=timeout)
> +    if result.failed_by_timeout:
> +      return False
> +    elif result.failed:
> +      raise HypervisorError("Failed to wait instance state transition:
> %s" %
>

Perhaps:
Failure while waiting for instance state transition


> +                            result.output)
> +    else:
> +      return True
> +
> +  def _SpawnLXC(self, instance, log_file, conf_file):
> +    """Execute lxc-start and wait until container health is confirmed.
>

Nice wording!


> +
> +    """
> +    lxc_start_cmd = [
> +      "lxc-start",
> +      "-n", instance.name,
> +      "-o", log_file,
> +      "-l", "DEBUG",
> +      "-f", conf_file,
> +      "-d"
> +      ]
> +
> +    result = utils.RunCmd(lxc_start_cmd)
> +    if result.failed:
> +      raise HypervisorError("Failed to start instance %s : %s" %
> +                            (instance.name, result.output))
> +
> +    lxc_startup_wait = instance.hvparams[constants.HV_LXC_STARTUP_WAIT]
> +    if not self._WaitForInstanceState(instance.name,
> +                                      constants.LXC_STATE_RUNNING,
> +                                      lxc_startup_wait):
> +      raise HypervisorError("Instance %s state didn't change to RUNNING
> within"
> +                            " %s secs" % (instance.name,
> lxc_startup_wait))
> +
> +    # Ensure that the instance is running correctly after daemonized
>

s/daemonized/daemonization or being daemonized/


> +    if not self._IsInstanceAlive(instance.name):
> +      raise HypervisorError("Failed to start instance %s :"
> +                            " lxc process exited after being daemonized" %
> +                            instance.name)
> +
>    def StartInstance(self, instance, block_devices, startup_paused):
>      """Start an instance.
>
> @@ -421,16 +471,13 @@ class LXCHypervisor(hv_base.BaseHypervisor):
>        conf = self._CreateConfigFile(instance, sda_dev_path)
>        utils.WriteFile(conf_file, data=conf)
>
> -      logging.info("Running lxc-start")
> -      result = utils.RunCmd(["lxc-start",
> -                             "-n", instance.name,
> -                             "-o", log_file,
> -                             "-l", "DEBUG",
> -                             "-f", conf_file,
> -                             "-d"])
> -      if result.failed:
> -        raise HypervisorError("Running the lxc-start failed: %s" %
> -                              result.output)
> +      logging.info("Starting LXC container")
> +      try:
> +        self._SpawnLXC(instance, log_file, conf_file)
> +      except:
> +        logging.error("Failed to start instance %s. Please take a look at
> %s to"
> +                      " see errors from LXC.", instance.name, log_file)
>

s/errors from LXC/LXC errors/


> +        raise
>      except:
>        # Save an original error
>        exc_info = sys.exc_info()
> diff --git a/man/gnt-instance.rst b/man/gnt-instance.rst
> index 997771e..d74b3c5 100644
> --- a/man/gnt-instance.rst
> +++ b/man/gnt-instance.rst
> @@ -869,6 +869,16 @@ virtio\_net\_queues
>
>      It is set to ``1`` by default.
>
> +lxc\_startup\_wait
> +    Valid for the LXC hypervisor.
> +
> +    This integer option specifies the number of seconds to wait
> +    for the state of an LXC container changes to "RUNNING" after
> +    startup, as reported by lxc-wait.
> +    Otherwise we assume an error has occurred and report it.
> +
> +    It is set to ``30`` by default.
> +
>  The ``-O (--os-parameters)`` option allows customisation of the OS
>  parameters. The actual parameter names and values depend on the OS being
>  used, but the syntax is the same key=value. For example, setting a
> diff --git a/src/Ganeti/Constants.hs b/src/Ganeti/Constants.hs
> index 154ee37..965cb4a 100644
> --- a/src/Ganeti/Constants.hs
> +++ b/src/Ganeti/Constants.hs
> @@ -519,6 +519,10 @@ socatUseCompress = AutoConf.socatUseCompress
>  socatUseEscape :: Bool
>  socatUseEscape = AutoConf.socatUseEscape
>
> +-- * LXC
> +lxcStateRunning :: String
> +lxcStateRunning = "RUNNING"
> +
>  -- * Console types
>
>  -- | Display a message for console access
> @@ -1641,6 +1645,9 @@ hvKvmUseChroot = "use_chroot"
>  hvKvmUserShutdown :: String
>  hvKvmUserShutdown = "user_shutdown"
>
> +hvLxcStartupWait :: String
> +hvLxcStartupWait = "lxc_startup_wait"
> +
>  hvMemPath :: String
>  hvMemPath = "mem_path"
>
> @@ -1803,6 +1810,7 @@ hvsParameterTypes = Map.fromList
>    , (hvKvmSpiceZlibGlzImgCompr,         VTypeString)
>    , (hvKvmUseChroot,                    VTypeBool)
>    , (hvKvmUserShutdown,                 VTypeBool)
> +  , (hvLxcStartupWait,                  VTypeInt)
>    , (hvMemPath,                         VTypeString)
>    , (hvMigrationBandwidth,              VTypeInt)
>    , (hvMigrationDowntime,               VTypeInt)
> @@ -3881,7 +3889,10 @@ hvcDefaults =
>            , (hvVnetHdr,                         PyValueEx True)])
>    , (Fake, Map.fromList [(hvMigrationMode, PyValueEx htMigrationLive)])
>    , (Chroot, Map.fromList [(hvInitScript, PyValueEx "/ganeti-chroot")])
> -  , (Lxc, Map.fromList [(hvCpuMask, PyValueEx "")])
> +  , (Lxc, Map.fromList
> +          [ (hvCpuMask,        PyValueEx "")
> +          , (hvLxcStartupWait, PyValueEx (30 :: Int))
> +          ])
>    ]
>
>  hvcGlobals :: FrozenSet String
> --
> 1.8.5.5
>
>
Hrvoje Ribicic
Ganeti Engineering
Google Germany GmbH
Dienerstr. 12, 80331, München

Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg
Geschäftsführer: Graham Law, Christine Elizabeth Flores
Steuernummer: 48/725/00206
Umsatzsteueridentifikationsnummer: DE813741370

Reply via email to