2014-07-28 10:01 GMT+09:00 Hrvoje Ribicic <[email protected]>:
> On Thu, Jul 24, 2014 at 2:31 AM, Yuto KAWAMURA(kawamuray)
> <[email protected]> wrote:
>>
>> Existing implementation doesn't care about LXC container state once it
>
>
> The existing ... about an LXC container's state ...
>
>>
>> has been daemonized and detached from the lxc-start process which was
>
>
> s/which was//
>
>>
>> executed by LXCHypervisor.
>> This causes a problem if the LXC container exited abnormally after
>> being daemonized. StartInstance won't report any error because lxc-start
>
>
> ... because the ...
>
>>
>> command reported success, but the container won't live long.
>> Followings changes have been made to solve this problem:
>
>
> The following changes ...
>
>>
>> - Add _WaitForInstanceState method to wait for instance state transition.
>> - Split part of StartInstance into _SpawnLXC method. _SpawnLXC executes
>>   lxc-start to run LXC container and calls _WaitForInstanceState method
>>   to ensure that the daemonized container didn't exit abnormally.
>> - Introduce hvparam 'lxc_startup_wait' which specifies the timeout for
>>   waiting instance state transition on starting.
>
>
> on the instance state transition when starting.
>
>>
>>
>> Signed-off-by: Yuto KAWAMURA(kawamuray) <[email protected]>
>> ---
>>  lib/hypervisor/hv_lxc.py | 67
>> ++++++++++++++++++++++++++++++++++++++++--------
>>  man/gnt-instance.rst     | 10 ++++++++
>>  src/Ganeti/Constants.hs  | 13 +++++++++-
>>  3 files changed, 79 insertions(+), 11 deletions(-)
>>
>> diff --git a/lib/hypervisor/hv_lxc.py b/lib/hypervisor/hv_lxc.py
>> index 037d3b7..131b76b 100644
>> --- a/lib/hypervisor/hv_lxc.py
>> +++ b/lib/hypervisor/hv_lxc.py
>> @@ -78,6 +78,7 @@ class LXCHypervisor(hv_base.BaseHypervisor):
>>
>>    PARAMETERS = {
>>      constants.HV_CPU_MASK: hv_base.OPT_CPU_MASK_CHECK,
>> +    constants.HV_LXC_STARTUP_WAIT: hv_base.OPT_NONNEGATIVE_INT_CHECK,
>>      }
>>
>>    def __init__(self):
>> @@ -386,6 +387,55 @@ class LXCHypervisor(hv_base.BaseHypervisor):
>>        stash["loopback-device"] = loop_dev_path
>>        return dm_dev_paths[0]
>>
>> +  @classmethod
>> +  def _WaitForInstanceState(cls, instance_name, state, timeout):
>> +    """Wait for instance state transition within timeout
>
>
> ... an instance ...
>
>>
>> +
>> +    Return True if instance state is changed to state within timeout
>> secs.
>> +    Currently only state RUNNING is supported.
>
>
> Currently supports only the RUNNING state.
>
> Btw, is this true? Looking at the lxc-wait man page for 1.0.5, it seems all
> states are supported.
>
Right, there are more states supported by lxc-wait, but since we have
no use case for other than RUNNING state, I'll just remove this lying
line.

>>
>> +
>> +    """
>> +    result = utils.RunCmd(["lxc-wait", "-n", instance_name, "-s", state],
>> +                          timeout=timeout)
>> +    if result.failed_by_timeout:
>> +      return False
>> +    elif result.failed:
>> +      raise HypervisorError("Failed to wait instance state transition:
>> %s" %
>
>
> Perhaps:
> Failure while waiting for instance state transition
>
>>
>> +                            result.output)
>> +    else:
>> +      return True
>> +
>> +  def _SpawnLXC(self, instance, log_file, conf_file):
>> +    """Execute lxc-start and wait until container health is confirmed.
>
>
> Nice wording!
>
>>
>> +
>> +    """
>> +    lxc_start_cmd = [
>> +      "lxc-start",
>> +      "-n", instance.name,
>> +      "-o", log_file,
>> +      "-l", "DEBUG",
>> +      "-f", conf_file,
>> +      "-d"
>> +      ]
>> +
>> +    result = utils.RunCmd(lxc_start_cmd)
>> +    if result.failed:
>> +      raise HypervisorError("Failed to start instance %s : %s" %
>> +                            (instance.name, result.output))
>> +
>> +    lxc_startup_wait = instance.hvparams[constants.HV_LXC_STARTUP_WAIT]
>> +    if not self._WaitForInstanceState(instance.name,
>> +                                      constants.LXC_STATE_RUNNING,
>> +                                      lxc_startup_wait):
>> +      raise HypervisorError("Instance %s state didn't change to RUNNING
>> within"
>> +                            " %s secs" % (instance.name,
>> lxc_startup_wait))
>> +
>> +    # Ensure that the instance is running correctly after daemonized
>
>
> s/daemonized/daemonization or being daemonized/
>
>>
>> +    if not self._IsInstanceAlive(instance.name):
>> +      raise HypervisorError("Failed to start instance %s :"
>> +                            " lxc process exited after being daemonized"
>> %
>> +                            instance.name)
>> +
>>    def StartInstance(self, instance, block_devices, startup_paused):
>>      """Start an instance.
>>
>> @@ -421,16 +471,13 @@ class LXCHypervisor(hv_base.BaseHypervisor):
>>        conf = self._CreateConfigFile(instance, sda_dev_path)
>>        utils.WriteFile(conf_file, data=conf)
>>
>> -      logging.info("Running lxc-start")
>> -      result = utils.RunCmd(["lxc-start",
>> -                             "-n", instance.name,
>> -                             "-o", log_file,
>> -                             "-l", "DEBUG",
>> -                             "-f", conf_file,
>> -                             "-d"])
>> -      if result.failed:
>> -        raise HypervisorError("Running the lxc-start failed: %s" %
>> -                              result.output)
>> +      logging.info("Starting LXC container")
>> +      try:
>> +        self._SpawnLXC(instance, log_file, conf_file)
>> +      except:
>> +        logging.error("Failed to start instance %s. Please take a look at
>> %s to"
>> +                      " see errors from LXC.", instance.name, log_file)
>
>
> s/errors from LXC/LXC errors/
>
>>
>> +        raise
>>      except:
>>        # Save an original error
>>        exc_info = sys.exc_info()
>> diff --git a/man/gnt-instance.rst b/man/gnt-instance.rst
>> index 997771e..d74b3c5 100644
>> --- a/man/gnt-instance.rst
>> +++ b/man/gnt-instance.rst
>> @@ -869,6 +869,16 @@ virtio\_net\_queues
>>
>>      It is set to ``1`` by default.
>>
>> +lxc\_startup\_wait
>> +    Valid for the LXC hypervisor.
>> +
>> +    This integer option specifies the number of seconds to wait
>> +    for the state of an LXC container changes to "RUNNING" after
>> +    startup, as reported by lxc-wait.
>> +    Otherwise we assume an error has occurred and report it.
>> +
>> +    It is set to ``30`` by default.
>> +
>>  The ``-O (--os-parameters)`` option allows customisation of the OS
>>  parameters. The actual parameter names and values depend on the OS being
>>  used, but the syntax is the same key=value. For example, setting a
>> diff --git a/src/Ganeti/Constants.hs b/src/Ganeti/Constants.hs
>> index 154ee37..965cb4a 100644
>> --- a/src/Ganeti/Constants.hs
>> +++ b/src/Ganeti/Constants.hs
>> @@ -519,6 +519,10 @@ socatUseCompress = AutoConf.socatUseCompress
>>  socatUseEscape :: Bool
>>  socatUseEscape = AutoConf.socatUseEscape
>>
>> +-- * LXC
>> +lxcStateRunning :: String
>> +lxcStateRunning = "RUNNING"
>> +
>>  -- * Console types
>>
>>  -- | Display a message for console access
>> @@ -1641,6 +1645,9 @@ hvKvmUseChroot = "use_chroot"
>>  hvKvmUserShutdown :: String
>>  hvKvmUserShutdown = "user_shutdown"
>>
>> +hvLxcStartupWait :: String
>> +hvLxcStartupWait = "lxc_startup_wait"
>> +
>>  hvMemPath :: String
>>  hvMemPath = "mem_path"
>>
>> @@ -1803,6 +1810,7 @@ hvsParameterTypes = Map.fromList
>>    , (hvKvmSpiceZlibGlzImgCompr,         VTypeString)
>>    , (hvKvmUseChroot,                    VTypeBool)
>>    , (hvKvmUserShutdown,                 VTypeBool)
>> +  , (hvLxcStartupWait,                  VTypeInt)
>>    , (hvMemPath,                         VTypeString)
>>    , (hvMigrationBandwidth,              VTypeInt)
>>    , (hvMigrationDowntime,               VTypeInt)
>> @@ -3881,7 +3889,10 @@ hvcDefaults =
>>            , (hvVnetHdr,                         PyValueEx True)])
>>    , (Fake, Map.fromList [(hvMigrationMode, PyValueEx htMigrationLive)])
>>    , (Chroot, Map.fromList [(hvInitScript, PyValueEx "/ganeti-chroot")])
>> -  , (Lxc, Map.fromList [(hvCpuMask, PyValueEx "")])
>> +  , (Lxc, Map.fromList
>> +          [ (hvCpuMask,        PyValueEx "")
>> +          , (hvLxcStartupWait, PyValueEx (30 :: Int))
>> +          ])
>>    ]
>>
>>  hvcGlobals :: FrozenSet String
>> --
>> 1.8.5.5
>>
>
> Hrvoje Ribicic
> Ganeti Engineering
> Google Germany GmbH
> Dienerstr. 12, 80331, München
>
> Registergericht und -nummer: Hamburg, HRB 86891
> Sitz der Gesellschaft: Hamburg
> Geschäftsführer: Graham Law, Christine Elizabeth Flores
> Steuernummer: 48/725/00206
> Umsatzsteueridentifikationsnummer: DE813741370

Reply via email to