On Tue, Mar 29, 2011 at 03:57:42PM +0200, René Nussbaumer wrote:
> As the code for failover for checking is almost identical it's an easy
> task to switch it over to the TLMigrateInstance. This allows us to
> fallback to failover if migrate fails prereq check for some reason.
> 
> Please note that everything from LUInstanceFailover.Exec is taken over
> unchanged to TLMigrateInstance._ExecFailover, only with adaption to
> opcode fields and variable referencing, but not in logic. There still
> needs to go some effort into merging the logic with the migration (for
> example DRBD handling). But this should happen in a separate iteration.

Mmm, thanks for doing this. Sorry for the slow review, the patch is a
bit long :)

Question (unrelated to the patch itself):

- what happens if the instance is ADMIN_up but ERROR_down?

Also, I wonder if we shouldn't merge the failover logic in the migration
logic, as:

- live migration
- non-live migration
- offline (=failover)

But that's for later, as you say.

A few comments below.

> ---
>  lib/cmdlib.py  |  381 
> +++++++++++++++++++++++++-------------------------------
>  lib/opcodes.py |    2 +
>  2 files changed, 174 insertions(+), 209 deletions(-)
> 
> diff --git a/lib/cmdlib.py b/lib/cmdlib.py
> index d663f8d..944f853 100644
> --- a/lib/cmdlib.py
> +++ b/lib/cmdlib.py
> @@ -5864,6 +5864,17 @@ class LUInstanceFailover(LogicalUnit):
>      self.needed_locks[locking.LEVEL_NODE] = []
>      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
>  
> +    ignore_consistency = self.op.ignore_consistency
> +    shutdown_timeout = self.op.shutdown_timeout
> +    self._migrater = TLMigrateInstance(self, self.op.instance_name,
> +                                       cleanup=False,
> +                                       iallocator=self.op.iallocator,
> +                                       target_node=self.op.target_node,
> +                                       failover=True,
> +                                       ignore_consistency=ignore_consistency,
> +                                       shutdown_timeout=shutdown_timeout)
> +    self.tasklets = [self._migrater]
> +
>    def DeclareLocks(self, level):
>      if level == locking.LEVEL_NODE:
>        instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
> @@ -5883,13 +5894,14 @@ class LUInstanceFailover(LogicalUnit):
>      This runs on master, primary and secondary nodes of the instance.
>  
>      """
> -    instance = self.instance
> +    instance = self._migrater.instance
>      source_node = instance.primary_node
> +    target_node = self._migrater.target_node
>      env = {
>        "IGNORE_CONSISTENCY": self.op.ignore_consistency,
>        "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
>        "OLD_PRIMARY": source_node,
> -      "NEW_PRIMARY": self.op.target_node,
> +      "NEW_PRIMARY": target_node,
>        }
>  
>      if instance.disk_template in constants.DTS_INT_MIRROR:
> @@ -5906,171 +5918,9 @@ class LUInstanceFailover(LogicalUnit):
>      """Build hooks nodes.
>  
>      """
> -    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
> -    return (nl, nl + [self.instance.primary_node])
> -
> -  def CheckPrereq(self):
> -    """Check prerequisites.
> -
> -    This checks that the instance is in the cluster.
> -
> -    """
> -    self.instance = instance = 
> self.cfg.GetInstanceInfo(self.op.instance_name)
> -    assert self.instance is not None, \
> -      "Cannot retrieve locked instance %s" % self.op.instance_name
> -
> -    bep = self.cfg.GetClusterInfo().FillBE(instance)
> -    if instance.disk_template not in constants.DTS_MIRRORED:
> -      raise errors.OpPrereqError("Instance's disk layout is not"
> -                                 " mirrored, cannot failover.",
> -                                 errors.ECODE_STATE)
> -
> -    if instance.disk_template in constants.DTS_EXT_MIRROR:
> -      _CheckIAllocatorOrNode(self, "iallocator", "target_node")
> -      if self.op.iallocator:
> -        self._RunAllocator()
> -        # Release all unnecessary node locks
> -        nodes_keep = [instance.primary_node, self.op.target_node]
> -        nodes_rel = [node for node in self.acquired_locks[locking.LEVEL_NODE]
> -                     if node not in nodes_keep]
> -        self.context.glm.release(locking.LEVEL_NODE, nodes_rel)
> -        self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
> -
> -      # self.op.target_node is already populated, either directly or by the
> -      # iallocator run
> -      target_node = self.op.target_node
> -
> -    else:
> -      secondary_nodes = instance.secondary_nodes
> -      if not secondary_nodes:
> -        raise errors.ConfigurationError("No secondary node but using"
> -                                        " %s disk template" %
> -                                        instance.disk_template)
> -      target_node = secondary_nodes[0]
> -
> -      if self.op.iallocator or (self.op.target_node and
> -                                self.op.target_node != target_node):
> -        raise errors.OpPrereqError("Instances with disk template %s cannot"
> -                                   " be failed over to arbitrary nodes"
> -                                   " (neither an iallocator nor a target"
> -                                   " node can be passed)" %
> -                                   instance.disk_template, 
> errors.ECODE_INVAL)
> -    _CheckNodeOnline(self, target_node)
> -    _CheckNodeNotDrained(self, target_node)
> -
> -    # Save target_node so that we can use it in BuildHooksEnv
> -    self.op.target_node = target_node
> -
> -    if instance.admin_up:
> -      # check memory requirements on the secondary node
> -      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
> -                           instance.name, bep[constants.BE_MEMORY],
> -                           instance.hypervisor)
> -    else:
> -      self.LogInfo("Not checking memory on the secondary node as"
> -                   " instance will not be started")
> -
> -    # check bridge existance
> -    _CheckInstanceBridgesExist(self, instance, node=target_node)
> -
> -  def Exec(self, feedback_fn):
> -    """Failover an instance.
> -
> -    The failover is done by shutting it down on its present node and
> -    starting it on the secondary.
> -
> -    """
> -    instance = self.instance
> -    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
> -
> -    source_node = instance.primary_node
> -    target_node = self.op.target_node
> -
> -    if instance.admin_up:
> -      feedback_fn("* checking disk consistency between source and target")
> -      for dev in instance.disks:
> -        # for drbd, these are drbd over lvm
> -        if not _CheckDiskConsistency(self, dev, target_node, False):
> -          if not self.op.ignore_consistency:
> -            raise errors.OpExecError("Disk %s is degraded on target node,"
> -                                     " aborting failover." % dev.iv_name)
> -    else:
> -      feedback_fn("* not checking disk consistency as instance is not 
> running")
> -
> -    feedback_fn("* shutting down instance on source node")
> -    logging.info("Shutting down instance %s on node %s",
> -                 instance.name, source_node)
> -
> -    result = self.rpc.call_instance_shutdown(source_node, instance,
> -                                             self.op.shutdown_timeout)
> -    msg = result.fail_msg
> -    if msg:
> -      if self.op.ignore_consistency or primary_node.offline:
> -        self.proc.LogWarning("Could not shutdown instance %s on node %s."
> -                             " Proceeding anyway. Please make sure node"
> -                             " %s is down. Error details: %s",
> -                             instance.name, source_node, source_node, msg)
> -      else:
> -        raise errors.OpExecError("Could not shutdown instance %s on"
> -                                 " node %s: %s" %
> -                                 (instance.name, source_node, msg))
> -
> -    feedback_fn("* deactivating the instance's disks on source node")
> -    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
> -      raise errors.OpExecError("Can't shut down the instance's disks.")
> -
> -    instance.primary_node = target_node
> -    # distribute new instance config to the other nodes
> -    self.cfg.Update(instance, feedback_fn)
> -
> -    # Only start the instance if it's marked as up
> -    if instance.admin_up:
> -      feedback_fn("* activating the instance's disks on target node")
> -      logging.info("Starting instance %s on node %s",
> -                   instance.name, target_node)
> -
> -      disks_ok, _ = _AssembleInstanceDisks(self, instance,
> -                                           ignore_secondaries=True)
> -      if not disks_ok:
> -        _ShutdownInstanceDisks(self, instance)
> -        raise errors.OpExecError("Can't activate the instance's disks")
> -
> -      feedback_fn("* starting the instance on the target node")
> -      result = self.rpc.call_instance_start(target_node, instance, None, 
> None)
> -      msg = result.fail_msg
> -      if msg:
> -        _ShutdownInstanceDisks(self, instance)
> -        raise errors.OpExecError("Could not start instance %s on node %s: 
> %s" %
> -                                 (instance.name, target_node, msg))
> -
> -  def _RunAllocator(self):
> -    """Run the allocator based on input opcode.
> -
> -    """
> -    ial = IAllocator(self.cfg, self.rpc,
> -                     mode=constants.IALLOCATOR_MODE_RELOC,
> -                     name=self.instance.name,
> -                     # TODO See why hail breaks with a single node below
> -                     relocate_from=[self.instance.primary_node,
> -                                    self.instance.primary_node],
> -                     )
> -
> -    ial.Run(self.op.iallocator)
> -
> -    if not ial.success:
> -      raise errors.OpPrereqError("Can't compute nodes using"
> -                                 " iallocator '%s': %s" %
> -                                 (self.op.iallocator, ial.info),
> -                                 errors.ECODE_NORES)
> -    if len(ial.result) != ial.required_nodes:
> -      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
> -                                 " of nodes (%s), required %s" %
> -                                 (self.op.iallocator, len(ial.result),
> -                                  ial.required_nodes), errors.ECODE_FAULT)
> -    self.op.target_node = ial.result[0]
> -    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
> -                 self.instance.name, self.op.iallocator,
> -                 utils.CommaJoin(ial.result))
> +    instance = self._migrater.instance
> +    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
> +    return (nl, nl + [instance.primary_node])
>  
>  
>  class LUInstanceMigrate(LogicalUnit):
> @@ -6094,8 +5944,11 @@ class LUInstanceMigrate(LogicalUnit):
>      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
>  
>      self._migrater = TLMigrateInstance(self, self.op.instance_name,
> -                                       self.op.cleanup, self.op.iallocator,
> -                                       self.op.target_node)
> +                                       cleanup=self.op.cleanup,
> +                                       iallocator=self.op.iallocator,
> +                                       target_node=self.op.target_node,
> +                                       failover=False,
> +                                       fallback=self.op.fallback)
>      self.tasklets = [self._migrater]
>  
>    def DeclareLocks(self, level):
> @@ -6355,8 +6208,9 @@ class LUNodeMigrate(LogicalUnit):
>        logging.debug("Migrating instance %s", inst.name)
>        names.append(inst.name)
>  
> -      tasklets.append(TLMigrateInstance(self, inst.name, False,
> -                                        self.op.iallocator, None))
> +      tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False,
> +                                        iallocator=self.op.iallocator,
> +                                        taget_node=None))
>  
>        if inst.disk_template in constants.DTS_EXT_MIRROR:
>          # We need to lock all nodes, as the iallocator will choose the
> @@ -6405,8 +6259,10 @@ class TLMigrateInstance(Tasklet):
>        this variable is initalized only after CheckPrereq has run
>  
>    """
> -  def __init__(self, lu, instance_name, cleanup,
> -               iallocator=None, target_node=None):
> +  def __init__(self, lu, instance_name, cleanup=False, iallocator=None,
> +               target_node=None, failover=False, fallback=False,
> +               ignore_consistency=False,
> +               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
>      """Initializes this class.
>  
>      """

The new instance variables are not documented, and the TLMigrateInstance
has even more local state. Could you please @ivar them in the class
docstring?

> @@ -6418,6 +6274,10 @@ class TLMigrateInstance(Tasklet):
>      self.live = False # will be overridden later
>      self.iallocator = iallocator
>      self.target_node = target_node
> +    self.failover = failover
> +    self.fallback = fallback
> +    self.ignore_consistency = ignore_consistency
> +    self.shutdown_timeout = shutdown_timeout

There should be somewhere (either here or in CheckPrereq) an assert that
cleanup is always false when failover is True.

>    def CheckPrereq(self):
>      """Check prerequisites.
> @@ -6430,9 +6290,18 @@ class TLMigrateInstance(Tasklet):
>      assert instance is not None
>      self.instance = instance
>  
> +    if not instance.admin_up and not self.failover and self.fallback:
> +      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
> +                      " to failover")
> +      self.failover = True

Hmm… This means we won't have back in the opcode a way to determine
(programatically) whether a migration or a failover was done, but only
by parsing the logs.

> +
>      if instance.disk_template not in constants.DTS_MIRRORED:
> +      if self.failover:
> +        text = "failovers"
> +      else:
> +        text = "migrations"
>        raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
> -                                 " migrations" % instance.disk_template,
> +                                 " %s" % (instance.disk_template, text),
>                                   errors.ECODE_STATE)
>  
>      if instance.disk_template in constants.DTS_EXT_MIRROR:
> @@ -6460,30 +6329,45 @@ class TLMigrateInstance(Tasklet):
>                                          " %s disk template" %
>                                          instance.disk_template)
>        target_node = secondary_nodes[0]
> -      if self.lu.op.iallocator or (self.lu.op.target_node and
> -                                   self.lu.op.target_node != target_node):
> +      if self.iallocator or (self.target_node and
> +                             self.target_node != target_node):
> +        if self.failover:
> +          text = "failed over"
> +        else:
> +          text = "migrated"
>          raise errors.OpPrereqError("Instances with disk template %s cannot"
> -                                   " be migrated over to arbitrary nodes"
> +                                   " be %s over to arbitrary nodes"
>                                     " (neither an iallocator nor a target"
>                                     " node can be passed)" %
> -                                   instance.disk_template, 
> errors.ECODE_INVAL)
> +                                   (text, instance.disk_template),
> +                                   errors.ECODE_INVAL)
>  
>      i_be = self.cfg.GetClusterInfo().FillBE(instance)
>  
>      # check memory requirements on the secondary node
> -    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
> -                         instance.name, i_be[constants.BE_MEMORY],
> -                         instance.hypervisor)
> +    if not self.failover or instance.admin_up:
> +      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
> +                           instance.name, i_be[constants.BE_MEMORY],
> +                           instance.hypervisor)
> +    else:
> +      self.lu.LogInfo("Not checking memory on the secondary node as"
> +                      " instance will not be started")
>  
>      # check bridge existance
>      _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
>  
>      if not self.cleanup:
>        _CheckNodeNotDrained(self.lu, target_node)
> -      result = self.rpc.call_instance_migratable(instance.primary_node,
> -                                                 instance)
> -      result.Raise("Can't migrate, please use failover",
> -                   prereq=True, ecode=errors.ECODE_STATE)
> +      if not self.failover:
> +        result = self.rpc.call_instance_migratable(instance.primary_node,
> +                                                   instance)
> +        if result.fail_msg and self.fallback:
> +          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
> +                          " failover")
> +          self.failover = True
> +        else:
> +          result.Raise("Can't migrate, please use failover",
> +                       prereq=True, ecode=errors.ECODE_STATE)
>  
>  
>    def _RunAllocator(self):
> @@ -6515,24 +6399,29 @@ class TLMigrateInstance(Tasklet):
>                   self.instance_name, self.iallocator,
>                   utils.CommaJoin(ial.result))
>  
> -    if self.lu.op.live is not None and self.lu.op.mode is not None:
> -      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
> -                                 " parameters are accepted",
> -                                 errors.ECODE_INVAL)
> -    if self.lu.op.live is not None:
> -      if self.lu.op.live:
> -        self.lu.op.mode = constants.HT_MIGRATION_LIVE
> -      else:
> -        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
> -      # reset the 'live' parameter to None so that repeated
> -      # invocations of CheckPrereq do not raise an exception
> -      self.lu.op.live = None
> -    elif self.lu.op.mode is None:
> -      # read the default value from the hypervisor
> -      i_hv = self.cfg.GetClusterInfo().FillHV(self.instance, 
> skip_globals=False)
> -      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
> -
> -    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
> +    if not self.failover:
> +      if self.lu.op.live is not None and self.lu.op.mode is not None:
> +        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
> +                                   " parameters are accepted",
> +                                   errors.ECODE_INVAL)
> +      if self.lu.op.live is not None:
> +        if self.lu.op.live:
> +          self.lu.op.mode = constants.HT_MIGRATION_LIVE
> +        else:
> +          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
> +        # reset the 'live' parameter to None so that repeated
> +        # invocations of CheckPrereq do not raise an exception
> +        self.lu.op.live = None
> +      elif self.lu.op.mode is None:
> +        # read the default value from the hypervisor
> +        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
> +                                                skip_globals=False)
> +        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
> +
> +      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
> +    else:
> +      # Failover is never live
> +      self.live = False
>  
>    def _WaitUntilSync(self):
>      """Poll with custom rpc for disk sync.
> @@ -6798,14 +6687,82 @@ class TLMigrateInstance(Tasklet):
>  
>      self.feedback_fn("* done")
>  
> +  def _ExecFailover(self):
> +    """Failover an instance.
> +
> +    The failover is done by shutting it down on its present node and
> +    starting it on the secondary.
> +
> +    """
> +    instance = self.instance
> +    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
> +
> +    source_node = instance.primary_node
> +    target_node = self.target_node
> +
> +    if instance.admin_up:
> +      self.feedback_fn("* checking disk consistency between source and 
> target")
> +      for dev in instance.disks:
> +        # for drbd, these are drbd over lvm
> +        if not _CheckDiskConsistency(self, dev, target_node, False):
> +          if not self.ignore_consistency:
> +            raise errors.OpExecError("Disk %s is degraded on target node,"
> +                                     " aborting failover." % dev.iv_name)
> +    else:
> +      self.feedback_fn("* not checking disk consistency as instance is not"
> +                       " running")
> +
> +    self.feedback_fn("* shutting down instance on source node")
> +    logging.info("Shutting down instance %s on node %s",
> +                 instance.name, source_node)
> +
> +    result = self.rpc.call_instance_shutdown(source_node, instance,
> +                                             self.shutdown_timeout)
> +    msg = result.fail_msg
> +    if msg:
> +      if self.ignore_consistency or primary_node.offline:
> +        self.lu.LogWarning("Could not shutdown instance %s on node %s."
> +                           " Proceeding anyway. Please make sure node"
> +                           " %s is down. Error details: %s",
> +                           instance.name, source_node, source_node, msg)
> +      else:
> +        raise errors.OpExecError("Could not shutdown instance %s on"
> +                                 " node %s: %s" %
> +                                 (instance.name, source_node, msg))
> +
> +    self.feedback_fn("* deactivating the instance's disks on source node")
> +    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
> +      raise errors.OpExecError("Can't shut down the instance's disks.")
> +
> +    instance.primary_node = target_node
> +    # distribute new instance config to the other nodes
> +    self.cfg.Update(instance, self.feedback_fn)
> +
> +    # Only start the instance if it's marked as up
> +    if instance.admin_up:
> +      self.feedback_fn("* activating the instance's disks on target node")
> +      logging.info("Starting instance %s on node %s",
> +                   instance.name, target_node)
> +
> +      disks_ok, _ = _AssembleInstanceDisks(self, instance,
> +                                           ignore_secondaries=True)
> +      if not disks_ok:
> +        _ShutdownInstanceDisks(self, instance)
> +        raise errors.OpExecError("Can't activate the instance's disks")
> +
> +      self.feedback_fn("* starting the instance on the target node")
> +      result = self.rpc.call_instance_start(target_node, instance, None, 
> None)
> +      msg = result.fail_msg
> +      if msg:
> +        _ShutdownInstanceDisks(self, instance)
> +        raise errors.OpExecError("Could not start instance %s on node %s: 
> %s" %
> +                                 (instance.name, target_node, msg))
> +
>    def Exec(self, feedback_fn):
>      """Perform the migration.
>  
>      """
> -    feedback_fn("Migrating instance %s" % self.instance.name)
> -
>      self.feedback_fn = feedback_fn
> -
>      self.source_node = self.instance.primary_node
>  
>      # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
> @@ -6820,10 +6777,16 @@ class TLMigrateInstance(Tasklet):
>        self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
>        }
>  
> -    if self.cleanup:
> -      return self._ExecCleanup()
> +    if self.failover:
> +      feedback_fn("Failover instance %s" % self.instance.name)
> +      self._ExecFailover()
>      else:
> -      return self._ExecMigration()
> +      feedback_fn("Migrating instance %s" % self.instance.name)
> +
> +      if self.cleanup:
> +        return self._ExecCleanup()
> +      else:
> +        return self._ExecMigration()
>  
>  
>  def _CreateBlockDev(lu, node, instance, device, force_create,
> diff --git a/lib/opcodes.py b/lib/opcodes.py
> index b09ab30..afd7ecb 100644
> --- a/lib/opcodes.py
> +++ b/lib/opcodes.py
> @@ -1047,6 +1047,8 @@ class OpInstanceMigrate(OpCode):
>       "Iallocator for deciding the target node for shared-storage instances"),
>      ("target_node", None, ht.TMaybeString,
>       "Target node for shared-storage instances"),
> +    ("fallback", False, ht.TBool,
> +     "Whether we can fallback to failover if migration is not possible"),

I think this is confusing. It's fine as internal variable name (in the
TLMigrater), but as opcode variable I'd rather have something more
descriptive, e.g. allow_failover (think of gnt-instance migrate
--fallback versuse --allow-failover).

thanks,
iustin

Reply via email to