On Fri, Dec 02, 2016 at 02:01:37PM +0000, 'Viktor Bachraty' via ganeti-devel wrote: > In some failure modes, Ganeti state of record may desync from actual > state of world. This patch allows migrate --cleanup to adopt an instance > if it is detected running on an unexpected node. > > Signed-off-by: Viktor Bachraty <vbachr...@google.com>
LGTM! Thanks, Brian. > lib/cmdlib/instance_migration.py | 20 +++++++++++++++----- > 1 file changed, 15 insertions(+), 5 deletions(-) > > diff --git a/lib/cmdlib/instance_migration.py > b/lib/cmdlib/instance_migration.py > index 3225861..4421eba 100644 > --- a/lib/cmdlib/instance_migration.py > +++ b/lib/cmdlib/instance_migration.py > @@ -611,8 +611,9 @@ class TLMigrateInstance(Tasklet): > " hangs, the hypervisor might be in a bad state)") > > cluster_hvparams = self.cfg.GetClusterInfo().hvparams > + online_node_uuids = self.cfg.GetOnlineNodeList() > instance_list = self.rpc.call_instance_list( > - self.all_node_uuids, [self.instance.hypervisor], cluster_hvparams) > + online_node_uuids, [self.instance.hypervisor], cluster_hvparams) > > # Verify each result and raise an exception if failed > for node_uuid, result in instance_list.items(): > @@ -679,10 +680,19 @@ class TLMigrateInstance(Tasklet): > " and restart this operation") > > if not (runningon_source or runningon_target): > - raise errors.OpExecError("Instance does not seem to be running at all;" > - " in this case it's safer to repair by" > - " running 'gnt-instance stop' to ensure disk" > - " shutdown, and then restarting it") > + if len(instance_locations) == 1: > + # The instance is running on a differrent node than expected, let's > + # adopt it as if it was running on the secondary > + self.target_node_uuid = instance_locations[0] > + self.feedback_fn("* instance running on unexpected node (%s)," > + " updating as the new secondary" % > + self.cfg.GetNodeName(self.target_node_uuid)) > + runningon_target = True > + else: > + raise errors.OpExecError("Instance does not seem to be running at > all;" > + " in this case it's safer to repair by" > + " running 'gnt-instance stop' to ensure > disk" > + " shutdown, and then restarting it") > > if runningon_target: > # the migration has actually succeeded, we need to update the config > -- > 2.8.0.rc3.226.g39d4020 >