This flag tracks if the disks of an instace are supposed to be active.
That's the case when an instance is running or when its disks got
activated explicitly (and in a couple of other cases).
It will be used by watcher to re-activate disks after a node reboot.

Signed-off-by: Thomas Thrainer <[email protected]>
---
 lib/cmdlib.py                  | 35 +++++++++++++++++++++-----------
 lib/config.py                  | 45 ++++++++++++++++++++++++++++++++++--------
 lib/masterd/iallocator.py      |  1 +
 lib/objects.py                 |  1 +
 src/Ganeti/Objects.hs          |  1 +
 test/hs/Test/Ganeti/Objects.hs |  2 +-
 6 files changed, 64 insertions(+), 21 deletions(-)

diff --git a/lib/cmdlib.py b/lib/cmdlib.py
index 6b1647d..75b1665 100644
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -2717,12 +2717,12 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
       # node here
       snode = node_image[nname]
       bad_snode = snode.ghost or snode.offline
-      _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
+      _ErrorIf(inst_config.disks_active and
                not success and not bad_snode,
                constants.CV_EINSTANCEFAULTYDISK, instance,
                "couldn't retrieve status for disk/%s on %s: %s",
                idx, nname, bdev_status)
-      _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
+      _ErrorIf((inst_config.disks_active and
                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
                constants.CV_EINSTANCEFAULTYDISK, instance,
                "disk/%s on %s is faulty", idx, nname)
@@ -3000,8 +3000,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
         node_drbd[minor] = (instance, False)
       else:
         instance = instanceinfo[instance]
-        node_drbd[minor] = (instance.name,
-                            instance.admin_state == constants.ADMINST_UP)
+        node_drbd[minor] = (instance.name, instance.disks_active)
 
     # and now check them
     used_minors = nresult.get(constants.NV_DRBDLIST, [])
@@ -3900,8 +3899,7 @@ class LUGroupVerifyDisks(NoHooksLU):
     res_missing = {}
 
     nv_dict = _MapInstanceDisksToNodes(
-      [inst for inst in self.instances.values()
-       if inst.admin_state == constants.ADMINST_UP])
+      [inst for inst in self.instances.values() if inst.disks_active])
 
     if nv_dict:
       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
@@ -7012,6 +7010,7 @@ class LUInstanceActivateDisks(NoHooksLU):
 
     if self.op.wait_for_sync:
       if not _WaitForSync(self, self.instance):
+        self.cfg.MarkInstanceDisksInactive(self.instance.name)
         raise errors.OpExecError("Some disks of the instance are degraded!")
 
     return disks_info
@@ -7055,6 +7054,10 @@ def _AssembleInstanceDisks(lu, instance, disks=None, 
ignore_secondaries=False,
   # into any other network-connected state (Connected, SyncTarget,
   # SyncSource, etc.)
 
+  # mark instance disks as active before doing actual work, so watcher does
+  # not try to shut them down erroneously
+  lu.cfg.MarkInstanceDisksActive(iname)
+
   # 1st pass, assemble on all nodes in secondary mode
   for idx, inst_disk in enumerate(disks):
     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
@@ -7106,6 +7109,9 @@ def _AssembleInstanceDisks(lu, instance, disks=None, 
ignore_secondaries=False,
   for disk in disks:
     lu.cfg.SetDiskID(disk, instance.primary_node)
 
+  if not disks_ok:
+    lu.cfg.MarkInstanceDisksInactive(iname)
+
   return disks_ok, device_info
 
 
@@ -7198,6 +7204,7 @@ def _ShutdownInstanceDisks(lu, instance, disks=None, 
ignore_primary=False):
   ignored.
 
   """
+  lu.cfg.MarkInstanceDisksInactive(instance.name)
   all_result = True
   disks = _ExpandCheckDisks(instance, disks)
 
@@ -9344,7 +9351,7 @@ class TLMigrateInstance(Tasklet):
     source_node = instance.primary_node
     target_node = self.target_node
 
-    if instance.admin_state == constants.ADMINST_UP:
+    if instance.disks_active:
       self.feedback_fn("* checking disk consistency between source and target")
       for (idx, dev) in enumerate(instance.disks):
         # for drbd, these are drbd over lvm
@@ -11159,6 +11166,7 @@ class LUInstanceCreate(LogicalUnit):
                             primary_node=pnode_name,
                             nics=self.nics, disks=disks,
                             disk_template=self.op.disk_template,
+                            disks_active=False,
                             admin_state=constants.ADMINST_DOWN,
                             network_port=network_port,
                             beparams=self.op.beparams,
@@ -11238,6 +11246,9 @@ class LUInstanceCreate(LogicalUnit):
       raise errors.OpExecError("There are some degraded disks for"
                                " this instance")
 
+    # instance disks are now active
+    iobj.disks_active = True
+
     # Release all node resource locks
     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
 
@@ -12002,7 +12013,7 @@ class TLReplaceDisks(Tasklet):
     feedback_fn("Current seconary node: %s" %
                 utils.CommaJoin(self.instance.secondary_nodes))
 
-    activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
+    activate_disks = not self.instance.disks_active
 
     # Activate the instance disks if we're replacing them on a down instance
     if activate_disks:
@@ -12517,7 +12528,7 @@ class LURepairNodeStorage(NoHooksLU):
     """
     # Check whether any instance on this node has faulty disks
     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
-      if inst.admin_state != constants.ADMINST_UP:
+      if not inst.disks_active:
         continue
       check_nodes = set(inst.all_nodes)
       check_nodes.discard(self.op.node_name)
@@ -12996,9 +13007,9 @@ class LUInstanceGrowDisk(LogicalUnit):
       if disk_abort:
         self.LogWarning("Disk syncing has not returned a good status; check"
                         " the instance")
-      if instance.admin_state != constants.ADMINST_UP:
+      if not instance.disks_active:
         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
-    elif instance.admin_state != constants.ADMINST_UP:
+    elif not instance.disks_active:
       self.LogWarning("Not shutting down the disk even if the instance is"
                       " not supposed to be running because no wait for"
                       " sync mode was requested")
@@ -15092,7 +15103,7 @@ class LUBackupExport(LogicalUnit):
     for disk in instance.disks:
       self.cfg.SetDiskID(disk, src_node)
 
-    activate_disks = (instance.admin_state != constants.ADMINST_UP)
+    activate_disks = not instance.disks_active
 
     if activate_disks:
       # Activate the instance disks if we'exporting a stopped instance
diff --git a/lib/config.py b/lib/config.py
index b968fcf..e3799d5 100644
--- a/lib/config.py
+++ b/lib/config.py
@@ -1436,19 +1436,27 @@ class ConfigWriter:
       raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
                                       " in use" % (item.name, item.uuid))
 
-  def _SetInstanceStatus(self, instance_name, status):
+  def _SetInstanceStatus(self, instance_name, status, disks_active):
     """Set the instance's status to a given value.
 
     """
-    assert status in constants.ADMINST_ALL, \
-           "Invalid status '%s' passed to SetInstanceStatus" % (status,)
-
     if instance_name not in self._config_data.instances:
       raise errors.ConfigurationError("Unknown instance '%s'" %
                                       instance_name)
     instance = self._config_data.instances[instance_name]
-    if instance.admin_state != status:
+
+    if status is None:
+      status = instance.admin_state
+    if disks_active is None:
+      disks_active = instance.disks_active
+
+    assert status in constants.ADMINST_ALL, \
+           "Invalid status '%s' passed to SetInstanceStatus" % (status,)
+
+    if instance.admin_state != status or \
+        instance.disks_active != disks_active:
       instance.admin_state = status
+      instance.disks_active = disks_active
       instance.serial_no += 1
       instance.mtime = time.time()
       self._WriteConfig()
@@ -1457,15 +1465,19 @@ class ConfigWriter:
   def MarkInstanceUp(self, instance_name):
     """Mark the instance status to up in the config.
 
+    This also sets the instance disks active flag.
+
     """
-    self._SetInstanceStatus(instance_name, constants.ADMINST_UP)
+    self._SetInstanceStatus(instance_name, constants.ADMINST_UP, True)
 
   @locking.ssynchronized(_config_lock)
   def MarkInstanceOffline(self, instance_name):
     """Mark the instance status to down in the config.
 
+    This also clear the instance disks active flag.
+
     """
-    self._SetInstanceStatus(instance_name, constants.ADMINST_OFFLINE)
+    self._SetInstanceStatus(instance_name, constants.ADMINST_OFFLINE, False)
 
   @locking.ssynchronized(_config_lock)
   def RemoveInstance(self, instance_name):
@@ -1531,8 +1543,25 @@ class ConfigWriter:
   def MarkInstanceDown(self, instance_name):
     """Mark the status of an instance to down in the configuration.
 
+    This does not touch the instance disks active flag, as shut down instances
+    can still have active disks.
+
+    """
+    self._SetInstanceStatus(instance_name, constants.ADMINST_DOWN, None)
+
+  @locking.ssynchronized(_config_lock)
+  def MarkInstanceDisksActive(self, instance_name):
+    """Mark the status of instance disks active.
+
+    """
+    self._SetInstanceStatus(instance_name, None, True)
+
+  @locking.ssynchronized(_config_lock)
+  def MarkInstanceDisksInactive(self, instance_name):
+    """Mark the status of instance disks inactive.
+
     """
-    self._SetInstanceStatus(instance_name, constants.ADMINST_DOWN)
+    self._SetInstanceStatus(instance_name, None, False)
 
   def _UnlockedGetInstanceList(self):
     """Get the list of instances.
diff --git a/lib/masterd/iallocator.py b/lib/masterd/iallocator.py
index 7179b0e..b1fd9f1 100644
--- a/lib/masterd/iallocator.py
+++ b/lib/masterd/iallocator.py
@@ -605,6 +605,7 @@ class IAllocator(object):
                    constants.IDISK_MODE: dsk.mode}
                   for dsk in iinfo.disks],
         "disk_template": iinfo.disk_template,
+        "disks_active": iinfo.disks_active,
         "hypervisor": iinfo.hypervisor,
         }
       pir["disk_space_total"] = gmi.ComputeDiskSize(iinfo.disk_template,
diff --git a/lib/objects.py b/lib/objects.py
index 8d809c4..f7ce3b6 100644
--- a/lib/objects.py
+++ b/lib/objects.py
@@ -1057,6 +1057,7 @@ class Instance(TaggableObject):
     "nics",
     "disks",
     "disk_template",
+    "disks_active",
     "network_port",
     "serial_no",
     ] + _TIMESTAMPS + _UUID
diff --git a/src/Ganeti/Objects.hs b/src/Ganeti/Objects.hs
index 1550309..cd8d024 100644
--- a/src/Ganeti/Objects.hs
+++ b/src/Ganeti/Objects.hs
@@ -472,6 +472,7 @@ $(buildObject "Instance" "inst" $
   , simpleField "nics"           [t| [PartialNic]       |]
   , simpleField "disks"          [t| [Disk]             |]
   , simpleField "disk_template"  [t| DiskTemplate       |]
+  , simpleField "disks_active"   [t| Bool               |]
   , optionalField $ simpleField "network_port" [t| Int  |]
   ]
   ++ timeStampFields
diff --git a/test/hs/Test/Ganeti/Objects.hs b/test/hs/Test/Ganeti/Objects.hs
index 23ea054..5d7a090 100644
--- a/test/hs/Test/Ganeti/Objects.hs
+++ b/test/hs/Test/Ganeti/Objects.hs
@@ -111,7 +111,7 @@ instance Arbitrary Instance where
       <*> pure (GenericContainer Map.empty) <*> arbitrary
       -- ... and for OSParams
       <*> pure (GenericContainer Map.empty) <*> arbitrary <*> arbitrary
-      <*> arbitrary <*> arbitrary <*> arbitrary
+      <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary
       -- ts
       <*> arbitrary <*> arbitrary
       -- uuid
-- 
1.8.2.1

Reply via email to