MacVTap devices are persistent. In some cases, like an instance internal
user shutdown with the kvm daemon disabled, the instance's MacVTap NICs
will not be cleaned up. So, we've created a new method inside the
ganeti-watcher, named '_CleanupStaleMacvtapDevs', which will remove all
stale MacVTap NICs that are not used by an instance.

To find those NICs, we compute the difference of all the Ganeti macvtap
devices on the node the watcher is running ("gnt.macvtap." prefix) and
the macvtap devs that are in use by an instance (based on the runtime
nic files).

Signed-off-by: Dimitris Bliablias <db...@skroutz.gr>
---
 lib/watcher/__init__.py | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/lib/watcher/__init__.py b/lib/watcher/__init__.py
index 7f5de10..7d71dd7 100644
--- a/lib/watcher/__init__.py
+++ b/lib/watcher/__init__.py
@@ -36,6 +36,7 @@ by a node reboot.  Run from cron or similar.
 
 """
 
+import re
 import os
 import os.path
 import sys
@@ -59,6 +60,7 @@ from ganeti import qlang
 from ganeti import ssconf
 from ganeti import ht
 from ganeti import pathutils
+from ganeti import hypervisor
 
 import ganeti.rapi.client # pylint: disable=W0611
 from ganeti.rapi.client import UsesRapiClient
@@ -344,6 +346,42 @@ def _CheckForOfflineNodes(nodes, instance):
   return compat.any(nodes[node_name].offline for node_name in instance.snodes)
 
 
+def _CleanupStaleMacvtapDevs():
+  """Ensure that any stale macvtap NICs get cleaned up.
+
+  """
+  def _GetAllMacvtapDevs():
+    """Get all the macvtaps NICs on the current node."""
+    result = utils.RunCmd(["ip", "link", "show"])
+    if result.failed:
+      raise errors.CommandError("Failed to list TUN/TAP interfaces")
+
+    macvtaps = set()
+    for line in result.output.splitlines()[0::2]:
+      iface = line.split(": ")[1]
+      match = re.match(r"%s([0-9]+)" % constants.MACVTAP_DEVICE_PREFIX, iface)
+      if match:
+        macvtaps.add(match.group(0))
+    return macvtaps
+
+  hyper_list = ssconf.SimpleStore().GetHypervisorList()
+  for hv_name in hyper_list:
+    try:
+      hyper = hypervisor.GetHypervisor(hv_name)
+      macvtaps_in_use = hyper.ListInstancesMacvtapNICs()
+    except errors.HypervisorError, err:
+      logging.warning(str(err))
+      continue
+
+    stale = _GetAllMacvtapDevs() - set(macvtaps_in_use)
+    if stale:
+      logging.info("Removing stale MacVTap devs: %s", utils.CommaJoin(stale))
+      for macvtap in stale:
+        result = utils.RunCmd(["ip", "link", "delete", macvtap])
+        if result.failed:
+          logging.warning("Failed to delete stale MacVTap iface '%s'", macvtap)
+
+
 def _VerifyDisks(cl, uuid, nodes, instances):
   """Run a per-group "gnt-cluster verify-disks".
 
@@ -689,6 +727,9 @@ def _GlobalWatcher(opts):
   StartNodeDaemons()
   RunWatcherHooks()
 
+  # Ensure that stale macvtap devices get cleaned up
+  _CleanupStaleMacvtapDevs()
+
   # Run node maintenance in all cases, even if master, so that old masters can
   # be properly cleaned up
   if nodemaint.NodeMaintenance.ShouldRun(): # pylint: disable=E0602
-- 
2.1.4

Reply via email to