On Mon, Dec 13, 2010 at 3:22 PM, Apollon Oikonomopoulos <[email protected]> wrote: > This patch introduces network configuration for KVM in Ganeti. > > There are three problems with having KVM perform network configuration via > ifup > scripts: > a) Ganeti never gets to know the tap interface that is associated with an > instance's NIC > b) Migration of routed instances will cause network problems because the > incoming KVM side configures the network as soon as it is spawned and not > as soon as the migration finishes. This means that all routing > configuration will be present in both, primary and secondary, nodes at the > same time, possibly causing network disruption during the migration. > c) We never get to know if the network configuration succeeded or not. > > This patch moves network configuration from KVM to Ganeti, using KVM's ability > to receive already open tap devices as file descriptors. >
Wonderful. > Minor modifications are made to _ExecKVMRuntime to handle tap device > initialization. NIC <-> tap associations are stored under a new directory, > _ROOT_DIR/nic in a file-per-nic fashion. > > The script generated by _WriteNetScript must be moved to live under > _KVM_NET_SCRIPT as the default network configuration script. > Currently _KVM_NET_SCRIPT is an optional override for the net script. You make it something the user must provide without documenting the requirement or giving an example. Either we ship/install an example, or even better we should provide a script in /usr/lib/ganeti/ and make it optionally overridable (or make it possible to optionally override parts of it) from /etc. Requiring the user to do more work for the "simple" setup is not a good idea. Also the way to override it should change from all/or/nothing to hooks and various parts. :) > Signed-off-by: Apollon Oikonomopoulos <[email protected]> > --- > lib/hypervisor/hv_kvm.py | 219 > +++++++++++++++++++--------------------------- > 1 files changed, 92 insertions(+), 127 deletions(-) > > diff --git a/lib/hypervisor/hv_kvm.py b/lib/hypervisor/hv_kvm.py > index c5ed994..dd16a73 100644 > --- a/lib/hypervisor/hv_kvm.py > +++ b/lib/hypervisor/hv_kvm.py > @@ -33,6 +33,7 @@ import logging > import pwd > import struct > import fcntl > +import glob > from cStringIO import StringIO > > from ganeti import utils > @@ -126,106 +127,6 @@ def _OpenTap(vnet_hdr=True): > return (ifname, tapfd) > > > -def _WriteNetScript(instance, nic, index): > - """Write a script to connect a net interface to the proper bridge. > - > - This can be used by any qemu-type hypervisor. > - > - �...@type instance: L{objects.Instance} > - �...@param instance: Instance object > - �...@type nic: L{objects.NIC} > - �...@param nic: NIC object > - �...@type index: int > - �...@param index: NIC index > - �...@return: Script > - �...@rtype: string > - > - """ > - if instance.tags: > - tags = " ".join(instance.tags) > - else: > - tags = "" > - > - buf = StringIO() > - sw = utils.ShellWriter(buf) > - sw.Write("#!/bin/sh") > - sw.Write("# this is autogenerated by Ganeti, please do not edit") > - sw.Write("export PATH=$PATH:/sbin:/usr/sbin") > - sw.Write("export INSTANCE=%s", utils.ShellQuote(instance.name)) > - sw.Write("export MAC=%s", utils.ShellQuote(nic.mac)) > - sw.Write("export MODE=%s", > - utils.ShellQuote(nic.nicparams[constants.NIC_MODE])) > - sw.Write("export INTERFACE=\"$1\"") > - sw.Write("export TAGS=%s", utils.ShellQuote(tags)) > - > - if nic.ip: > - sw.Write("export IP=%s", utils.ShellQuote(nic.ip)) > - > - if nic.nicparams[constants.NIC_LINK]: > - sw.Write("export LINK=%s", > - utils.ShellQuote(nic.nicparams[constants.NIC_LINK])) > - > - if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: > - sw.Write("export BRIDGE=%s", > - utils.ShellQuote(nic.nicparams[constants.NIC_LINK])) > - > - # TODO: make this configurable at ./configure time > - sw.Write("if [ -x %s ]; then", utils.ShellQuote(_KVM_NETWORK_SCRIPT)) > - sw.IncIndent() > - try: > - sw.Write("# Execute the user-specific vif file") > - sw.Write(_KVM_NETWORK_SCRIPT) > - finally: > - sw.DecIndent() > - sw.Write("else") > - sw.IncIndent() > - try: > - sw.Write("ifconfig $INTERFACE 0.0.0.0 up") > - > - if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: > - sw.Write("# Connect the interface to the bridge") > - sw.Write("brctl addif $BRIDGE $INTERFACE") > - > - elif nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_ROUTED: > - if not nic.ip: > - raise errors.HypervisorError("nic/%d is routed, but has no IP" > - " address" % index) > - > - sw.Write("# Route traffic targeted at the IP to the interface") > - if nic.nicparams[constants.NIC_LINK]: > - sw.Write("while ip rule del dev $INTERFACE; do :; done") > - sw.Write("ip rule add dev $INTERFACE table $LINK") > - sw.Write("ip route replace $IP table $LINK proto static" > - " dev $INTERFACE") > - else: > - sw.Write("ip route replace $IP proto static dev $INTERFACE") > - > - interface_v4_conf = "/proc/sys/net/ipv4/conf/$INTERFACE" > - sw.Write(" if [ -d %s ]; then", interface_v4_conf) > - sw.IncIndent() > - try: > - sw.Write("echo 1 > %s/proxy_arp", interface_v4_conf) > - sw.Write("echo 1 > %s/forwarding", interface_v4_conf) > - finally: > - sw.DecIndent() > - sw.Write("fi") > - > - interface_v6_conf = "/proc/sys/net/ipv6/conf/$INTERFACE" > - sw.Write("if [ -d %s ]; then", interface_v6_conf) > - sw.IncIndent() > - try: > - sw.Write("echo 1 > %s/proxy_ndp", interface_v6_conf) > - sw.Write("echo 1 > %s/forwarding", interface_v6_conf) > - finally: > - sw.DecIndent() > - sw.Write("fi") > - finally: > - sw.DecIndent() > - sw.Write("fi") > - > - return buf.getvalue() > - > - > class KVMHypervisor(hv_base.BaseHypervisor): > """KVM hypervisor interface""" > CAN_MIGRATE = True > @@ -235,6 +136,7 @@ class KVMHypervisor(hv_base.BaseHypervisor): > _UIDS_DIR = _ROOT_DIR + "/uid" # contains instances reserved uids > _CTRL_DIR = _ROOT_DIR + "/ctrl" # contains instances control sockets > _CONF_DIR = _ROOT_DIR + "/conf" # contains instances startup data > + _NICS_DIR = _ROOT_DIR + "/nic" # contains instances nic <-> tap > associations > # KVM instances with chroot enabled are started in empty chroot directories. > _CHROOT_DIR = _ROOT_DIR + "/chroot" # for empty chroot directories > # After an instance is stopped, its chroot directory is removed. > @@ -243,7 +145,7 @@ class KVMHypervisor(hv_base.BaseHypervisor): > # To support forensics, the non-empty chroot directory is quarantined in > # a separate directory, called 'chroot-quarantine'. > _CHROOT_QUARANTINE_DIR = _ROOT_DIR + "/chroot-quarantine" > - _DIRS = [_ROOT_DIR, _PIDS_DIR, _UIDS_DIR, _CTRL_DIR, _CONF_DIR, > + _DIRS = [_ROOT_DIR, _PIDS_DIR, _UIDS_DIR, _CTRL_DIR, _CONF_DIR, _NICS_DIR, > _CHROOT_DIR, _CHROOT_QUARANTINE_DIR] > > PARAMETERS = { > @@ -436,6 +338,20 @@ class KVMHypervisor(hv_base.BaseHypervisor): > return utils.PathJoin(cls._CHROOT_DIR, instance_name) > > @classmethod > + def _InstanceNICFile(cls, instance_name, seq): > + """Returns the name of the file containing the tap device for a given NIC > + > + """ > + return utils.PathJoin(cls._NICS_DIR, "%s-%d" % (instance_name, seq)) > + > + �...@classmethod > + def _InstanceNICFiles(cls, instance_name): > + """Returns all existing NIC filenames > + > + """ > + return glob.glob(utils.PathJoin(cls._NICS_DIR, "%s-[0-9]" % > instance_name)) > + Ouch, these two functions differ by a single "s" at the end? Not good, we'll end up confusing them. _AllInstanceNICFiles(...) or _InstanceAllNICFiles(....) vs _InstanceNICFile(...) would be better. > + �...@classmethod > def _TryReadUidFile(cls, uid_file): > """Try to read a uid file > > @@ -464,6 +380,8 @@ class KVMHypervisor(hv_base.BaseHypervisor): > utils.RemoveFile(uid_file) > if uid is not None: > uidpool.ReleaseUid(uid) > + for nicfile in cls._InstanceNICFiles(instance_name): > + utils.RemoveFile(nicfile) Good, for now. We should probably make sure a shutdown script is called, before, now that we can. > try: > chroot_dir = cls._InstanceChrootDir(instance_name) > utils.RemoveDir(chroot_dir) > @@ -484,10 +402,8 @@ class KVMHypervisor(hv_base.BaseHypervisor): > raise > > @staticmethod > - def _WriteNetScriptFile(instance, seq, nic): > - """Write a script to connect a net interface to the proper bridge. > - > - This can be used by any qemu-type hypervisor. > + def _ConfigureNIC(instance, seq, nic, tap): > + """Run the network configuration script for a specified NIC > > @param instance: instance we're acting on > @type instance: instance object > @@ -495,22 +411,39 @@ class KVMHypervisor(hv_base.BaseHypervisor): > @type seq: int > @param nic: nic we're acting on > @type nic: nic object > - �...@return: netscript file name > - �...@rtype: string > + �...@param tap: the host's tap interface this NIC corresponds to > + �...@type tap: str > > """ > - script = _WriteNetScript(instance, nic, seq) > > - # As much as we'd like to put this in our _ROOT_DIR, that will happen to > be > - # mounted noexec sometimes, so we'll have to find another place. > - (tmpfd, tmpfile_name) = tempfile.mkstemp() > - tmpfile = os.fdopen(tmpfd, 'w') > - try: > - tmpfile.write(script) > - finally: > - tmpfile.close() > - os.chmod(tmpfile_name, 0755) > - return tmpfile_name > + if instance.tags: > + tags = " ".join(instance.tags) > + else: > + tags = "" > + > + env = { > + "PATH": "%s:/sbin:/usr/sbin" % os.environ["PATH"], > + "INSTANCE": instance.name, > + "MAC": nic.mac, > + "MODE": nic.nicparams[constants.NIC_MODE], > + "INTERFACE": tap, > + "INTERFACE_INDEX": str(seq), > + "TAGS": tags, > + } > + > + if nic.ip: > + env["IP"] = nic.ip > + > + if nic.nicparams[constants.NIC_LINK]: > + env["LINK"] = nic.nicparams[constants.NIC_LINK] > + > + if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: > + env["BRIDGE"] = nic.nicparams[constants.NIC_LINK] > + > + result = utils.RunCmd([_KVM_NETWORK_SCRIPT, tap], env=env) > + if result.failed: > + logging.warning("Failed to configure interface %s: %s (%s)" % > + (tap, result.fail_reason, result.output)) > See the comments in the change description. > def ListInstances(self): > """Get the list of running instances. > @@ -761,16 +694,22 @@ class KVMHypervisor(hv_base.BaseHypervisor): > kvm_nics = [objects.NIC.FromDict(snic) for snic in serialized_nics] > return (kvm_cmd, kvm_nics, hvparams) > > - def _RunKVMCmd(self, name, kvm_cmd): > + def _RunKVMCmd(self, name, kvm_cmd, tap_fds=None): > """Run the KVM cmd and check for errors > > @type name: string > @param name: instance name > @type kvm_cmd: list of strings > @param kvm_cmd: runcmd input for kvm > + �...@type tap_fds: list of int > + �...@param tap_fds: fds of tap devices opened by Ganeti > > """ > - result = utils.RunCmd(kvm_cmd) > + result = utils.RunCmd(kvm_cmd, noclose_fds=tap_fds) > + > + for fd in tap_fds: > + utils._CloseFDNoErr(fd) > + > if result.failed: > raise errors.HypervisorError("Failed to start instance %s: %s (%s)" % > (name, result.fail_reason, result.output)) > @@ -816,15 +755,19 @@ class KVMHypervisor(hv_base.BaseHypervisor): > # We have reasons to believe changing something like the nic driver/type > # upon migration won't exactly fly with the instance kernel, so for nic > # related parameters we'll use up_hvp > + tapfds = [] > + taps = [] > if not kvm_nics: > kvm_cmd.extend(["-net", "none"]) > else: > + vnet_hdr = False > tap_extra = "" > nic_type = up_hvp[constants.HV_NIC_TYPE] > if nic_type == constants.HT_NIC_PARAVIRTUAL: > # From version 0.12.0, kvm uses a new sintax for network > configuration. > if (v_major, v_min) >= (0, 12): > nic_model = "virtio-net-pci" > + vnet_hdr = True > else: > nic_model = "virtio" > > @@ -839,17 +782,17 @@ class KVMHypervisor(hv_base.BaseHypervisor): > nic_model = nic_type > > for nic_seq, nic in enumerate(kvm_nics): > - script = self._WriteNetScriptFile(instance, nic_seq, nic) > - temp_files.append(script) > + tapname, tapfd = _OpenTap(vnet_hdr) > + tapfds.append(tapfd) > + taps.append(tapname) > if (v_major, v_min) >= (0, 12): > nic_val = "%s,mac=%s,netdev=netdev%s" % (nic_model, nic.mac, > nic_seq) > - tap_val = "type=tap,id=netdev%s,script=%s%s" % (nic_seq, > - script, tap_extra) > + tap_val = "type=tap,id=netdev%s,fd=%d%s" % (nic_seq, tapfd, > tap_extra) > kvm_cmd.extend(["-netdev", tap_val, "-device", nic_val]) > else: > nic_val = "nic,vlan=%s,macaddr=%s,model=%s" % (nic_seq, > nic.mac, nic_model) > - tap_val = "tap,vlan=%s,script=%s" % (nic_seq, script) > + tap_val = "tap,vlan=%s,fd=%d" % (nic_seq, tapfd) > kvm_cmd.extend(["-net", tap_val, "-net", nic_val]) > > if incoming: > @@ -880,7 +823,7 @@ class KVMHypervisor(hv_base.BaseHypervisor): > try: > username = pwd.getpwuid(uid.GetUid()).pw_name > kvm_cmd.extend(["-runas", username]) > - self._RunKVMCmd(name, kvm_cmd) > + self._RunKVMCmd(name, kvm_cmd, tapfds) > except: > uidpool.ReleaseUid(uid) > raise > @@ -888,7 +831,17 @@ class KVMHypervisor(hv_base.BaseHypervisor): > uid.Unlock() > utils.WriteFile(self._InstanceUidFile(name), data=str(uid)) > else: > - self._RunKVMCmd(name, kvm_cmd) > + self._RunKVMCmd(name, kvm_cmd, tapfds) > + > + for nic_seq, tap in enumerate(taps): > + utils.WriteFile(self._InstanceNICFile(instance.name, nic_seq), > + data=tap) > + > + if not incoming: > + # Configure the network now for starting instances, during > + # FinalizeMigration for incoming instances > + for nic_seq, nic in enumerate(kvm_nics): > + self._ConfigureNIC(instance, nic_seq, nic, taps[nic_seq]) > > if vnc_pwd: > change_cmd = 'change vnc password %s' % vnc_pwd > @@ -1025,6 +978,18 @@ class KVMHypervisor(hv_base.BaseHypervisor): > > """ > if success: > + kvm_runtime = self._LoadKVMRuntime(instance, serialized_runtime=info) > + kvm_nics = kvm_runtime[1] > + > + for nic_seq, nic in enumerate(kvm_nics): > + try: > + tap = utils.ReadFile(self._InstanceNICFile(instance.name, nic_seq)) > + except: > + logging.warning("Failed to find host interface for %s NIC #%d" % > + (instance.name, nic_seq)) > + continue > + self._ConfigureNIC(instance, nic_seq, nic, tap) > + > self._WriteKVMRuntime(instance.name, info) > else: > self.StopInstance(instance, force=True) In general a very welcome change, once we sort out the issues above! :) Thanks a lot for improving this! Guido
