Hello Jose, Here is rebased patch with master. If it's possible I would like to see this patch in stable-2.12 it will cleanly apply there as well.
Thanks, Luka Dana petak, 6. lipnja 2014. 12:58:32 UTC+2, korisnik Luka Blašković napisao je: > > This patch adds support for multiqueue virtio-net[1] which sets a number > of > queues (file descriptors) from virtio_net_queues parameter for tap device > to parallelize packets sending or receiving. Tap devices will be created > with > MULTI_QUEUE (IFF_MULTI_QUEUE) support. > > KVM paravirtual nics (virtio-net) are only one which supports this > feature. > Number of queues are limited by kernel tap implementation (currently to > 8). > Instances must manually set number of queues, on Linux using: > > ethtool -L ethX combined $queues > > Network device options logic is moved to separate method > _GetNetworkDeviceFeatures which is now properly reused in hotplugging > part. > This also fixes unreported bug when vhost_net parameter is set to true, > hotplugged network device will be created without "vhost=on" parameter. > > [1] http://www.linux-kvm.org/page/Multiqueue > > Signed-off-by: Luka Blaskovic <[email protected]> > --- > lib/hypervisor/hv_base.py | 6 ++ > lib/hypervisor/hv_kvm/__init__.py | 118 > ++++++++++++++++++++++++++------------ > lib/hypervisor/hv_kvm/netdev.py | 77 +++++++++++++++++++------ > man/gnt-instance.rst | 19 ++++++ > src/Ganeti/Constants.hs | 5 ++ > 5 files changed, 169 insertions(+), 56 deletions(-) > > diff --git a/lib/hypervisor/hv_base.py b/lib/hypervisor/hv_base.py > index f7317fc..491156c 100644 > --- a/lib/hypervisor/hv_base.py > +++ b/lib/hypervisor/hv_base.py > @@ -109,6 +109,10 @@ _MULTI_CPU_MASK_CHECK = (_IsMultiCpuMaskWellFormed, > _NET_PORT_CHECK = (lambda x: 0 < x < 65535, "invalid port number", > None, None) > > +# Check if number of queues is in safe range > +_VIRTIO_NET_QUEUES_CHECK = (lambda x: 0 < x < 9, "invalid number of > queues", > + None, None) > + > # Check that an integer is non negative > _NONNEGATIVE_INT_CHECK = (lambda x: x >= 0, "cannot be negative", None, > None) > > @@ -121,6 +125,8 @@ REQ_DIR_CHECK = (True, ) + _DIR_CHECK > OPT_DIR_CHECK = (False, ) + _DIR_CHECK > REQ_NET_PORT_CHECK = (True, ) + _NET_PORT_CHECK > OPT_NET_PORT_CHECK = (False, ) + _NET_PORT_CHECK > +REQ_VIRTIO_NET_QUEUES_CHECK = (True, ) + _VIRTIO_NET_QUEUES_CHECK > +OPT_VIRTIO_NET_QUEUES_CHECK = (False, ) + _VIRTIO_NET_QUEUES_CHECK > REQ_CPU_MASK_CHECK = (True, ) + _CPU_MASK_CHECK > OPT_CPU_MASK_CHECK = (False, ) + _CPU_MASK_CHECK > REQ_MULTI_CPU_MASK_CHECK = (True, ) + _MULTI_CPU_MASK_CHECK > diff --git a/lib/hypervisor/hv_kvm/__init__.py > b/lib/hypervisor/hv_kvm/__init__.py > index 5617ca7..f192c82 100644 > --- a/lib/hypervisor/hv_kvm/__init__.py > +++ b/lib/hypervisor/hv_kvm/__init__.py > @@ -339,6 +339,7 @@ class KVMHypervisor(hv_base.BaseHypervisor): > constants.HV_KVM_FLAG: > hv_base.ParamInSet(False, constants.HT_KVM_FLAG_VALUES), > constants.HV_VHOST_NET: hv_base.NO_CHECK, > + constants.HV_VIRTIO_NET_QUEUES: hv_base.OPT_VIRTIO_NET_QUEUES_CHECK, > constants.HV_KVM_USE_CHROOT: hv_base.NO_CHECK, > constants.HV_KVM_USER_SHUTDOWN: hv_base.NO_CHECK, > constants.HV_MEM_PATH: hv_base.OPT_DIR_CHECK, > @@ -385,6 +386,7 @@ class KVMHypervisor(hv_base.BaseHypervisor): > _QMP_RE = re.compile(r"^-qmp\s", re.M) > _SPICE_RE = re.compile(r"^-spice\s", re.M) > _VHOST_RE = re.compile(r"^-net\s.*,vhost=on|off", re.M) > + _VIRTIO_NET_QUEUES_RE = re.compile(r"^-net\s.*,fds=x:y:...:z", re.M) > _ENABLE_KVM_RE = re.compile(r"^-enable-kvm\s", re.M) > _DISABLE_KVM_RE = re.compile(r"^-disable-kvm\s", re.M) > _NETDEV_RE = re.compile(r"^-netdev\s", re.M) > @@ -1466,6 +1468,55 @@ class KVMHypervisor(hv_base.BaseHypervisor): > > return hv_base.GenerateTapName() > > + def _GetNetworkDeviceFeatures(self, up_hvp, devlist, kvmhelp): > + """Get network device options to properly enable supported features. > + > + Return tuple of supported and enabled tap features with nic_model. > + This function is called before opening a new tap device. > + > + @return: (nic_model, vnet_hdr, virtio_net_queues, tap_extra, > nic_extra) > + @rtype: tuple > + > + """ > + virtio_net_queues = 1 > + nic_extra = "" > + nic_type = up_hvp[constants.HV_NIC_TYPE] > + tap_extra = "" > + vnet_hdr = False > + if nic_type == constants.HT_NIC_PARAVIRTUAL: > + nic_model = self._VIRTIO > + try: > + if self._VIRTIO_NET_RE.search(devlist): > + nic_model = self._VIRTIO_NET_PCI > + vnet_hdr = up_hvp[constants.HV_VNET_HDR] > + except errors.HypervisorError, _: > + # Older versions of kvm don't support DEVICE_LIST, but they don't > + # have new virtio syntax either. > + pass > + > + if up_hvp[constants.HV_VHOST_NET]: > + # Check for vhost_net support. > + if self._VHOST_RE.search(kvmhelp): > + tap_extra = ",vhost=on" > + else: > + raise errors.HypervisorError("vhost_net is configured" > + " but it is not available") > + if up_hvp[constants.HV_VIRTIO_NET_QUEUES] > 1: > + # Check for multiqueue virtio-net support. > + if self._VIRTIO_NET_QUEUES_RE.search(kvmhelp): > + virtio_net_queues = up_hvp[constants.HV_VIRTIO_NET_QUEUES] > + # As advised at http://www.linux-kvm.org/page/Multiqueue > formula > + # for calculating vector size is: vectors=2*N+1 where N is > the > + # number of queues (HV_VIRTIO_NET_QUEUES). > + nic_extra = ",mq=on,vectors=%d" % (2 * virtio_net_queues + 1) > + else: > + raise errors.HypervisorError("virtio_net_queues is > configured" > + " but it is not available") > + else: > + nic_model = nic_type > + > + return (nic_model, vnet_hdr, virtio_net_queues, tap_extra, nic_extra) > + > # too many local variables > # pylint: disable=R0914 > def _ExecuteKVMRuntime(self, instance, kvm_runtime, kvmhelp, > incoming=None): > @@ -1524,37 +1575,18 @@ class KVMHypervisor(hv_base.BaseHypervisor): > if not kvm_nics: > kvm_cmd.extend(["-net", "none"]) > else: > - vnet_hdr = False > - tap_extra = "" > - nic_type = up_hvp[constants.HV_NIC_TYPE] > - if nic_type == constants.HT_NIC_PARAVIRTUAL: > - nic_model = self._VIRTIO > - try: > - if self._VIRTIO_NET_RE.search(devlist): > - nic_model = self._VIRTIO_NET_PCI > - vnet_hdr = up_hvp[constants.HV_VNET_HDR] > - except errors.HypervisorError, _: > - # Older versions of kvm don't support DEVICE_LIST, but they > don't > - # have new virtio syntax either. > - pass > - > - if up_hvp[constants.HV_VHOST_NET]: > - # check for vhost_net support > - if self._VHOST_RE.search(kvmhelp): > - tap_extra = ",vhost=on" > - else: > - raise errors.HypervisorError("vhost_net is configured" > - " but it is not available") > - else: > - nic_model = nic_type > - > + (nic_model, vnet_hdr, > + virtio_net_queues, tap_extra, > + nic_extra) = self._GetNetworkDeviceFeatures(up_hvp, devlist, > kvmhelp) > kvm_supports_netdev = self._NETDEV_RE.search(kvmhelp) > - > for nic_seq, nic in enumerate(kvm_nics): > - tapname, tapfd = OpenTap(vnet_hdr=vnet_hdr, > - name=self._GenerateKvmTapName(nic)) > - tapfds.append(tapfd) > + tapname, nic_tapfds = OpenTap(vnet_hdr=vnet_hdr, > + > virtio_net_queues=virtio_net_queues, > + name=self._GenerateKvmTapName(nic)) > + tapfds.extend(nic_tapfds) > taps.append(tapname) > + tapfd = "%s%s" % ("fds=" if len(nic_tapfds) > 1 else "fd=", > + ":".join(str(fd) for fd in nic_tapfds)) > if kvm_supports_netdev: > nic_val = "%s,mac=%s" % (nic_model, nic.mac) > try: > @@ -1565,14 +1597,14 @@ class KVMHypervisor(hv_base.BaseHypervisor): > nic_val += (",id=%s,bus=pci.0,addr=%s" % (kvm_devid, > hex(nic.pci))) > except errors.HotplugError: > netdev = "netdev%d" % nic_seq > - nic_val += (",netdev=%s" % netdev) > - tap_val = ("type=tap,id=%s,fd=%d%s" % > + nic_val += (",netdev=%s%s" % (netdev, nic_extra)) > + tap_val = ("type=tap,id=%s,%s%s" % > (netdev, tapfd, tap_extra)) > kvm_cmd.extend(["-netdev", tap_val, "-device", nic_val]) > else: > nic_val = "nic,vlan=%s,macaddr=%s,model=%s" % (nic_seq, > nic.mac, > nic_model) > - tap_val = "tap,vlan=%s,fd=%d" % (nic_seq, tapfd) > + tap_val = "tap,vlan=%s,%s" % (nic_seq, tapfd) > kvm_cmd.extend(["-net", tap_val, "-net", nic_val]) > > if incoming: > @@ -1869,12 +1901,23 @@ class KVMHypervisor(hv_base.BaseHypervisor): > cmds += ["device_add > virtio-blk-pci,bus=pci.0,addr=%s,drive=%s,id=%s" % > (hex(device.pci), kvm_devid, kvm_devid)] > elif dev_type == constants.HOTPLUG_TARGET_NIC: > - (tap, fd) = OpenTap() > + kvmpath = instance.hvparams[constants.HV_KVM_PATH] > + kvmhelp = self._GetKVMOutput(kvmpath, self._KVMOPT_HELP) > + devlist = self._GetKVMOutput(kvmpath, self._KVMOPT_DEVICELIST) > + up_hvp = runtime[2] > + (_, vnet_hdr, > + virtio_net_queues, tap_extra, > + nic_extra) = self._GetNetworkDeviceFeatures(up_hvp, devlist, > kvmhelp) > + (tap, fds) = OpenTap(vnet_hdr=vnet_hdr, > + virtio_net_queues=virtio_net_queues) > + # netdev_add don't support "fds=" when multiple fds are > + # requested, generate separate "fd=" string for every fd > + tapfd = ",".join(["fd=%s" % fd for fd in fds]) > self._ConfigureNIC(instance, seq, device, tap) > - self._PassTapFd(instance, fd, device) > - cmds = ["netdev_add tap,id=%s,fd=%s" % (kvm_devid, kvm_devid)] > - args = "virtio-net-pci,bus=pci.0,addr=%s,mac=%s,netdev=%s,id=%s" % > \ > - (hex(device.pci), device.mac, kvm_devid, kvm_devid) > + self._PassTapFd(instance, fds, device) > + cmds = ["netdev_add tap,id=%s,%s%s" % (kvm_devid, tapfd, > tap_extra)] > + args = "virtio-net-pci,bus=pci.0,addr=%s,mac=%s,netdev=%s,id=%s%s" > % \ > + (hex(device.pci), device.mac, kvm_devid, kvm_devid, > nic_extra) > cmds += ["device_add %s" % args] > utils.WriteFile(self._InstanceNICFile(instance.name, seq), > data=tap) > > @@ -1924,7 +1967,7 @@ class KVMHypervisor(hv_base.BaseHypervisor): > device.pci = self.HotDelDevice(instance, dev_type, device, _, seq) > self.HotAddDevice(instance, dev_type, device, _, seq) > > - def _PassTapFd(self, instance, fd, nic): > + def _PassTapFd(self, instance, fds, nic): > """Pass file descriptor to kvm process via monitor socket using > SCM_RIGHTS > > """ > @@ -1932,7 +1975,6 @@ class KVMHypervisor(hv_base.BaseHypervisor): > # squash common parts between monitor and qmp > kvm_devid = _GenerateDeviceKVMId(constants.HOTPLUG_TARGET_NIC, nic) > command = "getfd %s\n" % kvm_devid > - fds = [fd] > logging.info("%s", fds) > try: > monsock = MonitorSocket(self._InstanceMonitor(instance.name)) > diff --git a/lib/hypervisor/hv_kvm/netdev.py > b/lib/hypervisor/hv_kvm/netdev.py > index a4f2b5d..f5e8067 100644 > --- a/lib/hypervisor/hv_kvm/netdev.py > +++ b/lib/hypervisor/hv_kvm/netdev.py > @@ -41,6 +41,7 @@ IFF_TAP = 0x0002 > IFF_NO_PI = 0x1000 > IFF_ONE_QUEUE = 0x2000 > IFF_VNET_HDR = 0x4000 > +IFF_MULTI_QUEUE = 0x0100 > > > def _GetTunFeatures(fd, _ioctl=fcntl.ioctl): > @@ -91,42 +92,82 @@ def _ProbeTapVnetHdr(fd, > _features_fn=_GetTunFeatures): > return result > > > -def OpenTap(vnet_hdr=True, name=""): > +def _ProbeTapMqVirtioNet(fd, _features_fn=_GetTunFeatures): > + """Check whether to enable the IFF_MULTI_QUEUE flag. > + > + This flag was introduced in Linux kernel 3.8. > + > + @type fd: int > + @param fd: the file descriptor of /dev/net/tun > + > + """ > + flags = _features_fn(fd) > + > + if flags is None: > + # Not supported > + return False > + > + result = bool(flags & IFF_MULTI_QUEUE) > + > + if not result: > + logging.warning("Kernel does not support IFF_MULTI_QUEUE, not > enabling") > + > + return result > + > + > +def OpenTap(vnet_hdr=True, virtio_net_queues=1, name=""): > """Open a new tap device and return its file descriptor. > > This is intended to be used by a qemu-type hypervisor together with the > -net > - tap,fd=<fd> command line parameter. > + tap,fd=<fd> or -net tap,fds=x:y:...:z command line parameter. > > @type vnet_hdr: boolean > @param vnet_hdr: Enable the VNET Header > > + @type virtio_net_queues: int > + @param virtio_net_queues: Set number of tap queues but not more than 8, > + queues only work with virtio-net device; > + disabled by default (one queue). > + > @type name: string > @param name: name for the TAP interface being created; if an empty > string is passed, the OS will generate a unique name > > - @return: (ifname, tapfd) > + @return: (ifname, [tapfds]) > @rtype: tuple > > """ > - try: > - tapfd = os.open("/dev/net/tun", os.O_RDWR) > - except EnvironmentError: > - raise errors.HypervisorError("Failed to open /dev/net/tun") > + tapfds = [] > > - flags = IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE > + for _ in range(virtio_net_queues): > + try: > + tapfd = os.open("/dev/net/tun", os.O_RDWR) > + except EnvironmentError: > + raise errors.HypervisorError("Failed to open /dev/net/tun") > > - if vnet_hdr and _ProbeTapVnetHdr(tapfd): > - flags |= IFF_VNET_HDR > + flags = IFF_TAP | IFF_NO_PI > > - # The struct ifreq ioctl request (see netdevice(7)) > - ifr = struct.pack("16sh", name, flags) > + if vnet_hdr and _ProbeTapVnetHdr(tapfd): > + flags |= IFF_VNET_HDR > > - try: > - res = fcntl.ioctl(tapfd, TUNSETIFF, ifr) > - except EnvironmentError, err: > - raise errors.HypervisorError("Failed to allocate a new TAP device: > %s" % > - err) > + # Check if it's ok to enable IFF_MULTI_QUEUE > + if virtio_net_queues > 1 and _ProbeTapMqVirtioNet(tapfd): > + flags |= IFF_MULTI_QUEUE > + else: > + flags |= IFF_ONE_QUEUE > + > + # The struct ifreq ioctl request (see netdevice(7)) > + ifr = struct.pack("16sh", name, flags) > + > + try: > + res = fcntl.ioctl(tapfd, TUNSETIFF, ifr) > + except EnvironmentError, err: > + raise errors.HypervisorError("Failed to allocate a new TAP device: > %s" % > + err) > + > + tapfds.append(tapfd) > > # Get the interface name from the ioctl > ifname = struct.unpack("16sh", res)[0].strip("\x00") > - return (ifname, tapfd) > + > + return (ifname, tapfds) > diff --git a/man/gnt-instance.rst b/man/gnt-instance.rst > index 28c0090..7ded2e0 100644 > --- a/man/gnt-instance.rst > +++ b/man/gnt-instance.rst > @@ -850,6 +850,25 @@ vnet\_hdr > > It is set to ``true`` by default. > > +virtio\_net\_queues > + Valid for the KVM hypervisor. > + > + Set a number of queues (file descriptors) for tap device to > + parallelize packets sending or receiving. Tap devices will be > + created with MULTI_QUEUE (IFF_MULTI_QUEUE) support. This only > + works with KVM paravirtual nics (virtio-net) and the maximum > + number of queues is limited to ``8``. Tehnically this is an > + extension of ``vnet_hdr`` which must be enabled for multiqueue > + support. > + > + If set to ``1`` queue, it effectively disables multiqueue support > + on the tap and virio-net devices. > + > + For instances it is necessary to manually set number of queues (on > + Linux using: ``ethtool -L ethX combined $queues``). > + > + It is set to ``1`` by default. > + > The ``-O (--os-parameters)`` option allows customisation of the OS > parameters. The actual parameter names and values depends on the OS > being used, but the syntax is the same key=value. For example, setting > diff --git a/src/Ganeti/Constants.hs b/src/Ganeti/Constants.hs > index 8a01d84..234cdee 100644 > --- a/src/Ganeti/Constants.hs > +++ b/src/Ganeti/Constants.hs > @@ -1701,6 +1701,9 @@ hvVga = "vga" > hvVhostNet :: String > hvVhostNet = "vhost_net" > > +hvVirtioNetQueues :: String > +hvVirtioNetQueues = "virtio_net_queues" > + > hvVifScript :: String > hvVifScript = "vif_script" > > @@ -1818,6 +1821,7 @@ hvsParameterTypes = Map.fromList > , (hvUseLocaltime, VTypeBool) > , (hvVga, VTypeString) > , (hvVhostNet, VTypeBool) > + , (hvVirtioNetQueues, VTypeInt) > , (hvVifScript, VTypeString) > , (hvVifType, VTypeString) > , (hvViridian, VTypeBool) > @@ -3839,6 +3843,7 @@ hvcDefaults = > , (hvSecurityDomain, PyValueEx "") > , (hvKvmFlag, PyValueEx "") > , (hvVhostNet, PyValueEx False) > + , (hvVirtioNetQueues, PyValueEx (1 :: Int)) > , (hvKvmUseChroot, PyValueEx False) > , (hvKvmUserShutdown, PyValueEx False) > , (hvMemPath, PyValueEx "") > -- > 1.8.1.2 > >
