This patch adds support for multiqueue virtio-net[1] which sets a number of queues (file descriptors) from virtio_net_queues parameter for tap device to parallelize packets sending or receiving. Tap devices will be created with MULTI_QUEUE (IFF_MULTI_QUEUE) support.
KVM paravirtual nics (virtio-net) are only one which supports this feature. Number of queues are limited by kernel tap implementation (currently to 8). Instances must manually set number of queues, on Linux using: ethtool -L ethX combined $queues Network device options logic is moved to separate method _GetNetworkDeviceFeatures which is now properly reused in hotplugging part. This also fixes unreported bug when vhost_net parameter is set to true, hotplugged network device will be created without "vhost=on" parameter. [1] http://www.linux-kvm.org/page/Multiqueue Signed-off-by: Luka Blaskovic <[email protected]> --- lib/hypervisor/hv_base.py | 6 ++ lib/hypervisor/hv_kvm/__init__.py | 118 ++++++++++++++++++++++++++------------ lib/hypervisor/hv_kvm/netdev.py | 77 +++++++++++++++++++------ man/gnt-instance.rst | 19 ++++++ src/Ganeti/Constants.hs | 5 ++ 5 files changed, 169 insertions(+), 56 deletions(-) diff --git a/lib/hypervisor/hv_base.py b/lib/hypervisor/hv_base.py index f7317fc..491156c 100644 --- a/lib/hypervisor/hv_base.py +++ b/lib/hypervisor/hv_base.py @@ -109,6 +109,10 @@ _MULTI_CPU_MASK_CHECK = (_IsMultiCpuMaskWellFormed, _NET_PORT_CHECK = (lambda x: 0 < x < 65535, "invalid port number", None, None) +# Check if number of queues is in safe range +_VIRTIO_NET_QUEUES_CHECK = (lambda x: 0 < x < 9, "invalid number of queues", + None, None) + # Check that an integer is non negative _NONNEGATIVE_INT_CHECK = (lambda x: x >= 0, "cannot be negative", None, None) @@ -121,6 +125,8 @@ REQ_DIR_CHECK = (True, ) + _DIR_CHECK OPT_DIR_CHECK = (False, ) + _DIR_CHECK REQ_NET_PORT_CHECK = (True, ) + _NET_PORT_CHECK OPT_NET_PORT_CHECK = (False, ) + _NET_PORT_CHECK +REQ_VIRTIO_NET_QUEUES_CHECK = (True, ) + _VIRTIO_NET_QUEUES_CHECK +OPT_VIRTIO_NET_QUEUES_CHECK = (False, ) + _VIRTIO_NET_QUEUES_CHECK REQ_CPU_MASK_CHECK = (True, ) + _CPU_MASK_CHECK OPT_CPU_MASK_CHECK = (False, ) + _CPU_MASK_CHECK REQ_MULTI_CPU_MASK_CHECK = (True, ) + _MULTI_CPU_MASK_CHECK diff --git a/lib/hypervisor/hv_kvm/__init__.py b/lib/hypervisor/hv_kvm/__init__.py index 5617ca7..f192c82 100644 --- a/lib/hypervisor/hv_kvm/__init__.py +++ b/lib/hypervisor/hv_kvm/__init__.py @@ -339,6 +339,7 @@ class KVMHypervisor(hv_base.BaseHypervisor): constants.HV_KVM_FLAG: hv_base.ParamInSet(False, constants.HT_KVM_FLAG_VALUES), constants.HV_VHOST_NET: hv_base.NO_CHECK, + constants.HV_VIRTIO_NET_QUEUES: hv_base.OPT_VIRTIO_NET_QUEUES_CHECK, constants.HV_KVM_USE_CHROOT: hv_base.NO_CHECK, constants.HV_KVM_USER_SHUTDOWN: hv_base.NO_CHECK, constants.HV_MEM_PATH: hv_base.OPT_DIR_CHECK, @@ -385,6 +386,7 @@ class KVMHypervisor(hv_base.BaseHypervisor): _QMP_RE = re.compile(r"^-qmp\s", re.M) _SPICE_RE = re.compile(r"^-spice\s", re.M) _VHOST_RE = re.compile(r"^-net\s.*,vhost=on|off", re.M) + _VIRTIO_NET_QUEUES_RE = re.compile(r"^-net\s.*,fds=x:y:...:z", re.M) _ENABLE_KVM_RE = re.compile(r"^-enable-kvm\s", re.M) _DISABLE_KVM_RE = re.compile(r"^-disable-kvm\s", re.M) _NETDEV_RE = re.compile(r"^-netdev\s", re.M) @@ -1466,6 +1468,55 @@ class KVMHypervisor(hv_base.BaseHypervisor): return hv_base.GenerateTapName() + def _GetNetworkDeviceFeatures(self, up_hvp, devlist, kvmhelp): + """Get network device options to properly enable supported features. + + Return tuple of supported and enabled tap features with nic_model. + This function is called before opening a new tap device. + + @return: (nic_model, vnet_hdr, virtio_net_queues, tap_extra, nic_extra) + @rtype: tuple + + """ + virtio_net_queues = 1 + nic_extra = "" + nic_type = up_hvp[constants.HV_NIC_TYPE] + tap_extra = "" + vnet_hdr = False + if nic_type == constants.HT_NIC_PARAVIRTUAL: + nic_model = self._VIRTIO + try: + if self._VIRTIO_NET_RE.search(devlist): + nic_model = self._VIRTIO_NET_PCI + vnet_hdr = up_hvp[constants.HV_VNET_HDR] + except errors.HypervisorError, _: + # Older versions of kvm don't support DEVICE_LIST, but they don't + # have new virtio syntax either. + pass + + if up_hvp[constants.HV_VHOST_NET]: + # Check for vhost_net support. + if self._VHOST_RE.search(kvmhelp): + tap_extra = ",vhost=on" + else: + raise errors.HypervisorError("vhost_net is configured" + " but it is not available") + if up_hvp[constants.HV_VIRTIO_NET_QUEUES] > 1: + # Check for multiqueue virtio-net support. + if self._VIRTIO_NET_QUEUES_RE.search(kvmhelp): + virtio_net_queues = up_hvp[constants.HV_VIRTIO_NET_QUEUES] + # As advised at http://www.linux-kvm.org/page/Multiqueue formula + # for calculating vector size is: vectors=2*N+1 where N is the + # number of queues (HV_VIRTIO_NET_QUEUES). + nic_extra = ",mq=on,vectors=%d" % (2 * virtio_net_queues + 1) + else: + raise errors.HypervisorError("virtio_net_queues is configured" + " but it is not available") + else: + nic_model = nic_type + + return (nic_model, vnet_hdr, virtio_net_queues, tap_extra, nic_extra) + # too many local variables # pylint: disable=R0914 def _ExecuteKVMRuntime(self, instance, kvm_runtime, kvmhelp, incoming=None): @@ -1524,37 +1575,18 @@ class KVMHypervisor(hv_base.BaseHypervisor): if not kvm_nics: kvm_cmd.extend(["-net", "none"]) else: - vnet_hdr = False - tap_extra = "" - nic_type = up_hvp[constants.HV_NIC_TYPE] - if nic_type == constants.HT_NIC_PARAVIRTUAL: - nic_model = self._VIRTIO - try: - if self._VIRTIO_NET_RE.search(devlist): - nic_model = self._VIRTIO_NET_PCI - vnet_hdr = up_hvp[constants.HV_VNET_HDR] - except errors.HypervisorError, _: - # Older versions of kvm don't support DEVICE_LIST, but they don't - # have new virtio syntax either. - pass - - if up_hvp[constants.HV_VHOST_NET]: - # check for vhost_net support - if self._VHOST_RE.search(kvmhelp): - tap_extra = ",vhost=on" - else: - raise errors.HypervisorError("vhost_net is configured" - " but it is not available") - else: - nic_model = nic_type - + (nic_model, vnet_hdr, + virtio_net_queues, tap_extra, + nic_extra) = self._GetNetworkDeviceFeatures(up_hvp, devlist, kvmhelp) kvm_supports_netdev = self._NETDEV_RE.search(kvmhelp) - for nic_seq, nic in enumerate(kvm_nics): - tapname, tapfd = OpenTap(vnet_hdr=vnet_hdr, - name=self._GenerateKvmTapName(nic)) - tapfds.append(tapfd) + tapname, nic_tapfds = OpenTap(vnet_hdr=vnet_hdr, + virtio_net_queues=virtio_net_queues, + name=self._GenerateKvmTapName(nic)) + tapfds.extend(nic_tapfds) taps.append(tapname) + tapfd = "%s%s" % ("fds=" if len(nic_tapfds) > 1 else "fd=", + ":".join(str(fd) for fd in nic_tapfds)) if kvm_supports_netdev: nic_val = "%s,mac=%s" % (nic_model, nic.mac) try: @@ -1565,14 +1597,14 @@ class KVMHypervisor(hv_base.BaseHypervisor): nic_val += (",id=%s,bus=pci.0,addr=%s" % (kvm_devid, hex(nic.pci))) except errors.HotplugError: netdev = "netdev%d" % nic_seq - nic_val += (",netdev=%s" % netdev) - tap_val = ("type=tap,id=%s,fd=%d%s" % + nic_val += (",netdev=%s%s" % (netdev, nic_extra)) + tap_val = ("type=tap,id=%s,%s%s" % (netdev, tapfd, tap_extra)) kvm_cmd.extend(["-netdev", tap_val, "-device", nic_val]) else: nic_val = "nic,vlan=%s,macaddr=%s,model=%s" % (nic_seq, nic.mac, nic_model) - tap_val = "tap,vlan=%s,fd=%d" % (nic_seq, tapfd) + tap_val = "tap,vlan=%s,%s" % (nic_seq, tapfd) kvm_cmd.extend(["-net", tap_val, "-net", nic_val]) if incoming: @@ -1869,12 +1901,23 @@ class KVMHypervisor(hv_base.BaseHypervisor): cmds += ["device_add virtio-blk-pci,bus=pci.0,addr=%s,drive=%s,id=%s" % (hex(device.pci), kvm_devid, kvm_devid)] elif dev_type == constants.HOTPLUG_TARGET_NIC: - (tap, fd) = OpenTap() + kvmpath = instance.hvparams[constants.HV_KVM_PATH] + kvmhelp = self._GetKVMOutput(kvmpath, self._KVMOPT_HELP) + devlist = self._GetKVMOutput(kvmpath, self._KVMOPT_DEVICELIST) + up_hvp = runtime[2] + (_, vnet_hdr, + virtio_net_queues, tap_extra, + nic_extra) = self._GetNetworkDeviceFeatures(up_hvp, devlist, kvmhelp) + (tap, fds) = OpenTap(vnet_hdr=vnet_hdr, + virtio_net_queues=virtio_net_queues) + # netdev_add don't support "fds=" when multiple fds are + # requested, generate separate "fd=" string for every fd + tapfd = ",".join(["fd=%s" % fd for fd in fds]) self._ConfigureNIC(instance, seq, device, tap) - self._PassTapFd(instance, fd, device) - cmds = ["netdev_add tap,id=%s,fd=%s" % (kvm_devid, kvm_devid)] - args = "virtio-net-pci,bus=pci.0,addr=%s,mac=%s,netdev=%s,id=%s" % \ - (hex(device.pci), device.mac, kvm_devid, kvm_devid) + self._PassTapFd(instance, fds, device) + cmds = ["netdev_add tap,id=%s,%s%s" % (kvm_devid, tapfd, tap_extra)] + args = "virtio-net-pci,bus=pci.0,addr=%s,mac=%s,netdev=%s,id=%s%s" % \ + (hex(device.pci), device.mac, kvm_devid, kvm_devid, nic_extra) cmds += ["device_add %s" % args] utils.WriteFile(self._InstanceNICFile(instance.name, seq), data=tap) @@ -1924,7 +1967,7 @@ class KVMHypervisor(hv_base.BaseHypervisor): device.pci = self.HotDelDevice(instance, dev_type, device, _, seq) self.HotAddDevice(instance, dev_type, device, _, seq) - def _PassTapFd(self, instance, fd, nic): + def _PassTapFd(self, instance, fds, nic): """Pass file descriptor to kvm process via monitor socket using SCM_RIGHTS """ @@ -1932,7 +1975,6 @@ class KVMHypervisor(hv_base.BaseHypervisor): # squash common parts between monitor and qmp kvm_devid = _GenerateDeviceKVMId(constants.HOTPLUG_TARGET_NIC, nic) command = "getfd %s\n" % kvm_devid - fds = [fd] logging.info("%s", fds) try: monsock = MonitorSocket(self._InstanceMonitor(instance.name)) diff --git a/lib/hypervisor/hv_kvm/netdev.py b/lib/hypervisor/hv_kvm/netdev.py index a4f2b5d..f5e8067 100644 --- a/lib/hypervisor/hv_kvm/netdev.py +++ b/lib/hypervisor/hv_kvm/netdev.py @@ -41,6 +41,7 @@ IFF_TAP = 0x0002 IFF_NO_PI = 0x1000 IFF_ONE_QUEUE = 0x2000 IFF_VNET_HDR = 0x4000 +IFF_MULTI_QUEUE = 0x0100 def _GetTunFeatures(fd, _ioctl=fcntl.ioctl): @@ -91,42 +92,82 @@ def _ProbeTapVnetHdr(fd, _features_fn=_GetTunFeatures): return result -def OpenTap(vnet_hdr=True, name=""): +def _ProbeTapMqVirtioNet(fd, _features_fn=_GetTunFeatures): + """Check whether to enable the IFF_MULTI_QUEUE flag. + + This flag was introduced in Linux kernel 3.8. + + @type fd: int + @param fd: the file descriptor of /dev/net/tun + + """ + flags = _features_fn(fd) + + if flags is None: + # Not supported + return False + + result = bool(flags & IFF_MULTI_QUEUE) + + if not result: + logging.warning("Kernel does not support IFF_MULTI_QUEUE, not enabling") + + return result + + +def OpenTap(vnet_hdr=True, virtio_net_queues=1, name=""): """Open a new tap device and return its file descriptor. This is intended to be used by a qemu-type hypervisor together with the -net - tap,fd=<fd> command line parameter. + tap,fd=<fd> or -net tap,fds=x:y:...:z command line parameter. @type vnet_hdr: boolean @param vnet_hdr: Enable the VNET Header + @type virtio_net_queues: int + @param virtio_net_queues: Set number of tap queues but not more than 8, + queues only work with virtio-net device; + disabled by default (one queue). + @type name: string @param name: name for the TAP interface being created; if an empty string is passed, the OS will generate a unique name - @return: (ifname, tapfd) + @return: (ifname, [tapfds]) @rtype: tuple """ - try: - tapfd = os.open("/dev/net/tun", os.O_RDWR) - except EnvironmentError: - raise errors.HypervisorError("Failed to open /dev/net/tun") + tapfds = [] - flags = IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE + for _ in range(virtio_net_queues): + try: + tapfd = os.open("/dev/net/tun", os.O_RDWR) + except EnvironmentError: + raise errors.HypervisorError("Failed to open /dev/net/tun") - if vnet_hdr and _ProbeTapVnetHdr(tapfd): - flags |= IFF_VNET_HDR + flags = IFF_TAP | IFF_NO_PI - # The struct ifreq ioctl request (see netdevice(7)) - ifr = struct.pack("16sh", name, flags) + if vnet_hdr and _ProbeTapVnetHdr(tapfd): + flags |= IFF_VNET_HDR - try: - res = fcntl.ioctl(tapfd, TUNSETIFF, ifr) - except EnvironmentError, err: - raise errors.HypervisorError("Failed to allocate a new TAP device: %s" % - err) + # Check if it's ok to enable IFF_MULTI_QUEUE + if virtio_net_queues > 1 and _ProbeTapMqVirtioNet(tapfd): + flags |= IFF_MULTI_QUEUE + else: + flags |= IFF_ONE_QUEUE + + # The struct ifreq ioctl request (see netdevice(7)) + ifr = struct.pack("16sh", name, flags) + + try: + res = fcntl.ioctl(tapfd, TUNSETIFF, ifr) + except EnvironmentError, err: + raise errors.HypervisorError("Failed to allocate a new TAP device: %s" % + err) + + tapfds.append(tapfd) # Get the interface name from the ioctl ifname = struct.unpack("16sh", res)[0].strip("\x00") - return (ifname, tapfd) + + return (ifname, tapfds) diff --git a/man/gnt-instance.rst b/man/gnt-instance.rst index 28c0090..7ded2e0 100644 --- a/man/gnt-instance.rst +++ b/man/gnt-instance.rst @@ -850,6 +850,25 @@ vnet\_hdr It is set to ``true`` by default. +virtio\_net\_queues + Valid for the KVM hypervisor. + + Set a number of queues (file descriptors) for tap device to + parallelize packets sending or receiving. Tap devices will be + created with MULTI_QUEUE (IFF_MULTI_QUEUE) support. This only + works with KVM paravirtual nics (virtio-net) and the maximum + number of queues is limited to ``8``. Tehnically this is an + extension of ``vnet_hdr`` which must be enabled for multiqueue + support. + + If set to ``1`` queue, it effectively disables multiqueue support + on the tap and virio-net devices. + + For instances it is necessary to manually set number of queues (on + Linux using: ``ethtool -L ethX combined $queues``). + + It is set to ``1`` by default. + The ``-O (--os-parameters)`` option allows customisation of the OS parameters. The actual parameter names and values depends on the OS being used, but the syntax is the same key=value. For example, setting diff --git a/src/Ganeti/Constants.hs b/src/Ganeti/Constants.hs index 8a01d84..234cdee 100644 --- a/src/Ganeti/Constants.hs +++ b/src/Ganeti/Constants.hs @@ -1701,6 +1701,9 @@ hvVga = "vga" hvVhostNet :: String hvVhostNet = "vhost_net" +hvVirtioNetQueues :: String +hvVirtioNetQueues = "virtio_net_queues" + hvVifScript :: String hvVifScript = "vif_script" @@ -1818,6 +1821,7 @@ hvsParameterTypes = Map.fromList , (hvUseLocaltime, VTypeBool) , (hvVga, VTypeString) , (hvVhostNet, VTypeBool) + , (hvVirtioNetQueues, VTypeInt) , (hvVifScript, VTypeString) , (hvVifType, VTypeString) , (hvViridian, VTypeBool) @@ -3839,6 +3843,7 @@ hvcDefaults = , (hvSecurityDomain, PyValueEx "") , (hvKvmFlag, PyValueEx "") , (hvVhostNet, PyValueEx False) + , (hvVirtioNetQueues, PyValueEx (1 :: Int)) , (hvKvmUseChroot, PyValueEx False) , (hvKvmUserShutdown, PyValueEx False) , (hvMemPath, PyValueEx "") -- 1.8.1.2
