Hello Jose,

Here is rebased patch with master. If it's possible I would like to see 
this patch in stable-2.12 it will cleanly apply there as well.

Thanks,
Luka

Dana petak, 6. lipnja 2014. 12:58:32 UTC+2, korisnik Luka Blašković napisao 
je:
>
> This patch adds support for multiqueue virtio-net[1] which sets a number 
> of 
> queues (file descriptors) from virtio_net_queues parameter for tap device 
> to parallelize packets sending or receiving. Tap devices will be created 
> with 
> MULTI_QUEUE (IFF_MULTI_QUEUE) support. 
>
> KVM paravirtual nics (virtio-net) are only one which supports this 
> feature. 
> Number of queues are limited by kernel tap implementation (currently to 
> 8). 
> Instances must manually set number of queues, on Linux using: 
>
> ethtool -L ethX combined $queues 
>
> Network device options logic is moved to separate method 
> _GetNetworkDeviceFeatures which is now properly reused in hotplugging 
> part. 
> This also fixes unreported bug when vhost_net parameter is set to true, 
> hotplugged network device will be created without "vhost=on" parameter. 
>
> [1] http://www.linux-kvm.org/page/Multiqueue 
>
> Signed-off-by: Luka Blaskovic <[email protected]> 
> --- 
>  lib/hypervisor/hv_base.py         |   6 ++ 
>  lib/hypervisor/hv_kvm/__init__.py | 118 
> ++++++++++++++++++++++++++------------ 
>  lib/hypervisor/hv_kvm/netdev.py   |  77 +++++++++++++++++++------ 
>  man/gnt-instance.rst              |  19 ++++++ 
>  src/Ganeti/Constants.hs           |   5 ++ 
>  5 files changed, 169 insertions(+), 56 deletions(-) 
>
> diff --git a/lib/hypervisor/hv_base.py b/lib/hypervisor/hv_base.py 
> index f7317fc..491156c 100644 
> --- a/lib/hypervisor/hv_base.py 
> +++ b/lib/hypervisor/hv_base.py 
> @@ -109,6 +109,10 @@ _MULTI_CPU_MASK_CHECK = (_IsMultiCpuMaskWellFormed, 
>  _NET_PORT_CHECK = (lambda x: 0 < x < 65535, "invalid port number", 
>                     None, None) 
>   
> +# Check if number of queues is in safe range 
> +_VIRTIO_NET_QUEUES_CHECK = (lambda x: 0 < x < 9, "invalid number of 
> queues", 
> +                            None, None) 
> + 
>  # Check that an integer is non negative 
>  _NONNEGATIVE_INT_CHECK = (lambda x: x >= 0, "cannot be negative", None, 
> None) 
>   
> @@ -121,6 +125,8 @@ REQ_DIR_CHECK = (True, ) + _DIR_CHECK 
>  OPT_DIR_CHECK = (False, ) + _DIR_CHECK 
>  REQ_NET_PORT_CHECK = (True, ) + _NET_PORT_CHECK 
>  OPT_NET_PORT_CHECK = (False, ) + _NET_PORT_CHECK 
> +REQ_VIRTIO_NET_QUEUES_CHECK = (True, ) + _VIRTIO_NET_QUEUES_CHECK 
> +OPT_VIRTIO_NET_QUEUES_CHECK = (False, ) + _VIRTIO_NET_QUEUES_CHECK 
>  REQ_CPU_MASK_CHECK = (True, ) + _CPU_MASK_CHECK 
>  OPT_CPU_MASK_CHECK = (False, ) + _CPU_MASK_CHECK 
>  REQ_MULTI_CPU_MASK_CHECK = (True, ) + _MULTI_CPU_MASK_CHECK 
> diff --git a/lib/hypervisor/hv_kvm/__init__.py 
> b/lib/hypervisor/hv_kvm/__init__.py 
> index 5617ca7..f192c82 100644 
> --- a/lib/hypervisor/hv_kvm/__init__.py 
> +++ b/lib/hypervisor/hv_kvm/__init__.py 
> @@ -339,6 +339,7 @@ class KVMHypervisor(hv_base.BaseHypervisor): 
>      constants.HV_KVM_FLAG: 
>        hv_base.ParamInSet(False, constants.HT_KVM_FLAG_VALUES), 
>      constants.HV_VHOST_NET: hv_base.NO_CHECK, 
> +    constants.HV_VIRTIO_NET_QUEUES: hv_base.OPT_VIRTIO_NET_QUEUES_CHECK, 
>      constants.HV_KVM_USE_CHROOT: hv_base.NO_CHECK, 
>      constants.HV_KVM_USER_SHUTDOWN: hv_base.NO_CHECK, 
>      constants.HV_MEM_PATH: hv_base.OPT_DIR_CHECK, 
> @@ -385,6 +386,7 @@ class KVMHypervisor(hv_base.BaseHypervisor): 
>    _QMP_RE = re.compile(r"^-qmp\s", re.M) 
>    _SPICE_RE = re.compile(r"^-spice\s", re.M) 
>    _VHOST_RE = re.compile(r"^-net\s.*,vhost=on|off", re.M) 
> +  _VIRTIO_NET_QUEUES_RE = re.compile(r"^-net\s.*,fds=x:y:...:z", re.M) 
>    _ENABLE_KVM_RE = re.compile(r"^-enable-kvm\s", re.M) 
>    _DISABLE_KVM_RE = re.compile(r"^-disable-kvm\s", re.M) 
>    _NETDEV_RE = re.compile(r"^-netdev\s", re.M) 
> @@ -1466,6 +1468,55 @@ class KVMHypervisor(hv_base.BaseHypervisor): 
>   
>      return hv_base.GenerateTapName() 
>   
> +  def _GetNetworkDeviceFeatures(self, up_hvp, devlist, kvmhelp): 
> +    """Get network device options to properly enable supported features. 
> + 
> +    Return tuple of supported and enabled tap features with nic_model. 
> +    This function is called before opening a new tap device. 
> + 
> +    @return: (nic_model, vnet_hdr, virtio_net_queues, tap_extra, 
> nic_extra) 
> +    @rtype: tuple 
> + 
> +    """ 
> +    virtio_net_queues = 1 
> +    nic_extra = "" 
> +    nic_type = up_hvp[constants.HV_NIC_TYPE] 
> +    tap_extra = "" 
> +    vnet_hdr = False 
> +    if nic_type == constants.HT_NIC_PARAVIRTUAL: 
> +      nic_model = self._VIRTIO 
> +      try: 
> +        if self._VIRTIO_NET_RE.search(devlist): 
> +          nic_model = self._VIRTIO_NET_PCI 
> +          vnet_hdr = up_hvp[constants.HV_VNET_HDR] 
> +      except errors.HypervisorError, _: 
> +        # Older versions of kvm don't support DEVICE_LIST, but they don't 
> +        # have new virtio syntax either. 
> +        pass 
> + 
> +      if up_hvp[constants.HV_VHOST_NET]: 
> +        # Check for vhost_net support. 
> +        if self._VHOST_RE.search(kvmhelp): 
> +          tap_extra = ",vhost=on" 
> +        else: 
> +          raise errors.HypervisorError("vhost_net is configured" 
> +                                       " but it is not available") 
> +        if up_hvp[constants.HV_VIRTIO_NET_QUEUES] > 1: 
> +          # Check for multiqueue virtio-net support. 
> +          if self._VIRTIO_NET_QUEUES_RE.search(kvmhelp): 
> +            virtio_net_queues = up_hvp[constants.HV_VIRTIO_NET_QUEUES] 
> +            # As advised at http://www.linux-kvm.org/page/Multiqueue 
> formula 
> +            # for calculating vector size is: vectors=2*N+1 where N is 
> the 
> +            # number of queues (HV_VIRTIO_NET_QUEUES). 
> +            nic_extra = ",mq=on,vectors=%d" % (2 * virtio_net_queues + 1) 
> +          else: 
> +            raise errors.HypervisorError("virtio_net_queues is 
> configured" 
> +                                         " but it is not available") 
> +    else: 
> +      nic_model = nic_type 
> + 
> +    return (nic_model, vnet_hdr, virtio_net_queues, tap_extra, nic_extra) 
> + 
>    # too many local variables 
>    # pylint: disable=R0914 
>    def _ExecuteKVMRuntime(self, instance, kvm_runtime, kvmhelp, 
> incoming=None): 
> @@ -1524,37 +1575,18 @@ class KVMHypervisor(hv_base.BaseHypervisor): 
>      if not kvm_nics: 
>        kvm_cmd.extend(["-net", "none"]) 
>      else: 
> -      vnet_hdr = False 
> -      tap_extra = "" 
> -      nic_type = up_hvp[constants.HV_NIC_TYPE] 
> -      if nic_type == constants.HT_NIC_PARAVIRTUAL: 
> -        nic_model = self._VIRTIO 
> -        try: 
> -          if self._VIRTIO_NET_RE.search(devlist): 
> -            nic_model = self._VIRTIO_NET_PCI 
> -            vnet_hdr = up_hvp[constants.HV_VNET_HDR] 
> -        except errors.HypervisorError, _: 
> -          # Older versions of kvm don't support DEVICE_LIST, but they 
> don't 
> -          # have new virtio syntax either. 
> -          pass 
> - 
> -        if up_hvp[constants.HV_VHOST_NET]: 
> -          # check for vhost_net support 
> -          if self._VHOST_RE.search(kvmhelp): 
> -            tap_extra = ",vhost=on" 
> -          else: 
> -            raise errors.HypervisorError("vhost_net is configured" 
> -                                         " but it is not available") 
> -      else: 
> -        nic_model = nic_type 
> - 
> +      (nic_model, vnet_hdr, 
> +       virtio_net_queues, tap_extra, 
> +       nic_extra) = self._GetNetworkDeviceFeatures(up_hvp, devlist, 
> kvmhelp) 
>        kvm_supports_netdev = self._NETDEV_RE.search(kvmhelp) 
> - 
>        for nic_seq, nic in enumerate(kvm_nics): 
> -        tapname, tapfd = OpenTap(vnet_hdr=vnet_hdr, 
> -                                 name=self._GenerateKvmTapName(nic)) 
> -        tapfds.append(tapfd) 
> +        tapname, nic_tapfds = OpenTap(vnet_hdr=vnet_hdr, 
> +                                     
>  virtio_net_queues=virtio_net_queues, 
> +                                      name=self._GenerateKvmTapName(nic)) 
> +        tapfds.extend(nic_tapfds) 
>          taps.append(tapname) 
> +        tapfd = "%s%s" % ("fds=" if len(nic_tapfds) > 1 else "fd=", 
> +                          ":".join(str(fd) for fd in nic_tapfds)) 
>          if kvm_supports_netdev: 
>            nic_val = "%s,mac=%s" % (nic_model, nic.mac) 
>            try: 
> @@ -1565,14 +1597,14 @@ class KVMHypervisor(hv_base.BaseHypervisor): 
>              nic_val += (",id=%s,bus=pci.0,addr=%s" % (kvm_devid, 
> hex(nic.pci))) 
>            except errors.HotplugError: 
>              netdev = "netdev%d" % nic_seq 
> -          nic_val += (",netdev=%s" % netdev) 
> -          tap_val = ("type=tap,id=%s,fd=%d%s" % 
> +          nic_val += (",netdev=%s%s" % (netdev, nic_extra)) 
> +          tap_val = ("type=tap,id=%s,%s%s" % 
>                       (netdev, tapfd, tap_extra)) 
>            kvm_cmd.extend(["-netdev", tap_val, "-device", nic_val]) 
>          else: 
>            nic_val = "nic,vlan=%s,macaddr=%s,model=%s" % (nic_seq, 
>                                                           nic.mac, 
> nic_model) 
> -          tap_val = "tap,vlan=%s,fd=%d" % (nic_seq, tapfd) 
> +          tap_val = "tap,vlan=%s,%s" % (nic_seq, tapfd) 
>            kvm_cmd.extend(["-net", tap_val, "-net", nic_val]) 
>   
>      if incoming: 
> @@ -1869,12 +1901,23 @@ class KVMHypervisor(hv_base.BaseHypervisor): 
>        cmds += ["device_add 
> virtio-blk-pci,bus=pci.0,addr=%s,drive=%s,id=%s" % 
>                  (hex(device.pci), kvm_devid, kvm_devid)] 
>      elif dev_type == constants.HOTPLUG_TARGET_NIC: 
> -      (tap, fd) = OpenTap() 
> +      kvmpath = instance.hvparams[constants.HV_KVM_PATH] 
> +      kvmhelp = self._GetKVMOutput(kvmpath, self._KVMOPT_HELP) 
> +      devlist = self._GetKVMOutput(kvmpath, self._KVMOPT_DEVICELIST) 
> +      up_hvp = runtime[2] 
> +      (_, vnet_hdr, 
> +       virtio_net_queues, tap_extra, 
> +       nic_extra) = self._GetNetworkDeviceFeatures(up_hvp, devlist, 
> kvmhelp) 
> +      (tap, fds) = OpenTap(vnet_hdr=vnet_hdr, 
> +                           virtio_net_queues=virtio_net_queues) 
> +      # netdev_add don't support "fds=" when multiple fds are 
> +      # requested, generate separate "fd=" string for every fd 
> +      tapfd = ",".join(["fd=%s" % fd for fd in fds]) 
>        self._ConfigureNIC(instance, seq, device, tap) 
> -      self._PassTapFd(instance, fd, device) 
> -      cmds = ["netdev_add tap,id=%s,fd=%s" % (kvm_devid, kvm_devid)] 
> -      args = "virtio-net-pci,bus=pci.0,addr=%s,mac=%s,netdev=%s,id=%s" % 
> \ 
> -               (hex(device.pci), device.mac, kvm_devid, kvm_devid) 
> +      self._PassTapFd(instance, fds, device) 
> +      cmds = ["netdev_add tap,id=%s,%s%s" % (kvm_devid, tapfd, 
> tap_extra)] 
> +      args = "virtio-net-pci,bus=pci.0,addr=%s,mac=%s,netdev=%s,id=%s%s" 
> % \ 
> +               (hex(device.pci), device.mac, kvm_devid, kvm_devid, 
> nic_extra) 
>        cmds += ["device_add %s" % args] 
>        utils.WriteFile(self._InstanceNICFile(instance.name, seq), 
> data=tap) 
>   
> @@ -1924,7 +1967,7 @@ class KVMHypervisor(hv_base.BaseHypervisor): 
>        device.pci = self.HotDelDevice(instance, dev_type, device, _, seq) 
>        self.HotAddDevice(instance, dev_type, device, _, seq) 
>   
> -  def _PassTapFd(self, instance, fd, nic): 
> +  def _PassTapFd(self, instance, fds, nic): 
>      """Pass file descriptor to kvm process via monitor socket using 
> SCM_RIGHTS 
>   
>      """ 
> @@ -1932,7 +1975,6 @@ class KVMHypervisor(hv_base.BaseHypervisor): 
>      #       squash common parts between monitor and qmp 
>      kvm_devid = _GenerateDeviceKVMId(constants.HOTPLUG_TARGET_NIC, nic) 
>      command = "getfd %s\n" % kvm_devid 
> -    fds = [fd] 
>      logging.info("%s", fds) 
>      try: 
>        monsock = MonitorSocket(self._InstanceMonitor(instance.name)) 
> diff --git a/lib/hypervisor/hv_kvm/netdev.py 
> b/lib/hypervisor/hv_kvm/netdev.py 
> index a4f2b5d..f5e8067 100644 
> --- a/lib/hypervisor/hv_kvm/netdev.py 
> +++ b/lib/hypervisor/hv_kvm/netdev.py 
> @@ -41,6 +41,7 @@ IFF_TAP = 0x0002 
>  IFF_NO_PI = 0x1000 
>  IFF_ONE_QUEUE = 0x2000 
>  IFF_VNET_HDR = 0x4000 
> +IFF_MULTI_QUEUE = 0x0100 
>   
>   
>  def _GetTunFeatures(fd, _ioctl=fcntl.ioctl): 
> @@ -91,42 +92,82 @@ def _ProbeTapVnetHdr(fd, 
> _features_fn=_GetTunFeatures): 
>    return result 
>   
>   
> -def OpenTap(vnet_hdr=True, name=""): 
> +def _ProbeTapMqVirtioNet(fd, _features_fn=_GetTunFeatures): 
> +  """Check whether to enable the IFF_MULTI_QUEUE flag. 
> + 
> +  This flag was introduced in Linux kernel 3.8. 
> + 
> +   @type fd: int 
> +   @param fd: the file descriptor of /dev/net/tun 
> + 
> +  """ 
> +  flags = _features_fn(fd) 
> + 
> +  if flags is None: 
> +    # Not supported 
> +    return False 
> + 
> +  result = bool(flags & IFF_MULTI_QUEUE) 
> + 
> +  if not result: 
> +    logging.warning("Kernel does not support IFF_MULTI_QUEUE, not 
> enabling") 
> + 
> +  return result 
> + 
> + 
> +def OpenTap(vnet_hdr=True, virtio_net_queues=1, name=""): 
>    """Open a new tap device and return its file descriptor. 
>   
>    This is intended to be used by a qemu-type hypervisor together with the 
> -net 
> -  tap,fd=<fd> command line parameter. 
> +  tap,fd=<fd> or -net tap,fds=x:y:...:z command line parameter. 
>   
>    @type vnet_hdr: boolean 
>    @param vnet_hdr: Enable the VNET Header 
>   
> +  @type virtio_net_queues: int 
> +  @param virtio_net_queues: Set number of tap queues but not more than 8, 
> +                            queues only work with virtio-net device; 
> +                            disabled by default (one queue). 
> + 
>    @type name: string 
>    @param name: name for the TAP interface being created; if an empty 
>                 string is passed, the OS will generate a unique name 
>   
> -  @return: (ifname, tapfd) 
> +  @return: (ifname, [tapfds]) 
>    @rtype: tuple 
>   
>    """ 
> -  try: 
> -    tapfd = os.open("/dev/net/tun", os.O_RDWR) 
> -  except EnvironmentError: 
> -    raise errors.HypervisorError("Failed to open /dev/net/tun") 
> +  tapfds = [] 
>   
> -  flags = IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE 
> +  for _ in range(virtio_net_queues): 
> +    try: 
> +      tapfd = os.open("/dev/net/tun", os.O_RDWR) 
> +    except EnvironmentError: 
> +      raise errors.HypervisorError("Failed to open /dev/net/tun") 
>   
> -  if vnet_hdr and _ProbeTapVnetHdr(tapfd): 
> -    flags |= IFF_VNET_HDR 
> +    flags = IFF_TAP | IFF_NO_PI 
>   
> -  # The struct ifreq ioctl request (see netdevice(7)) 
> -  ifr = struct.pack("16sh", name, flags) 
> +    if vnet_hdr and _ProbeTapVnetHdr(tapfd): 
> +      flags |= IFF_VNET_HDR 
>   
> -  try: 
> -    res = fcntl.ioctl(tapfd, TUNSETIFF, ifr) 
> -  except EnvironmentError, err: 
> -    raise errors.HypervisorError("Failed to allocate a new TAP device: 
> %s" % 
> -                                 err) 
> +    # Check if it's ok to enable IFF_MULTI_QUEUE 
> +    if virtio_net_queues > 1 and _ProbeTapMqVirtioNet(tapfd): 
> +      flags |= IFF_MULTI_QUEUE 
> +    else: 
> +      flags |= IFF_ONE_QUEUE 
> + 
> +    # The struct ifreq ioctl request (see netdevice(7)) 
> +    ifr = struct.pack("16sh", name, flags) 
> + 
> +    try: 
> +      res = fcntl.ioctl(tapfd, TUNSETIFF, ifr) 
> +    except EnvironmentError, err: 
> +      raise errors.HypervisorError("Failed to allocate a new TAP device: 
> %s" % 
> +                                   err) 
> + 
> +    tapfds.append(tapfd) 
>   
>    # Get the interface name from the ioctl 
>    ifname = struct.unpack("16sh", res)[0].strip("\x00") 
> -  return (ifname, tapfd) 
> + 
> +  return (ifname, tapfds) 
> diff --git a/man/gnt-instance.rst b/man/gnt-instance.rst 
> index 28c0090..7ded2e0 100644 
> --- a/man/gnt-instance.rst 
> +++ b/man/gnt-instance.rst 
> @@ -850,6 +850,25 @@ vnet\_hdr 
>   
>      It is set to ``true`` by default. 
>   
> +virtio\_net\_queues 
> +    Valid for the KVM hypervisor. 
> + 
> +    Set a number of queues (file descriptors) for tap device to 
> +    parallelize packets sending or receiving. Tap devices will be 
> +    created with MULTI_QUEUE (IFF_MULTI_QUEUE) support. This only 
> +    works with KVM paravirtual nics (virtio-net) and the maximum 
> +    number of queues is limited to ``8``. Tehnically this is an 
> +    extension of ``vnet_hdr`` which must be enabled for multiqueue 
> +    support. 
> + 
> +    If set to ``1`` queue, it effectively disables multiqueue support 
> +    on the tap and virio-net devices. 
> + 
> +    For instances it is necessary to manually set number of queues (on 
> +    Linux using: ``ethtool -L ethX combined $queues``). 
> + 
> +    It is set to ``1`` by default. 
> + 
>  The ``-O (--os-parameters)`` option allows customisation of the OS 
>  parameters. The actual parameter names and values depends on the OS 
>  being used, but the syntax is the same key=value. For example, setting 
> diff --git a/src/Ganeti/Constants.hs b/src/Ganeti/Constants.hs 
> index 8a01d84..234cdee 100644 
> --- a/src/Ganeti/Constants.hs 
> +++ b/src/Ganeti/Constants.hs 
> @@ -1701,6 +1701,9 @@ hvVga = "vga" 
>  hvVhostNet :: String 
>  hvVhostNet = "vhost_net" 
>   
> +hvVirtioNetQueues :: String 
> +hvVirtioNetQueues = "virtio_net_queues" 
> + 
>  hvVifScript :: String 
>  hvVifScript = "vif_script" 
>   
> @@ -1818,6 +1821,7 @@ hvsParameterTypes = Map.fromList 
>    , (hvUseLocaltime,                    VTypeBool) 
>    , (hvVga,                             VTypeString) 
>    , (hvVhostNet,                        VTypeBool) 
> +  , (hvVirtioNetQueues,                 VTypeInt) 
>    , (hvVifScript,                       VTypeString) 
>    , (hvVifType,                         VTypeString) 
>    , (hvViridian,                        VTypeBool) 
> @@ -3839,6 +3843,7 @@ hvcDefaults = 
>            , (hvSecurityDomain,                  PyValueEx "") 
>            , (hvKvmFlag,                         PyValueEx "") 
>            , (hvVhostNet,                        PyValueEx False) 
> +          , (hvVirtioNetQueues,                 PyValueEx (1 :: Int)) 
>            , (hvKvmUseChroot,                    PyValueEx False) 
>            , (hvKvmUserShutdown,                 PyValueEx False) 
>            , (hvMemPath,                         PyValueEx "") 
> -- 
> 1.8.1.2 
>
>

Reply via email to