Add the new uio_msi as a supported driver model. Signed-off-by: Stephen Hemminger <stephen at networkplumber.org> --- lib/librte_eal/common/include/rte_pci.h | 1 + lib/librte_eal/linuxapp/eal/eal_interrupts.c | 94 +++++++++++++++++++--- lib/librte_eal/linuxapp/eal/eal_pci.c | 4 + lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 59 ++++++++++++-- lib/librte_eal/linuxapp/eal/eal_uio_msi.h | 26 ++++++ .../linuxapp/eal/include/exec-env/rte_interrupts.h | 1 + lib/librte_ether/rte_ethdev.c | 1 + tools/dpdk_nic_bind.py | 2 +- 8 files changed, 166 insertions(+), 22 deletions(-) create mode 100644 lib/librte_eal/linuxapp/eal/eal_uio_msi.h
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h index 223d3cd..106f4f7 100644 --- a/lib/librte_eal/common/include/rte_pci.h +++ b/lib/librte_eal/common/include/rte_pci.h @@ -147,6 +147,7 @@ enum rte_kernel_driver { RTE_KDRV_IGB_UIO, RTE_KDRV_VFIO, RTE_KDRV_UIO_GENERIC, + RTE_KDRV_UIO_MSIX, }; /** diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c index fd97fc4..8cdab58 100644 --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -66,6 +66,7 @@ #include "eal_private.h" #include "eal_vfio.h" +#include "eal_uio_msi.h" #define EAL_INTR_EPOLL_WAIT_FOREVER (-1) @@ -89,9 +90,7 @@ union intr_pipefds{ */ union rte_intr_read_buffer { int uio_intr_count; /* for uio device */ -#ifdef VFIO_PRESENT - uint64_t vfio_intr_count; /* for vfio device */ -#endif + uint64_t eventfd_count; /* for vfio and uio-msi */ uint64_t timerfd_num; /* for timerfd */ char charbuf[16]; /* for others */ }; @@ -356,6 +355,67 @@ vfio_disable_msix(struct rte_intr_handle *intr_handle) { } #endif +/* enable MSI-X interrupts */ +static int +uio_msix_enable(struct rte_intr_handle *intr_handle) +{ + int i, max_intr; + + if (!intr_handle->max_intr || + intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID) + max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1; + else + max_intr = intr_handle->max_intr; + + /* Actual number of MSI-X interrupts might be less than requested */ + for (i = 0; i < max_intr; i++) { + struct uio_msi_irq_set irqs = { + .vec = i, + .fd = intr_handle->efds[i], + }; + + if (i == max_intr - 1) + irqs.fd = intr_handle->fd; + + if (ioctl(intr_handle->vfio_dev_fd, UIO_MSI_IRQ_SET, &irqs) < 0) { + RTE_LOG(ERR, EAL, + "Error enabling MSI-X event %u fd %d (%s)\n", + irqs.vec, irqs.fd, strerror(errno)); + return -1; + } + } + + return 0; +} + +/* disable MSI-X interrupts */ +static int +uio_msix_disable(struct rte_intr_handle *intr_handle) +{ + int i, max_intr; + + if (!intr_handle->max_intr || + intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID) + max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1; + else + max_intr = intr_handle->max_intr; + + for (i = 0; i < max_intr; i++) { + struct uio_msi_irq_set irqs = { + .vec = i, + .fd = -1, + }; + + if (ioctl(intr_handle->vfio_dev_fd, UIO_MSI_IRQ_SET, &irqs) < 0) { + RTE_LOG(ERR, EAL, + "Error disabling MSI-X event %u (%s)\n", + i, strerror(errno)); + return -1; + } + } + return 0; +} + static int uio_intx_intr_disable(struct rte_intr_handle *intr_handle) { @@ -584,6 +644,10 @@ rte_intr_enable(struct rte_intr_handle *intr_handle) if (uio_intx_intr_enable(intr_handle)) return -1; break; + case RTE_INTR_HANDLE_UIO_MSIX: + if (uio_msix_enable(intr_handle)) + return -1; + break; /* not used at this moment */ case RTE_INTR_HANDLE_ALARM: return -1; @@ -628,6 +692,10 @@ rte_intr_disable(struct rte_intr_handle *intr_handle) if (uio_intx_intr_disable(intr_handle)) return -1; break; + case RTE_INTR_HANDLE_UIO_MSIX: + if (uio_msix_disable(intr_handle)) + return -1; + break; /* not used at this moment */ case RTE_INTR_HANDLE_ALARM: return -1; @@ -696,16 +764,19 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) case RTE_INTR_HANDLE_UIO: bytes_read = sizeof(buf.uio_intr_count); break; + case RTE_INTR_HANDLE_ALARM: bytes_read = sizeof(buf.timerfd_num); break; -#ifdef VFIO_PRESENT + + case RTE_INTR_HANDLE_UIO_MSIX: +#ifdef RTE_EAL_VFIO case RTE_INTR_HANDLE_VFIO_MSIX: case RTE_INTR_HANDLE_VFIO_MSI: case RTE_INTR_HANDLE_VFIO_LEGACY: - bytes_read = sizeof(buf.vfio_intr_count); - break; #endif + bytes_read = sizeof(buf.eventfd_count); + break; default: bytes_read = 1; break; @@ -895,17 +966,14 @@ static void eal_intr_proc_rxtx_intr(int fd, struct rte_intr_handle *intr_handle) { union rte_intr_read_buffer buf; - int bytes_read = 1; + int bytes_read = sizeof(buf.eventfd_count); - if (intr_handle->type != RTE_INTR_HANDLE_VFIO_MSIX) { - RTE_LOG(ERR, EAL, "intr type should be VFIO_MSIX\n"); + if (intr_handle->type != RTE_INTR_HANDLE_VFIO_MSIX && + intr_handle->type != RTE_INTR_HANDLE_UIO_MSIX) { + RTE_LOG(ERR, EAL, "intr type should be VFIO_MSIX or UIO_MSIX\n"); return; } -#ifdef VFIO_PRESENT - bytes_read = sizeof(buf.vfio_intr_count); -#endif - /** * read out to clear the ready-to-be-read flag * for epoll_wait. diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index d2adc66..814dc7c 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -345,6 +345,8 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, dev->kdrv = RTE_KDRV_IGB_UIO; else if (!strcmp(driver, "uio_pci_generic")) dev->kdrv = RTE_KDRV_UIO_GENERIC; + else if (!strcmp(driver, "uio_msi")) + dev->kdrv = RTE_KDRV_UIO_MSIX; else dev->kdrv = RTE_KDRV_UNKNOWN; } else if (ret < 0) { @@ -576,6 +578,7 @@ pci_map_device(struct rte_pci_device *dev) ret = pci_vfio_map_resource(dev); #endif break; + case RTE_KDRV_UIO_MSIX: case RTE_KDRV_IGB_UIO: case RTE_KDRV_UIO_GENERIC: /* map resources for devices that use uio */ @@ -603,6 +606,7 @@ pci_unmap_device(struct rte_pci_device *dev) case RTE_KDRV_VFIO: RTE_LOG(ERR, EAL, "Hotplug doesn't support vfio yet\n"); break; + case RTE_KDRV_UIO_MSIX: case RTE_KDRV_IGB_UIO: case RTE_KDRV_UIO_GENERIC: /* unmap resources for devices that use uio */ diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c index b5116a7..7eee828 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c @@ -38,6 +38,7 @@ #include <sys/stat.h> #include <sys/mman.h> #include <linux/pci_regs.h> +#include <sys/eventfd.h> #include <rte_log.h> #include <rte_pci.h> @@ -259,13 +260,42 @@ pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf, return uio_num; } +static int +pci_uio_msix_init(struct rte_pci_device *dev) +{ + int i, fd; + + /* set up an eventfd for interrupts */ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, " cannot set up irq eventfd (%s)\n", + strerror(errno)); + return -1; + } + dev->intr_handle.fd = fd; + + /* an additional eventfd for each vector */ + for (i = 0; i < RTE_MAX_RXTX_INTR_VEC_ID; i++) { + fd = eventfd(0, EFD_NONBLOCK|EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, + " cannot set up eventfd (%s)\n", + strerror(errno)); + return -1; + } + + dev->intr_handle.efds[i] = fd; + } + + return 0; +} + /* map the PCI resource of a PCI device in virtual memory */ int pci_uio_map_resource(struct rte_pci_device *dev) { - int i, map_idx; + int i, fd, map_idx; char dirname[PATH_MAX]; - char cfgname[PATH_MAX]; char devname[PATH_MAX]; /* contains the /dev/uioX */ void *mapaddr; int uio_num; @@ -274,11 +304,15 @@ pci_uio_map_resource(struct rte_pci_device *dev) struct mapped_pci_resource *uio_res; struct mapped_pci_res_list *uio_res_list = RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); struct pci_map *maps; + char cfgname[PATH_MAX]; dev->intr_handle.fd = -1; - dev->intr_handle.uio_cfg_fd = -1; + dev->intr_handle.vfio_dev_fd = -1; dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + for (i = 0; i < RTE_MAX_RXTX_INTR_VEC_ID; i++) + dev->intr_handle.efds[i] = -1; + /* secondary processes - use already recorded details */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) return pci_uio_map_secondary(dev); @@ -293,15 +327,15 @@ pci_uio_map_resource(struct rte_pci_device *dev) snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num); /* save fd if in primary process */ - dev->intr_handle.fd = open(devname, O_RDWR); - if (dev->intr_handle.fd < 0) { + fd = open(devname, O_RDWR); + if (fd < 0) { RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", devname, strerror(errno)); return -1; } snprintf(cfgname, sizeof(cfgname), - "/sys/class/uio/uio%u/device/config", uio_num); + "/sys/class/uio/uio%u/device/config", uio_num); dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR); if (dev->intr_handle.uio_cfg_fd < 0) { RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", @@ -309,9 +343,17 @@ pci_uio_map_resource(struct rte_pci_device *dev) return -1; } - if (dev->kdrv == RTE_KDRV_IGB_UIO) + if (dev->kdrv == RTE_KDRV_UIO_MSIX) { + dev->intr_handle.vfio_dev_fd = fd; + dev->intr_handle.type = RTE_INTR_HANDLE_UIO_MSIX; + if (pci_uio_msix_init(dev) < 0) + return -1; + } else if (dev->kdrv == RTE_KDRV_IGB_UIO) { + dev->intr_handle.fd = fd; dev->intr_handle.type = RTE_INTR_HANDLE_UIO; - else { + } else { + + dev->intr_handle.fd = fd; dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX; /* set bus master that is not done by uio_pci_generic */ @@ -460,6 +502,7 @@ pci_uio_unmap_resource(struct rte_pci_device *dev) /* close fd if in primary process */ close(dev->intr_handle.fd); + close(dev->intr_handle.uio_cfg_fd); dev->intr_handle.fd = -1; dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; diff --git a/lib/librte_eal/linuxapp/eal/eal_uio_msi.h b/lib/librte_eal/linuxapp/eal/eal_uio_msi.h new file mode 100644 index 0000000..f01f302 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_uio_msi.h @@ -0,0 +1,26 @@ +/* + * UIO_MSI API definition + * + * Copyright (c) 2015 by Brocade Communications Systems, Inc. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef EAL_UIO_MSI_H +#define EAL_UIO_MSI_H + +/* Driver is not upstream yet. */ + +#include <sys/ioctl.h> + +struct uio_msi_irq_set { + uint32_t vec; + int fd; +}; + +#define UIO_MSI_BASE 0x86 +#define UIO_MSI_IRQ_SET _IOW('I', UIO_MSI_BASE+1, struct uio_msi_irq_set) + +#endif diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h index 9843001..d3cf680 100644 --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h @@ -44,6 +44,7 @@ enum rte_intr_handle_type { RTE_INTR_HANDLE_UNKNOWN = 0, RTE_INTR_HANDLE_UIO, /**< uio device handle */ RTE_INTR_HANDLE_UIO_INTX, /**< uio generic handle */ + RTE_INTR_HANDLE_UIO_MSIX, /**< uio with MSI-X support */ RTE_INTR_HANDLE_VFIO_LEGACY, /**< vfio device handle (legacy) */ RTE_INTR_HANDLE_VFIO_MSI, /**< vfio device handle (MSI) */ RTE_INTR_HANDLE_VFIO_MSIX, /**< vfio device handle (MSIX) */ diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index cf9a79a..3fbc4a1 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -515,6 +515,7 @@ rte_eth_dev_is_detachable(uint8_t port_id) switch (rte_eth_devices[port_id].pci_dev->kdrv) { case RTE_KDRV_IGB_UIO: case RTE_KDRV_UIO_GENERIC: + case RTE_KDRV_UIO_MSIX: break; case RTE_KDRV_VFIO: default: diff --git a/tools/dpdk_nic_bind.py b/tools/dpdk_nic_bind.py index 8523f82..20b4b06 100755 --- a/tools/dpdk_nic_bind.py +++ b/tools/dpdk_nic_bind.py @@ -43,7 +43,7 @@ ETHERNET_CLASS = "0200" # Each device within this is itself a dictionary of device properties devices = {} # list of supported DPDK drivers -dpdk_drivers = [ "igb_uio", "vfio-pci", "uio_pci_generic" ] +dpdk_drivers = [ "igb_uio", "vfio-pci", "uio_pci_generic", "uio_msi" ] # command-line arg flags b_flag = None -- 2.1.4