[dpdk-dev] [PATCH] maintainers: claim responsability for xen
As some users are still using xen as the hypervisor, I suggest to continue support for xen in DPDK. And from 16.11, I will be the maintainer of all xen-related files. Signed-off-by: Jianfeng Tan --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index ba12d1b..d2fa2db 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -190,6 +190,7 @@ M: Anatoly Burakov F: lib/librte_eal/linuxapp/eal/*vfio* Linux Xen +M: Jianfeng Tan F: lib/librte_eal/linuxapp/xen_dom0/ F: lib/librte_eal/linuxapp/eal/*xen* F: lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h -- 2.7.4
[dpdk-dev] [RFC] igb_uio: deprecate iomem and ioport mapping
Previously in igb_uio, iomem is mapped, and both ioport and io mem are recorded into uio framework, which is duplicated and makes the code too complex. For iomem, DPDK user space code never opens or reads files under /sys/pci/bus/devices/:xx:xx.x/uio/uioY/maps/. Instead, /sys/pci/bus/devices/:xx:xx.x/resourceY are used to map device memory. For ioport, non-x86 platforms cannot read from files under /sys/pci/bus/devices/:xx:xx.x/uio/uioY/portio/ directly, because non-x86 platforms need to map port region for access in user space, see non-x86 version pci_uio_ioport_map(). x86 platforms can use the the same way as uio_pci_generic. This patch deprecates iomem and ioport mapping in igb_uio kernel module, and adjusts the iomem implementation in both igb_uio and uio_pci_generic: - for x86 platform, get ports info from /proc/ioports; - for non-x86 platform, map and get ports info by pci_uio_ioport_map(). Note: this will affect those applications who are using files under /sys/pci/bus/devices/:xx:xx.x/uio/uioY/maps/ and /sys/pci/bus/devices/:xx:xx.x/uio/uioY/portio/. Signed-off-by: Jianfeng Tan --- lib/librte_eal/linuxapp/eal/eal_pci.c | 4 - lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 56 +- lib/librte_eal/linuxapp/igb_uio/igb_uio.c | 119 ++ 3 files changed, 9 insertions(+), 170 deletions(-) diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index cd9de7c..f23e99d 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -629,8 +629,6 @@ rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, break; #endif case RTE_KDRV_IGB_UIO: - ret = pci_uio_ioport_map(dev, bar, p); - break; case RTE_KDRV_UIO_GENERIC: #if defined(RTE_ARCH_X86) ret = pci_ioport_map(dev, bar, p); @@ -718,8 +716,6 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) break; #endif case RTE_KDRV_IGB_UIO: - ret = pci_uio_ioport_unmap(p); - break; case RTE_KDRV_UIO_GENERIC: #if defined(RTE_ARCH_X86) ret = 0; diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c index 1786b75..28d09ed 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c @@ -370,53 +370,7 @@ error: return -1; } -#if defined(RTE_ARCH_X86) -int -pci_uio_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) -{ - char dirname[PATH_MAX]; - char filename[PATH_MAX]; - int uio_num; - unsigned long start; - - uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0); - if (uio_num < 0) - return -1; - - /* get portio start */ - snprintf(filename, sizeof(filename), -"%s/portio/port%d/start", dirname, bar); - if (eal_parse_sysfs_value(filename, ) < 0) { - RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n", - __func__); - return -1; - } - /* ensure we don't get anything funny here, read/write will cast to -* uin16_t */ - if (start > UINT16_MAX) - return -1; - - /* FIXME only for primary process ? */ - if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) { - - snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); - dev->intr_handle.fd = open(filename, O_RDWR); - if (dev->intr_handle.fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - filename, strerror(errno)); - return -1; - } - dev->intr_handle.type = RTE_INTR_HANDLE_UIO; - } - - RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start); - - p->base = start; - p->len = 0; - return 0; -} -#else +#if !defined(RTE_ARCH_X86) int pci_uio_ioport_map(struct rte_pci_device *dev, int bar, struct rte_pci_ioport *p) @@ -553,14 +507,10 @@ pci_uio_ioport_write(struct rte_pci_ioport *p, } } +#if !defined(RTE_ARCH_X86) int pci_uio_ioport_unmap(struct rte_pci_ioport *p) { -#if defined(RTE_ARCH_X86) - RTE_SET_USED(p); - /* FIXME close intr fd ? */ - return 0; -#else return munmap((void *)(uintptr_t)p->base, p->len); -#endif } +#endif diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c index df41e45..e9d78fb 100644 --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c @@ -216,107 +216,6 @@ igbuio_dom0_pci_mmap(struct uio_info *info, struct vm_area_struct *vma) } #endif -/* Remap pci resources desc
[dpdk-dev] [PATCH v3] eal: restrict cores detection
This patch uses pthread_getaffinity_np() to narrow down detected cores before parsing coremask (-c), corelist (-l), and coremap (--lcores). The purpose of this patch is to leave out these core related options when DPDK applications are deployed under container env, so that users only specify core restriction as starting the instance. Note: previously, some users are using isolated CPUs, which could be excluded by default. Please add commands like taskset to use those cores. Test example: $ taskset 0xc ./examples/helloworld/build/helloworld -m 1024 Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- v3: - Choose a more descriptive variable name, and remove comments as suggested by Stephen Hemminger. v2: - Make it as default instead of adding the new options. lib/librte_eal/common/eal_common_lcore.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c index 2cd4132..71c575c 100644 --- a/lib/librte_eal/common/eal_common_lcore.c +++ b/lib/librte_eal/common/eal_common_lcore.c @@ -57,6 +57,12 @@ rte_eal_cpu_init(void) struct rte_config *config = rte_eal_get_configuration(); unsigned lcore_id; unsigned count = 0; + rte_cpuset_t affinity_set; + pthread_t tid = pthread_self(); + + if (pthread_getaffinity_np(tid, sizeof(rte_cpuset_t), + _set) < 0) + CPU_ZERO(_set); /* * Parse the maximum set of logical cores, detect the subset of running @@ -70,7 +76,8 @@ rte_eal_cpu_init(void) /* in 1:1 mapping, record related cpu detected state */ lcore_config[lcore_id].detected = eal_cpu_detected(lcore_id); - if (lcore_config[lcore_id].detected == 0) { + if (lcore_config[lcore_id].detected == 0 || + !CPU_ISSET(lcore_id, _set)) { config->lcore_role[lcore_id] = ROLE_OFF; lcore_config[lcore_id].core_index = -1; continue; -- 2.7.4
[dpdk-dev] [PATCH v2] eal: restrict cores detection
This patch uses pthread_getaffinity_np() to narrow down detected cores before parsing coremask (-c), corelist (-l), and coremap (--lcores). The purpose of this patch is to leave out these core related options when DPDK applications are deployed under container env, so that users only specify core restriction as starting the instance. Note: previously, some users are using isolated CPUs, which could be excluded by default. Please add commands like taskset to use those cores. Test example: $ taskset 0xc ./examples/helloworld/build/helloworld -m 1024 Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- v2: - Make it as default instead of adding the new options. lib/librte_eal/common/eal_common_lcore.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c index 2cd4132..62e4f67 100644 --- a/lib/librte_eal/common/eal_common_lcore.c +++ b/lib/librte_eal/common/eal_common_lcore.c @@ -57,6 +57,14 @@ rte_eal_cpu_init(void) struct rte_config *config = rte_eal_get_configuration(); unsigned lcore_id; unsigned count = 0; + rte_cpuset_t cs; + pthread_t tid = pthread_self(); + + /* Add below method to obtain core restrictions, like ulimit, +* cgroup.cpuset, etc. Will not use those cores, which are rebuffed. +*/ + if (pthread_getaffinity_np(tid, sizeof(rte_cpuset_t), ) < 0) + CPU_ZERO(); /* * Parse the maximum set of logical cores, detect the subset of running @@ -70,7 +78,8 @@ rte_eal_cpu_init(void) /* in 1:1 mapping, record related cpu detected state */ lcore_config[lcore_id].detected = eal_cpu_detected(lcore_id); - if (lcore_config[lcore_id].detected == 0) { + if (lcore_config[lcore_id].detected == 0 || + !CPU_ISSET(lcore_id, )) { config->lcore_role[lcore_id] = ROLE_OFF; lcore_config[lcore_id].core_index = -1; continue; -- 2.7.4
[dpdk-dev] [PATCH 3/3] net/virtio_user: fix dev not freed after init error
Currently, when virtio_user device fails to be started (e.g., vhost unix socket does not exit), the init function does not return struct rte_eth_dev (and some other structs) back to ether layer. And what's more, it does not report the error to upper layer. The fix is to free those structs and report error when failing to start virtio_user devices. Fixes: ce2eabdd43ec ("net/virtio-user: add virtual device") Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user_ethdev.c | 23 +++ 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c index daef09b..62ccb0b 100644 --- a/drivers/net/virtio/virtio_user_ethdev.c +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -313,6 +313,17 @@ virtio_user_eth_dev_alloc(const char *name) return eth_dev; } +static void +virtio_user_eth_dev_free(struct rte_eth_dev *eth_dev) +{ + struct rte_eth_dev_data *data = eth_dev->data; + struct virtio_hw *hw = data->dev_private; + + rte_free(hw->virtio_user_dev); + rte_free(hw); + rte_eth_dev_release_port(eth_dev); +} + /* Dev initialization routine. Invoked once for each virtio vdev at * EAL init time, see rte_eal_dev_init(). * Returns 0 on success. @@ -328,7 +339,7 @@ virtio_user_pmd_devinit(const char *name, const char *params) uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ; char *path = NULL; char *mac_addr = NULL; - int ret = -1; + int result = -1, ret; if (!params || params[0] == '\0') { PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio_user", @@ -411,15 +422,19 @@ virtio_user_pmd_devinit(const char *name, const char *params) hw = eth_dev->data->dev_private; if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq, -queue_size, mac_addr) < 0) +queue_size, mac_addr) < 0) { + PMD_INIT_LOG(ERR, "virtio_user_dev_init fails"); + virtio_user_eth_dev_free(eth_dev); goto end; + } /* previously called by rte_eal_pci_probe() for physical dev */ if (eth_virtio_dev_init(eth_dev) < 0) { PMD_INIT_LOG(ERR, "eth_virtio_dev_init fails"); + virtio_user_eth_dev_free(eth_dev); goto end; } - ret = 0; + result = 0; end: if (kvlist) @@ -428,7 +443,7 @@ end: free(path); if (mac_addr) free(mac_addr); - return ret; + return result; } /** Called by rte_eth_dev_detach() */ -- 2.7.4
[dpdk-dev] [PATCH 2/3] net/virtio_user: fix wrong sequence of messages
When virtio_user is used with VPP's native vhost user, it cannot send/receive any packets. The root cause is that vpp-vhost-user translates the message VHOST_USER_SET_FEATURES as puting this device into init state, aka, zero all related structures. However, previous code puts this message at last in the whole initialization process, which leads to all previous information are zeroed. To fix this issue, we rearrange the sequence of those messages. - step 0, send VHOST_USER_SET_VRING_CALL so that vhost allocates virtqueue structures; - step 1, send VHOST_USER_SET_FEATURES to confirm the features; - step 2, send VHOST_USER_SET_MEM_TABLE to share mem regions; - step 3, send VHOST_USER_SET_VRING_NUM, VHOST_USER_SET_VRING_BASE, VHOST_USER_SET_VRING_ADDR, VHOST_USER_SET_VRING_KICK for each queue; - ... Fixes: 37a7eb2ae816 ("net/virtio-user: add device emulation layer") Reported-by: Zhihong Wang Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user/virtio_user_dev.c | 120 ++- 1 file changed, 72 insertions(+), 48 deletions(-) diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index 2c4e999..afdf721 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -45,20 +45,14 @@ #include "../virtio_ethdev.h" static int -virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) +virtio_user_create_queue(struct virtio_user_dev *dev, uint32_t queue_sel) { - int callfd, kickfd; + /* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come +* firstly because vhost depends on this msg to allocate virtqueue +* pair. +*/ + int callfd; struct vhost_vring_file file; - struct vhost_vring_state state; - struct vring *vring = >vrings[queue_sel]; - struct vhost_vring_addr addr = { - .index = queue_sel, - .desc_user_addr = (uint64_t)(uintptr_t)vring->desc, - .avail_user_addr = (uint64_t)(uintptr_t)vring->avail, - .used_user_addr = (uint64_t)(uintptr_t)vring->used, - .log_guest_addr = 0, - .flags = 0, /* disable log */ - }; /* May use invalid flag, but some backend leverages kickfd and callfd as * criteria to judge if dev is alive. so finally we use real event_fd. @@ -68,22 +62,30 @@ virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) PMD_DRV_LOG(ERR, "callfd error, %s\n", strerror(errno)); return -1; } - kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); - if (kickfd < 0) { - close(callfd); - PMD_DRV_LOG(ERR, "kickfd error, %s\n", strerror(errno)); - return -1; - } - - /* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come -* firstly because vhost depends on this msg to allocate virtqueue -* pair. -*/ file.index = queue_sel; file.fd = callfd; vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_CALL, ); dev->callfds[queue_sel] = callfd; + return 0; +} + +static int +virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) +{ + int kickfd; + struct vhost_vring_file file; + struct vhost_vring_state state; + struct vring *vring = >vrings[queue_sel]; + struct vhost_vring_addr addr = { + .index = queue_sel, + .desc_user_addr = (uint64_t)(uintptr_t)vring->desc, + .avail_user_addr = (uint64_t)(uintptr_t)vring->avail, + .used_user_addr = (uint64_t)(uintptr_t)vring->used, + .log_guest_addr = 0, + .flags = 0, /* disable log */ + }; + state.index = queue_sel; state.num = vring->num; vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_NUM, ); @@ -97,6 +99,12 @@ virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) * lastly because vhost depends on this msg to judge if * virtio is ready. */ + kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (kickfd < 0) { + PMD_DRV_LOG(ERR, "kickfd error, %s\n", strerror(errno)); + return -1; + } + file.index = queue_sel; file.fd = kickfd; vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_KICK, ); dev->kickfds[queue_sel] = kickfd; @@ -104,44 +112,43 @@ virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) return 0; } -int -virtio_user_start_device(struct virtio_user_dev *dev) +static int +virtio_user_queue_setup(struct virtio_user_dev *dev, + int (*fn)(struct virtio_user_dev *, uint32_t)) { - uint64_t features
[dpdk-dev] [PATCH 1/3] net/virtio_user: fix queue pair not enabled
When virtio_user is used with OVS-DPDK (with mq disabled), it cannot receive any packets. It's because when vhost provides VHOST_USER_GET_PROTOCOL_FEATURES, all queue pairs are initialized in the disabled state. Quote QEMU/docs/specs/vhost-user.txt: If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated, the ring is initialized in an enabled state. If VHOST_USER_F_PROTOCOL_FEATURES has been negotiated, the ring is initialized in a disabled state. In OVS-DPDK, all queue pairs are in the disabled state by default. When used with QEMU, QEMU will set it as enabled in the process of initialization. So the fix here is to include similar logic in virtio_user. Fixes: 37a7eb2ae816 ("net/virtio-user: add device emulation layer") Reported-by: Ning Li Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user/virtio_user_dev.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index 376c9cf..2c4e999 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -131,6 +131,13 @@ virtio_user_start_device(struct virtio_user_dev *dev) } } + /* As this feature is negotiated from the vhost, all queues are +* initialized in the disabled state. For non-mq case, we enable +* the 1st queue pair by default. +*/ + if (dev->features & (1ull << VHOST_USER_GET_PROTOCOL_FEATURES)) + vhost_user_enable_queue_pair(dev->vhostfd, 0, 1); + /* After setup all virtqueues, we need to set_features so that these * features can be set into each virtqueue in vhost side. And before * that, make sure VHOST_USER_F_PROTOCOL_FEATURES is added if mq is -- 2.7.4
[dpdk-dev] [PATCH 0/3] fix virtio_user issues
Patch 1: fix issue when using virtio_user with OVS-DPDK. Patch 2: fix issue when using virtio_user with VPP. Patch 3: fix issue when failing to start virtio_user devices. Signed-off-by: Jianfeng Tan Jianfeng Tan (3): net/virtio_user: fix queue pair not enabled net/virtio_user: fix wrong sequence of messages net/virtio_user: fix dev not freed after init error drivers/net/virtio/virtio_user/virtio_user_dev.c | 115 ++- drivers/net/virtio/virtio_user_ethdev.c | 23 - 2 files changed, 92 insertions(+), 46 deletions(-) -- 2.7.4
[dpdk-dev] [PATCH 2/2] examples/tep_term: fix inner L4 checksum failure
When sending packets from virtual machine which in need of TSO by hardware NIC, the inner L4 checksum is not correct on the other side of the cable. It's because get_psd_sum() depends on PKT_TX_TCP_SEG to calculate pseudo-header checksum, but currently this bit is set after the function get_psd_sum() is called. The fix is straightforward. Move the bit setting before get_psd_sum() is called. Fixes: a50245ede72a ("examples/tep_term: initialize VXLAN sample") Signed-off-by: Jianfeng Tan --- examples/tep_termination/vxlan.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/examples/tep_termination/vxlan.c b/examples/tep_termination/vxlan.c index 4bad33d..155415c 100644 --- a/examples/tep_termination/vxlan.c +++ b/examples/tep_termination/vxlan.c @@ -141,14 +141,17 @@ process_inner_cksums(struct ether_hdr *eth_hdr, union tunnel_offload_info *info) ethertype, ol_flags); } else if (l4_proto == IPPROTO_TCP) { tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len); - ol_flags |= PKT_TX_TCP_CKSUM; - tcp_hdr->cksum = get_psd_sum(l3_hdr, ethertype, - ol_flags); + /* Put PKT_TX_TCP_SEG bit setting before get_psd_sum(), because +* it depends on PKT_TX_TCP_SEG to calculate pseudo-header +* checksum. +*/ if (tso_segsz != 0) { ol_flags |= PKT_TX_TCP_SEG; info->tso_segsz = tso_segsz; info->l4_len = sizeof(struct tcp_hdr); } + ol_flags |= PKT_TX_TCP_CKSUM; + tcp_hdr->cksum = get_psd_sum(l3_hdr, ethertype, ol_flags); } else if (l4_proto == IPPROTO_SCTP) { sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len); -- 2.7.4
[dpdk-dev] [PATCH 1/2] examples/tep_term: fix offload on VXLAN failure
Based on previous fix of offload on VXLAN using i40e, applications need to set proper tunneling type on ol_flags so that i40e driver can pass it to NIC. Fixes: a50245ede72a ("examples/tep_term: initialize VXLAN sample") Signed-off-by: Jianfeng Tan --- examples/tep_termination/vxlan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/tep_termination/vxlan.c b/examples/tep_termination/vxlan.c index 5ee1f95..4bad33d 100644 --- a/examples/tep_termination/vxlan.c +++ b/examples/tep_termination/vxlan.c @@ -237,6 +237,8 @@ encapsulation(struct rte_mbuf *m, uint8_t queue_id) m->outer_l2_len = sizeof(struct ether_hdr); m->outer_l3_len = sizeof(struct ipv4_hdr); + ol_flags |= PKT_TX_TUNNEL_VXLAN; + m->ol_flags |= ol_flags; m->tso_segsz = tx_offload.tso_segsz; -- 2.7.4
[dpdk-dev] [PATCH 0/2] Two offloading issues of tep_term
This patch set depends on: - http://dpdk.org/ml/archives/dev/2016-August/044924.html Patch 1: fill tunneling type. Patch 2: inner L4 checksum error. Signed-off-by: Jianfeng Tan Jianfeng Tan (2): examples/tep_term: fix offload on VXLAN failure examples/tep_term: fix inner L4 checksum failure examples/tep_termination/vxlan.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) -- 2.7.4
[dpdk-dev] [PATCH v4 3/3] app/testpmd: fix Tx offload on tunneling packet
Tx offload on tunneling packet now requires applications to correctly set tunneling type. Without setting it, i40e driver does not parse tunneling parameters. Besides that, add a check to see if NIC supports TSO on tunneling packet when executing "csum parse_tunnel on _port" after "tso set _size _port" or the other way around. Fixes: b51c47536a9e ("app/testpmd: support TSO in checksum forward engine") Signed-off-by: Zhe Tao Signed-off-by: Jianfeng Tan --- app/test-pmd/cmdline.c | 42 -- app/test-pmd/csumonly.c | 37 + 2 files changed, 65 insertions(+), 14 deletions(-) diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index f90befc..561839f 100644 --- a/app/test-pmd/cmdline.c +++ b/app/test-pmd/cmdline.c @@ -3426,6 +3426,26 @@ struct cmd_csum_tunnel_result { }; static void +check_tunnel_tso_support(uint8_t port_id) +{ + struct rte_eth_dev_info dev_info; + + rte_eth_dev_info_get(port_id, _info); + if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VXLAN_TNL_TSO)) + printf("Warning: TSO enabled but VXLAN TUNNEL TSO not " + "supported by port %d\n", port_id); + if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GRE_TNL_TSO)) + printf("Warning: TSO enabled but GRE TUNNEL TSO not " + "supported by port %d\n", port_id); + if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPIP_TNL_TSO)) + printf("Warning: TSO enabled but IPIP TUNNEL TSO not " + "supported by port %d\n", port_id); + if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GENEVE_TNL_TSO)) + printf("Warning: TSO enabled but GENEVE TUNNEL TSO not " + "supported by port %d\n", port_id); +} + +static void cmd_csum_tunnel_parsed(void *parsed_result, __attribute__((unused)) struct cmdline *cl, __attribute__((unused)) void *data) @@ -3435,10 +3455,13 @@ cmd_csum_tunnel_parsed(void *parsed_result, if (port_id_is_invalid(res->port_id, ENABLED_WARN)) return; - if (!strcmp(res->onoff, "on")) + if (!strcmp(res->onoff, "on")) { ports[res->port_id].tx_ol_flags |= TESTPMD_TX_OFFLOAD_PARSE_TUNNEL; - else + + if (ports[res->port_id].tso_segsz != 0) + check_tunnel_tso_support(res->port_id); + } else ports[res->port_id].tx_ol_flags &= (~TESTPMD_TX_OFFLOAD_PARSE_TUNNEL); @@ -3502,10 +3525,17 @@ cmd_tso_set_parsed(void *parsed_result, /* display warnings if configuration is not supported by the NIC */ rte_eth_dev_info_get(res->port_id, _info); - if ((ports[res->port_id].tso_segsz != 0) && - (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO) == 0) { - printf("Warning: TSO enabled but not " - "supported by port %d\n", res->port_id); + if (ports[res->port_id].tso_segsz != 0) { + if (ports[res->port_id].tx_ol_flags & + TESTPMD_TX_OFFLOAD_PARSE_TUNNEL) + check_tunnel_tso_support(res->port_id); + /* For packets, +* (1) when tnl parse is disabled; +* (2) when tnl parse is enabled but not deemed as tnl pkts +*/ + if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO)) + printf("Warning: TSO enabled but not " + "supported by port %d\n", res->port_id); } } diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index ac4bd8f..0a1f95d 100644 --- a/app/test-pmd/csumonly.c +++ b/app/test-pmd/csumonly.c @@ -412,12 +412,10 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info, return ol_flags; } -/* Calculate the checksum of outer header (only vxlan is supported, - * meaning IP + UDP). The caller already checked that it's a vxlan - * packet */ +/* Calculate the checksum of outer header */ static uint64_t process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info, - uint16_t testpmd_ol_flags) + uint16_t testpmd_ol_flags, int tso_enabled) { struct ipv4_hdr *ipv4_hdr = outer_l3_hdr; struct ipv6_hdr *ipv6_hdr = outer_l3_hdr; @@ -438,10 +436,20 @@ process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info, if (info->outer_l4_proto != IPPROTO_UDP) return ol_flags; - /* outer UDP checksum is always done in software as we have no -* hardware supporting it
[dpdk-dev] [PATCH v4 2/3] net/i40e: add TSO support on tunneling packet
To enable Tx side offload on tunneling packet, driver should set correct tunneling parameters: (1) EIPT, External IP header type; (2) EIPLEN, External IP; (3) L4TUNT; (4) L4TUNLEN. This parsing behavior is based on (ol_flag & PKT_TX_TUNNEL_MASK). And when it's a tunneling packet, MACLEN defines the outer L2 header. Also, we define TSO on each kind of tunneling type as a capabilities. Now only i40e declares to support them. Signed-off-by: Zhe Tao Signed-off-by: Jianfeng Tan --- drivers/net/i40e/i40e_ethdev.c | 6 ++- drivers/net/i40e/i40e_rxtx.c | 90 +- lib/librte_ether/rte_ethdev.h | 4 ++ 3 files changed, 72 insertions(+), 28 deletions(-) diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index d0aeb70..64ba570 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -2576,7 +2576,11 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) DEV_TX_OFFLOAD_TCP_CKSUM | DEV_TX_OFFLOAD_SCTP_CKSUM | DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | - DEV_TX_OFFLOAD_TCP_TSO; + DEV_TX_OFFLOAD_TCP_TSO | + DEV_TX_OFFLOAD_VXLAN_TNL_TSO | + DEV_TX_OFFLOAD_GRE_TNL_TSO | + DEV_TX_OFFLOAD_IPIP_TNL_TSO | + DEV_TX_OFFLOAD_GENEVE_TNL_TSO; dev_info->hash_key_size = (I40E_PFQF_HKEY_MAX_INDEX + 1) * sizeof(uint32_t); dev_info->reta_size = pf->hash_lut_size; diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 554d167..4eac713 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -779,33 +779,65 @@ i40e_rxd_build_fdir(volatile union i40e_rx_desc *rxdp, struct rte_mbuf *mb) #endif return flags; } + +static inline void +i40e_parse_tunneling_params(uint64_t ol_flags, + union i40e_tx_offload tx_offload, + uint32_t *cd_tunneling) +{ + /* EIPT: External (outer) IP header type */ + if (ol_flags & PKT_TX_OUTER_IP_CKSUM) + *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; + else if (ol_flags & PKT_TX_OUTER_IPV4) + *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; + else if (ol_flags & PKT_TX_OUTER_IPV6) + *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; + + /* EIPLEN: External (outer) IP header length, in DWords */ + *cd_tunneling |= (tx_offload.outer_l3_len >> 2) << + I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; + + /* L4TUNT: L4 Tunneling Type */ + switch (ol_flags & PKT_TX_TUNNEL_MASK) { + case PKT_TX_TUNNEL_IPIP: + /* for non UDP / GRE tunneling, set to 00b */ + break; + case PKT_TX_TUNNEL_VXLAN: + case PKT_TX_TUNNEL_GENEVE: + *cd_tunneling |= I40E_TXD_CTX_UDP_TUNNELING; + break; + case PKT_TX_TUNNEL_GRE: + *cd_tunneling |= I40E_TXD_CTX_GRE_TUNNELING; + break; + default: + PMD_TX_LOG(ERR, "Tunnel type not supported\n"); + return; + } + + /* L4TUNLEN: L4 Tunneling Length, in Words +* +* We depend on app to set rte_mbuf.l2_len correctly. +* For IP in GRE it should be set to the length of the GRE +* header; +* for MAC in GRE or MAC in UDP it should be set to the length +* of the GRE or UDP headers plus the inner MAC up to including +* its last Ethertype. +*/ + *cd_tunneling |= (tx_offload.l2_len >> 1) << + I40E_TXD_CTX_QW0_NATLEN_SHIFT; +} + static inline void i40e_txd_enable_checksum(uint64_t ol_flags, uint32_t *td_cmd, uint32_t *td_offset, - union i40e_tx_offload tx_offload, - uint32_t *cd_tunneling) + union i40e_tx_offload tx_offload) { - /* UDP tunneling packet TX checksum offload */ - if (ol_flags & PKT_TX_OUTER_IP_CKSUM) { - + /* Set MACLEN */ + if (ol_flags & PKT_TX_TUNNEL_MASK) *td_offset |= (tx_offload.outer_l2_len >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; - - if (ol_flags & PKT_TX_OUTER_IP_CKSUM) - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; - else if (ol_flags & PKT_TX_OUTER_IPV4) - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; - else if (ol_flags & PKT_TX_OUTER_IPV6) - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; - - /* Now set the ctx descriptor fields */ - *cd_tunneling |= (tx_offload.outer_l3_len >> 2) << -
[dpdk-dev] [PATCH v4 1/3] mbuf: add Tx side tunneling type
To support tunneling packet offload capabilities on Tx side, PMDs (e.g., i40e) need to know what kind of tunneling type of this packet. Instead of analyzing the packet itself, we depend on applications to correctly set the tunneling type. These flags are defined inside rte_mbuf.ol_flags. Signed-off-by: Zhe Tao Signed-off-by: Jianfeng Tan --- lib/librte_mbuf/rte_mbuf.c | 4 lib/librte_mbuf/rte_mbuf.h | 17 - 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c index 4846b89..4505abb 100644 --- a/lib/librte_mbuf/rte_mbuf.c +++ b/lib/librte_mbuf/rte_mbuf.c @@ -302,6 +302,10 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask) case PKT_TX_OUTER_IP_CKSUM: return "PKT_TX_OUTER_IP_CKSUM"; case PKT_TX_OUTER_IPV4: return "PKT_TX_OUTER_IPV4"; case PKT_TX_OUTER_IPV6: return "PKT_TX_OUTER_IPV6"; + case PKT_TX_TUNNEL_VXLAN: return "PKT_TX_TUNNEL_VXLAN"; + case PKT_TX_TUNNEL_GRE: return "PKT_TX_TUNNEL_GRE"; + case PKT_TX_TUNNEL_IPIP: return "PKT_TX_TUNNEL_IPIP"; + case PKT_TX_TUNNEL_GENEVE: return "PKT_TX_TUNNEL_GENEVE"; default: return NULL; } } diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index 101485f..0eec112 100644 --- a/lib/librte_mbuf/rte_mbuf.h +++ b/lib/librte_mbuf/rte_mbuf.h @@ -129,6 +129,18 @@ extern "C" { /* add new TX flags here */ /** + * Bits 45:48 used for the tunnel type. + * When doing Tx offload like TSO or checksum, the HW needs to configure the + * tunnel type into the HW descriptors. + */ +#define PKT_TX_TUNNEL_VXLAN (0x1ULL << 45) +#define PKT_TX_TUNNEL_GRE (0x2ULL << 45) +#define PKT_TX_TUNNEL_IPIP(0x3ULL << 45) +#define PKT_TX_TUNNEL_GENEVE (0x4ULL << 45) +/* add new TX TUNNEL type here */ +#define PKT_TX_TUNNEL_MASK(0xFULL << 45) + +/** * Second VLAN insertion (QinQ) flag. */ #define PKT_TX_QINQ_PKT(1ULL << 49) /**< TX packet with double VLAN inserted. */ @@ -863,7 +875,10 @@ struct rte_mbuf { union { uint64_t tx_offload; /**< combined for easy fetch */ struct { - uint64_t l2_len:7; /**< L2 (MAC) Header Length. */ + uint64_t l2_len:7; + /**< L2 (MAC) Header Length for non-tunneling pkt. +* Outer_L4_len + ... + Inner_L2_len for tunneling pkt. +*/ uint64_t l3_len:9; /**< L3 (IP) Header Length. */ uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */ uint64_t tso_segsz:16; /**< TCP TSO segment size */ -- 2.7.4
[dpdk-dev] [PATCH v4 0/3] Add TSO on tunneling packet
Patch 1: mbuf: add Tx side tunneling type Patch 2: net/i40e: add TSO support on tunneling packet Patch 3: app/testpmd: fix Tx offload on tunneling packet v4: - According to tunnel type flag to parse tunneling parameters. - Add new capabilities to indicate support of TSO on tunneling packets. - Add check to see if TSO on tunneling packets are supported for the specified NIC. - Add support for geneve (as i40e does not differentiate UDP tunneling. - Split into three patches. v3: - added external IP offload flag when TSO is enabled for tunnelling packets v2: - edited the comments Signed-off-by: Zhe Tao Signed-off-by: Jianfeng Tan Jianfeng Tan (3): mbuf: add Tx side tunneling type net/i40e: add TSO support on tunneling packet app/testpmd: fix Tx offload on tunneling packet app/test-pmd/cmdline.c | 42 +--- app/test-pmd/csumonly.c| 37 + drivers/net/i40e/i40e_ethdev.c | 6 ++- drivers/net/i40e/i40e_rxtx.c | 90 +- lib/librte_ether/rte_ethdev.h | 4 ++ lib/librte_mbuf/rte_mbuf.c | 4 ++ lib/librte_mbuf/rte_mbuf.h | 17 +++- 7 files changed, 157 insertions(+), 43 deletions(-) -- 2.7.4
[dpdk-dev] [PATCH] net/virtio_user: fix inconsistent name
The commit cb6696d22023 ("drivers: update registration macro usage") changes the name from virtio-user to virtio_user, because hyphen cannot be used in a C symbol name. However, this commit does not update the strings in docs and source code, which could lead to failure to start this device as per the docs. This patch updates related strings in the docs and source code. Fixes: cb6696d22023 ("drivers: update registration macro usage") Reported-by: Tiwei Bie Signed-off-by: Jianfeng Tan --- doc/guides/rel_notes/release_16_07.rst | 2 +- doc/guides/sample_app_ug/vhost.rst | 12 ++-- drivers/net/virtio/virtio_ethdev.c | 4 ++-- drivers/net/virtio/virtio_user_ethdev.c | 6 +++--- drivers/net/virtio/virtqueue.h | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst index d3a144f..0740d4f 100644 --- a/doc/guides/rel_notes/release_16_07.rst +++ b/doc/guides/rel_notes/release_16_07.rst @@ -90,7 +90,7 @@ New Features * **Virtio support for containers.** - Add a new virtual device, named virtio-user, to support virtio for containers. + Add a new virtual device, named virtio_user, to support virtio for containers. Known limitations: diff --git a/doc/guides/sample_app_ug/vhost.rst b/doc/guides/sample_app_ug/vhost.rst index a93e54d..2b7defc 100644 --- a/doc/guides/sample_app_ug/vhost.rst +++ b/doc/guides/sample_app_ug/vhost.rst @@ -834,19 +834,19 @@ The above message indicates that device 0 has been registered with MAC address c Any packets received on the NIC with these values is placed on the devices receive queue. When a virtio-net device transmits packets, the VLAN tag is added to the packet by the DPDK vhost sample code. -Running virtio-user with vhost-switch +Running virtio_user with vhost-switch - -We can also use virtio-user with vhost-switch now. -Virtio-user is a virtual device that can be run in a application (container) parallelly with vhost in the same OS, +We can also use virtio_user with vhost-switch now. +Virtio_user is a virtual device that can be run in a application (container) parallelly with vhost in the same OS, aka, there is no need to start a VM. We just run it with a different --file-prefix to avoid startup failure. .. code-block:: console cd ${RTE_SDK}/x86_64-native-linuxapp-gcc/app -./testpmd -c 0x3 -n 4 --socket-mem 1024 --no-pci --file-prefix=virtio-user-testpmd \ ---vdev=virtio-user0,mac=00:01:02:03:04:05,path=$path_vhost \ +./testpmd -c 0x3 -n 4 --socket-mem 1024 --no-pci --file-prefix=virtio_user-testpmd \ +--vdev=virtio_user0,mac=00:01:02:03:04:05,path=$path_vhost \ -- -i --txqflags=0xf01 --disable-hw-vlan There is no difference on the vhost side. -Pleae note that there are some limitations (see release note for more information) in the usage of virtio-user. +Pleae note that there are some limitations (see release note for more information) in the usage of virtio_user. diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 850e3ba..c1ff2ac 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -452,7 +452,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, *pvq = cvq; } - /* For virtio-user case (that is when dev->pci_dev is NULL), we use + /* For virtio_user case (that is when dev->pci_dev is NULL), we use * virtual address. And we need properly set _offset_, please see * MBUF_DATA_DMA_ADDR in virtqueue.h for more information. */ @@ -1541,7 +1541,7 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) if (dev->pci_dev) dev_info->driver_name = dev->driver->pci_drv.name; else - dev_info->driver_name = "virtio-user PMD"; + dev_info->driver_name = "virtio_user PMD"; dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues; dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues; dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE; diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c index 6b4f66e..daef09b 100644 --- a/drivers/net/virtio/virtio_user_ethdev.c +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -331,7 +331,7 @@ virtio_user_pmd_devinit(const char *name, const char *params) int ret = -1; if (!params || params[0] == '\0') { - PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio-user", + PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio_user", VIRTIO_USER_ARG_QUEUE_SIZE); goto end; } @@ -351,7 +351,7 @@ virtio_user_pmd_devinit(const char *name, const char *params)
[dpdk-dev] [PATCH v3] examples/vhost: fix perf regression
We find significant perfermance drop introduced by below commit, when vhost example is started with --mergeable 0 and inside vm, kernel virtio-net driver is used to do ip based forwarding. The commit, 859b480d5afd ("vhost: add guest offload setting"), adds support for VIRTIO_NET_F_GUEST_TSO4 and VIRTIO_NET_F_GUEST_TSO6, in vhost lib. But inside vhost example, the way to disable tso only excludes the direction from virtio to vhost, but not the opposite direction. When mergeable is disabled, it triggers big_packets path of virtio-net driver to prepare to receive possible big packets with size of 64K. Because mergeable is off, for each entry of avail ring, virtio driver uses 19 desc chained together, with one desc pointing to header, other 18 desc pointing to 4K-sized pages. But QEMU only creates 256 desc entries for each vq, which results in that only 13 packets can be received. VM kernel can quickly handle those packets and go to sleep (HLT). As QEMU has no option to set the desc entries of a vq, so here, we disable VIRTIO_NET_F_GUEST_TSO4 and VIRTIO_NET_F_GUEST_TSO6 with VIRTIO_NET_F_HOST_TSO4 and VIRTIO_NET_F_HOST_TSO6 when we disable tso of vhost example, to avoid VM kernel virtio driver go into big_packets path. Fixes: 9fd72e3cbd29 ("examples/vhost: add virtio offload") Reported-by: Qian Xu Signed-off-by: Jianfeng Tan --- v3: reword commit log. v2: change the Fixes line to point to proper commit to fix. examples/vhost/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/vhost/main.c b/examples/vhost/main.c index 3b98f42..92a9823 100644 --- a/examples/vhost/main.c +++ b/examples/vhost/main.c @@ -327,6 +327,8 @@ port_init(uint8_t port) if (enable_tso == 0) { rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4); rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO6); + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_GUEST_TSO4); + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_GUEST_TSO6); } rx_rings = (uint16_t)dev_info.max_rx_queues; -- 2.7.4
[dpdk-dev] [PATCH v2] examples/vhost: fix perf regression
We find significant perfermance drop introduced by below commit, when vhost example is started with --mergeable 0 and inside vm, kernel virtio-net driver is used to do ip based forwarding. The root cause is that below commit adds support for VIRTIO_NET_F_GUEST_TSO4 and VIRTIO_NET_F_GUEST_TSO6, and when mergeable is disabled, it triggers big_packets path of virtio-net driver. In this path, virtio driver uses 19 desc with 18 4K-sized pages to receive each packet, so that it can receive a big packet with size of 64K. But QEMU only creates 256 desc entries for each vq, which results in that only 13 packets can be received. VM kernel can quickly handle those packets and go to sleep (HLT). As QEMU has no option to set the desc entries of a vq, so here, we disable VIRTIO_NET_F_GUEST_TSO4 and VIRTIO_NET_F_GUEST_TSO6 with VIRTIO_NET_F_HOST_TSO4 and VIRTIO_NET_F_HOST_TSO6 when we disable tso of vhost example, to avoid VM kernel virtio driver go into big_packets path. Fixes: 9fd72e3cbd29 ("examples/vhost: add virtio offload") Reported-by: Qian Xu Signed-off-by: Jianfeng Tan --- v2: change the Fixes line to point to proper commit to fix. examples/vhost/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/vhost/main.c b/examples/vhost/main.c index 3b98f42..92a9823 100644 --- a/examples/vhost/main.c +++ b/examples/vhost/main.c @@ -327,6 +327,8 @@ port_init(uint8_t port) if (enable_tso == 0) { rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4); rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO6); + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_GUEST_TSO4); + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_GUEST_TSO6); } rx_rings = (uint16_t)dev_info.max_rx_queues; -- 2.7.4
[dpdk-dev] [PATCH] examples/vhost: fix perf regression
We find significant perfermance drop introduced by below commit, when vhost example is started with --mergeable 0 and inside vm, kernel virtio-net driver is used to do ip based forwarding. The root cause is that below commit adds support for VIRTIO_NET_F_GUEST_TSO4 and VIRTIO_NET_F_GUEST_TSO6, and when mergeable is disabled, it triggers big_packets path of virtio-net driver. In this path, virtio driver uses 19 desc with 18 4K-sized pages to receive each packet, so that it can receive a big packet with size of 64K. But QEMU only creates 256 desc entries for each vq, which results in that only 13 packets can be received. VM kernel can quickly handle those packets and go to sleep (HLT). As QEMU has no option to set the desc entries of a vq, so here, we disable VIRTIO_NET_F_GUEST_TSO4 and VIRTIO_NET_F_GUEST_TSO6 with VIRTIO_NET_F_HOST_TSO4 and VIRTIO_NET_F_HOST_TSO6 when we disable tso of vhost example, to avoid VM kernel virtio driver go into big_packets path. Fixes: 859b480d5afd ("vhost: add guest offload setting") Reported-by: Qian Xu Signed-off-by: Jianfeng Tan --- examples/vhost/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/vhost/main.c b/examples/vhost/main.c index 3b98f42..92a9823 100644 --- a/examples/vhost/main.c +++ b/examples/vhost/main.c @@ -327,6 +327,8 @@ port_init(uint8_t port) if (enable_tso == 0) { rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4); rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO6); + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_GUEST_TSO4); + rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_GUEST_TSO6); } rx_rings = (uint16_t)dev_info.max_rx_queues; -- 2.7.4
[dpdk-dev] [PATCH] net/virtio: fix null pointer dereference
There is a logic bug in this code, that could lead to null pointer dereference when cvq is NULL. Fix this problem by changing logic _and_ to logic _or_. >> CID 127480: Null pointer dereferences (FORWARD_NULL) >> Dereferencing null pointer "cvq". if (!cvq && !cvq->vq) { ... } Coverity issue: 127480 Fixes: 01ad44fd374f ("net/virtio: split Rx/Tx queue") Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_ethdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 480daa3..828afef 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -166,7 +166,7 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, ctrl->status = status; - if (!cvq && !cvq->vq) { + if (!cvq || !cvq->vq) { PMD_INIT_LOG(ERR, "Control queue is not supported."); return -1; } -- 2.1.4
[dpdk-dev] [PATCH] examples/vhost: fix failure without hints
When the specified cores and memory lie on different numa socket with physical NIC, vhost fails to set up rx queue, and exits without any hints. This could leads to confusion of users. This patch fixes it by adding some error messages when calling ether APIs returns errors. Fixes: 4796ad63ba1f ("examples/vhost: import userspace vhost application") Reported-by: Yulong Pei Signed-off-by: Jianfeng Tan --- examples/vhost/main.c | 20 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/examples/vhost/main.c b/examples/vhost/main.c index 3aff2cc..3b98f42 100644 --- a/examples/vhost/main.c +++ b/examples/vhost/main.c @@ -332,8 +332,11 @@ port_init(uint8_t port) rx_rings = (uint16_t)dev_info.max_rx_queues; /* Configure ethernet device. */ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, _conf); - if (retval != 0) + if (retval != 0) { + RTE_LOG(ERR, VHOST_PORT, "Failed to configure port %u: %s.\n", + port, strerror(-retval)); return retval; + } /* Setup the queues. */ for (q = 0; q < rx_rings; q ++) { @@ -341,21 +344,30 @@ port_init(uint8_t port) rte_eth_dev_socket_id(port), rxconf, mbuf_pool); - if (retval < 0) + if (retval < 0) { + RTE_LOG(ERR, VHOST_PORT, + "Failed to setup rx queue %u of port %u: %s.\n", + q, port, strerror(-retval)); return retval; + } } for (q = 0; q < tx_rings; q ++) { retval = rte_eth_tx_queue_setup(port, q, tx_ring_size, rte_eth_dev_socket_id(port), txconf); - if (retval < 0) + if (retval < 0) { + RTE_LOG(ERR, VHOST_PORT, + "Failed to setup tx queue %u of port %u: %s.\n", + q, port, strerror(-retval)); return retval; + } } /* Start the device. */ retval = rte_eth_dev_start(port); if (retval < 0) { - RTE_LOG(ERR, VHOST_DATA, "Failed to start the device.\n"); + RTE_LOG(ERR, VHOST_PORT, "Failed to start port %u: %s\n", + port, strerror(-retval)); return retval; } -- 2.1.4
[dpdk-dev] [PATCH 4/4] net/virtio-user: fix string unterminated
When use strcpy() to copy string with length exceeding the last parameter of strcpy(), it may lead to the destination string unterminated. We replaced strncpy with snprintf to make sure it's NULL terminated. Coverity issue: 127476 Fixes: ce2eabdd43ec ("net/virtio-user: add virtual device") Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user/virtio_user_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index 1b1e5bf..376c9cf 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -181,7 +181,7 @@ int virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, int cq, int queue_size, const char *mac) { - strncpy(dev->path, path, PATH_MAX); + snprintf(dev->path, PATH_MAX, "%s", path); dev->max_queue_pairs = queues; dev->queue_pairs = 1; /* mq disabled by default */ dev->queue_size = queue_size; -- 2.1.4
[dpdk-dev] [PATCH 3/4] net/virtio-user: fix resource leaks
The return value by rte_kvargs_parse is not free(d), which leads to memory leak. Coverity issue: 127482 Fixes: ce2eabdd43ec ("net/virtio-user: add virtual device") Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user_ethdev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c index 8429b2e..8e39adf 100644 --- a/drivers/net/virtio/virtio_user_ethdev.c +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -320,7 +320,7 @@ virtio_user_eth_dev_alloc(const char *name) static int virtio_user_pmd_devinit(const char *name, const char *params) { - struct rte_kvargs *kvlist; + struct rte_kvargs *kvlist = NULL; struct rte_eth_dev *eth_dev; struct virtio_hw *hw; uint64_t queues = VIRTIO_USER_DEF_Q_NUM; @@ -422,6 +422,8 @@ virtio_user_pmd_devinit(const char *name, const char *params) ret = 0; end: + if (kvlist) + rte_kvargs_free(kvlist); if (path) free(path); if (mac_addr) -- 2.1.4
[dpdk-dev] [PATCH 2/4] net/virtio-user: fix string overflow
When parsing /proc/self/maps to get hugepage information, the string was being copied with strcpy(), which could, theoretically but in fact not possiblly, overflow the destination buffer. Anyway, to avoid the false alarm, we replaced strncpy with snprintf for safely copying the strings. Coverity issue: 127484 Fixes: 6a84c37e3975 ("net/virtio-user: add vhost-user adapter layer") Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user/vhost_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c index a159ece..082e821 100644 --- a/drivers/net/virtio/virtio_user/vhost_user.c +++ b/drivers/net/virtio/virtio_user/vhost_user.c @@ -181,7 +181,7 @@ get_hugepage_file_info(struct hugepage_file_info huges[], int max) } huges[idx].addr = v_start; huges[idx].size = v_end - v_start; - strcpy(huges[idx].path, tmp); + snprintf(huges[idx].path, PATH_MAX, "%s", tmp); idx++; } -- 2.1.4
[dpdk-dev] [PATCH 1/4] net/virtio-user: fix return value not checked
When return values of function calls are not checked, Coverity will report errors like: if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_PATH) == 1) >>> CID 127477:(CHECKED_RETURN) >>> Calling "rte_kvargs_process" without checking return value (as is done elsewhere 25 out of 30 times). rte_kvargs_process(kvlist, VIRTIO_USER_ARG_PATH, _string_arg, ); Coverity issue: 127344, 127478 Fixes: ce2eabdd43ec ("net/virtio-user: add virtual device") Fixes: 6a84c37e3975 ("net/virtio-user: add vhost-user adapter layer") Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user/vhost_user.c | 3 +- drivers/net/virtio/virtio_user_ethdev.c | 57 ++--- 2 files changed, 45 insertions(+), 15 deletions(-) diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c index a2b0687..a159ece 100644 --- a/drivers/net/virtio/virtio_user/vhost_user.c +++ b/drivers/net/virtio/virtio_user/vhost_user.c @@ -392,7 +392,8 @@ vhost_user_setup(const char *path) } flag = fcntl(fd, F_GETFD); - fcntl(fd, F_SETFD, flag | FD_CLOEXEC); + if (fcntl(fd, F_SETFD, flag | FD_CLOEXEC) < 0) + PMD_DRV_LOG(WARNING, "fcntl failed, %s", strerror(errno)); memset(, 0, sizeof(un)); un.sun_family = AF_UNIX; diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c index 5ab2471..8429b2e 100644 --- a/drivers/net/virtio/virtio_user_ethdev.c +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -343,29 +343,58 @@ virtio_user_pmd_devinit(const char *name, const char *params) } if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_PATH) == 1) - rte_kvargs_process(kvlist, VIRTIO_USER_ARG_PATH, - _string_arg, ); + ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_PATH, +_string_arg, ); + if (ret < 0) { + PMD_INIT_LOG(ERR, "error to parse %s", +VIRTIO_USER_ARG_PATH); + goto end; + } else { PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio-user\n", VIRTIO_USER_ARG_QUEUE_SIZE); goto end; } - if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_MAC) == 1) - rte_kvargs_process(kvlist, VIRTIO_USER_ARG_MAC, - _string_arg, _addr); + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_MAC) == 1) { + ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_MAC, +_string_arg, _addr); + if (ret < 0) { + PMD_INIT_LOG(ERR, "error to parse %s", +VIRTIO_USER_ARG_MAC); + goto end; + } + } - if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE) == 1) - rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE, - _integer_arg, _size); + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE) == 1) { + ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE, +_integer_arg, _size); + if (ret < 0) { + PMD_INIT_LOG(ERR, "error to parse %s", +VIRTIO_USER_ARG_QUEUE_SIZE); + goto end; + } + } - if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_QUEUES_NUM) == 1) - rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUES_NUM, - _integer_arg, ); + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_QUEUES_NUM) == 1) { + ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUES_NUM, +_integer_arg, ); + if (ret < 0) { + PMD_INIT_LOG(ERR, "error to parse %s", +VIRTIO_USER_ARG_QUEUES_NUM); + goto end; + } + } - if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) - rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM, - _integer_arg, ); + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) { + ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM, +_integer_arg, ); + if (ret < 0) { + PMD_INIT_LOG(ERR, "error to parse %s", +VIRTIO_USER_ARG_CQ_NUM); + goto end; + } + } else if (queues > 1) cq = 1; -- 2.1.4
[dpdk-dev] [PATCH 0/4] net/virtio-user: fix coverity issues
Patch 1: fix return value not checked, Coverity issue: 127344, 127478 Patch 2: fix string overflow, Coverity issue: 127484 Patch 3: fix resource leaks, Coverity issue: 127482 Patch 4: fix string unterminated, Coverity issue: 127476 Jianfeng Tan (4): net/virtio-user: fix return value not checked net/virtio-user: fix string overflow net/virtio-user: fix resource leaks net/virtio-user: fix string unterminated drivers/net/virtio/virtio_user/vhost_user.c | 5 +- drivers/net/virtio/virtio_user/virtio_user_dev.c | 2 +- drivers/net/virtio/virtio_user_ethdev.c | 61 ++-- 3 files changed, 50 insertions(+), 18 deletions(-) -- 2.1.4
[dpdk-dev] [PATCH v2] net/virtio-user: fix build error in SUSE 11
On some older systems, such as SUSE 11, the compiling error shows as: .../dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c:67:22: error: ?O_CLOEXEC? undeclared (first use in this function) The fix is to use EFD_CLOEXEC, which is defined in sys/eventfd.h, instead of O_CLOEXEC which needs _GNU_SOURCE defined on some old systems. Fixes: 37a7eb2ae816 ("net/virtio-user: add device emulation layer") Signed-off-by: Jianfeng Tan --- v2: - Change the way to fix this issue. drivers/net/virtio/virtio_user/virtio_user_dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index 3d12a32..1b1e5bf 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -63,12 +63,12 @@ virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) /* May use invalid flag, but some backend leverages kickfd and callfd as * criteria to judge if dev is alive. so finally we use real event_fd. */ - callfd = eventfd(0, O_CLOEXEC | O_NONBLOCK); + callfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); if (callfd < 0) { PMD_DRV_LOG(ERR, "callfd error, %s\n", strerror(errno)); return -1; } - kickfd = eventfd(0, O_CLOEXEC | O_NONBLOCK); + kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); if (kickfd < 0) { close(callfd); PMD_DRV_LOG(ERR, "kickfd error, %s\n", strerror(errno)); -- 2.1.4
[dpdk-dev] [PATCH] net/virtio-user: fix missing default macro
With current config structure, all configuration parameters put into common_base with a default value, and overwritten in environment file if required, CONFIG_RTE_VIRTIO_USER is missing in common_base. This fix is simple, by adding CONFIG_RTE_VIRTIO_USER=n as the default macro value. Fixes: ce2eabdd43ec ("net/virtio-user: add virtual device") Reported-by: Ferruh Yigit Signed-off-by: Jianfeng Tan --- config/common_base | 5 + 1 file changed, 5 insertions(+) diff --git a/config/common_base b/config/common_base index 5d9daf5..0368d59 100644 --- a/config/common_base +++ b/config/common_base @@ -269,6 +269,11 @@ CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DRIVER=n CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n # +# Compile software VIRTIO-USER PMD driver +# +CONFIG_RTE_VIRTIO_USER=n + +# # Compile burst-oriented VMXNET3 PMD driver # CONFIG_RTE_LIBRTE_VMXNET3_PMD=y -- 2.1.4
[dpdk-dev] [PATCH] net/virtio-user: fix O_CLOEXEC undeclared error
On some older systems, such as SUSE 11, the compiling error shows as: .../dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c:67:22: error: ?O_CLOEXEC? undeclared (first use in this function) The fix is to declare _GNU_SOURCE macro before include fcntl.h. Fixes: 37a7eb2ae816 ("net/virtio-user: add device emulation layer") Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user/virtio_user_dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index 3d12a32..180f824 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -31,6 +31,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#define _GNU_SOURCE #include #include #include -- 2.1.4
[dpdk-dev] [PATCH v2] net/virtio-user: fix implicit int to enum conversion
Implicit int to enum conversion is not allowed when icc is used as the compiler. It raises the compiling error like, /.../dpdk/drivers/net/virtio/virtio_user/vhost_user.c(257): error #188: enumerated type mixed with another type msg.request = req; ^ The fix is simple, change the type of parameter req to enum vhost_user_request. Fixes: 6a84c37e3975 ("net/virtio-user: add vhost-user adapter layer") Suggested-by: Stephen Hemminger Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user/vhost.h | 2 +- drivers/net/virtio/virtio_user/vhost_user.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio/virtio_user/vhost.h b/drivers/net/virtio/virtio_user/vhost.h index 897042f..7adb55f 100644 --- a/drivers/net/virtio/virtio_user/vhost.h +++ b/drivers/net/virtio/virtio_user/vhost.h @@ -139,7 +139,7 @@ struct vhost_user_msg { #define VHOST_USER_F_PROTOCOL_FEATURES 30 #define VHOST_USER_MQ (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) -int vhost_user_sock(int vhostfd, uint64_t req, void *arg); +int vhost_user_sock(int vhostfd, enum vhost_user_request req, void *arg); int vhost_user_setup(const char *path); int vhost_user_enable_queue_pair(int vhostfd, uint16_t pair_idx, int enable); diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c index 95e80f8..a2b0687 100644 --- a/drivers/net/virtio/virtio_user/vhost_user.c +++ b/drivers/net/virtio/virtio_user/vhost_user.c @@ -240,7 +240,7 @@ static const char * const vhost_msg_strings[] = { }; int -vhost_user_sock(int vhostfd, uint64_t req, void *arg) +vhost_user_sock(int vhostfd, enum vhost_user_request req, void *arg) { struct vhost_user_msg msg; struct vhost_vring_file *file = 0; -- 2.1.4
[dpdk-dev] [PATCH] net/virtio-user: fix implicit int to enum conversion
Implicit int to enum conversion is not allowed when icc is used as the compiler. It raises the compiling error like, /.../dpdk/drivers/net/virtio/virtio_user/vhost_user.c(257): error #188: enumerated type mixed with another type msg.request = req; ^ The fix is simple, aka make such conversion explicit. Fixes: 6a84c37e3975 ("net/virtio-user: add vhost-user adapter layer") Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user/vhost_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c index 95e80f8..e2772d5 100644 --- a/drivers/net/virtio/virtio_user/vhost_user.c +++ b/drivers/net/virtio/virtio_user/vhost_user.c @@ -254,7 +254,7 @@ vhost_user_sock(int vhostfd, uint64_t req, void *arg) PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); - msg.request = req; + msg.request = (enum vhost_user_request)req; msg.flags = VHOST_USER_VERSION; msg.size = 0; -- 2.1.4
[dpdk-dev] [PATCH v3 0/3] virtio-user: handle ctrl-q in driver
In virtio-user driver, when notify ctrl-queue, invoke API of virtio-user device emulation to handle ctrl-q command. Besides, multi-queue requires ctrl-queue and ctrl-queue will be enabled automatically when multi-queue is specified. Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user_ethdev.c | 13 + 1 file changed, 13 insertions(+) diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c index 8f401a3..4c9279e 100644 --- a/drivers/net/virtio/virtio_user_ethdev.c +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -42,6 +42,7 @@ #include "virtio_logs.h" #include "virtio_pci.h" #include "virtqueue.h" +#include "virtio_rxtx.h" #include "virtio_user/virtio_user_dev.h" #define virtio_user_get_dev(hw) \ @@ -200,6 +201,11 @@ virtio_user_notify_queue(struct virtio_hw *hw, struct virtqueue *vq) uint64_t buf = 1; struct virtio_user_dev *dev = virtio_user_get_dev(hw); + if (hw->cvq && (hw->cvq->vq == vq)) { + virtio_user_handle_cq(dev, vq->vq_queue_index); + return; + } + if (write(dev->kickfds[vq->vq_queue_index], , sizeof(buf)) < 0) PMD_DRV_LOG(ERR, "failed to kick backend: %s\n", strerror(errno)); @@ -360,6 +366,13 @@ virtio_user_pmd_devinit(const char *name, const char *params) if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM, _integer_arg, ); + else if (queues > 1) + cq = 1; + + if (queues > 1 && cq == 0) { + PMD_INIT_LOG(ERR, "multi-q requires ctrl-q"); + goto end; + } eth_dev = virtio_user_eth_dev_alloc(name); if (!eth_dev) { -- 2.1.4
[dpdk-dev] [PATCH v3 2/3] virtio-user: add ctrl-q and mq in device emulation
The main purpose of this patch is to enable multi-queue. But multi-queue requires ctrl-queue so that driver can send how many queues will be enabled through ctrl-queue messages. So we partially implement ctrl-queue to handle control command with class of VIRTIO_NET_CTRL_MQ and with cmd of VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET to handle mq support. This patch provides an API, virtio_user_handle_cq(), for driver to handle ctrl-queue messages. Besides, multi-queue requires VIRTIO_NET_F_MQ and VIRTIO_NET_F_CTRL_VQ are enabled when we do feature negotiation. Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user/virtio_user_dev.c | 124 +-- drivers/net/virtio/virtio_user/virtio_user_dev.h | 2 +- 2 files changed, 116 insertions(+), 10 deletions(-) diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index 93cb758..3d12a32 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -131,11 +131,14 @@ virtio_user_start_device(struct virtio_user_dev *dev) } } - /* After setup all virtqueues, we need to set_features so that -* these features can be set into each virtqueue in vhost side. -* And before that, make sure VIRTIO_NET_F_MAC is stripped. + /* After setup all virtqueues, we need to set_features so that these +* features can be set into each virtqueue in vhost side. And before +* that, make sure VHOST_USER_F_PROTOCOL_FEATURES is added if mq is +* enabled, and VIRTIO_NET_F_MAC is stripped. */ features = dev->features; + if (dev->max_queue_pairs > 1) + features |= VHOST_USER_MQ; features &= ~(1ull << VIRTIO_NET_F_MAC); ret = vhost_user_sock(dev->vhostfd, VHOST_USER_SET_FEATURES, ); if (ret < 0) @@ -185,8 +188,6 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, dev->mac_specified = 0; parse_mac(dev, mac); dev->vhostfd = -1; - /* TODO: cq */ - RTE_SET_USED(cq); dev->vhostfd = vhost_user_setup(dev->path); if (dev->vhostfd < 0) { @@ -205,12 +206,33 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, } if (dev->mac_specified) dev->features |= (1ull << VIRTIO_NET_F_MAC); - /* disable it until we support CQ */ - dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); - dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_RX); - return 0; + if (!cq) { + dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); + /* Also disable features depends on VIRTIO_NET_F_CTRL_VQ */ + dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_RX); + dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_VLAN); + dev->features &= ~(1ull << VIRTIO_NET_F_GUEST_ANNOUNCE); + dev->features &= ~(1ull << VIRTIO_NET_F_MQ); + dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR); + } else { + /* vhost user backend does not need to know ctrl-q, so +* actually we need add this bit into features. However, +* DPDK vhost-user does send features with this bit, so we +* check it instead of OR it for now. +*/ + if (!(dev->features & (1ull << VIRTIO_NET_F_CTRL_VQ))) + PMD_INIT_LOG(INFO, "vhost does not support ctrl-q"); + } + + if (dev->max_queue_pairs > 1) { + if (!(dev->features & VHOST_USER_MQ)) { + PMD_INIT_LOG(ERR, "MQ not supported by the backend"); + return -1; + } + } + return 0; } void @@ -225,3 +247,87 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev) close(dev->vhostfd); } + +static uint8_t +virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs) +{ + uint16_t i; + uint8_t ret = 0; + + if (q_pairs > dev->max_queue_pairs) { + PMD_INIT_LOG(ERR, "multi-q config %u, but only %u supported", +q_pairs, dev->max_queue_pairs); + return -1; + } + + for (i = 0; i < q_pairs; ++i) + ret |= vhost_user_enable_queue_pair(dev->vhostfd, i, 1); + for (i = q_pairs; i < dev->max_queue_pairs; ++i) + ret |= vhost_user_enable_queue_pair(dev->vhostfd, i, 0); + + dev->queue_pairs = q_pairs; + + return ret; +} + +static uint32_t +virtio_user_handle_ctrl_msg(struct virtio_user_dev *dev, struct vring *vring, + uint16_t id
[dpdk-dev] [PATCH v3 1/3] virtio-user: add mq in vhost user adapter
This patch mainly adds method in vhost user adapter to communicate enable/disable queues messages with vhost user backend, aka, VHOST_USER_SET_VRING_ENABLE. Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user/vhost.h | 5 + drivers/net/virtio/virtio_user/vhost_user.c | 22 ++ 2 files changed, 27 insertions(+) diff --git a/drivers/net/virtio/virtio_user/vhost.h b/drivers/net/virtio/virtio_user/vhost.h index 4e04ede..8d1e505 100644 --- a/drivers/net/virtio/virtio_user/vhost.h +++ b/drivers/net/virtio/virtio_user/vhost.h @@ -136,6 +136,11 @@ struct vhost_user_msg { /* The version of the protocol we support */ #define VHOST_USER_VERSION0x1 +#define VHOST_USER_F_PROTOCOL_FEATURES 30 +#define VHOST_USER_MQ (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) + int vhost_user_sock(int vhostfd, uint64_t req, void *arg); int vhost_user_setup(const char *path); +int vhost_user_enable_queue_pair(int vhostfd, unsigned pair_idx, int enable); + #endif diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c index 47bbf74..98d98b6 100644 --- a/drivers/net/virtio/virtio_user/vhost_user.c +++ b/drivers/net/virtio/virtio_user/vhost_user.c @@ -234,6 +234,7 @@ static const char * const vhost_msg_strings[] = { [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", + [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", NULL, }; @@ -286,6 +287,7 @@ vhost_user_sock(int vhostfd, uint64_t req, void *arg) case VHOST_USER_SET_VRING_NUM: case VHOST_USER_SET_VRING_BASE: + case VHOST_USER_SET_VRING_ENABLE: memcpy(, arg, sizeof(msg.payload.state)); msg.size = sizeof(m.payload.state); break; @@ -402,3 +404,23 @@ vhost_user_setup(const char *path) return fd; } + +int +vhost_user_enable_queue_pair(int vhostfd, unsigned pair_idx, int enable) +{ + int i; + + for (i = 0; i < 2; ++i) { + struct vhost_vring_state state = { + .index = pair_idx * 2 + i, + .num = enable, + }; + + if (vhost_user_sock(vhostfd, + VHOST_USER_SET_VRING_ENABLE, )) + return -1; + } + + return 0; + +} -- 2.1.4
[dpdk-dev] [PATCH v3 0/3] add multi queue support for virtio-user
v3: - Fix compiling issue on 32-bit system. - Fix a segment fault issue when sending VHOST_USER_SET_VRING_ENABLE. - Squash the patch (use virtual address in mq) into "virtio for container" patch series. v2: - Move the ctrl queue handler and multi queue logic into device emulation. - Rebase on newest code. This patch set depends on below patch sets: - http://dpdk.org/ml/archives/dev/2016-April/038111.html - http://dpdk.org/ml/archives/dev/2016-April/038118.html - http://dpdk.org/ml/archives/dev/2016-April/038121.html Add multi queue support for virtio-user virtual port. Patch 1 adds vhost user adapter communications for enable/disable queues. Patch 2 adds features check for multi queue and provides a method for virtio-user driver to enable/disable queues. Patch 3 partially implements ctrl-q to handle VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET command from PMD. Test case: 1. start testpmd with a vhost-user port: $ TESTPMD -c 0x7 -n 4 --socket-mem 1024,0 --no-pci \ --vdev 'eth_vhost0,iface=/tmp/sock0,queues=2' \ -- -i --rxq=2 --txq=2 --nb-cores=2 2. start testpmd with a virtio-user port: $ TESTPMD -c 0x70 -n 4 --socket-mem 1024,0 --no-pci --file-prefix=testpmd \ --vdev=virtio-user0,mac=00:01:02:03:04:05,path=/tmp/sock0,queues=2 \ -- -i --rxq=2 --txq=2 --nb-cores=2 --txqflags=0xf01 --disable-hw-vlan 3. use below commands to see if all queues are working: testpmd> show port xstats all Jianfeng Tan (3): virtio-user: add mq in vhost user adapter virtio-user: add ctrl-q and mq in device emulation virtio-user: handle ctrl-q in driver drivers/net/virtio/virtio_user/vhost.h | 5 + drivers/net/virtio/virtio_user/vhost_user.c | 22 drivers/net/virtio/virtio_user/virtio_user_dev.c | 124 +-- drivers/net/virtio/virtio_user/virtio_user_dev.h | 2 +- drivers/net/virtio/virtio_user_ethdev.c | 13 +++ 5 files changed, 156 insertions(+), 10 deletions(-) -- 2.1.4
[dpdk-dev] [PATCH v9 6/6] virtio-user: add a new vdev named virtio-user
Add a new virtual device named vhost-user, which can be used just like eth_ring, eth_null, etc. To reuse the code of original virtio, we do some adjustment in virtio_ethdev.c, such as remove key _static_ of eth_virtio_dev_init() so that it can be reused in virtual device; and we add some check to make sure it will not crash. Configured parameters include: - queues (optional, 1 by default), number of queue pairs, multi-queue not supported for now. - cq (optional, 0 by default), not supported for now. - mac (optional), random value will be given if not specified. - queue_size (optional, 256 by default), size of virtqueues. - path (madatory), path of vhost user. When enable CONFIG_RTE_VIRTIO_USER (enabled by default), the compiled library can be used in both VM and container environment. Examples: path_vhost= # use vhost-user as a backend sudo ./examples/l2fwd/build/l2fwd -c 0x10 -n 4 \ --socket-mem 0,1024 --no-pci --file-prefix=l2fwd \ --vdev=virtio-user0,mac=00:01:02:03:04:05,path=$path_vhost -- -p 0x1 Known issues: - Control queue and multi-queue are not supported yet. - Cannot work with --huge-unlink. - Cannot work with no-huge. - Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8) hugepages. - Root privilege is a must (mainly becase of sorting hugepages according to physical address). - Applications should not use file name like HUGEFILE_FMT ("%smap_%d"). - Cannot work with vhost-net backend. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman Acked-by: Yuanhan Liu --- doc/guides/rel_notes/release_16_07.rst | 12 ++ doc/guides/sample_app_ug/vhost.rst | 17 +++ drivers/net/virtio/virtio_ethdev.c | 19 +-- drivers/net/virtio/virtio_ethdev.h | 2 + drivers/net/virtio/virtio_user_ethdev.c | 209 5 files changed, 252 insertions(+), 7 deletions(-) diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst index 13df729..681235a 100644 --- a/doc/guides/rel_notes/release_16_07.rst +++ b/doc/guides/rel_notes/release_16_07.rst @@ -68,6 +68,18 @@ New Features It can be turned off if flag ``RTE_VHOST_USER_NO_RECONNECT`` is set. +* **Virtio support for containers.** + + Add a new virtual device, named virtio-user, to support virtio for containers. + + Known limitations: + + * Control queue and multi-queue are not supported yet. + * Cannot work with --huge-unlink. + * Cannot work with --no-huge. + * Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8) hugepages. + * Root privilege is a must for sorting hugepages by physical address. + * Can only be used with vhost user backend. Resolved Issues --- diff --git a/doc/guides/sample_app_ug/vhost.rst b/doc/guides/sample_app_ug/vhost.rst index 5f81802..a93e54d 100644 --- a/doc/guides/sample_app_ug/vhost.rst +++ b/doc/guides/sample_app_ug/vhost.rst @@ -833,3 +833,20 @@ For example: The above message indicates that device 0 has been registered with MAC address cc:bb:bb:bb:bb:bb and VLAN tag 1000. Any packets received on the NIC with these values is placed on the devices receive queue. When a virtio-net device transmits packets, the VLAN tag is added to the packet by the DPDK vhost sample code. + +Running virtio-user with vhost-switch +- + +We can also use virtio-user with vhost-switch now. +Virtio-user is a virtual device that can be run in a application (container) parallelly with vhost in the same OS, +aka, there is no need to start a VM. We just run it with a different --file-prefix to avoid startup failure. + +.. code-block:: console + +cd ${RTE_SDK}/x86_64-native-linuxapp-gcc/app +./testpmd -c 0x3 -n 4 --socket-mem 1024 --no-pci --file-prefix=virtio-user-testpmd \ +--vdev=virtio-user0,mac=00:01:02:03:04:05,path=$path_vhost \ +-- -i --txqflags=0xf01 --disable-hw-vlan + +There is no difference on the vhost side. +Pleae note that there are some limitations (see release note for more information) in the usage of virtio-user. diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 5fd9f51..026b8a1 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -59,7 +59,6 @@ #include "virtqueue.h" #include "virtio_rxtx.h" -static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev); static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev); static int virtio_dev_configure(struct rte_eth_dev *dev); static int virtio_dev_start(struct rte_eth_dev *dev); @@ -1084,7 +1083,7 @@ rx_func_get(struct rte_eth_dev *eth_dev) * This function is based on probe() function in virtio_pci.c * It returns 0 on success. */ -static int +int eth_virtio_dev_init(struct rte_eth_dev *eth_dev) { struct virtio_hw *hw = eth_dev->data->dev_private; @@ -1115,9 +1114,11 @@ eth_virtio_dev_init(struct rt
[dpdk-dev] [PATCH v9 5/6] virtio-user: add new virtual PCI driver for virtio
This patch implements another new instance of struct virtio_pci_ops to drive the virtio-user virtual device. Instead of rd/wr ioport or PCI configuration space, this virtual pci driver will rd/wr the virtual device struct virtio_user_hw, and when necessary, invokes APIs provided by device emulation later to start/stop the device. -- | -- | | | virtio driver | |> (virtio_user_ethdev.c) | -- | | | | | -- | --> virtio-user PMD | | device emulate | | | || | | | vhost adapter | | | -- | -- | | | -- | vhost backend | -- Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman Acked-by: Yuanhan Liu --- drivers/net/virtio/Makefile | 1 + drivers/net/virtio/virtio_pci.h | 1 + drivers/net/virtio/virtio_user_ethdev.c | 218 3 files changed, 220 insertions(+) create mode 100644 drivers/net/virtio/virtio_user_ethdev.c diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index d37b83d..43de46c 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -58,6 +58,7 @@ endif ifeq ($(CONFIG_RTE_VIRTIO_USER),y) SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user_ethdev.c endif # this lib depends upon: diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h index 6c7f8d7..dd7693f 100644 --- a/drivers/net/virtio/virtio_pci.h +++ b/drivers/net/virtio/virtio_pci.h @@ -261,6 +261,7 @@ struct virtio_hw { struct virtio_pci_common_cfg *common_cfg; struct virtio_net_config *dev_cfg; const struct virtio_pci_ops *vtpci_ops; + void*virtio_user_dev; }; /* diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c new file mode 100644 index 000..7814514 --- /dev/null +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -0,0 +1,218 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include "virtio_logs.h" +#include "virtio_pci.h" +#include "virtqueue.h" +#include "virtio_user/virtio_user_dev.h" + +#define virtio_user_get_dev(hw) \ + ((struct virtio_user_dev *)(hw)->virtio_user_dev); + +static void +virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset, +void *dst, int length) +{ + int i; + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + if (offset == offsetof(struct virtio_net_config, mac) && + length == ETHER_ADDR_LEN) { + for (i = 0; i < ETHER_ADDR_LEN; ++i) + ((uint8_t *)dst)[i] = dev->mac_addr[i]; + return; + } + + if (offset == offsetof(struct virtio_net_config, status)) + *(uint16_t *)dst = dev->status; + + if (offset == offsetof(struct virtio_net_config, max_virtqueue_pairs)) + *(uint16_t *)dst =
[dpdk-dev] [PATCH v9 4/6] virtio-user: add device emulation layer APIs
Two device emulation layer APIs are added for virtio driver to call: - virtio_user_start_device() - virtio_user_stop_device() - virtio_user_dev_init() - virtio_user_dev_uninit() These APIs will get called by virtio driver, and they call vhost adapter layer APIs to implement the functionality. Besides, this patch defines a struct named virtio_user_dev to help manage the data stands for this kind of virtual device. -- | -- | | | virtio driver | | | -- | | | | | -- | --> virtio-user PMD | | device emulate |-|> (virtio_user_dev.c, virtio_user_dev.h) | || | | | vhost adapter | | | -- | -- | | | -- | vhost backend | -- Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- drivers/net/virtio/Makefile | 1 + drivers/net/virtio/virtio_user/virtio_user_dev.c | 227 +++ drivers/net/virtio/virtio_user/virtio_user_dev.h | 62 +++ 3 files changed, 290 insertions(+) create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.c create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.h diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index c2ed0fa..d37b83d 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -57,6 +57,7 @@ endif ifeq ($(CONFIG_RTE_VIRTIO_USER),y) SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c endif # this lib depends upon: diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c new file mode 100644 index 000..93cb758 --- /dev/null +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -0,0 +1,227 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vhost.h" +#include "virtio_user_dev.h" +#include "../virtio_ethdev.h" + +static int +virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) +{ + int callfd, kickfd; + struct vhost_vring_file file; + struct vhost_vring_state state; + struct vring *vring = >vrings[queue_sel]; + struct vhost_vring_addr addr = { + .index = queue_sel, + .desc_user_addr = (uint64_t)(uintptr_t)vring->desc, + .avail_user_addr = (uint64_t)(uintptr_t)vring->avail, + .used_user_addr = (uint64_t)(uintptr_t)vring->used, + .log_guest_addr = 0, + .flags = 0, /* disable log */ + }; + + /* May use invalid flag, but some backend leverages kickfd and callfd as +* criteria to judge if dev is alive. so finally we use real event_fd. +*/ + callfd = eventfd(0, O_CLOEXEC | O_NONBLOCK); + if (callfd < 0) { + PMD_DRV_LOG(ERR, "callfd error, %s\n", strerror(errno)); + return -1; + } + kickfd = eventfd(0, O_CLOEXE
[dpdk-dev] [PATCH v9 1/6] virtio: hide phys addr check inside PCI ops
This patch is to move phys addr check from virtio_dev_queue_setup to pci ops. To makt that happen, make sure virtio_ops.setup_queue return the result if we pass through the check. Signed-off-by: Jianfeng Tan Signed-off-by: Huawei Xie Acked-by: Yuanhan Liu --- drivers/net/virtio/virtio_ethdev.c | 17 ++--- drivers/net/virtio/virtio_pci.c| 30 -- drivers/net/virtio/virtio_pci.h| 2 +- 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index ad0f5a6..53faa46 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -388,16 +388,6 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, } } - /* -* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, -* and only accepts 32 bit page frame number. -* Check if the allocated physical memory exceeds 16TB. -*/ - if ((mz->phys_addr + vq->vq_ring_size - 1) >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) { - PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!"); - ret = -ENOMEM; - goto fail_q_alloc; - } memset(mz->addr, 0, sizeof(mz->len)); vq->vq_ring_mem = mz->phys_addr; @@ -482,7 +472,12 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, *pvq = cvq; } - hw->vtpci_ops->setup_queue(hw, vq); + if (hw->vtpci_ops->setup_queue(hw, vq) < 0) { + PMD_INIT_LOG(ERR, "setup_queue failed"); + virtio_dev_queue_release(vq); + return -EINVAL; + } + vq->configured = 1; return 0; diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c index 60ec4da..02f605d 100644 --- a/drivers/net/virtio/virtio_pci.c +++ b/drivers/net/virtio/virtio_pci.c @@ -55,6 +55,22 @@ */ #define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20) +static inline int +check_vq_phys_addr_ok(struct virtqueue *vq) +{ + /* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, +* and only accepts 32 bit page frame number. +* Check if the allocated physical memory exceeds 16TB. +*/ + if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >> + (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) { + PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!"); + return 0; + } + + return 1; +} + static void legacy_read_dev_config(struct virtio_hw *hw, size_t offset, void *dst, int length) @@ -143,15 +159,20 @@ legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) return dst; } -static void +static int legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) { uint32_t src; + if (!check_vq_phys_addr_ok(vq)) + return -1; + rte_eal_pci_ioport_write(>io, >vq_queue_index, 2, VIRTIO_PCI_QUEUE_SEL); src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; rte_eal_pci_ioport_write(>io, , 4, VIRTIO_PCI_QUEUE_PFN); + + return 0; } static void @@ -367,12 +388,15 @@ modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) return io_read16(>common_cfg->queue_size); } -static void +static int modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) { uint64_t desc_addr, avail_addr, used_addr; uint16_t notify_off; + if (!check_vq_phys_addr_ok(vq)) + return -1; + desc_addr = vq->vq_ring_mem; avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc); used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail, @@ -400,6 +424,8 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr); PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)", vq->notify_addr, notify_off); + + return 0; } static void diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h index f20468a..6c7f8d7 100644 --- a/drivers/net/virtio/virtio_pci.h +++ b/drivers/net/virtio/virtio_pci.h @@ -235,7 +235,7 @@ struct virtio_pci_ops { uint16_t (*set_config_irq)(struct virtio_hw *hw, uint16_t vec); uint16_t (*get_queue_num)(struct virtio_hw *hw, uint16_t queue_id); - void (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); + int (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); void (*del_queue)(struct virtio_hw *hw, struct virtqueue *vq); void (*notify_queue)(struct virtio_hw *hw, struct virtqueue *vq); }; -- 2.1.4
[dpdk-dev] [PATCH v2 4/4] virtio-user: handle ctrl-q in driver
In virtio-user driver, when notify ctrl-queue, invoke API of virtio-user device emulation to handle ctrl-q command. Besides, multi-queue requires ctrl-queue and ctrl-queue will be enabled automatically when multi-queue is specified. Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user_ethdev.c | 13 + 1 file changed, 13 insertions(+) diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c index 8f401a3..4c9279e 100644 --- a/drivers/net/virtio/virtio_user_ethdev.c +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -42,6 +42,7 @@ #include "virtio_logs.h" #include "virtio_pci.h" #include "virtqueue.h" +#include "virtio_rxtx.h" #include "virtio_user/virtio_user_dev.h" #define virtio_user_get_dev(hw) \ @@ -200,6 +201,11 @@ virtio_user_notify_queue(struct virtio_hw *hw, struct virtqueue *vq) uint64_t buf = 1; struct virtio_user_dev *dev = virtio_user_get_dev(hw); + if (hw->cvq && (hw->cvq->vq == vq)) { + virtio_user_handle_cq(dev, vq->vq_queue_index); + return; + } + if (write(dev->kickfds[vq->vq_queue_index], , sizeof(buf)) < 0) PMD_DRV_LOG(ERR, "failed to kick backend: %s\n", strerror(errno)); @@ -360,6 +366,13 @@ virtio_user_pmd_devinit(const char *name, const char *params) if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM, _integer_arg, ); + else if (queues > 1) + cq = 1; + + if (queues > 1 && cq == 0) { + PMD_INIT_LOG(ERR, "multi-q requires ctrl-q"); + goto end; + } eth_dev = virtio_user_eth_dev_alloc(name); if (!eth_dev) { -- 2.1.4
[dpdk-dev] [PATCH v2 3/4] virtio-user: add ctrl-q and mq in device emulation
The main purpose of this patch is to enable multi-queue. But multi-queue requires ctrl-queue so that driver can send how many queues will be enabled through ctrl-queue messages. So we partially implement ctrl-queue to handle control command with class of VIRTIO_NET_CTRL_MQ and with cmd of VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET to handle mq support. This patch provides an API, virtio_user_handle_cq(), for driver to handle ctrl-queue messages. Besides, multi-queue requires VIRTIO_NET_F_MQ and VIRTIO_NET_F_CTRL_VQ are enabled when we do feature negotiation. Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user/virtio_user_dev.c | 124 +-- drivers/net/virtio/virtio_user/virtio_user_dev.h | 2 +- 2 files changed, 116 insertions(+), 10 deletions(-) diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index 93cb758..3c72a2a 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -131,11 +131,14 @@ virtio_user_start_device(struct virtio_user_dev *dev) } } - /* After setup all virtqueues, we need to set_features so that -* these features can be set into each virtqueue in vhost side. -* And before that, make sure VIRTIO_NET_F_MAC is stripped. + /* After setup all virtqueues, we need to set_features so that these +* features can be set into each virtqueue in vhost side. And before +* that, make sure VHOST_USER_F_PROTOCOL_FEATURES is added if mq is +* enabled, and VIRTIO_NET_F_MAC is stripped. */ features = dev->features; + if (dev->max_queue_pairs > 1) + features |= VHOST_USER_MQ; features &= ~(1ull << VIRTIO_NET_F_MAC); ret = vhost_user_sock(dev->vhostfd, VHOST_USER_SET_FEATURES, ); if (ret < 0) @@ -185,8 +188,6 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, dev->mac_specified = 0; parse_mac(dev, mac); dev->vhostfd = -1; - /* TODO: cq */ - RTE_SET_USED(cq); dev->vhostfd = vhost_user_setup(dev->path); if (dev->vhostfd < 0) { @@ -205,12 +206,33 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, } if (dev->mac_specified) dev->features |= (1ull << VIRTIO_NET_F_MAC); - /* disable it until we support CQ */ - dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); - dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_RX); - return 0; + if (!cq) { + dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); + /* Also disable features depends on VIRTIO_NET_F_CTRL_VQ */ + dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_RX); + dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_VLAN); + dev->features &= ~(1ull << VIRTIO_NET_F_GUEST_ANNOUNCE); + dev->features &= ~(1ull << VIRTIO_NET_F_MQ); + dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR); + } else { + /* vhost user backend does not need to know ctrl-q, so +* actually we need add this bit into features. However, +* DPDK vhost-user does send features with this bit, so we +* check it instead of OR it for now. +*/ + if (!(dev->features & (1ull << VIRTIO_NET_F_CTRL_VQ))) + PMD_INIT_LOG(INFO, "vhost does not support ctrl-q"); + } + + if (dev->max_queue_pairs > 1) { + if (!(dev->features & VHOST_USER_MQ)) { + PMD_INIT_LOG(ERR, "MQ not supported by the backend"); + return -1; + } + } + return 0; } void @@ -225,3 +247,87 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev) close(dev->vhostfd); } + +static uint8_t +virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs) +{ + uint16_t i; + uint8_t ret = 0; + + if (q_pairs > dev->max_queue_pairs) { + PMD_INIT_LOG(ERR, "multi-q config %u, but only %u supported", +q_pairs, dev->max_queue_pairs); + return -1; + } + + for (i = 0; i < q_pairs; ++i) + ret |= vhost_user_enable_queue_pair(dev->vhostfd, i, 1); + for (i = q_pairs; i < dev->max_queue_pairs; ++i) + ret |= vhost_user_enable_queue_pair(dev->vhostfd, i, 0); + + dev->queue_pairs = q_pairs; + + return ret; +} + +static uint32_t +virtio_user_handle_ctrl_msg(struct virtio_user_dev *dev, struct vring *vring, +
[dpdk-dev] [PATCH v2 2/4] virtio-user: add mq in vhost user adapter
This patch mainly adds method in vhost user adapter to communicate enable/disable queues messages with vhost user backend. Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user/vhost.h | 5 + drivers/net/virtio/virtio_user/vhost_user.c | 21 + 2 files changed, 26 insertions(+) diff --git a/drivers/net/virtio/virtio_user/vhost.h b/drivers/net/virtio/virtio_user/vhost.h index 4e04ede..8d1e505 100644 --- a/drivers/net/virtio/virtio_user/vhost.h +++ b/drivers/net/virtio/virtio_user/vhost.h @@ -136,6 +136,11 @@ struct vhost_user_msg { /* The version of the protocol we support */ #define VHOST_USER_VERSION0x1 +#define VHOST_USER_F_PROTOCOL_FEATURES 30 +#define VHOST_USER_MQ (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) + int vhost_user_sock(int vhostfd, uint64_t req, void *arg); int vhost_user_setup(const char *path); +int vhost_user_enable_queue_pair(int vhostfd, unsigned pair_idx, int enable); + #endif diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c index 8e5c0a9..1df532f 100644 --- a/drivers/net/virtio/virtio_user/vhost_user.c +++ b/drivers/net/virtio/virtio_user/vhost_user.c @@ -286,6 +286,7 @@ vhost_user_sock(int vhostfd, uint64_t req, void *arg) case VHOST_USER_SET_VRING_NUM: case VHOST_USER_SET_VRING_BASE: + case VHOST_USER_SET_VRING_ENABLE: memcpy(, arg, sizeof(msg.payload.state)); msg.size = sizeof(m.payload.state); break; @@ -402,3 +403,23 @@ vhost_user_setup(const char *path) return fd; } + +int +vhost_user_enable_queue_pair(int vhostfd, unsigned pair_idx, int enable) +{ + int i; + + for (i = 0; i < 2; ++i) { + struct vhost_vring_state state = { + .index = pair_idx * 2 + i, + .num = enable, + }; + + if (vhost_user_sock(vhostfd, + VHOST_USER_SET_VRING_ENABLE, )) + return -1; + } + + return 0; + +} -- 2.1.4
[dpdk-dev] [PATCH v2 1/4] virtio-user: use virtual address in cq
Change to use virtio_net_hdr_mem instead of physical addr of memzone when sending contrl queue commands. The virtio_net_hdr_mem has been initialized to use virtual address under the case of virtio-user. Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_ethdev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 4523ceb..86ea07a 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -187,14 +187,14 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, * One RX packet for ACK. */ vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT; - vq->vq_ring.desc[head].addr = cvq->virtio_net_hdr_mz->phys_addr; + vq->vq_ring.desc[head].addr = cvq->virtio_net_hdr_mem; vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr); vq->vq_free_cnt--; i = vq->vq_ring.desc[head].next; for (k = 0; k < pkt_num; k++) { vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT; - vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mz->phys_addr + vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem + sizeof(struct virtio_net_ctrl_hdr) + sizeof(ctrl->status) + sizeof(uint8_t)*sum; vq->vq_ring.desc[i].len = dlen[k]; @@ -204,7 +204,7 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, } vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE; - vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mz->phys_addr + vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem + sizeof(struct virtio_net_ctrl_hdr); vq->vq_ring.desc[i].len = sizeof(ctrl->status); vq->vq_free_cnt--; -- 2.1.4
[dpdk-dev] [PATCH v2 0/4] add multi queue support for virtio-user
v2: - Move the ctrl queue handler and multi queue logic into device emulation. - Rebase on newest code. This patch set depends on below patch sets: - http://dpdk.org/ml/archives/dev/2016-June/040979.html - http://dpdk.org/ml/archives/dev/2016-June/040954.html Add multi queue support for virtio-user virtual port. Patch 1 adds vhost user adapter communications for enable/disable queues. Patch 2 adds features check for multi queue and provides a method for virtio-user driver to enable/disable queues. Patch 3 partially implements ctrl-q to handle VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET command from PMD. Test case: 1. start testpmd with a vhost-user port: $ TESTPMD -c 0x7 -n 4 --socket-mem 1024,0 --no-pci \ --vdev 'eth_vhost0,iface=/tmp/sock0,queues=2' \ -- -i --rxq=2 --txq=2 --nb-cores=2 2. start testpmd with a virtio-user port: $ TESTPMD -c 0x70 -n 4 --socket-mem 1024,0 --no-pci --file-prefix=testpmd \ --vdev=virtio-user0,mac=00:01:02:03:04:05,path=/tmp/sock0,queues=2 \ -- -i --rxq=2 --txq=2 --nb-cores=2 --txqflags=0xf01 --disable-hw-vlan 3. use below commands to see if all queues are working: testpmd> show port xstats all Jianfeng Tan (4): virtio-user: use virtual address in cq virtio-user: add mq in vhost user adapter virtio-user: add ctrl-q and mq in device emulation virtio-user: handle ctrl-q in driver drivers/net/virtio/virtio_ethdev.c | 6 +- drivers/net/virtio/virtio_user/vhost.h | 5 + drivers/net/virtio/virtio_user/vhost_user.c | 21 drivers/net/virtio/virtio_user/virtio_user_dev.c | 124 +-- drivers/net/virtio/virtio_user/virtio_user_dev.h | 2 +- drivers/net/virtio/virtio_user_ethdev.c | 13 +++ 6 files changed, 158 insertions(+), 13 deletions(-) -- 2.1.4
[dpdk-dev] [PATCH v8 6/6] virtio-user: add a new vdev named virtio-user
Add a new virtual device named vhost-user, which can be used just like eth_ring, eth_null, etc. To reuse the code of original virtio, we do some adjustment in virtio_ethdev.c, such as remove key _static_ of eth_virtio_dev_init() so that it can be reused in virtual device; and we add some check to make sure it will not crash. Configured parameters include: - queues (optional, 1 by default), number of queue pairs, multi-queue not supported for now. - cq (optional, 0 by default), not supported for now. - mac (optional), random value will be given if not specified. - queue_size (optional, 256 by default), size of virtqueues. - path (madatory), path of vhost user. When enable CONFIG_RTE_VIRTIO_USER (enabled by default), the compiled library can be used in both VM and container environment. Examples: path_vhost= # use vhost-user as a backend sudo ./examples/l2fwd/build/l2fwd -c 0x10 -n 4 \ --socket-mem 0,1024 --no-pci --file-prefix=l2fwd \ --vdev=virtio-user0,mac=00:01:02:03:04:05,path=$path_vhost -- -p 0x1 Known issues: - Control queue and multi-queue are not supported yet. - Cannot work with --huge-unlink. - Cannot work with no-huge. - Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8) hugepages. - Root privilege is a must (mainly becase of sorting hugepages according to physical address). - Applications should not use file name like HUGEFILE_FMT ("%smap_%d"). - Cannot work with vhost-net backend. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- doc/guides/rel_notes/release_16_07.rst | 12 ++ doc/guides/sample_app_ug/vhost.rst | 17 +++ drivers/net/virtio/virtio_ethdev.c | 19 +-- drivers/net/virtio/virtio_ethdev.h | 2 + drivers/net/virtio/virtio_user_ethdev.c | 209 5 files changed, 252 insertions(+), 7 deletions(-) diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst index 30e78d4..90bdcd4 100644 --- a/doc/guides/rel_notes/release_16_07.rst +++ b/doc/guides/rel_notes/release_16_07.rst @@ -47,6 +47,18 @@ New Features * Dropped specific Xen Dom0 code. * Dropped specific anonymous mempool code in testpmd. +* **Virtio support for containers.** + + Add a new virtual device, named virtio-user, to support virtio for containers. + + Known limitations: + + * Control queue and multi-queue are not supported yet. + * Cannot work with --huge-unlink. + * Cannot work with --no-huge. + * Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8) hugepages. + * Root privilege is a must for sorting hugepages by physical address. + * Can only be used with vhost user backend. Resolved Issues --- diff --git a/doc/guides/sample_app_ug/vhost.rst b/doc/guides/sample_app_ug/vhost.rst index 5f81802..a93e54d 100644 --- a/doc/guides/sample_app_ug/vhost.rst +++ b/doc/guides/sample_app_ug/vhost.rst @@ -833,3 +833,20 @@ For example: The above message indicates that device 0 has been registered with MAC address cc:bb:bb:bb:bb:bb and VLAN tag 1000. Any packets received on the NIC with these values is placed on the devices receive queue. When a virtio-net device transmits packets, the VLAN tag is added to the packet by the DPDK vhost sample code. + +Running virtio-user with vhost-switch +- + +We can also use virtio-user with vhost-switch now. +Virtio-user is a virtual device that can be run in a application (container) parallelly with vhost in the same OS, +aka, there is no need to start a VM. We just run it with a different --file-prefix to avoid startup failure. + +.. code-block:: console + +cd ${RTE_SDK}/x86_64-native-linuxapp-gcc/app +./testpmd -c 0x3 -n 4 --socket-mem 1024 --no-pci --file-prefix=virtio-user-testpmd \ +--vdev=virtio-user0,mac=00:01:02:03:04:05,path=$path_vhost \ +-- -i --txqflags=0xf01 --disable-hw-vlan + +There is no difference on the vhost side. +Pleae note that there are some limitations (see release note for more information) in the usage of virtio-user. diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 9ccce79..4523ceb 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -59,7 +59,6 @@ #include "virtqueue.h" #include "virtio_rxtx.h" -static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev); static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev); static int virtio_dev_configure(struct rte_eth_dev *dev); static int virtio_dev_start(struct rte_eth_dev *dev); @@ -1081,7 +1080,7 @@ rx_func_get(struct rte_eth_dev *eth_dev) * This function is based on probe() function in virtio_pci.c * It returns 0 on success. */ -static int +int eth_virtio_dev_init(struct rte_eth_dev *eth_dev) { struct virtio_hw *hw = eth_dev->data->dev_private; @@ -1112,9 +,11 @@ eth_virtio_dev_init(struct rt
[dpdk-dev] [PATCH v8 5/6] virtio-user: add new virtual pci driver for virtio
This patch implements another new instance of struct virtio_pci_ops to drive the virtio-user virtual device. Instead of rd/wr ioport or PCI configuration space, this virtual pci driver will rd/wr the virtual device struct virtio_user_hw, and when necessary, invokes APIs provided by device emulation later to start/stop the device. -- | -- | | | virtio driver | |> (virtio_user_ethdev.c) | -- | | | | | -- | --> virtio-user PMD | | device emulate | | | || | | | vhost adapter | | | -- | -- | | | -- | vhost backend | -- Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- drivers/net/virtio/Makefile | 1 + drivers/net/virtio/virtio_pci.h | 1 + drivers/net/virtio/virtio_user_ethdev.c | 218 3 files changed, 220 insertions(+) create mode 100644 drivers/net/virtio/virtio_user_ethdev.c diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index d37b83d..43de46c 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -58,6 +58,7 @@ endif ifeq ($(CONFIG_RTE_VIRTIO_USER),y) SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user_ethdev.c endif # this lib depends upon: diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h index 6c7f8d7..dd7693f 100644 --- a/drivers/net/virtio/virtio_pci.h +++ b/drivers/net/virtio/virtio_pci.h @@ -261,6 +261,7 @@ struct virtio_hw { struct virtio_pci_common_cfg *common_cfg; struct virtio_net_config *dev_cfg; const struct virtio_pci_ops *vtpci_ops; + void*virtio_user_dev; }; /* diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c new file mode 100644 index 000..7814514 --- /dev/null +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -0,0 +1,218 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include "virtio_logs.h" +#include "virtio_pci.h" +#include "virtqueue.h" +#include "virtio_user/virtio_user_dev.h" + +#define virtio_user_get_dev(hw) \ + ((struct virtio_user_dev *)(hw)->virtio_user_dev); + +static void +virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset, +void *dst, int length) +{ + int i; + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + if (offset == offsetof(struct virtio_net_config, mac) && + length == ETHER_ADDR_LEN) { + for (i = 0; i < ETHER_ADDR_LEN; ++i) + ((uint8_t *)dst)[i] = dev->mac_addr[i]; + return; + } + + if (offset == offsetof(struct virtio_net_config, status)) + *(uint16_t *)dst = dev->status; + + if (offset == offsetof(struct virtio_net_config, max_virtqueue_pairs)) + *(uint16_t *)dst = dev->max_queue_pairs; +} +
[dpdk-dev] [PATCH v8 4/6] virtio-user: add device emulation layer APIs
Two device emulation layer APIs are added for virtio driver to call: - virtio_user_start_device() - virtio_user_stop_device() - virtio_user_dev_init() - virtio_user_dev_uninit() These APIs will get called by virtio driver, and they call vhost adapter layer APIs to implement the functionality. Besides, this patch defines a struct named virtio_user_dev to help manage the data stands for this kind of virtual device. -- | -- | | | virtio driver | | | -- | | | | | -- | --> virtio-user PMD | | device emulate |-|> (virtio_user_dev.c, virtio_user_dev.h) | || | | | vhost adapter | | | -- | -- | | | -- | vhost backend | -- Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- drivers/net/virtio/Makefile | 1 + drivers/net/virtio/virtio_user/virtio_user_dev.c | 227 +++ drivers/net/virtio/virtio_user/virtio_user_dev.h | 62 +++ 3 files changed, 290 insertions(+) create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.c create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.h diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index c2ed0fa..d37b83d 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -57,6 +57,7 @@ endif ifeq ($(CONFIG_RTE_VIRTIO_USER),y) SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c endif # this lib depends upon: diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c new file mode 100644 index 000..93cb758 --- /dev/null +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -0,0 +1,227 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vhost.h" +#include "virtio_user_dev.h" +#include "../virtio_ethdev.h" + +static int +virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) +{ + int callfd, kickfd; + struct vhost_vring_file file; + struct vhost_vring_state state; + struct vring *vring = >vrings[queue_sel]; + struct vhost_vring_addr addr = { + .index = queue_sel, + .desc_user_addr = (uint64_t)(uintptr_t)vring->desc, + .avail_user_addr = (uint64_t)(uintptr_t)vring->avail, + .used_user_addr = (uint64_t)(uintptr_t)vring->used, + .log_guest_addr = 0, + .flags = 0, /* disable log */ + }; + + /* May use invalid flag, but some backend leverages kickfd and callfd as +* criteria to judge if dev is alive. so finally we use real event_fd. +*/ + callfd = eventfd(0, O_CLOEXEC | O_NONBLOCK); + if (callfd < 0) { + PMD_DRV_LOG(ERR, "callfd error, %s\n", strerror(errno)); + return -1; + } + kickfd = eventfd(0, O_CLOEXE
[dpdk-dev] [PATCH v8 3/6] virtio-user: add vhost user adapter layer
This patch is to provide vhost adapter layer implementations. Instead of relying on a hypervisor to translate between device emulation and vhost backend, here we directly talk with vhost backend through the vhost file. Here three main APIs are provided to upper layer (device emulation): - vhost_user_setup(), to set up vhost user backend; - vhost_user_sock(), to talk with vhost user backend. -- | -- | | | virtio driver | | | -- | | | | | -- | --> virtio-user PMD | | device emulate | | | || | | | vhost adapter |-|> (vhost_user.c) | -- | -- | | -- --> (vhost-user protocol) | -- | vhost backend | -- Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- config/common_linuxapp | 1 + drivers/net/virtio/Makefile | 4 + drivers/net/virtio/virtio_user/vhost.h | 141 ++ drivers/net/virtio/virtio_user/vhost_user.c | 404 4 files changed, 550 insertions(+) create mode 100644 drivers/net/virtio/virtio_user/vhost.h create mode 100644 drivers/net/virtio/virtio_user/vhost_user.c diff --git a/config/common_linuxapp b/config/common_linuxapp index 7e698e2..2483dfa 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -43,3 +43,4 @@ CONFIG_RTE_LIBRTE_VHOST=y CONFIG_RTE_LIBRTE_PMD_VHOST=y CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y CONFIG_RTE_LIBRTE_POWER=y +CONFIG_RTE_VIRTIO_USER=y diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index ef84f60..c2ed0fa 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -55,6 +55,10 @@ ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSSE3,$(CFLAGS)),RTE_MACHINE_CPUFLAG_SSSE SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c endif +ifeq ($(CONFIG_RTE_VIRTIO_USER),y) +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c +endif + # this lib depends upon: DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf diff --git a/drivers/net/virtio/virtio_user/vhost.h b/drivers/net/virtio/virtio_user/vhost.h new file mode 100644 index 000..4e04ede --- /dev/null +++ b/drivers/net/virtio/virtio_user/vhost.h @@ -0,0 +1,141 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VHOST_NET_USER_H +#define _VHOST_NET_USER_H + +#include +#include +#include + +#include "../virtio_pci.h" +#include "../virtio_logs.h" +#include "../virtqueue.h" + +#define VHOST_MEMORY_MAX_NREGIONS 8 + +struct vhost_vring_state { + unsigned int index; + unsigned int num; +}; + +struct vhost_vring_file { + unsigned int index; + int fd; +}; + +struct vhost_vring_addr { + unsigned int index; + /* Option flags. */ + unsigned int flags; + /* Flag values: */ + /* Whether log address is valid. If set enables logging. */ +#define VHOST_VRING_F_LOG 0 + + /* Start of array of descriptors (virtually contiguous) */ + uint64_t
[dpdk-dev] [PATCH v8 2/6] virtio: enable use virtual address to fill desc
This patch is related to how to calculate relative address for vhost backend. The principle is that: based on one or multiple shared memory regions, vhost maintains a reference system with the frontend start address, backend start address, and length for each segment, so that each frontend address (GPA, Guest Physical Address) can be translated into vhost-recognizable backend address. To make the address translation efficient, we need to maintain as few regions as possible. In the case of VM, GPA is always locally continuous. But for some other case, like virtio-user, we use virtual address here. It basically means: a. when set_base_addr, VA address is used; b. when preparing RX's descriptors, VA address is used; c. when transmitting packets, VA is filled in TX's descriptors; d. in TX and CQ's header, VA is used. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- drivers/net/virtio/virtio_ethdev.c | 43 +++-- drivers/net/virtio/virtio_rxtx.c| 5 ++-- drivers/net/virtio/virtio_rxtx_simple.c | 13 +- drivers/net/virtio/virtqueue.h | 10 4 files changed, 49 insertions(+), 22 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 841949b..9ccce79 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -431,9 +431,6 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, rxvq->mz = mz; *pvq = rxvq; } else if (queue_type == VTNET_TQ) { - struct virtio_tx_region *txr; - unsigned int i; - txvq = (struct virtnet_tx *)RTE_PTR_ADD(vq, sz_vq); txvq->vq = vq; txvq->port_id = dev->data->port_id; @@ -442,6 +439,36 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, txvq->virtio_net_hdr_mz = hdr_mz; txvq->virtio_net_hdr_mem = hdr_mz->phys_addr; + *pvq = txvq; + } else if (queue_type == VTNET_CQ) { + cvq = (struct virtnet_ctl *)RTE_PTR_ADD(vq, sz_vq); + cvq->vq = vq; + cvq->mz = mz; + cvq->virtio_net_hdr_mz = hdr_mz; + cvq->virtio_net_hdr_mem = hdr_mz->phys_addr; + memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); + *pvq = cvq; + } + + /* For virtio-user case (that is when dev->pci_dev is NULL), we use +* virtual address. And we need properly set _offset_, please see +* MBUF_DATA_DMA_ADDR in virtqueue.h for more information. +*/ + if (dev->pci_dev) + vq->offset = offsetof(struct rte_mbuf, buf_physaddr); + else { + vq->vq_ring_mem = (uintptr_t)mz->addr; + vq->offset = offsetof(struct rte_mbuf, buf_addr); + if (queue_type == VTNET_TQ) + txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr; + else if (queue_type == VTNET_CQ) + cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr; + } + + if (queue_type == VTNET_TQ) { + struct virtio_tx_region *txr; + unsigned int i; + txr = hdr_mz->addr; memset(txr, 0, vq_size * sizeof(*txr)); for (i = 0; i < vq_size; i++) { @@ -457,16 +484,6 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, start_dp->len = hw->vtnet_hdr_size; start_dp->flags = VRING_DESC_F_NEXT; } - - *pvq = txvq; - } else if (queue_type == VTNET_CQ) { - cvq = (struct virtnet_ctl *)RTE_PTR_ADD(vq, sz_vq); - cvq->vq = vq; - cvq->mz = mz; - cvq->virtio_net_hdr_mz = hdr_mz; - cvq->virtio_net_hdr_mem = hdr_mz->phys_addr; - memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); - *pvq = cvq; } if (hw->vtpci_ops->setup_queue(hw, vq) < 0) { diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c index 28266d2..b96d0cb 100644 --- a/drivers/net/virtio/virtio_rxtx.c +++ b/drivers/net/virtio/virtio_rxtx.c @@ -193,8 +193,7 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) start_dp = vq->vq_ring.desc; start_dp[idx].addr = - (uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM - - hw->vtnet_hdr_size); + MBUF_DATA_DMA_ADDR(cookie, vq->offset) - hw->vtnet_hdr_size; start_dp[idx].len = cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; start_dp[idx].flags = VRING_DESC_F_WRITE; @@ -266,7 +265,7 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, str
[dpdk-dev] [PATCH v8 1/6] virtio: hide phys addr check inside pci ops
This patch is to move phys addr check from virtio_dev_queue_setup to pci ops. To makt that happen, make sure virtio_ops.setup_queue return the result if we pass through the check. Signed-off-by: Jianfeng Tan Signed-off-by: Huawei Xie Acked-by: Yuanhan Liu --- drivers/net/virtio/virtio_ethdev.c | 17 ++--- drivers/net/virtio/virtio_pci.c| 30 -- drivers/net/virtio/virtio_pci.h| 2 +- 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index a995520..841949b 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -385,16 +385,6 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, } } - /* -* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, -* and only accepts 32 bit page frame number. -* Check if the allocated physical memory exceeds 16TB. -*/ - if ((mz->phys_addr + vq->vq_ring_size - 1) >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) { - PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!"); - ret = -ENOMEM; - goto fail_q_alloc; - } memset(mz->addr, 0, sizeof(mz->len)); vq->vq_ring_mem = mz->phys_addr; @@ -479,7 +469,12 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, *pvq = cvq; } - hw->vtpci_ops->setup_queue(hw, vq); + if (hw->vtpci_ops->setup_queue(hw, vq) < 0) { + PMD_INIT_LOG(ERR, "setup_queue failed"); + virtio_dev_queue_release(vq); + return -EINVAL; + } + vq->configured = 1; return 0; diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c index d0f2428..8d0c983 100644 --- a/drivers/net/virtio/virtio_pci.c +++ b/drivers/net/virtio/virtio_pci.c @@ -55,6 +55,22 @@ */ #define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20) +static inline int +check_vq_phys_addr_ok(struct virtqueue *vq) +{ + /* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, +* and only accepts 32 bit page frame number. +* Check if the allocated physical memory exceeds 16TB. +*/ + if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >> + (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) { + PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!"); + return 0; + } + + return 1; +} + static void legacy_read_dev_config(struct virtio_hw *hw, size_t offset, void *dst, int length) @@ -143,15 +159,20 @@ legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) return dst; } -static void +static int legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) { uint32_t src; + if (!check_vq_phys_addr_ok(vq)) + return -1; + rte_eal_pci_ioport_write(>io, >vq_queue_index, 2, VIRTIO_PCI_QUEUE_SEL); src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; rte_eal_pci_ioport_write(>io, , 4, VIRTIO_PCI_QUEUE_PFN); + + return 0; } static void @@ -367,12 +388,15 @@ modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) return io_read16(>common_cfg->queue_size); } -static void +static int modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) { uint64_t desc_addr, avail_addr, used_addr; uint16_t notify_off; + if (!check_vq_phys_addr_ok(vq)) + return -1; + desc_addr = vq->vq_ring_mem; avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc); used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail, @@ -400,6 +424,8 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr); PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)", vq->notify_addr, notify_off); + + return 0; } static void diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h index f20468a..6c7f8d7 100644 --- a/drivers/net/virtio/virtio_pci.h +++ b/drivers/net/virtio/virtio_pci.h @@ -235,7 +235,7 @@ struct virtio_pci_ops { uint16_t (*set_config_irq)(struct virtio_hw *hw, uint16_t vec); uint16_t (*get_queue_num)(struct virtio_hw *hw, uint16_t queue_id); - void (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); + int (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); void (*del_queue)(struct virtio_hw *hw, struct virtqueue *vq); void (*notify_queue)(struct virtio_hw *hw, struct virtqueue *vq); }; -- 2.1.4
[dpdk-dev] [PATCH v8 0/6] virtio support for container
ode layout and functionality of each module: -- | -- | | | virtio driver | |> (virtio_user_ethdev.c) | -- | | | | | -- | --> virtio-user PMD | | device emulate |-|> (virtio_user_dev.c) | || | | | vhost adapter |-|> (vhost_user.c, vhost_kernel.c, vhost.c) | -- | -- | | -- --> (vhost-user protocol) | -- | vhost backend | -- How to share memory? In VM's case, qemu always shares all physical layout to backend. But it's not feasible for a container, as a process, to share all virtual memory regions to backend. So only specified virtual memory regions (with type of shared) are sent to backend. It's a limitation that only addresses in these areas can be used to transmit or receive packets. Known issues: - Control queue and multi-queue are not supported yet. - Cannot work with --huge-unlink. - Cannot work with no-huge. - Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8) hugepages. - Root privilege is a must (mainly becase of sorting hugepages according to physical address). - Applications should not use file name like HUGEFILE_FMT ("%smap_%d"). - Cannot work with vhost kernel. How to use? a. Apply this patchset. b. To compile container apps: $: make config RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc $: make install RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc $: make -C examples/l2fwd RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc $: make -C examples/vhost RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc c. To build a docker image using Dockerfile below. $: cat ./Dockerfile FROM ubuntu:latest WORKDIR /usr/src/dpdk COPY . /usr/src/dpdk ENV PATH "$PATH:/usr/src/dpdk/examples/l2fwd/build/" $: docker build -t dpdk-app-l2fwd . d. Used with vhost-user $: ./examples/vhost/build/vhost-switch -c 3 -n 4 \ --socket-mem 1024,1024 -- -p 0x1 --stats 1 $: docker run -i -t -v :/var/run/usvhost \ -v /dev/hugepages:/dev/hugepages \ dpdk-app-l2fwd l2fwd -c 0x4 -n 4 -m 1024 --no-pci \ --vdev=virtio-user0,path=/var/run/usvhost -- -p 0x1 By the way, it's not necessary to run in a container. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Jianfeng Tan (6): virtio: hide phys addr check inside pci ops virtio: enable use virtual address to fill desc virtio-user: add vhost user adapter layer virtio-user: add device emulation layer APIs virtio-user: add new virtual pci driver for virtio virtio-user: add a new vdev named virtio-user config/common_linuxapp | 1 + doc/guides/rel_notes/release_16_07.rst | 12 + doc/guides/sample_app_ug/vhost.rst | 17 + drivers/net/virtio/Makefile | 6 + drivers/net/virtio/virtio_ethdev.c | 77 ++-- drivers/net/virtio/virtio_ethdev.h | 2 + drivers/net/virtio/virtio_pci.c | 30 +- drivers/net/virtio/virtio_pci.h | 3 +- drivers/net/virtio/virtio_rxtx.c | 5 +- drivers/net/virtio/virtio_rxtx_simple.c | 13 +- drivers/net/virtio/virtio_user/vhost.h | 141 drivers/net/virtio/virtio_user/vhost_user.c | 404 + drivers/net/virtio/virtio_user/virtio_user_dev.c | 227 drivers/net/virtio/virtio_user/virtio_user_dev.h | 62 drivers/net/virtio/virtio_user_ethdev.c | 427 +++ drivers/net/virtio/virtqueue.h | 10 + 16 files changed, 1395 insertions(+), 42 deletions(-) create mode 100644 drivers/net/virtio/virtio_user/vhost.h create mode 100644 drivers/net/virtio/virtio_user/vhost_user.c create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.c create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.h create mode 100644 drivers/net/virtio/virtio_user_ethdev.c -- 2.1.4
[dpdk-dev] [PATCH] virtio: fix allocating virtnet_rx not mem aligned
Compile DPDK with clang, below line in virtio_rxtx.c could be optimized with four "VMOVAPS ymm, m256". memset(>fake_mbuf, 0, sizeof(rxvq->fake_mbuf)); This instruction requires memory address is 32-byte aligned. Or, it leads to segfault. Although only tested with Clang 3.6.0, it can be reproduced in any compilers, which do aggressive optimization, aka, change memset of known length to VMOVAPS. The fact that struct rte_mbuf is cache line aligned, can only make sure fake_mbuf is aligned compared to the start address of struct virtnet_rx. Unfortunately, this address is not necessarily aligned because it's allocated by: rxvq = (struct virtnet_rx *)RTE_PTR_ADD(vq, sz_vq); When sz_vq is not aligned, then rxvq cannot be allocated with an aligned address, and then rxvq->fake_mbuf (addr of rxvq + cache line size) is not an aligned address. The fix is very simple that making sz_vq 32-byte aligned. Here we make it cache line aligned for future optimization. Fixes: a900472aedef ("virtio: split virtio Rx/Tx queue") Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_ethdev.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index a995520..ad0f5a6 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -337,7 +337,10 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, snprintf(vq_name, sizeof(vq_name), "port%d_%s%d", dev->data->port_id, queue_names[queue_type], queue_idx); - sz_vq = sizeof(*vq) + vq_size * sizeof(struct vq_desc_extra); + + sz_vq = RTE_ALIGN_CEIL(sizeof(*vq) + + vq_size * sizeof(struct vq_desc_extra), + RTE_CACHE_LINE_SIZE); if (queue_type == VTNET_RQ) { sz_q = sz_vq + sizeof(*rxvq); } else if (queue_type == VTNET_TQ) { -- 2.1.4
[dpdk-dev] [PATCH] virito: fix reuse index in nested loop
This patches fixes problem of reusing index of outmost loop in nested loops. This bug will lead to failure when starting a multi queue virtio device: rx queues (except from the first one) cannot be started, expecially their vq_ring cannot be initialized, so that when invoking rx func on these queues, segment fault happens. Fixes: a900472aedef ("virtio: split virtio Rx/Tx queue") Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_rxtx.c | 36 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c index 2e7205b..b96d0cb 100644 --- a/drivers/net/virtio/virtio_rxtx.c +++ b/drivers/net/virtio/virtio_rxtx.c @@ -331,7 +331,7 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev) * -Allocate blank mbufs for the each rx descriptor * */ - int i; + int i, j; PMD_INIT_FUNC_TRACE(); @@ -352,15 +352,18 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev) error = ENOSPC; #ifdef RTE_MACHINE_CPUFLAG_SSSE3 - if (use_simple_rxtx) - for (i = 0; i < vq->vq_nentries; i++) { - vq->vq_ring.avail->ring[i] = i; - vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE; + if (use_simple_rxtx) { + uint16_t k; + + for (k = 0; k < vq->vq_nentries; k++) { + vq->vq_ring.avail->ring[k] = k; + vq->vq_ring.desc[k].flags = VRING_DESC_F_WRITE; } + } #endif memset(>fake_mbuf, 0, sizeof(rxvq->fake_mbuf)); - for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++) - vq->sw_ring[vq->vq_nentries + i] = >fake_mbuf; + for (j = 0; j < RTE_PMD_VIRTIO_RX_MAX_BURST; j++) + vq->sw_ring[vq->vq_nentries + j] = >fake_mbuf; while (!virtqueue_full(vq)) { m = rte_mbuf_raw_alloc(rxvq->mpool); @@ -399,20 +402,21 @@ virtio_dev_rxtx_start(struct rte_eth_dev *dev) #ifdef RTE_MACHINE_CPUFLAG_SSSE3 if (use_simple_rxtx) { int mid_idx = vq->vq_nentries >> 1; - for (i = 0; i < mid_idx; i++) { - vq->vq_ring.avail->ring[i] = i + mid_idx; - vq->vq_ring.desc[i + mid_idx].next = i; - vq->vq_ring.desc[i + mid_idx].addr = + + for (j = 0; j < mid_idx; j++) { + vq->vq_ring.avail->ring[j] = j + mid_idx; + vq->vq_ring.desc[j + mid_idx].next = j; + vq->vq_ring.desc[j + mid_idx].addr = txvq->virtio_net_hdr_mem + offsetof(struct virtio_tx_region, tx_hdr); - vq->vq_ring.desc[i + mid_idx].len = + vq->vq_ring.desc[j + mid_idx].len = vq->hw->vtnet_hdr_size; - vq->vq_ring.desc[i + mid_idx].flags = + vq->vq_ring.desc[j + mid_idx].flags = VRING_DESC_F_NEXT; - vq->vq_ring.desc[i].flags = 0; + vq->vq_ring.desc[j].flags = 0; } - for (i = mid_idx; i < vq->vq_nentries; i++) - vq->vq_ring.avail->ring[i] = i; + for (j = mid_idx; j < vq->vq_nentries; j++) + vq->vq_ring.avail->ring[j] = j; } #endif VIRTQUEUE_DUMP(vq); -- 2.1.4
[dpdk-dev] [PATCH v7 6/6] virtio-user: add a new vdev named virtio-user
Add a new virtual device named vhost-user, which can be used just like eth_ring, eth_null, etc. To reuse the code of original virtio, we do some adjustment in virtio_ethdev.c, such as remove key _static_ of eth_virtio_dev_init() so that it can be reused in virtual device; and we add some check to make sure it will not crash. Configured parameters include: - queues (optional, 1 by default), number of queue pairs, multi-queue not supported for now. - cq (optional, 0 by default), not supported for now. - mac (optional), random value will be given if not specified. - queue_size (optional, 256 by default), size of virtqueues. - path (madatory), path of vhost, depends on the file type, vhost user if the given path points to a unix socket; vhost-net if the given path points to a char device. - ifname (optional), specify the name of backend tap device; only valid when backend is vhost-net. When enable CONFIG_RTE_VIRTIO_USER (enabled by default), the compiled library can be used in both VM and container environment. Examples: path_vhost=/dev/vhost-net # use vhost-net as a backend path_vhost= # use vhost-user as a backend sudo ./examples/l2fwd/build/l2fwd -c 0x10 -n 4 \ --socket-mem 0,1024 --no-pci --file-prefix=l2fwd \ --vdev=virtio-user0,mac=00:01:02:03:04:05,path=$path_vhost -- -p 0x1 Known issues: - Control queue and multi-queue are not supported yet. - Cannot work with --huge-unlink. - Cannot work with no-huge. - Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8) hugepages. - Root privilege is a must (mainly becase of sorting hugepages according to physical address). - Applications should not use file name like HUGEFILE_FMT ("%smap_%d"). Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- doc/guides/rel_notes/release_16_07.rst | 11 ++ doc/guides/sample_app_ug/vhost.rst | 17 +++ drivers/net/virtio/virtio_ethdev.c | 19 ++- drivers/net/virtio/virtio_ethdev.h | 2 + drivers/net/virtio/virtio_user_ethdev.c | 228 +++- 5 files changed, 265 insertions(+), 12 deletions(-) diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst index 30e78d4..4ecca7e 100644 --- a/doc/guides/rel_notes/release_16_07.rst +++ b/doc/guides/rel_notes/release_16_07.rst @@ -47,6 +47,17 @@ New Features * Dropped specific Xen Dom0 code. * Dropped specific anonymous mempool code in testpmd. +* **Virtio support for containers.** + + Add a new virtual device, named virtio-user, to support virtio for containers. + + Known limitations: + + * Control queue and multi-queue are not supported yet. + * Cannot work with --huge-unlink. + * Cannot work with --no-huge. + * Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8) hugepages. + * Root privilege is a must for sorting hugepages by physical address. Resolved Issues --- diff --git a/doc/guides/sample_app_ug/vhost.rst b/doc/guides/sample_app_ug/vhost.rst index 5f81802..a93e54d 100644 --- a/doc/guides/sample_app_ug/vhost.rst +++ b/doc/guides/sample_app_ug/vhost.rst @@ -833,3 +833,20 @@ For example: The above message indicates that device 0 has been registered with MAC address cc:bb:bb:bb:bb:bb and VLAN tag 1000. Any packets received on the NIC with these values is placed on the devices receive queue. When a virtio-net device transmits packets, the VLAN tag is added to the packet by the DPDK vhost sample code. + +Running virtio-user with vhost-switch +- + +We can also use virtio-user with vhost-switch now. +Virtio-user is a virtual device that can be run in a application (container) parallelly with vhost in the same OS, +aka, there is no need to start a VM. We just run it with a different --file-prefix to avoid startup failure. + +.. code-block:: console + +cd ${RTE_SDK}/x86_64-native-linuxapp-gcc/app +./testpmd -c 0x3 -n 4 --socket-mem 1024 --no-pci --file-prefix=virtio-user-testpmd \ +--vdev=virtio-user0,mac=00:01:02:03:04:05,path=$path_vhost \ +-- -i --txqflags=0xf01 --disable-hw-vlan + +There is no difference on the vhost side. +Pleae note that there are some limitations (see release note for more information) in the usage of virtio-user. diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 9ccce79..4523ceb 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -59,7 +59,6 @@ #include "virtqueue.h" #include "virtio_rxtx.h" -static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev); static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev); static int virtio_dev_configure(struct rte_eth_dev *dev); static int virtio_dev_start(struct rte_eth_dev *dev); @@ -1081,7 +1080,7 @@ rx_func_get(struct rte_eth_dev *eth_dev) * This function is based on probe() function in virtio_pci.c * It retu
[dpdk-dev] [PATCH v7 5/6] virtio-user: add new virtual pci driver for virtio
This patch implements another new instance of struct virtio_pci_ops to drive the virtio-user virtual device. Instead of rd/wr ioport or PCI configuration space, this virtual pci driver will rd/wr the virtual device struct virtio_user_hw, and when necessary, invokes APIs provided by device emulation later to start/stop the device. -- | -- | | | virtio driver | |> (virtio_user_ethdev.c) | -- | | | | | -- | --> virtio-user PMD | | device emulate | | | || | | | vhost adapter | | | -- | -- | | | -- | vhost backend | -- Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- drivers/net/virtio/Makefile | 1 + drivers/net/virtio/virtio_pci.h | 1 + drivers/net/virtio/virtio_user_ethdev.c | 218 3 files changed, 220 insertions(+) create mode 100644 drivers/net/virtio/virtio_user_ethdev.c diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index 88a634a..459260b 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -60,6 +60,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user_ethdev.c endif # this lib depends upon: diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h index 6c7f8d7..dd7693f 100644 --- a/drivers/net/virtio/virtio_pci.h +++ b/drivers/net/virtio/virtio_pci.h @@ -261,6 +261,7 @@ struct virtio_hw { struct virtio_pci_common_cfg *common_cfg; struct virtio_net_config *dev_cfg; const struct virtio_pci_ops *vtpci_ops; + void*virtio_user_dev; }; /* diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c new file mode 100644 index 000..e390242 --- /dev/null +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -0,0 +1,218 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include "virtio_logs.h" +#include "virtio_pci.h" +#include "virtqueue.h" +#include "virtio_user/virtio_user_dev.h" + +#define virtio_user_get_dev(hw) \ + ((struct virtio_user_dev *)(hw)->virtio_user_dev); + +static void +virtio_user_read_dev_config(struct virtio_hw *hw, uint64_t offset, +void *dst, int length) +{ + int i; + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + if (offset == offsetof(struct virtio_net_config, mac) && + length == ETHER_ADDR_LEN) { + for (i = 0; i < ETHER_ADDR_LEN; ++i) + ((uint8_t *)dst)[i] = dev->mac_addr[i]; + return; + } + + if (offset == offsetof(struct virtio_net_config, status)) + *(uint16_t *)dst = dev->status; + + if (offset == offsetof(struct vir
[dpdk-dev] [PATCH v7 4/6] virtio-user: add device emulation layer APIs
Two device emulation layer APIs are added for virtio driver to call: - virtio_user_start_device() - virtio_user_stop_device() - virtio_user_dev_init() - virtio_user_dev_uninit() These APIs will get called by virtio driver, and they call vhost adapter layer APIs to implement the functionality. Besides, this patch defines a struct named virtio_user_dev to help manage the data stands for this kind of virtual device. -- | -- | | | virtio driver | | | -- | | | | | -- | --> virtio-user PMD | | device emulate |-|> (virtio_user_dev.c, virtio_user_dev.h) | || | | | vhost adapter | | | -- | -- | | | -- | vhost backend | -- Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- drivers/net/virtio/Makefile | 1 + drivers/net/virtio/virtio_user/virtio_user_dev.c | 263 +++ drivers/net/virtio/virtio_user/virtio_user_dev.h | 64 ++ 3 files changed, 328 insertions(+) create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.c create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.h diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index 892c2ae..88a634a 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -59,6 +59,7 @@ ifeq ($(CONFIG_RTE_VIRTIO_USER),y) SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c endif # this lib depends upon: diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c new file mode 100644 index 000..b4e53a8 --- /dev/null +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -0,0 +1,263 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vhost.h" +#include "virtio_user_dev.h" +#include "../virtio_ethdev.h" + +static int +virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) +{ + int callfd, kickfd; + struct vhost_vring_file file; + struct vhost_vring_state state; + struct vring *vring = >vrings[queue_sel]; + struct vhost_vring_addr addr = { + .index = queue_sel, + .desc_user_addr = (uint64_t)(uintptr_t)vring->desc, + .avail_user_addr = (uint64_t)(uintptr_t)vring->avail, + .used_user_addr = (uint64_t)(uintptr_t)vring->used, + .log_guest_addr = 0, + .flags = 0, /* disable log */ + }; + + /* May use invalid flag, but some backend leverages kickfd and callfd as +* criteria to judge if dev is alive. so finally we use real event_fd. +*/ + callfd = eventfd(0, O_CLOEXEC | O_NONBLOCK); + if (callfd < 0) { + PMD_DRV_LOG(ERR, &quo
[dpdk-dev] [PATCH v7 3/6] virtio-user: add vhost adapter layer
This patch is to provide vhost adapter layer implementations. Instead of relying on a hypervisor to translate between device emulation and vhost backend, here we directly talk with vhost backend through the vhost file. Depending on the type of vhost file, - vhost-user is used if the given path points to a unix socket; - vhost-kernel is used if the given path points to a char device. Here three main APIs are provided to upper layer (device emulation): - vhost_user_setup(), to set up env to talk to a vhost user backend; - vhost_kernel_setup(), to set up env to talk to a vhost kernel backend. - vhost_call(), to provide a unified interface to communicate with vhost backend. -- | -- | | | virtio driver | | | -- | | | | | -- | --> virtio-user PMD | | device emulate | | | || | | | vhost adapter |-|> (vhost_user.c, vhost_kernel.c, vhost.c) | -- | -- | | -- --> (vhost-user protocol or vhost-net ioctls) | -- | vhost backend | -- Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- config/common_linuxapp| 1 + drivers/net/virtio/Makefile | 6 + drivers/net/virtio/virtio_user/vhost.c| 105 +++ drivers/net/virtio/virtio_user/vhost.h| 222 +++ drivers/net/virtio/virtio_user/vhost_kernel.c | 254 + drivers/net/virtio/virtio_user/vhost_user.c | 378 ++ 6 files changed, 966 insertions(+) create mode 100644 drivers/net/virtio/virtio_user/vhost.c create mode 100644 drivers/net/virtio/virtio_user/vhost.h create mode 100644 drivers/net/virtio/virtio_user/vhost_kernel.c create mode 100644 drivers/net/virtio/virtio_user/vhost_user.c diff --git a/config/common_linuxapp b/config/common_linuxapp index 7e698e2..2483dfa 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -43,3 +43,4 @@ CONFIG_RTE_LIBRTE_VHOST=y CONFIG_RTE_LIBRTE_PMD_VHOST=y CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y CONFIG_RTE_LIBRTE_POWER=y +CONFIG_RTE_VIRTIO_USER=y diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index ef84f60..892c2ae 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -55,6 +55,12 @@ ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSSE3,$(CFLAGS)),RTE_MACHINE_CPUFLAG_SSSE SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c endif +ifeq ($(CONFIG_RTE_VIRTIO_USER),y) +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c +endif + # this lib depends upon: DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf diff --git a/drivers/net/virtio/virtio_user/vhost.c b/drivers/net/virtio/virtio_user/vhost.c new file mode 100644 index 000..1944a97 --- /dev/null +++ b/drivers/net/virtio/virtio_user/vhost.c @@ -0,0 +1,105 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAG
[dpdk-dev] [PATCH v7 2/6] virtio: enable use virtual address to fill desc
This patch is related to how to calculate relative address for vhost backend. The principle is that: based on one or multiple shared memory regions, vhost maintains a reference system with the frontend start address, backend start address, and length for each segment, so that each frontend address (GPA, Guest Physical Address) can be translated into vhost-recognizable backend address. To make the address translation efficient, we need to maintain as few regions as possible. In the case of VM, GPA is always locally continuous. But for some other case, like virtio-user, we use virtual address here. It basically means: a. when set_base_addr, VA address is used; b. when preparing RX's descriptors, VA address is used; c. when transmitting packets, VA is filled in TX's descriptors; d. in TX and CQ's header, VA is used. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- drivers/net/virtio/virtio_ethdev.c | 43 +++-- drivers/net/virtio/virtio_rxtx.c| 5 ++-- drivers/net/virtio/virtio_rxtx_simple.c | 13 +- drivers/net/virtio/virtqueue.h | 10 4 files changed, 49 insertions(+), 22 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 841949b..9ccce79 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -431,9 +431,6 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, rxvq->mz = mz; *pvq = rxvq; } else if (queue_type == VTNET_TQ) { - struct virtio_tx_region *txr; - unsigned int i; - txvq = (struct virtnet_tx *)RTE_PTR_ADD(vq, sz_vq); txvq->vq = vq; txvq->port_id = dev->data->port_id; @@ -442,6 +439,36 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, txvq->virtio_net_hdr_mz = hdr_mz; txvq->virtio_net_hdr_mem = hdr_mz->phys_addr; + *pvq = txvq; + } else if (queue_type == VTNET_CQ) { + cvq = (struct virtnet_ctl *)RTE_PTR_ADD(vq, sz_vq); + cvq->vq = vq; + cvq->mz = mz; + cvq->virtio_net_hdr_mz = hdr_mz; + cvq->virtio_net_hdr_mem = hdr_mz->phys_addr; + memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); + *pvq = cvq; + } + + /* For virtio-user case (that is when dev->pci_dev is NULL), we use +* virtual address. And we need properly set _offset_, please see +* MBUF_DATA_DMA_ADDR in virtqueue.h for more information. +*/ + if (dev->pci_dev) + vq->offset = offsetof(struct rte_mbuf, buf_physaddr); + else { + vq->vq_ring_mem = (uintptr_t)mz->addr; + vq->offset = offsetof(struct rte_mbuf, buf_addr); + if (queue_type == VTNET_TQ) + txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr; + else if (queue_type == VTNET_CQ) + cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr; + } + + if (queue_type == VTNET_TQ) { + struct virtio_tx_region *txr; + unsigned int i; + txr = hdr_mz->addr; memset(txr, 0, vq_size * sizeof(*txr)); for (i = 0; i < vq_size; i++) { @@ -457,16 +484,6 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, start_dp->len = hw->vtnet_hdr_size; start_dp->flags = VRING_DESC_F_NEXT; } - - *pvq = txvq; - } else if (queue_type == VTNET_CQ) { - cvq = (struct virtnet_ctl *)RTE_PTR_ADD(vq, sz_vq); - cvq->vq = vq; - cvq->mz = mz; - cvq->virtio_net_hdr_mz = hdr_mz; - cvq->virtio_net_hdr_mem = hdr_mz->phys_addr; - memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); - *pvq = cvq; } if (hw->vtpci_ops->setup_queue(hw, vq) < 0) { diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c index f371423..2e7205b 100644 --- a/drivers/net/virtio/virtio_rxtx.c +++ b/drivers/net/virtio/virtio_rxtx.c @@ -193,8 +193,7 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) start_dp = vq->vq_ring.desc; start_dp[idx].addr = - (uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM - - hw->vtnet_hdr_size); + MBUF_DATA_DMA_ADDR(cookie, vq->offset) - hw->vtnet_hdr_size; start_dp[idx].len = cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; start_dp[idx].flags = VRING_DESC_F_WRITE; @@ -266,7 +265,7 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, str
[dpdk-dev] [PATCH v7 1/6] virtio: hide phys addr check inside pci ops
This patch is to move phys addr check from virtio_dev_queue_setup to pci ops. To makt that happen, make sure virtio_ops.setup_queue return the result if we pass through the check. Signed-off-by: Jianfeng Tan Signed-off-by: Huawei Xie Acked-by: Yuanhan Liu --- drivers/net/virtio/virtio_ethdev.c | 17 ++--- drivers/net/virtio/virtio_pci.c| 30 -- drivers/net/virtio/virtio_pci.h| 2 +- 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index a995520..841949b 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -385,16 +385,6 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, } } - /* -* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, -* and only accepts 32 bit page frame number. -* Check if the allocated physical memory exceeds 16TB. -*/ - if ((mz->phys_addr + vq->vq_ring_size - 1) >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) { - PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!"); - ret = -ENOMEM; - goto fail_q_alloc; - } memset(mz->addr, 0, sizeof(mz->len)); vq->vq_ring_mem = mz->phys_addr; @@ -479,7 +469,12 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, *pvq = cvq; } - hw->vtpci_ops->setup_queue(hw, vq); + if (hw->vtpci_ops->setup_queue(hw, vq) < 0) { + PMD_INIT_LOG(ERR, "setup_queue failed"); + virtio_dev_queue_release(vq); + return -EINVAL; + } + vq->configured = 1; return 0; diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c index d0f2428..8d0c983 100644 --- a/drivers/net/virtio/virtio_pci.c +++ b/drivers/net/virtio/virtio_pci.c @@ -55,6 +55,22 @@ */ #define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20) +static inline int +check_vq_phys_addr_ok(struct virtqueue *vq) +{ + /* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, +* and only accepts 32 bit page frame number. +* Check if the allocated physical memory exceeds 16TB. +*/ + if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >> + (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) { + PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!"); + return 0; + } + + return 1; +} + static void legacy_read_dev_config(struct virtio_hw *hw, size_t offset, void *dst, int length) @@ -143,15 +159,20 @@ legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) return dst; } -static void +static int legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) { uint32_t src; + if (!check_vq_phys_addr_ok(vq)) + return -1; + rte_eal_pci_ioport_write(>io, >vq_queue_index, 2, VIRTIO_PCI_QUEUE_SEL); src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; rte_eal_pci_ioport_write(>io, , 4, VIRTIO_PCI_QUEUE_PFN); + + return 0; } static void @@ -367,12 +388,15 @@ modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) return io_read16(>common_cfg->queue_size); } -static void +static int modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) { uint64_t desc_addr, avail_addr, used_addr; uint16_t notify_off; + if (!check_vq_phys_addr_ok(vq)) + return -1; + desc_addr = vq->vq_ring_mem; avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc); used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail, @@ -400,6 +424,8 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr); PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)", vq->notify_addr, notify_off); + + return 0; } static void diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h index f20468a..6c7f8d7 100644 --- a/drivers/net/virtio/virtio_pci.h +++ b/drivers/net/virtio/virtio_pci.h @@ -235,7 +235,7 @@ struct virtio_pci_ops { uint16_t (*set_config_irq)(struct virtio_hw *hw, uint16_t vec); uint16_t (*get_queue_num)(struct virtio_hw *hw, uint16_t queue_id); - void (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); + int (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); void (*del_queue)(struct virtio_hw *hw, struct virtqueue *vq); void (*notify_queue)(struct virtio_hw *hw, struct virtqueue *vq); }; -- 2.1.4
[dpdk-dev] [PATCH v7 0/6] virtio support for container
) | -- | | | | | -- | --> virtio-user PMD | | device emulate |-|> (virtio_user_dev.c) | || | | | vhost adapter |-|> (vhost_user.c, vhost_kernel.c, vhost.c) | -- | -- | | -- --> (vhost-user protocol or vhost-net ioctls) | -- | vhost backend | -- How to share memory? In VM's case, qemu always shares all physical layout to backend. But it's not feasible for a container, as a process, to share all virtual memory regions to backend. So only specified virtual memory regions (with type of shared) are sent to backend. It's a limitation that only addresses in these areas can be used to transmit or receive packets. Known issues: - Control queue and multi-queue are not supported yet. - Cannot work with --huge-unlink. - Cannot work with no-huge. - Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8) hugepages. - Root privilege is a must (mainly becase of sorting hugepages according to physical address). - Applications should not use file name like HUGEFILE_FMT ("%smap_%d"). How to use? a. Apply this patchset. b. To compile container apps: $: make config RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc $: make install RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc $: make -C examples/l2fwd RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc $: make -C examples/vhost RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc c. To build a docker image using Dockerfile below. $: cat ./Dockerfile FROM ubuntu:latest WORKDIR /usr/src/dpdk COPY . /usr/src/dpdk ENV PATH "$PATH:/usr/src/dpdk/examples/l2fwd/build/" $: docker build -t dpdk-app-l2fwd . d. Used with vhost-user $: ./examples/vhost/build/vhost-switch -c 3 -n 4 \ --socket-mem 1024,1024 -- -p 0x1 --stats 1 $: docker run -i -t -v :/var/run/usvhost \ -v /dev/hugepages:/dev/hugepages \ dpdk-app-l2fwd l2fwd -c 0x4 -n 4 -m 1024 --no-pci \ --vdev=virtio-user0,path=/var/run/usvhost -- -p 0x1 f. Used with vhost-net $: modprobe vhost $: modprobe vhost-net $: docker run -i -t --privileged \ -v /dev/vhost-net:/dev/vhost-net \ -v /dev/net/tun:/dev/net/tun \ -v /dev/hugepages:/dev/hugepages \ dpdk-app-l2fwd l2fwd -c 0x4 -n 4 -m 1024 --no-pci \ --vdev=virtio-user0,path=/dev/vhost-net -- -p 0x1 By the way, it's not necessary to run in a container. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Jianfeng Tan (6): virtio: hide phys addr check inside pci ops virtio: enable use virtual address to fill desc virtio-user: add vhost adapter layer virtio-user: add device emulation layer APIs virtio-user: add new virtual pci driver for virtio virtio-user: add a new vdev named virtio-user config/common_linuxapp | 1 + doc/guides/rel_notes/release_16_07.rst | 11 + doc/guides/sample_app_ug/vhost.rst | 17 + drivers/net/virtio/Makefile | 8 + drivers/net/virtio/virtio_ethdev.c | 77 ++-- drivers/net/virtio/virtio_ethdev.h | 2 + drivers/net/virtio/virtio_pci.c | 30 +- drivers/net/virtio/virtio_pci.h | 3 +- drivers/net/virtio/virtio_rxtx.c | 5 +- drivers/net/virtio/virtio_rxtx_simple.c | 13 +- drivers/net/virtio/virtio_user/vhost.c | 105 ++ drivers/net/virtio/virtio_user/vhost.h | 222 drivers/net/virtio/virtio_user/vhost_kernel.c| 254 + drivers/net/virtio/virtio_user/vhost_user.c | 378 drivers/net/virtio/virtio_user/virtio_user_dev.c | 263 ++ drivers/net/virtio/virtio_user/virtio_user_dev.h | 64 drivers/net/virtio/virtio_user_ethdev.c | 436 +++ drivers/net/virtio/virtqueue.h | 10 + 18 files changed, 1857 insertions(+), 42 deletions(-) create mode 100644 drivers/net/virtio/virtio_user/vhost.c create mode 100644 drivers/net/virtio/virtio_user/vhost.h create mode 100644 drivers/net/virtio/virtio_user/vhost_kernel.c create mode 100644 drivers/net/virtio/virtio_user/vhost_user.c create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.c create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.h create mode 100644 drivers/net/virtio/virtio_user_ethdev.c -- 2.1.4
[dpdk-dev] [PATCH v6 7/7] virtio-user: add a new vdev named virtio-user
Add a new virtual device named vhost-user, which can be used just like eth_ring, eth_null, etc. To reuse the code of original virtio, we do some adjustment in virtio_ethdev.c, such as remove key _static_ of eth_virtio_dev_init() so that it can be reused in virtual device; and we add some check to make sure it will not crash. Configured parameters include: - queues (optional, 1 by default), number of queue pairs, multi-queue not supported for now. - cq (optional, 0 by default), not supported for now. - mac (optional), random value will be given if not specified. - queue_size (optional, 256 by default), size of virtqueues. - path (madatory), path of vhost, depends on the file type, vhost user if the given path points to a unix socket; vhost-net if the given path points to a char device. - ifname (optional), specify the name of backend tap device; only valid when backend is vhost-net. When enable CONFIG_RTE_VIRTIO_VDEV (enabled by default), the compiled library can be used in both VM and container environment. Examples: path_vhost=/dev/vhost-net # use vhost-net as a backend path_vhost= # use vhost-user as a backend sudo ./examples/l2fwd/build/l2fwd -c 0x10 -n 4 \ --socket-mem 0,1024 --no-pci --file-prefix=l2fwd \ --vdev=virtio-user0,mac=00:01:02:03:04:05,path=$path_vhost -- -p 0x1 Known issues: - Control queue and multi-queue are not supported yet. - Cannot work with --huge-unlink. - Cannot work with no-huge. - Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8) hugepages. - Root privilege is a must (mainly becase of sorting hugepages according to physical address). - Applications should not use file name like HUGEFILE_FMT ("%smap_%d"). Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- doc/guides/rel_notes/release_16_07.rst | 11 ++ doc/guides/sample_app_ug/vhost.rst | 17 +++ drivers/net/virtio/virtio_ethdev.c | 19 ++- drivers/net/virtio/virtio_ethdev.h | 2 + drivers/net/virtio/virtio_user_ethdev.c | 218 5 files changed, 260 insertions(+), 7 deletions(-) diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst index f6d543c..78787ca 100644 --- a/doc/guides/rel_notes/release_16_07.rst +++ b/doc/guides/rel_notes/release_16_07.rst @@ -34,6 +34,17 @@ This section should contain new features added in this release. Sample format: Refer to the previous release notes for examples. +* **Virtio support for containers.** + + Add a new virtual device, named virtio-user, to support virtio for containers. + + Known limitations: + + * Control queue and multi-queue are not supported yet. + * Cannot work with --huge-unlink. + * Cannot work with --no-huge. + * Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8) hugepages. + * Root privilege is a must for sorting hugepages by physical address. Resolved Issues --- diff --git a/doc/guides/sample_app_ug/vhost.rst b/doc/guides/sample_app_ug/vhost.rst index 5f81802..a93e54d 100644 --- a/doc/guides/sample_app_ug/vhost.rst +++ b/doc/guides/sample_app_ug/vhost.rst @@ -833,3 +833,20 @@ For example: The above message indicates that device 0 has been registered with MAC address cc:bb:bb:bb:bb:bb and VLAN tag 1000. Any packets received on the NIC with these values is placed on the devices receive queue. When a virtio-net device transmits packets, the VLAN tag is added to the packet by the DPDK vhost sample code. + +Running virtio-user with vhost-switch +- + +We can also use virtio-user with vhost-switch now. +Virtio-user is a virtual device that can be run in a application (container) parallelly with vhost in the same OS, +aka, there is no need to start a VM. We just run it with a different --file-prefix to avoid startup failure. + +.. code-block:: console + +cd ${RTE_SDK}/x86_64-native-linuxapp-gcc/app +./testpmd -c 0x3 -n 4 --socket-mem 1024 --no-pci --file-prefix=virtio-user-testpmd \ +--vdev=virtio-user0,mac=00:01:02:03:04:05,path=$path_vhost \ +-- -i --txqflags=0xf01 --disable-hw-vlan + +There is no difference on the vhost side. +Pleae note that there are some limitations (see release note for more information) in the usage of virtio-user. diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 90f1a28..e1d5f0b 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -59,7 +59,6 @@ #include "virtqueue.h" #include "virtio_rxtx.h" -static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev); static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev); static int virtio_dev_configure(struct rte_eth_dev *dev); static int virtio_dev_start(struct rte_eth_dev *dev); @@ -1042,7 +1041,7 @@ rx_func_get(struct rte_eth_dev *eth_dev) * This function is based on probe() function in
[dpdk-dev] [PATCH v6 6/7] virtio-user: add new virtual pci driver for virtio
This patch implements another new instance of struct virtio_pci_ops to drive the virtio-user virtual device. Instead of rd/wr ioport or PCI configuration space, this virtual pci driver will rd/wr the virtual device struct virtio_user_hw, and when necessary, invokes APIs provided by device emulation later to start/stop the device. -- | -- | | | virtio driver | |> (virtio_user_ethdev.c) | -- | | | | | -- | --> virtio-user PMD | | device emulate | | | || | | | vhost adapter | | | -- | -- | | | -- | vhost backend | -- Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- drivers/net/virtio/Makefile | 1 + drivers/net/virtio/virtio_pci.h | 1 + drivers/net/virtio/virtio_user_ethdev.c | 218 3 files changed, 220 insertions(+) create mode 100644 drivers/net/virtio/virtio_user_ethdev.c diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index 68068bd..d913df0 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -60,6 +60,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user_ethdev.c endif # this lib depends upon: diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h index a76daf7..d10d013 100644 --- a/drivers/net/virtio/virtio_pci.h +++ b/drivers/net/virtio/virtio_pci.h @@ -260,6 +260,7 @@ struct virtio_hw { struct virtio_pci_common_cfg *common_cfg; struct virtio_net_config *dev_cfg; const struct virtio_pci_ops *vtpci_ops; + void*virtio_user_dev; }; /* diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c new file mode 100644 index 000..0ea3f23 --- /dev/null +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -0,0 +1,218 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include "virtio_logs.h" +#include "virtio_pci.h" +#include "virtqueue.h" +#include "virtio_user/virtio_user_dev.h" + +#define virtio_user_get_dev(hw) \ + ((struct virtio_user_dev *)(hw)->virtio_user_dev); + +static void +virtio_user_read_dev_config(struct virtio_hw *hw, uint64_t offset, +void *dst, int length) +{ + int i; + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + if (offset == offsetof(struct virtio_net_config, mac) && + length == ETHER_ADDR_LEN) { + for (i = 0; i < ETHER_ADDR_LEN; ++i) + ((uint8_t *)dst)[i] = dev->mac_addr[i]; + return; + } + + if (offset == offsetof(struct virtio_net_config, status)) + *(uint16_t *)dst = dev->status; + + if (offset == offsetof(struct vir
[dpdk-dev] [PATCH v6 4/7] virtio-user: add vhost adapter layer
This patch is to provide vhost adapter layer implementations. Instead of relying on a hypervisor to translate between device emulation and vhost backend, here we directly talk with vhost backend through the vhost file. Depending on the type of vhost file, - vhost-user is used if the given path points to a unix socket; - vhost-kernel is used if the given path points to a char device. Here three main APIs are provided to upper layer (device emulation): - vhost_user_setup(), to set up env to talk to a vhost user backend; - vhost_kernel_setup(), to set up env to talk to a vhost kernel backend. - vhost_call(), to provide a unified interface to communicate with vhost backend. -- | -- | | | virtio driver | | | -- | | | | | -- | --> virtio-user PMD | | device emulate | | | || | | | vhost adapter |-|> (vhost_user.c, vhost_kernel.c, vhost.c) | -- | -- | | -- --> (vhost-user protocol or vhost-net ioctls) | -- | vhost backend | -- Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- config/common_linuxapp| 3 + drivers/net/virtio/Makefile | 6 + drivers/net/virtio/virtio_user/vhost.c| 105 +++ drivers/net/virtio/virtio_user/vhost.h| 222 +++ drivers/net/virtio/virtio_user/vhost_kernel.c | 254 + drivers/net/virtio/virtio_user/vhost_user.c | 378 ++ 6 files changed, 968 insertions(+) create mode 100644 drivers/net/virtio/virtio_user/vhost.c create mode 100644 drivers/net/virtio/virtio_user/vhost.h create mode 100644 drivers/net/virtio/virtio_user/vhost_kernel.c create mode 100644 drivers/net/virtio/virtio_user/vhost_user.c diff --git a/config/common_linuxapp b/config/common_linuxapp index 7e698e2..946a6d4 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -43,3 +43,6 @@ CONFIG_RTE_LIBRTE_VHOST=y CONFIG_RTE_LIBRTE_PMD_VHOST=y CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y CONFIG_RTE_LIBRTE_POWER=y + +# Enable virtio-user +CONFIG_RTE_VIRTIO_VDEV=y diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index ef84f60..c9f2bc0 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -55,6 +55,12 @@ ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSSE3,$(CFLAGS)),RTE_MACHINE_CPUFLAG_SSSE SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c endif +ifeq ($(CONFIG_RTE_VIRTIO_VDEV),y) +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c +endif + # this lib depends upon: DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf diff --git a/drivers/net/virtio/virtio_user/vhost.c b/drivers/net/virtio/virtio_user/vhost.c new file mode 100644 index 000..1944a97 --- /dev/null +++ b/drivers/net/virtio/virtio_user/vhost.c @@ -0,0 +1,105 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED
[dpdk-dev] [PATCH v6 3/7] virtio: enable use virtual address to fill desc
This patch is related to how to calculate relative address for vhost backend. The principle is that: based on one or multiple shared memory regions, vhost maintains a reference system with the frontend start address, backend start address, and length for each segment, so that each frontend address (GPA, Guest Physical Address) can be translated into vhost-recognizable backend address. To make the address translation efficient, we need to maintain as few regions as possible. In the case of VM, GPA is always locally continuous. But for some other case, like virtio-user, we use virtual address here. It basically means: a. when set_base_addr, VA address is used; b. when preparing RX's descriptors, VA address is used; c. when transmitting packets, VA is filled in TX's descriptors; d. in TX and CQ's header, VA is used. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- drivers/net/virtio/virtio_ethdev.c | 25 - drivers/net/virtio/virtio_rxtx.c| 5 ++--- drivers/net/virtio/virtio_rxtx_simple.c | 13 +++-- drivers/net/virtio/virtqueue.h | 13 - 4 files changed, 41 insertions(+), 15 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 781886d..90f1a28 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -167,14 +167,14 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl, * One RX packet for ACK. */ vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT; - vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mz->phys_addr; + vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mem; vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr); vq->vq_free_cnt--; i = vq->vq_ring.desc[head].next; for (k = 0; k < pkt_num; k++) { vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT; - vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr + vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mem + sizeof(struct virtio_net_ctrl_hdr) + sizeof(ctrl->status) + sizeof(uint8_t)*sum; vq->vq_ring.desc[i].len = dlen[k]; @@ -184,7 +184,7 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl, } vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE; - vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr + vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mem + sizeof(struct virtio_net_ctrl_hdr); vq->vq_ring.desc[i].len = sizeof(ctrl->status); vq->vq_free_cnt--; @@ -419,8 +419,6 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, vq->virtio_net_hdr_mem = hdr_mz->phys_addr; memset(hdr_mz->addr, 0, hdr_mz_sz); - vring_hdr_desc_init(vq); - } else if (queue_type == VTNET_CQ) { /* Allocate a page for control vq command, data and status */ snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone", @@ -441,6 +439,23 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, memset(vq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); } + /* For virtio-user case (that is when dev->pci_dev is NULL), we use +* virtual address. And we need properly set _offset_, please see +* MBUF_DATA_DMA_ADDR in virtqueue.h for more information. +*/ + if (dev->pci_dev) + vq->offset = offsetof(struct rte_mbuf, buf_physaddr); + else { + vq->vq_ring_mem = (phys_addr_t)vq->mz->addr; + vq->offset = offsetof(struct rte_mbuf, buf_addr); + if (vq->virtio_net_hdr_mz) + vq->virtio_net_hdr_mem = + (phys_addr_t)vq->virtio_net_hdr_mz->addr; + } + + if (queue_type == VTNET_TQ) + vring_hdr_desc_init(vq); + if (hw->vtpci_ops->setup_queue(hw, vq) < 0) { PMD_INIT_LOG(ERR, "setup_queue failed"); virtio_dev_queue_release(vq); diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c index f326222..5b0c3df 100644 --- a/drivers/net/virtio/virtio_rxtx.c +++ b/drivers/net/virtio/virtio_rxtx.c @@ -193,8 +193,7 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) start_dp = vq->vq_ring.desc; start_dp[idx].addr = - (uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM - - hw->vtnet_hdr_size); + MBUF_DATA_DMA_ADDR(cookie, vq->offset) - hw->vtnet_hdr_size; start_dp[idx].len = cookie->buf_len - RTE_PKTMB
[dpdk-dev] [PATCH v6 2/7] virtio: clean up virtio_dev_queue_setup
Abstract vring hdr desc init as an inline method. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_ethdev.c | 42 ++ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index a3031e4..781886d 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -278,6 +278,26 @@ virtio_dev_queue_release(struct virtqueue *vq) } } +static void +vring_hdr_desc_init(struct virtqueue *vq) +{ + int i; + struct virtio_tx_region *txr = vq->virtio_net_hdr_mz->addr; + + for (i = 0; i < vq->vq_nentries; i++) { + struct vring_desc *start_dp = txr[i].tx_indir; + + vring_desc_init(start_dp, RTE_DIM(txr[i].tx_indir)); + + /* first indirect descriptor is always the tx header */ + start_dp->addr = vq->virtio_net_hdr_mem + i * sizeof(*txr) + +offsetof(struct virtio_tx_region, tx_hdr); + + start_dp->len = vq->hw->vtnet_hdr_size; + start_dp->flags = VRING_DESC_F_NEXT; + } +} + int virtio_dev_queue_setup(struct rte_eth_dev *dev, int queue_type, uint16_t queue_idx, @@ -375,8 +395,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, if (queue_type == VTNET_TQ) { const struct rte_memzone *hdr_mz; - struct virtio_tx_region *txr; - unsigned int i; + size_t hdr_mz_sz = vq_size * sizeof(struct virtio_tx_region); /* * For each xmit packet, allocate a virtio_net_hdr @@ -385,7 +404,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone", dev->data->port_id, queue_idx); hdr_mz = rte_memzone_reserve_aligned(vq_name, -vq_size * sizeof(*txr), +hdr_mz_sz, socket_id, 0, RTE_CACHE_LINE_SIZE); if (hdr_mz == NULL) { @@ -399,21 +418,8 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, vq->virtio_net_hdr_mz = hdr_mz; vq->virtio_net_hdr_mem = hdr_mz->phys_addr; - txr = hdr_mz->addr; - memset(txr, 0, vq_size * sizeof(*txr)); - for (i = 0; i < vq_size; i++) { - struct vring_desc *start_dp = txr[i].tx_indir; - - vring_desc_init(start_dp, RTE_DIM(txr[i].tx_indir)); - - /* first indirect descriptor is always the tx header */ - start_dp->addr = vq->virtio_net_hdr_mem - + i * sizeof(*txr) - + offsetof(struct virtio_tx_region, tx_hdr); - - start_dp->len = vq->hw->vtnet_hdr_size; - start_dp->flags = VRING_DESC_F_NEXT; - } + memset(hdr_mz->addr, 0, hdr_mz_sz); + vring_hdr_desc_init(vq); } else if (queue_type == VTNET_CQ) { /* Allocate a page for control vq command, data and status */ -- 2.1.4
[dpdk-dev] [PATCH v6 1/7] virtio: hide phys addr check inside pci ops
This patch is to move phys addr check from virtio_dev_queue_setup to pci ops. To makt that happen, make sure virtio_ops.setup_queue return the result if we pass through the check. Signed-off-by: Jianfeng Tan Signed-off-by: Huawei Xie Acked-by: Yuanhan Liu --- drivers/net/virtio/virtio_ethdev.c | 17 + drivers/net/virtio/virtio_pci.c| 30 -- drivers/net/virtio/virtio_pci.h| 2 +- 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index c3fb628..a3031e4 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -364,17 +364,6 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, } } - /* -* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, -* and only accepts 32 bit page frame number. -* Check if the allocated physical memory exceeds 16TB. -*/ - if ((mz->phys_addr + vq->vq_ring_size - 1) >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) { - PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!"); - virtio_dev_queue_release(vq); - return -ENOMEM; - } - memset(mz->addr, 0, sizeof(mz->len)); vq->mz = mz; vq->vq_ring_mem = mz->phys_addr; @@ -446,7 +435,11 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, memset(vq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); } - hw->vtpci_ops->setup_queue(hw, vq); + if (hw->vtpci_ops->setup_queue(hw, vq) < 0) { + PMD_INIT_LOG(ERR, "setup_queue failed"); + virtio_dev_queue_release(vq); + return -EINVAL; + } vq->configured = 1; *pvq = vq; diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c index 9cdca06..6bd239c 100644 --- a/drivers/net/virtio/virtio_pci.c +++ b/drivers/net/virtio/virtio_pci.c @@ -55,6 +55,22 @@ */ #define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20) +static inline int +check_vq_phys_addr_ok(struct virtqueue *vq) +{ + /* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, +* and only accepts 32 bit page frame number. +* Check if the allocated physical memory exceeds 16TB. +*/ + if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >> + (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) { + PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!"); + return 0; + } + + return 1; +} + static void legacy_read_dev_config(struct virtio_hw *hw, size_t offset, void *dst, int length) @@ -143,15 +159,20 @@ legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) return dst; } -static void +static int legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) { uint32_t src; + if (!check_vq_phys_addr_ok(vq)) + return -1; + rte_eal_pci_ioport_write(>io, >vq_queue_index, 2, VIRTIO_PCI_QUEUE_SEL); src = vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; rte_eal_pci_ioport_write(>io, , 4, VIRTIO_PCI_QUEUE_PFN); + + return 0; } static void @@ -367,12 +388,15 @@ modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) return io_read16(>common_cfg->queue_size); } -static void +static int modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) { uint64_t desc_addr, avail_addr, used_addr; uint16_t notify_off; + if (!check_vq_phys_addr_ok(vq)) + return -1; + desc_addr = vq->mz->phys_addr; avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc); used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail, @@ -400,6 +424,8 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr); PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)", vq->notify_addr, notify_off); + + return 0; } static void diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h index 554efea..a76daf7 100644 --- a/drivers/net/virtio/virtio_pci.h +++ b/drivers/net/virtio/virtio_pci.h @@ -234,7 +234,7 @@ struct virtio_pci_ops { uint16_t (*set_config_irq)(struct virtio_hw *hw, uint16_t vec); uint16_t (*get_queue_num)(struct virtio_hw *hw, uint16_t queue_id); - void (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); + int (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); void (*del_queue)(struct virtio_hw *hw, struct virtqueue *vq); void (*notify_queue)(struct virtio_hw *hw, struct virtqueue *vq); }; -- 2.1.4
[dpdk-dev] [PATCH v6 0/7] virtio support for container
rotocol or vhost-net ioctls) | -- | vhost backend | -- How to share memory? In VM's case, qemu always shares all physical layout to backend. But it's not feasible for a container, as a process, to share all virtual memory regions to backend. So only specified virtual memory regions (with type of shared) are sent to backend. It's a limitation that only addresses in these areas can be used to transmit or receive packets. Known issues: - Control queue and multi-queue are not supported yet. - Cannot work with --huge-unlink. - Cannot work with no-huge. - Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8) hugepages. - Root privilege is a must (mainly becase of sorting hugepages according to physical address). - Applications should not use file name like HUGEFILE_FMT ("%smap_%d"). How to use? a. Apply this patchset. b. To compile container apps: $: make config RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc $: make install RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc $: make -C examples/l2fwd RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc $: make -C examples/vhost RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc c. To build a docker image using Dockerfile below. $: cat ./Dockerfile FROM ubuntu:latest WORKDIR /usr/src/dpdk COPY . /usr/src/dpdk ENV PATH "$PATH:/usr/src/dpdk/examples/l2fwd/build/" $: docker build -t dpdk-app-l2fwd . d. Used with vhost-user $: ./examples/vhost/build/vhost-switch -c 3 -n 4 \ --socket-mem 1024,1024 -- -p 0x1 --stats 1 $: docker run -i -t -v :/var/run/usvhost \ -v /dev/hugepages:/dev/hugepages \ dpdk-app-l2fwd l2fwd -c 0x4 -n 4 -m 1024 --no-pci \ --vdev=virtio-user0,path=/var/run/usvhost -- -p 0x1 f. Used with vhost-net $: modprobe vhost $: modprobe vhost-net $: docker run -i -t --privileged \ -v /dev/vhost-net:/dev/vhost-net \ -v /dev/net/tun:/dev/net/tun \ -v /dev/hugepages:/dev/hugepages \ dpdk-app-l2fwd l2fwd -c 0x4 -n 4 -m 1024 --no-pci \ --vdev=virtio-user0,path=/dev/vhost-net -- -p 0x1 By the way, it's not necessary to run in a container. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Jianfeng Tan (7): virtio: hide phys addr check inside pci ops virtio: clean up virtio_dev_queue_setup virtio: enable use virtual address to fill desc virtio-user: add vhost adapter layer virtio-user: add device emulation layer APIs virtio-user: add new virtual pci driver for virtio virtio-user: add a new vdev named virtio-user config/common_linuxapp | 3 + doc/guides/rel_notes/release_16_07.rst | 11 + doc/guides/sample_app_ug/vhost.rst | 17 + drivers/net/virtio/Makefile | 8 + drivers/net/virtio/virtio_ethdev.c | 101 +++--- drivers/net/virtio/virtio_ethdev.h | 2 + drivers/net/virtio/virtio_pci.c | 30 +- drivers/net/virtio/virtio_pci.h | 3 +- drivers/net/virtio/virtio_rxtx.c | 5 +- drivers/net/virtio/virtio_rxtx_simple.c | 13 +- drivers/net/virtio/virtio_user/vhost.c | 105 ++ drivers/net/virtio/virtio_user/vhost.h | 222 drivers/net/virtio/virtio_user/vhost_kernel.c| 254 + drivers/net/virtio/virtio_user/vhost_user.c | 378 drivers/net/virtio/virtio_user/virtio_user_dev.c | 263 ++ drivers/net/virtio/virtio_user/virtio_user_dev.h | 64 drivers/net/virtio/virtio_user_ethdev.c | 436 +++ drivers/net/virtio/virtqueue.h | 13 +- 18 files changed, 1874 insertions(+), 54 deletions(-) create mode 100644 drivers/net/virtio/virtio_user/vhost.c create mode 100644 drivers/net/virtio/virtio_user/vhost.h create mode 100644 drivers/net/virtio/virtio_user/vhost_kernel.c create mode 100644 drivers/net/virtio/virtio_user/vhost_user.c create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.c create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.h create mode 100644 drivers/net/virtio/virtio_user_ethdev.c -- 2.1.4
[dpdk-dev] [PATCH v5] eal: fix allocating all free hugepages
EAL memory init allocates all free hugepages of the whole system, which seen from sysfs, even when applications do not ask so many. When there is a limitation on how many hugepages an application can use (such as cgroup.hugetlb), or hugetlbfs is specified with an option of size (exceeding the quota of the fs), it just fails to start even there are enough hugepages allocated. To fix above issue, this patch: - Changes the logic to continue memory init to see if hugetlb requirement of application can be addressed by already allocated hugepages. - To make sure each hugepage is allocated successfully, we add a recover mechanism, which relies on a mem access to fault-in hugepages, and if it fails with SIGBUS, recover to previously saved stack environment with siglongjmp(). For the case of CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS (enabled by default when compiling IVSHMEM target), it's indispensable to mapp all free hugepages in the system. Under this case, it fails to start when allocating fails. Test example: a. cgcreate -g hugetlb:/test-subgroup b. cgset -r hugetlb.1GB.limit_in_bytes=2147483648 test-subgroup c. cgexec -g hugetlb:test-subgroup \ ./examples/helloworld/build/helloworld -c 0x2 -n 4 Fixes: af75078fece ("first public release") Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- v5: - Make this method as default instead of using an option. - When SIGBUS is triggered in the case of RTE_EAL_SINGLE_FILE_SEGMENTS, just return error. - Add prefix "huge_" to newly added function and static variables. - Move the internal_config.memory assignment after the page allocations. v4: - Change map_all_hugepages to return unsigned instead of int. v3: - Reword commit message to include it fixes the hugetlbfs quota issue. - setjmp -> sigsetjmp. - Fix RTE_LOG complaint from ERR to DEBUG as it does not mean init error so far. - Fix the second map_all_hugepages's return value check. v2: - Address the compiling error by move setjmp into a wrap method. lib/librte_eal/linuxapp/eal/eal.c| 20 - lib/librte_eal/linuxapp/eal/eal_memory.c | 138 --- 2 files changed, 125 insertions(+), 33 deletions(-) diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 8aafd51..4a8dfbd 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -465,24 +465,6 @@ eal_parse_vfio_intr(const char *mode) return -1; } -static inline size_t -eal_get_hugepage_mem_size(void) -{ - uint64_t size = 0; - unsigned i, j; - - for (i = 0; i < internal_config.num_hugepage_sizes; i++) { - struct hugepage_info *hpi = _config.hugepage_info[i]; - if (hpi->hugedir != NULL) { - for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { - size += hpi->hugepage_sz * hpi->num_pages[j]; - } - } - } - - return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX; -} - /* Parse the arguments for --log-level only */ static void eal_log_level_parse(int argc, char **argv) @@ -766,8 +748,6 @@ rte_eal_init(int argc, char **argv) if (internal_config.memory == 0 && internal_config.force_sockets == 0) { if (internal_config.no_hugetlbfs) internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE; - else - internal_config.memory = eal_get_hugepage_mem_size(); } if (internal_config.vmware_tsc_map == 1) { diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 5b9132c..dc6f49b 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -80,6 +80,8 @@ #include #include #include +#include +#include #include #include @@ -309,6 +311,21 @@ get_virtual_area(size_t *size, size_t hugepage_sz) return addr; } +static sigjmp_buf huge_jmpenv; + +static void huge_sigbus_handler(int signo __rte_unused) +{ + siglongjmp(huge_jmpenv, 1); +} + +/* Put setjmp into a wrap method to avoid compiling error. Any non-volatile, + * non-static local variable in the stack frame calling sigsetjmp might be + * clobbered by a call to longjmp. + */ +static int huge_wrap_sigsetjmp(void) +{ + return sigsetjmp(huge_jmpenv, 1); +} /* * Mmap all hugepages of hugepage table: it first open a file in * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the @@ -316,7 +333,7 @@ get_virtual_area(size_t *size, size_t hugepage_sz) * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to * map continguous physical blocks in contiguous virtual blocks. */ -static int +static unsigned map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, int orig) { @@ -394,9 +411,9 @@ map_all_hugepages(struct
[dpdk-dev] [PATCH v5 8/8] doc: update doc for virtio-user
Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan --- doc/guides/rel_notes/release_16_07.rst | 4 1 file changed, 4 insertions(+) diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst index f6d543c..b1054b6 100644 --- a/doc/guides/rel_notes/release_16_07.rst +++ b/doc/guides/rel_notes/release_16_07.rst @@ -34,6 +34,10 @@ This section should contain new features added in this release. Sample format: Refer to the previous release notes for examples. +* **Virtio support for containers.** + + Add a new virtual device, named virtio-user, to support virtio for containers. + Resolved Issues --- -- 2.1.4
[dpdk-dev] [PATCH v5 7/8] virtio-user: add a new vdev named virtio-user
Add a new virtual device named vhost-user, which can be used just like eth_ring, eth_null, etc. To reuse the code of original virtio, we do some adjustment in virtio_ethdev.c, such as remove key _static_ of eth_virtio_dev_init() so that it can be reused in virtual device; and we add some check to make sure it will not crash. Configured parameters include: - queues (optional, 1 by default), number of queue pairs, multi-queue not supported for now. - cq (optional, 0 by default), not supported for now. - mac (optional), random value will be given if not specified. - queue_size (optional, 256 by default), size of virtqueues. - path (madatory), path of vhost, depends on the file type, vhost user if the given path points to a unix socket; vhost-net if the given path points to a char device. - ifname (optional), specify the name of backend tap device; only valid when backend is vhost-net. When enable CONFIG_RTE_VIRTIO_VDEV (enabled by default), the compiled library can be used in both VM and container environment. Examples: path_vhost=/dev/vhost-net # use vhost-net as a backend path_vhost= # use vhost-user as a backend sudo ./examples/l2fwd/build/l2fwd -c 0x10 -n 4 \ --socket-mem 0,1024 --no-pci --file-prefix=l2fwd \ --vdev=virtio-user0,mac=00:01:02:03:04:05,path=$path_vhost -- -p 0x1 Known issues: - Control queue and multi-queue are not supported yet. - Cannot work with --huge-unlink. - Cannot work with no-huge. - Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8) hugepages. - Root privilege is a must (mainly becase of sorting hugepages according to physical address). - Applications should not use file name like HUGEFILE_FMT ("%smap_%d"). Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- drivers/net/virtio/virtio_ethdev.c | 19 +- drivers/net/virtio/virtio_ethdev.h | 2 + drivers/net/virtio/virtio_user/virtio_user_dev.c | 309 +++ 3 files changed, 323 insertions(+), 7 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 1866afd..f8972f2 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -59,7 +59,6 @@ #include "virtqueue.h" #include "virtio_rxtx.h" -static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev); static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev); static int virtio_dev_configure(struct rte_eth_dev *dev); static int virtio_dev_start(struct rte_eth_dev *dev); @@ -1038,7 +1037,7 @@ rx_func_get(struct rte_eth_dev *eth_dev) * This function is based on probe() function in virtio_pci.c * It returns 0 on success. */ -static int +int eth_virtio_dev_init(struct rte_eth_dev *eth_dev) { struct virtio_hw *hw = eth_dev->data->dev_private; @@ -1069,9 +1068,11 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) pci_dev = eth_dev->pci_dev; - ret = vtpci_init(pci_dev, hw, _flags); - if (ret) - return ret; + if (pci_dev) { + ret = vtpci_init(pci_dev, hw, _flags); + if (ret) + return ret; + } /* Reset the device although not necessary at startup */ vtpci_reset(hw); @@ -1163,7 +1164,8 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d hw->max_tx_queues=%d", hw->max_rx_queues, hw->max_tx_queues); - PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x", + if (pci_dev) + PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x", eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id); @@ -1442,7 +1444,10 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct virtio_hw *hw = dev->data->dev_private; - dev_info->driver_name = dev->driver->pci_drv.name; + if (dev->pci_dev) + dev_info->driver_name = dev->driver->pci_drv.name; + else + dev_info->driver_name = "virtio-user PMD"; dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues; dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues; dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE; diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h index 66423a0..284afaa 100644 --- a/drivers/net/virtio/virtio_ethdev.h +++ b/drivers/net/virtio/virtio_ethdev.h @@ -113,6 +113,8 @@ uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +int eth_virtio_dev_init(struct rte_eth_dev *eth_dev); + /* * The VIRTI
[dpdk-dev] [PATCH v5 6/8] virtio-user: add new virtual pci driver for virtio
This patch implements another new instance of struct virtio_pci_ops to drive the virtio-user virtual device. Instead of rd/wr ioport or PCI configuration space, this virtual pci driver will rd/wr the virtual device struct virtio_user_hw, and when necessary, invokes APIs provided by device emulation later to start/stop the device. -- | -- | | | virtio driver | |> (virtio_user_pci.c) | -- | | | | | -- | --> virtio-user PMD | | device emulate | | | || | | | vhost adapter | | | -- | -- | | | -- | vhost backend | -- Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- drivers/net/virtio/Makefile | 1 + drivers/net/virtio/virtio_pci.h | 1 + drivers/net/virtio/virtio_user/virtio_user_dev.h | 2 + drivers/net/virtio/virtio_user/virtio_user_pci.c | 218 +++ 4 files changed, 222 insertions(+) create mode 100644 drivers/net/virtio/virtio_user/virtio_user_pci.c diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index 68068bd..13b2b75 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -60,6 +60,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_pci.c endif # this lib depends upon: diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h index a76daf7..d10d013 100644 --- a/drivers/net/virtio/virtio_pci.h +++ b/drivers/net/virtio/virtio_pci.h @@ -260,6 +260,7 @@ struct virtio_hw { struct virtio_pci_common_cfg *common_cfg; struct virtio_net_config *dev_cfg; const struct virtio_pci_ops *vtpci_ops; + void*virtio_user_dev; }; /* diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h index 8ca0095..9ebe440 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.h +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h @@ -57,4 +57,6 @@ struct virtio_user_dev { int virtio_user_start_device(struct virtio_user_dev *dev); int virtio_user_stop_device(struct virtio_user_dev *dev); +const struct virtio_pci_ops vdev_ops; + #endif diff --git a/drivers/net/virtio/virtio_user/virtio_user_pci.c b/drivers/net/virtio/virtio_user/virtio_user_pci.c new file mode 100644 index 000..b56419b --- /dev/null +++ b/drivers/net/virtio/virtio_user/virtio_user_pci.c @@ -0,0 +1,218 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include "../virtio_logs.h" +#include "../virtio_pci.h" +#include "../virtqueue.h" +#include "virtio_user_dev.h" + +#define virtio_user_get_dev(hw) \ + ((struct virtio_user_dev *)(hw)->virtio_user_dev); + +static void +vdev_read_dev_config(struct virtio_hw *hw, uint64_t offset, +
[dpdk-dev] [PATCH v5 5/8] virtio-user: add device emulation layer APIs
Two device emulation layer APIs are added for virtio driver to call: - virtio_user_start_device() - virtio_user_stop_device() These APIs will get called by virtio driver, and they call vhost adapter layer APIs to implement the functionality. Besides, this patch defines a struct named virtio_user_dev to help manage the data stands for this kind of virtual device. -- | -- | | | virtio driver | | | -- | | | | | -- | --> virtio-user PMD | | device emulate |-|> (virtio_user_dev.c, virtio_user_dev.h) | || | | | vhost adapter | | | -- | -- | | | -- | vhost backend | -- Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- drivers/net/virtio/Makefile | 1 + drivers/net/virtio/virtio_user/virtio_user_dev.c | 168 +++ drivers/net/virtio/virtio_user/virtio_user_dev.h | 60 3 files changed, 229 insertions(+) create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.c create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.h diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index c9f2bc0..68068bd 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -59,6 +59,7 @@ ifeq ($(CONFIG_RTE_VIRTIO_VDEV),y) SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c endif # this lib depends upon: diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c new file mode 100644 index 000..41d8ad1 --- /dev/null +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -0,0 +1,168 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "vhost.h" +#include "virtio_user_dev.h" +#include "../virtio_ethdev.h" + +static int +virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) +{ + int callfd, kickfd; + struct vhost_vring_file file; + struct vhost_vring_state state; + struct vring *vring = >vrings[queue_sel]; + struct vhost_vring_addr addr = { + .index = queue_sel, + .desc_user_addr = (uint64_t)(uintptr_t)vring->desc, + .avail_user_addr = (uint64_t)(uintptr_t)vring->avail, + .used_user_addr = (uint64_t)(uintptr_t)vring->used, + .log_guest_addr = 0, + .flags = 0, /* disable log */ + }; + + /* May use invalid flag, but some backend leverages kickfd and callfd as +* criteria to judge if dev is alive. so finally we use real event_fd. +*/ + callfd = eventfd(0, O_CLOEXEC | O_NONBLOCK); + if (callfd < 0) { + PMD_DRV_LOG(ERR, "callfd error, %s\n", strerror(e
[dpdk-dev] [PATCH v5 4/8] virtio-user: add vhost adapter layer
This patch is to provide vhost adapter layer implementations. Instead of relying on a hypervisor to translate between device emulation and vhost backend, here we directly talk with vhost backend through the vhost file. Depending on the type of vhost file, - vhost-user is used if the given path points to a unix socket; - vhost-kernel is used if the given path points to a char device. Here three main APIs are provided to upper layer (device emulation): - vhost_user_setup(), to set up env to talk to a vhost user backend; - vhost_kernel_setup(), to set up env to talk to a vhost kernel backend. - vhost_call(), to provide a unified interface to communicate with vhost backend. -- | -- | | | virtio driver | | | -- | | | | | -- | --> virtio-user PMD | | device emulate | | | || | | | vhost adapter |-|> (vhost_user.c, vhost_kernel.c, vhost.c) | -- | -- | | -- --> (vhost-user protocol or vhost-net ioctls) | -- | vhost backend | -- Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- config/common_linuxapp| 3 + drivers/net/virtio/Makefile | 6 + drivers/net/virtio/virtio_user/vhost.c| 105 +++ drivers/net/virtio/virtio_user/vhost.h| 222 +++ drivers/net/virtio/virtio_user/vhost_kernel.c | 254 + drivers/net/virtio/virtio_user/vhost_user.c | 378 ++ 6 files changed, 968 insertions(+) create mode 100644 drivers/net/virtio/virtio_user/vhost.c create mode 100644 drivers/net/virtio/virtio_user/vhost.h create mode 100644 drivers/net/virtio/virtio_user/vhost_kernel.c create mode 100644 drivers/net/virtio/virtio_user/vhost_user.c diff --git a/config/common_linuxapp b/config/common_linuxapp index 7e698e2..946a6d4 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -43,3 +43,6 @@ CONFIG_RTE_LIBRTE_VHOST=y CONFIG_RTE_LIBRTE_PMD_VHOST=y CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y CONFIG_RTE_LIBRTE_POWER=y + +# Enable virtio-user +CONFIG_RTE_VIRTIO_VDEV=y diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index ef84f60..c9f2bc0 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -55,6 +55,12 @@ ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSSE3,$(CFLAGS)),RTE_MACHINE_CPUFLAG_SSSE SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c endif +ifeq ($(CONFIG_RTE_VIRTIO_VDEV),y) +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c +endif + # this lib depends upon: DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf diff --git a/drivers/net/virtio/virtio_user/vhost.c b/drivers/net/virtio/virtio_user/vhost.c new file mode 100644 index 000..1944a97 --- /dev/null +++ b/drivers/net/virtio/virtio_user/vhost.c @@ -0,0 +1,105 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED
[dpdk-dev] [PATCH v5 3/8] virtio: enable use virtual address to fill desc
This patch is related to how to calculate relative address for vhost backend. The principle is that: based on one or multiple shared memory regions, vhost maintains a reference system with the frontend start address, backend start address, and length for each segment, so that each frontend address (GPA, Guest Physical Address) can be translated into vhost-recognizable backend address. To make the address translation efficient, we need to maintain as few regions as possible. In the case of VM, GPA is always locally continuous. But for some other case, like virtio-user, we use virtual address here. It basically means: a. when set_base_addr, VA address is used; b. when preparing RX's descriptors, VA address is used; c. when transmitting packets, VA is filled in TX's descriptors; d. in TX and CQ's header, VA is used. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- drivers/net/virtio/virtio_ethdev.c | 21 - drivers/net/virtio/virtio_rxtx.c| 5 ++--- drivers/net/virtio/virtio_rxtx_simple.c | 13 +++-- drivers/net/virtio/virtqueue.h | 13 - 4 files changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 781886d..1866afd 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -167,14 +167,14 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl, * One RX packet for ACK. */ vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT; - vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mz->phys_addr; + vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mem; vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr); vq->vq_free_cnt--; i = vq->vq_ring.desc[head].next; for (k = 0; k < pkt_num; k++) { vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT; - vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr + vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mem + sizeof(struct virtio_net_ctrl_hdr) + sizeof(ctrl->status) + sizeof(uint8_t)*sum; vq->vq_ring.desc[i].len = dlen[k]; @@ -184,7 +184,7 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl, } vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE; - vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr + vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mem + sizeof(struct virtio_net_ctrl_hdr); vq->vq_ring.desc[i].len = sizeof(ctrl->status); vq->vq_free_cnt--; @@ -419,8 +419,6 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, vq->virtio_net_hdr_mem = hdr_mz->phys_addr; memset(hdr_mz->addr, 0, hdr_mz_sz); - vring_hdr_desc_init(vq); - } else if (queue_type == VTNET_CQ) { /* Allocate a page for control vq command, data and status */ snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone", @@ -441,6 +439,19 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, memset(vq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); } + if (dev->pci_dev) + vq->offset = offsetof(struct rte_mbuf, buf_physaddr); + else { + vq->vq_ring_mem = (phys_addr_t)vq->mz->addr; + vq->offset = offsetof(struct rte_mbuf, buf_addr); + if (vq->virtio_net_hdr_mz) + vq->virtio_net_hdr_mem = + (phys_addr_t)vq->virtio_net_hdr_mz->addr; + } + + if (queue_type == VTNET_TQ) + vring_hdr_desc_init(vq); + if (hw->vtpci_ops->setup_queue(hw, vq) < 0) { PMD_INIT_LOG(ERR, "setup_queue failed"); virtio_dev_queue_release(vq); diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c index f326222..5b0c3df 100644 --- a/drivers/net/virtio/virtio_rxtx.c +++ b/drivers/net/virtio/virtio_rxtx.c @@ -193,8 +193,7 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) start_dp = vq->vq_ring.desc; start_dp[idx].addr = - (uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM - - hw->vtnet_hdr_size); + MBUF_DATA_DMA_ADDR(cookie, vq->offset) - hw->vtnet_hdr_size; start_dp[idx].len = cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; start_dp[idx].flags = VRING_DESC_F_WRITE; @@ -265,7 +264,7 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie, } do { - start_dp[idx
[dpdk-dev] [PATCH v5 2/8] virtio: clean up virtio_dev_queue_setup
Abstract vring hdr desc init as an inline method. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_ethdev.c | 42 ++ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index a3031e4..781886d 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -278,6 +278,26 @@ virtio_dev_queue_release(struct virtqueue *vq) } } +static void +vring_hdr_desc_init(struct virtqueue *vq) +{ + int i; + struct virtio_tx_region *txr = vq->virtio_net_hdr_mz->addr; + + for (i = 0; i < vq->vq_nentries; i++) { + struct vring_desc *start_dp = txr[i].tx_indir; + + vring_desc_init(start_dp, RTE_DIM(txr[i].tx_indir)); + + /* first indirect descriptor is always the tx header */ + start_dp->addr = vq->virtio_net_hdr_mem + i * sizeof(*txr) + +offsetof(struct virtio_tx_region, tx_hdr); + + start_dp->len = vq->hw->vtnet_hdr_size; + start_dp->flags = VRING_DESC_F_NEXT; + } +} + int virtio_dev_queue_setup(struct rte_eth_dev *dev, int queue_type, uint16_t queue_idx, @@ -375,8 +395,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, if (queue_type == VTNET_TQ) { const struct rte_memzone *hdr_mz; - struct virtio_tx_region *txr; - unsigned int i; + size_t hdr_mz_sz = vq_size * sizeof(struct virtio_tx_region); /* * For each xmit packet, allocate a virtio_net_hdr @@ -385,7 +404,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone", dev->data->port_id, queue_idx); hdr_mz = rte_memzone_reserve_aligned(vq_name, -vq_size * sizeof(*txr), +hdr_mz_sz, socket_id, 0, RTE_CACHE_LINE_SIZE); if (hdr_mz == NULL) { @@ -399,21 +418,8 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, vq->virtio_net_hdr_mz = hdr_mz; vq->virtio_net_hdr_mem = hdr_mz->phys_addr; - txr = hdr_mz->addr; - memset(txr, 0, vq_size * sizeof(*txr)); - for (i = 0; i < vq_size; i++) { - struct vring_desc *start_dp = txr[i].tx_indir; - - vring_desc_init(start_dp, RTE_DIM(txr[i].tx_indir)); - - /* first indirect descriptor is always the tx header */ - start_dp->addr = vq->virtio_net_hdr_mem - + i * sizeof(*txr) - + offsetof(struct virtio_tx_region, tx_hdr); - - start_dp->len = vq->hw->vtnet_hdr_size; - start_dp->flags = VRING_DESC_F_NEXT; - } + memset(hdr_mz->addr, 0, hdr_mz_sz); + vring_hdr_desc_init(vq); } else if (queue_type == VTNET_CQ) { /* Allocate a page for control vq command, data and status */ -- 2.1.4
[dpdk-dev] [PATCH v5 1/8] virtio: hide phys addr check inside pci ops
This patch is to move phys addr check from virtio_dev_queue_setup to pci ops. To makt that happen, make sure virtio_ops.setup_queue return the result if we pass through the check. Signed-off-by: Jianfeng Tan Signed-off-by: Huawei Xie Acked-by: Yuanhan Liu --- drivers/net/virtio/virtio_ethdev.c | 17 + drivers/net/virtio/virtio_pci.c| 30 -- drivers/net/virtio/virtio_pci.h| 2 +- 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index c3fb628..a3031e4 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -364,17 +364,6 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, } } - /* -* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, -* and only accepts 32 bit page frame number. -* Check if the allocated physical memory exceeds 16TB. -*/ - if ((mz->phys_addr + vq->vq_ring_size - 1) >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) { - PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!"); - virtio_dev_queue_release(vq); - return -ENOMEM; - } - memset(mz->addr, 0, sizeof(mz->len)); vq->mz = mz; vq->vq_ring_mem = mz->phys_addr; @@ -446,7 +435,11 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, memset(vq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); } - hw->vtpci_ops->setup_queue(hw, vq); + if (hw->vtpci_ops->setup_queue(hw, vq) < 0) { + PMD_INIT_LOG(ERR, "setup_queue failed"); + virtio_dev_queue_release(vq); + return -EINVAL; + } vq->configured = 1; *pvq = vq; diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c index 9cdca06..6bd239c 100644 --- a/drivers/net/virtio/virtio_pci.c +++ b/drivers/net/virtio/virtio_pci.c @@ -55,6 +55,22 @@ */ #define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20) +static inline int +check_vq_phys_addr_ok(struct virtqueue *vq) +{ + /* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, +* and only accepts 32 bit page frame number. +* Check if the allocated physical memory exceeds 16TB. +*/ + if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >> + (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) { + PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!"); + return 0; + } + + return 1; +} + static void legacy_read_dev_config(struct virtio_hw *hw, size_t offset, void *dst, int length) @@ -143,15 +159,20 @@ legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) return dst; } -static void +static int legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) { uint32_t src; + if (!check_vq_phys_addr_ok(vq)) + return -1; + rte_eal_pci_ioport_write(>io, >vq_queue_index, 2, VIRTIO_PCI_QUEUE_SEL); src = vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; rte_eal_pci_ioport_write(>io, , 4, VIRTIO_PCI_QUEUE_PFN); + + return 0; } static void @@ -367,12 +388,15 @@ modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) return io_read16(>common_cfg->queue_size); } -static void +static int modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) { uint64_t desc_addr, avail_addr, used_addr; uint16_t notify_off; + if (!check_vq_phys_addr_ok(vq)) + return -1; + desc_addr = vq->mz->phys_addr; avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc); used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail, @@ -400,6 +424,8 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr); PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)", vq->notify_addr, notify_off); + + return 0; } static void diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h index 554efea..a76daf7 100644 --- a/drivers/net/virtio/virtio_pci.h +++ b/drivers/net/virtio/virtio_pci.h @@ -234,7 +234,7 @@ struct virtio_pci_ops { uint16_t (*set_config_irq)(struct virtio_hw *hw, uint16_t vec); uint16_t (*get_queue_num)(struct virtio_hw *hw, uint16_t queue_id); - void (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); + int (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); void (*del_queue)(struct virtio_hw *hw, struct virtqueue *vq); void (*notify_queue)(struct virtio_hw *hw, struct virtqueue *vq); }; -- 2.1.4
[dpdk-dev] [PATCH v5 0/8] virtio support for container
packets. Known issues: - Control queue and multi-queue are not supported yet. - Cannot work with --huge-unlink. - Cannot work with no-huge. - Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8) hugepages. - Root privilege is a must (mainly becase of sorting hugepages according to physical address). - Applications should not use file name like HUGEFILE_FMT ("%smap_%d"). How to use? a. Apply this patchset. b. To compile container apps: $: make config RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc $: make install RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc $: make -C examples/l2fwd RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc $: make -C examples/vhost RTE_SDK=`pwd` T=x86_64-native-linuxapp-gcc c. To build a docker image using Dockerfile below. $: cat ./Dockerfile FROM ubuntu:latest WORKDIR /usr/src/dpdk COPY . /usr/src/dpdk ENV PATH "$PATH:/usr/src/dpdk/examples/l2fwd/build/" $: docker build -t dpdk-app-l2fwd . d. Used with vhost-user $: ./examples/vhost/build/vhost-switch -c 3 -n 4 \ --socket-mem 1024,1024 -- -p 0x1 --stats 1 $: docker run -i -t -v :/var/run/usvhost \ -v /dev/hugepages:/dev/hugepages \ dpdk-app-l2fwd l2fwd -c 0x4 -n 4 -m 1024 --no-pci \ --vdev=virtio-user0,path=/var/run/usvhost -- -p 0x1 f. Used with vhost-net $: modprobe vhost $: modprobe vhost-net $: docker run -i -t --privileged \ -v /dev/vhost-net:/dev/vhost-net \ -v /dev/net/tun:/dev/net/tun \ -v /dev/hugepages:/dev/hugepages \ dpdk-app-l2fwd l2fwd -c 0x4 -n 4 -m 1024 --no-pci \ --vdev=virtio-user0,path=/dev/vhost-net -- -p 0x1 By the way, it's not necessary to run in a container. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Jianfeng Tan (8): virtio: hide phys addr check inside pci ops virtio: clean up virtio_dev_queue_setup virtio: enable use virtual address to fill desc virtio-user: add vhost adapter layer virtio-user: add device emulation layer APIs virtio-user: add new virtual pci driver for virtio virtio-user: add a new vdev named virtio-user doc: update doc for virtio-user config/common_linuxapp | 3 + doc/guides/rel_notes/release_16_07.rst | 4 + drivers/net/virtio/Makefile | 8 + drivers/net/virtio/virtio_ethdev.c | 97 +++-- drivers/net/virtio/virtio_ethdev.h | 2 + drivers/net/virtio/virtio_pci.c | 30 +- drivers/net/virtio/virtio_pci.h | 3 +- drivers/net/virtio/virtio_rxtx.c | 5 +- drivers/net/virtio/virtio_rxtx_simple.c | 13 +- drivers/net/virtio/virtio_user/vhost.c | 105 + drivers/net/virtio/virtio_user/vhost.h | 222 +++ drivers/net/virtio/virtio_user/vhost_kernel.c| 254 drivers/net/virtio/virtio_user/vhost_user.c | 378 ++ drivers/net/virtio/virtio_user/virtio_user_dev.c | 477 +++ drivers/net/virtio/virtio_user/virtio_user_dev.h | 62 +++ drivers/net/virtio/virtio_user/virtio_user_pci.c | 218 +++ drivers/net/virtio/virtqueue.h | 13 +- 17 files changed, 1840 insertions(+), 54 deletions(-) create mode 100644 drivers/net/virtio/virtio_user/vhost.c create mode 100644 drivers/net/virtio/virtio_user/vhost.h create mode 100644 drivers/net/virtio/virtio_user/vhost_kernel.c create mode 100644 drivers/net/virtio/virtio_user/vhost_user.c create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.c create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.h create mode 100644 drivers/net/virtio/virtio_user/virtio_user_pci.c -- 2.1.4
[dpdk-dev] [PATCH v4] eal: make hugetlb initialization more robust
This patch adds an option, --huge-trybest, to use a recover mechanism to the case that there are not so many hugepages (declared in sysfs), which can be used. It relys on a mem access to fault-in hugepages, and if fails with SIGBUS, recover to previously saved stack environment with siglongjmp(). Besides, this solution fixes an issue when hugetlbfs is specified with an option of size. Currently DPDK does not respect the quota of a hugetblfs mount. It fails to init the EAL because it tries to map the number of free hugepages in the system rather than using the number specified in the quota for that mount. It's still an open issue with CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS. Under this case (such as IVSHMEM target), having hugetlbfs mounts with quota will fail to remap hugepages as it relies on having mapped all free hugepages in the system. Test example: a. cgcreate -g hugetlb:/test-subgroup b. cgset -r hugetlb.1GB.limit_in_bytes=2147483648 test-subgroup c. cgexec -g hugetlb:test-subgroup \ ./examples/helloworld/build/helloworld -c 0x2 -n 4 --huge-trybest Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- v4: - Change map_all_hugepages to return unsigned instead of int. v3: - Reword commit message to include it fixes the hugetlbfs quota issue. - setjmp -> sigsetjmp. - Fix RTE_LOG complaint from ERR to DEBUG as it does not mean init error so far. - Fix the second map_all_hugepages's return value check. v2: - Address the compiling error by move setjmp into a wrap method. lib/librte_eal/common/eal_common_options.c | 4 + lib/librte_eal/common/eal_internal_cfg.h | 1 + lib/librte_eal/common/eal_options.h| 2 + lib/librte_eal/linuxapp/eal/eal.c | 1 + lib/librte_eal/linuxapp/eal/eal_memory.c | 118 + 5 files changed, 112 insertions(+), 14 deletions(-) diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index 3efc90f..e9a111d 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -95,6 +95,7 @@ eal_long_options[] = { {OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM}, {OPT_VMWARE_TSC_MAP,0, NULL, OPT_VMWARE_TSC_MAP_NUM }, {OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM }, + {OPT_HUGE_TRYBEST, 0, NULL, OPT_HUGE_TRYBEST_NUM }, {0, 0, NULL, 0} }; @@ -899,6 +900,9 @@ eal_parse_common_option(int opt, const char *optarg, return -1; } break; + case OPT_HUGE_TRYBEST_NUM: + internal_config.huge_trybest = 1; + break; /* don't know what to do, leave this to caller */ default: diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h index 5f1367e..90a3533 100644 --- a/lib/librte_eal/common/eal_internal_cfg.h +++ b/lib/librte_eal/common/eal_internal_cfg.h @@ -64,6 +64,7 @@ struct internal_config { volatile unsigned force_nchannel; /**< force number of channels */ volatile unsigned force_nrank;/**< force number of ranks */ volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */ + volatile unsigned huge_trybest; /**< try best to allocate hugepages */ unsigned hugepage_unlink; /**< true to unlink backing files */ volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/ volatile unsigned no_pci; /**< true to disable PCI */ diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h index a881c62..02397c5 100644 --- a/lib/librte_eal/common/eal_options.h +++ b/lib/librte_eal/common/eal_options.h @@ -83,6 +83,8 @@ enum { OPT_VMWARE_TSC_MAP_NUM, #define OPT_XEN_DOM0 "xen-dom0" OPT_XEN_DOM0_NUM, +#define OPT_HUGE_TRYBEST "huge-trybest" + OPT_HUGE_TRYBEST_NUM, OPT_LONG_MAX_NUM }; diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 8aafd51..eeb1d4e 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -343,6 +343,7 @@ eal_usage(const char *prgname) " --"OPT_CREATE_UIO_DEV"Create /dev/uioX (usually done by hotplug)\n" " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n" " --"OPT_XEN_DOM0" Support running on Xen dom0 without hugetlbfs\n" + " --"OPT_HUGE_TRYBEST" Try best to accommodate hugepages\n" "\n"); /* Allow the application to print its usage message too if hook is set */ if ( rte_application_usage_hook ) { diff --git a/lib/librte_eal/linuxapp/eal/eal_memor
[dpdk-dev] [PATCH v3] eal: make hugetlb initialization more robust
This patch adds an option, --huge-trybest, to use a recover mechanism to the case that there are not so many hugepages (declared in sysfs), which can be used. It relys on a mem access to fault-in hugepages, and if fails with SIGBUS, recover to previously saved stack environment with siglongjmp(). Besides, this solution fixes an issue when hugetlbfs is specified with an option of size. Currently DPDK does not respect the quota of a hugetblfs mount. It fails to init the EAL because it tries to map the number of free hugepages in the system rather than using the number specified in the quota for that mount. It's still an open issue with CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS. Under this case (such as IVSHMEM target), having hugetlbfs mounts with quota will fail to remap hugepages as it relies on having mapped all free hugepages in the system. Test example: a. cgcreate -g hugetlb:/test-subgroup b. cgset -r hugetlb.1GB.limit_in_bytes=2147483648 test-subgroup c. cgexec -g hugetlb:test-subgroup \ ./examples/helloworld/build/helloworld -c 0x2 -n 4 --huge-trybest Signed-off-by: Jianfeng Tan Acked-by: Neil Horman --- v3: - Reword commit message to include it fixes the hugetlbfs quota issue. - setjmp -> sigsetjmp. - Fix RTE_LOG complaint from ERR to DEBUG as it does not mean init error so far. - Fix the second map_all_hugepages's return value check. v2: - Address the compiling error by move setjmp into a wrap method. lib/librte_eal/common/eal_common_options.c | 4 + lib/librte_eal/common/eal_internal_cfg.h | 1 + lib/librte_eal/common/eal_options.h| 2 + lib/librte_eal/linuxapp/eal/eal.c | 1 + lib/librte_eal/linuxapp/eal/eal_memory.c | 115 + 5 files changed, 110 insertions(+), 13 deletions(-) diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index 3efc90f..e9a111d 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -95,6 +95,7 @@ eal_long_options[] = { {OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM}, {OPT_VMWARE_TSC_MAP,0, NULL, OPT_VMWARE_TSC_MAP_NUM }, {OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM }, + {OPT_HUGE_TRYBEST, 0, NULL, OPT_HUGE_TRYBEST_NUM }, {0, 0, NULL, 0} }; @@ -899,6 +900,9 @@ eal_parse_common_option(int opt, const char *optarg, return -1; } break; + case OPT_HUGE_TRYBEST_NUM: + internal_config.huge_trybest = 1; + break; /* don't know what to do, leave this to caller */ default: diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h index 5f1367e..90a3533 100644 --- a/lib/librte_eal/common/eal_internal_cfg.h +++ b/lib/librte_eal/common/eal_internal_cfg.h @@ -64,6 +64,7 @@ struct internal_config { volatile unsigned force_nchannel; /**< force number of channels */ volatile unsigned force_nrank;/**< force number of ranks */ volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */ + volatile unsigned huge_trybest; /**< try best to allocate hugepages */ unsigned hugepage_unlink; /**< true to unlink backing files */ volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/ volatile unsigned no_pci; /**< true to disable PCI */ diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h index a881c62..02397c5 100644 --- a/lib/librte_eal/common/eal_options.h +++ b/lib/librte_eal/common/eal_options.h @@ -83,6 +83,8 @@ enum { OPT_VMWARE_TSC_MAP_NUM, #define OPT_XEN_DOM0 "xen-dom0" OPT_XEN_DOM0_NUM, +#define OPT_HUGE_TRYBEST "huge-trybest" + OPT_HUGE_TRYBEST_NUM, OPT_LONG_MAX_NUM }; diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 8aafd51..eeb1d4e 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -343,6 +343,7 @@ eal_usage(const char *prgname) " --"OPT_CREATE_UIO_DEV"Create /dev/uioX (usually done by hotplug)\n" " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n" " --"OPT_XEN_DOM0" Support running on Xen dom0 without hugetlbfs\n" + " --"OPT_HUGE_TRYBEST" Try best to accommodate hugepages\n" "\n"); /* Allow the application to print its usage message too if hook is set */ if ( rte_application_usage_hook ) { diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 5b9132c..cb0df76 10064
[dpdk-dev] [PATCH 3/3] virtio-user: add mq in virtual pci driver
Partially implement ctrl-queue to handle control command with class of VIRTIO_NET_CTRL_MQ and with cmd of VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET to handle mq support. After filling the command into ctrl-queue, we dequeue it when notify_queue(), and invoke method from device emulation to enable/disable queues. Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user/virtio_user_pci.c | 89 +++- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio/virtio_user/virtio_user_pci.c b/drivers/net/virtio/virtio_user/virtio_user_pci.c index 873e619..aa02c60 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_pci.c +++ b/drivers/net/virtio/virtio_user/virtio_user_pci.c @@ -38,6 +38,7 @@ #include "../virtio_logs.h" #include "../virtio_pci.h" #include "../virtqueue.h" +#include "../virtio_ring.h" #include "virtio_user_dev.h" static void @@ -157,8 +158,10 @@ vdev_setup_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq) if (vq->virtio_net_hdr_mz) { vq->virtio_net_hdr_mem = (phys_addr_t)vq->virtio_net_hdr_mz->addr; - /* Do it one more time after we reset virtio_net_hdr_mem */ - vring_hdr_desc_init(vq); + + /* Do it again after we reset virtio_net_hdr_mem for tx */ + if ((vq->vq_queue_index % VTNET_CQ) == VTNET_TQ) + vring_hdr_desc_init(vq); } vq->offset = offsetof(struct rte_mbuf, buf_addr); return 0; @@ -182,11 +185,93 @@ vdev_del_queue(struct virtio_hw *hw, struct virtqueue *vq) close(uhw->kickfds[vq->vq_queue_index]); } +static uint8_t +handle_mq(struct virtqueue *vq, uint16_t queues) +{ + struct virtio_hw *hw = vq->hw; + struct virtio_user_hw *uhw = (struct virtio_user_hw *)hw->vdev_private; + uint32_t i; + uint8_t ret = 0; + + if (queues > uhw->max_queue_pairs) { + PMD_INIT_LOG(ERR, "multi-q config %u, but only %u supported", +queues, uhw->max_queue_pairs); + return -1; + } + + for (i = 0; i < queues; ++i) + ret |= virtio_user_enable_queue_pair(uhw, i, 1); + for (i = queues; i < uhw->max_queue_pairs; ++i) + ret |= virtio_user_enable_queue_pair(uhw, i, 0); + + return ret; +} + +static uint32_t +handle_ctrl(struct virtqueue *vq, uint16_t desc_idx_hdr) +{ + struct virtio_net_ctrl_hdr *hdr; + virtio_net_ctrl_ack status = ~0; + uint16_t i, desc_idx_data, desc_idx_status; + uint32_t num_of_descs = 0; + + /* locate desc for header, data, and status */ + desc_idx_data = vq->vq_ring.desc[desc_idx_hdr].next; + num_of_descs++; + + + i = desc_idx_data; + while (vq->vq_ring.desc[i].flags == VRING_DESC_F_NEXT) { + i = vq->vq_ring.desc[i].next; + num_of_descs++; + } + + /* locate desc for status */ + desc_idx_status = i; + num_of_descs++; + + hdr = (struct virtio_net_ctrl_hdr *)vq->vq_ring.desc[desc_idx_hdr].addr; + if (hdr->class == VIRTIO_NET_CTRL_MQ && + hdr->cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { + uint16_t queues; + + queues = *(uint16_t *)vq->vq_ring.desc[desc_idx_data].addr; + status = handle_mq(vq, queues); + } + + /* Update status */ + *(virtio_net_ctrl_ack *)vq->vq_ring.desc[desc_idx_status].addr = status; + + return num_of_descs; +} + static void vdev_notify_queue(struct virtio_hw *hw, struct virtqueue *vq) { uint64_t buf = 1; struct virtio_user_hw *uhw = (struct virtio_user_hw *)hw->vdev_private; + uint16_t avail_idx, desc_idx; + struct vring_used_elem *uep; + uint32_t num_of_descs; + + if (vq == hw->cvq) { + /* Consume avail ring, using used ring idx as first one */ + while (vq->vq_ring.used->idx != vq->vq_ring.avail->idx) { + avail_idx = (vq->vq_ring.used->idx) & + (vq->vq_nentries - 1); + desc_idx = vq->vq_ring.avail->ring[avail_idx]; + + num_of_descs = handle_ctrl(vq, desc_idx); + + /* Update used ring */ + uep = >vq_ring.used->ring[avail_idx]; + uep->id = avail_idx; + uep->len = num_of_descs; + + vq->vq_ring.used->idx++; + } + return; + } if (write(uhw->kickfds[vq->vq_queue_index], , sizeof(buf)) < 0) PMD_DRV_LOG(ERR, "failed to kick backend: %s\n", strerror(errno)); -- 2.1.4
[dpdk-dev] [PATCH 2/3] virtio-user: add mq in device emulation
Multi-queue requires VIRTIO_NET_F_MQ and VIRTIO_NET_F_CTRL_VQ in feature negotiation. Mainly two changes in virtio-user device emulation layer. - Multi-queue requires ctrl-queue. So ctrl-queue will by enabled automatically when multi-queue is specified. - Provide a method virtio_user_enable_queue_pair() for virtio-user driver to enable/disable queues. Note: Do not support multiple queue for vhost kernel backend. Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user/virtio_user_dev.c | 65 drivers/net/virtio/virtio_user/virtio_user_dev.h | 2 + 2 files changed, 58 insertions(+), 9 deletions(-) diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index a8e58c0..ea0d4c4 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -136,11 +136,14 @@ virtio_user_start_device(struct virtio_user_hw *hw) } } - /* After setup all virtqueues, we need to set_features so that -* these features can be set into each virtqueue in vhost side. -* And before that, make sure VIRTIO_NET_F_MAC is stripped. + /* After setup all virtqueues, we need to set_features so that these +* features can be set into each virtqueue in vhost side. And before +* that, make sure VHOST_USER_F_PROTOCOL_FEATURES is added if mq is +* enabled, and VIRTIO_NET_F_MAC is stripped. */ features = hw->features; + if (hw->type == VHOST_USER && hw->max_queue_pairs > 1) + features |= VHOST_USER_MQ; features &= ~(1ull << VIRTIO_NET_F_MAC); ret = vhost_call(hw->vhostfd, hw->type, VHOST_MSG_SET_FEATURES, ); @@ -161,6 +164,18 @@ error: return -1; } +int +virtio_user_enable_queue_pair(struct virtio_user_hw *hw, + unsigned pair_idx, int enable) +{ + int r = -1; + + if (hw->type == VHOST_USER) + r = vhost_user_enable_queue_pair(hw->vhostfd, pair_idx, enable); + + return r; +} + int virtio_user_stop_device(struct virtio_user_hw *hw) { return vhost_call(hw->vhostfd, hw->type, VHOST_MSG_RESET_OWNER, NULL); @@ -188,7 +203,7 @@ static inline void parse_mac(struct virtio_user_hw *hw, const char *mac) static int virtio_vdev_init(struct rte_eth_dev_data *data, char *path, -int queues, int nb_cq __rte_unused, +int queues, int enable_ctrl_q, int queue_size, const char *mac, char *ifname) { struct stat s; @@ -204,8 +219,6 @@ virtio_vdev_init(struct rte_eth_dev_data *data, char *path, uhw->vhostfd = -1; uhw->tapfd = -1; - /* TODO: cq */ - if (stat(uhw->path, ) < 0) { PMD_INIT_LOG(ERR, "stat: %s failed, %s", uhw->path, strerror(errno)); @@ -243,9 +256,36 @@ virtio_vdev_init(struct rte_eth_dev_data *data, char *path, } if (uhw->mac_specified) uhw->features |= (1ull << VIRTIO_NET_F_MAC); - /* disable it until we support CQ */ - uhw->features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); - uhw->features &= ~(1ull << VIRTIO_NET_F_CTRL_RX); + + if (!enable_ctrl_q) { + uhw->features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); + /* Also disable features depends on VIRTIO_NET_F_CTRL_VQ */ + uhw->features &= ~(1ull << VIRTIO_NET_F_CTRL_RX); + uhw->features &= ~(1ull << VIRTIO_NET_F_CTRL_VLAN); + uhw->features &= ~(1ull << VIRTIO_NET_F_GUEST_ANNOUNCE); + uhw->features &= ~(1ull << VIRTIO_NET_F_MQ); + uhw->features &= ~(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR); + } else { + /* vhost user backend does not need to know ctrl-q, so +* actually we need add this bit into features. However, +* DPDK vhost-user does send features with this bit, so we +* check it instead of OR it for now. +*/ + if (!(uhw->features & (1ull << VIRTIO_NET_F_CTRL_VQ))) + PMD_INIT_LOG(INFO, "vhost does not support ctrl-q"); + } + + if (uhw->max_queue_pairs > 1) { + if (uhw->type == VHOST_KERNEL) { + PMD_INIT_LOG(ERR, "MQ not supported for vhost kernel"); + return -1; + } + + if (!(uhw->features & VHOST_USER_MQ)) { + PMD_INIT_LOG(ERR, "MQ not supported by the backend"); + return -1; + } + } return 0; @@ -411,6 +451,13 @@ rte_v
[dpdk-dev] [PATCH 1/3] virtio-user: add mq in vhost user adapter
This patch mainly adds method in vhost user adapter to communicate enable/disable queues messages with vhost user backend. Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_user/vhost.h | 4 drivers/net/virtio/virtio_user/vhost_user.c | 21 + 2 files changed, 25 insertions(+) diff --git a/drivers/net/virtio/virtio_user/vhost.h b/drivers/net/virtio/virtio_user/vhost.h index 5cd3543..fe236a7 100644 --- a/drivers/net/virtio/virtio_user/vhost.h +++ b/drivers/net/virtio/virtio_user/vhost.h @@ -209,8 +209,12 @@ enum { #define VHOST_KERNEL 0 #define VHOST_USER 1 +#define VHOST_USER_F_PROTOCOL_FEATURES 30 +#define VHOST_USER_MQ (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) + int vhost_user_sock(int vhostfd, unsigned long int req, void *arg); int vhost_user_setup(const char *path); +int vhost_user_enable_queue_pair(int vhostfd, unsigned pair_idx, int enable); int vhost_kernel_ioctl(int vhostfd, unsigned long int req, void *arg); int vhost_kernel_setup(const char *path, const char *ifname, int *p_tapfd); diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c index 6fd648c..d8f7996 100644 --- a/drivers/net/virtio/virtio_user/vhost_user.c +++ b/drivers/net/virtio/virtio_user/vhost_user.c @@ -263,6 +263,7 @@ vhost_user_sock(int vhostfd, unsigned long int req, void *arg) case VHOST_USER_SET_VRING_NUM: case VHOST_USER_SET_VRING_BASE: + case VHOST_USER_SET_VRING_ENABLE: memcpy(, arg, sizeof(msg.payload.state)); msg.size = sizeof(m.payload.state); break; @@ -373,3 +374,23 @@ vhost_user_setup(const char *path) return fd; } + +int +vhost_user_enable_queue_pair(int vhostfd, unsigned pair_idx, int enable) +{ + int i; + + for (i = 0; i < 2; ++i) { + struct vhost_vring_state state = { + .index = pair_idx * 2 + i, + .num = enable, + }; + + if (vhost_user_sock(vhostfd, + VHOST_USER_SET_VRING_ENABLE, )) + return -1; + } + + return 0; + +} -- 2.1.4
[dpdk-dev] [PATCH 0/3] add multi queue support for virtio-user
This patch set depends on below patch sets: - http://dpdk.org/ml/archives/dev/2016-April/038111.html - http://dpdk.org/ml/archives/dev/2016-April/038118.html - http://dpdk.org/ml/archives/dev/2016-April/038121.html Add multi queue support for virtio-user virtual port. Patch 1 adds vhost user adapter communications for enable/disable queues. Patch 2 adds features check for multi queue and provides a method for virtio-user driver to enable/disable queues. Patch 3 partially implements ctrl-q to handle VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET command from PMD. Test case: 1. start testpmd with a vhost-user port: $ TESTPMD -c 0x7 -n 4 --socket-mem 1024,0 --no-pci \ --vdev 'eth_vhost0,iface=/tmp/sock0,queues=2' \ -- -i --rxq=2 --txq=2 --nb-cores=2 2. start testpmd with a virtio-user port: $ TESTPMD -c 0x70 -n 4 --socket-mem 1024,0 --no-pci --file-prefix=testpmd \ --vdev=virtio-user0,mac=00:01:02:03:04:05,path=/tmp/sock0,queues=2 \ -- -i --rxq=2 --txq=2 --nb-cores=2 --txqflags=0xf01 --disable-hw-vlan 3. use below commands to see if all queues are working: testpmd> show port xstats all Jianfeng Tan (3): virtio-user: add mq in vhost user adapter virtio-user: add mq in device emulation virtio-user: add mq in virtual pci driver drivers/net/virtio/virtio_user/vhost.h | 4 ++ drivers/net/virtio/virtio_user/vhost_user.c | 21 ++ drivers/net/virtio/virtio_user/virtio_user_dev.c | 65 ++--- drivers/net/virtio/virtio_user/virtio_user_dev.h | 2 + drivers/net/virtio/virtio_user/virtio_user_pci.c | 89 +++- 5 files changed, 170 insertions(+), 11 deletions(-) -- 2.1.4
[dpdk-dev] [PATCH v4 8/8] doc: update doc for virtio-user
Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-By: Neil Horman --- doc/guides/nics/overview.rst | 64 +- doc/guides/rel_notes/release_16_07.rst | 4 +++ 2 files changed, 36 insertions(+), 32 deletions(-) diff --git a/doc/guides/nics/overview.rst b/doc/guides/nics/overview.rst index f08039e..92e7468 100644 --- a/doc/guides/nics/overview.rst +++ b/doc/guides/nics/overview.rst @@ -74,40 +74,40 @@ Most of these differences are summarized below. .. table:: Features availability in networking drivers - = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - Feature a b b b c e e e i i i i i i i i i i f f f f m m m n n p r s v v v v x -f n n o x 1 n n 4 4 4 4 g g x x x x m m m m l l p f u c i z h i i m e -p x x n g 0 a i 0 0 0 0 b b g g g g 1 1 1 1 x x i p l a n e o r r x n -a 2 2 d b 0 c e e e e v b b b b 0 0 0 0 4 5 p l p g d s t t n v -c x x i e 0 . v v f e e e e k k k k e a t i i e i -k v n . f f . v v . v v t o o t r -e f g . . . f f . f f a . 3 t -t v v v v v v 2 v - e e e e e e e - c c c c c c c - = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + Feature a b b b c e e e i i i i i i i i i i f f f f m m m n n p r s v v v v v x +f n n o x 1 n n 4 4 4 4 g g x x x x m m m m l l p f u c i z h i i i m e +p x x n g 0 a i 0 0 0 0 b b g g g g 1 1 1 1 x x i p l a n e o r r r x n +a 2 2 d b 0 c e e e e v b b b b 0 0 0 0 4 5 p l p g d s t t t n v +c x x i e 0 . v v f e e e e k k k k e a t i i i e i +k v n . f f . v v . v v t o o o t r +e f g . . . f f . f f a . u 3 t +t v v v v v v 2 v s + e e e e e e e e + c c c c c c c r + = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = Speed capabilities - Link statusY Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y + Link statusY Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Link status event Y Y Y Y Y Y Y Y Y Y Y Queue status event Y Rx interrupt Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y - Queue start/stop Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y + Queue start/stop Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y MTU update Y Y Y Y Y Y Y Y Y Y Jumbo frame Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y - Scattered Rx Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y + Scattered Rx Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y LRO Y Y Y Y TSO Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y - Promiscuous mode Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y - Allmulticast modeY Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y - Unicast MAC filter Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y - Multicast MAC filter Y Y Y Y Y Y Y Y Y Y Y Y Y + Promiscuous mode Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y + Allmulticast modeY Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y + Unicast MAC filter Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y + Multicast MAC filter Y Y Y Y Y Y Y Y Y Y Y Y Y Y RSS hash Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y RSS key update Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y RSS reta update Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y VMDq Y Y Y Y Y
[dpdk-dev] [PATCH v4 7/8] virtio-user: add a new virtual device named virtio-user
Add a new virtual device named vhost-user, which can be used just like eth_ring, eth_null, etc. To reuse the code of original virtio, we do some adjustment in virtio_ethdev.c, such as remove key _static_ of eth_virtio_dev_init() so that it can be reused in virtual device; and we add some check to make sure it will not crash. Configured parameters include: - queues (optional, 1 by default), number of rx, multi-queue not supported for now. - cq (optional, 0 by default), not supported for now. - mac (optional), random value will be given if not specified. - queue_size (optional, 256 by default), size of virtqueues. - path (madatory), path of vhost, depends on the file type, vhost user if the given path points to a unix socket; vhost-net if the given path points to a char device. - ifname (optional), specify the name of backend tap device; only valid when backend is vhost-net. When enable CONFIG_RTE_VIRTIO_VDEV (enabled by default), the compiled library can be used in both VM and container environment. Examples: path_vhost=/dev/vhost-net # use vhost-net as a backend path_vhost= # use vhost-user as a backend sudo ./examples/l2fwd/build/l2fwd -c 0x10 -n 4 \ --socket-mem 0,1024 --no-pci --file-prefix=l2fwd \ --vdev=virtio-user0,mac=00:01:02:03:04:05,path=$path_vhost -- -p 0x1 Known issues: - Control queue and multi-queue are not supported yet. - Cannot work with --huge-unlink. - Cannot work with no-huge. - Cannot work when there are more than VHOST_MEMORY_MAX_NREGIONS(8) hugepages. - Root privilege is a must (mainly becase of sorting hugepages according to physical address). - Applications should not use file name like HUGEFILE_FMT ("%smap_%d"). Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-By: Neil Horman --- drivers/net/virtio/virtio_ethdev.c | 19 +- drivers/net/virtio/virtio_ethdev.h | 2 + drivers/net/virtio/virtio_user/virtio_user_dev.c | 307 +++ 3 files changed, 321 insertions(+), 7 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 16b324d..54462a3 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -59,7 +59,6 @@ #include "virtqueue.h" #include "virtio_rxtx.h" -static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev); static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev); static int virtio_dev_configure(struct rte_eth_dev *dev); static int virtio_dev_start(struct rte_eth_dev *dev); @@ -1017,7 +1016,7 @@ rx_func_get(struct rte_eth_dev *eth_dev) * This function is based on probe() function in virtio_pci.c * It returns 0 on success. */ -static int +int eth_virtio_dev_init(struct rte_eth_dev *eth_dev) { struct virtio_hw *hw = eth_dev->data->dev_private; @@ -1048,9 +1047,11 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) pci_dev = eth_dev->pci_dev; - ret = vtpci_init(pci_dev, hw, _flags); - if (ret) - return ret; + if (pci_dev) { + ret = vtpci_init(pci_dev, hw, _flags); + if (ret) + return ret; + } /* Reset the device although not necessary at startup */ vtpci_reset(hw); @@ -1147,7 +1148,8 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d hw->max_tx_queues=%d", hw->max_rx_queues, hw->max_tx_queues); - PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x", + if (pci_dev) + PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x", eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id); @@ -1426,7 +1428,10 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct virtio_hw *hw = dev->data->dev_private; - dev_info->driver_name = dev->driver->pci_drv.name; + if (dev->pci_dev) + dev_info->driver_name = dev->driver->pci_drv.name; + else + dev_info->driver_name = "virtio-user PMD"; dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues; dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues; dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE; diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h index 66423a0..284afaa 100644 --- a/drivers/net/virtio/virtio_ethdev.h +++ b/drivers/net/virtio/virtio_ethdev.h @@ -113,6 +113,8 @@ uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +int eth_virtio_dev_init(struct rte_eth_dev *eth_dev); + /* * The VIRTIO_NE
[dpdk-dev] [PATCH v4 6/8] virtio-user: add new virtual pci driver for virtio
This patch implements another new instance of struct virtio_pci_ops to drive the virtio-user virtual device. Instead of rd/wr ioport or PCI configuration space, this virtual pci driver will rd/wr the virtual device struct virtio_user_hw, and when necessary, invokes APIs provided by device emulation later to start/stop the device. -- | -- | | | virtio driver | |> (virtio_user_pci.c) | -- | | | | | -- | --> virtio-user PMD | | device emulate | | | || | | | vhost adapter | | | -- | -- | | | -- | vhost backend | -- Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-By: Neil Horman --- drivers/net/virtio/Makefile | 1 + drivers/net/virtio/virtio_user/virtio_user_dev.h | 2 + drivers/net/virtio/virtio_user/virtio_user_pci.c | 209 +++ 3 files changed, 212 insertions(+) create mode 100644 drivers/net/virtio/virtio_user/virtio_user_pci.c diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index 68068bd..13b2b75 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -60,6 +60,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_pci.c endif # this lib depends upon: diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h index 76250f0..bc4dc1a 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.h +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h @@ -56,4 +56,6 @@ struct virtio_user_hw { int virtio_user_start_device(struct virtio_user_hw *hw); int virtio_user_stop_device(struct virtio_user_hw *hw); +const struct virtio_pci_ops vdev_ops; + #endif diff --git a/drivers/net/virtio/virtio_user/virtio_user_pci.c b/drivers/net/virtio/virtio_user/virtio_user_pci.c new file mode 100644 index 000..60351d9 --- /dev/null +++ b/drivers/net/virtio/virtio_user/virtio_user_pci.c @@ -0,0 +1,209 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include "../virtio_logs.h" +#include "../virtio_pci.h" +#include "../virtqueue.h" +#include "virtio_user_dev.h" + +static void +vdev_read_dev_config(struct virtio_hw *hw, uint64_t offset, +void *dst, int length) +{ + int i; + struct virtio_user_hw *uhw = (struct virtio_user_hw *)hw->vdev_private; + + if (offset == offsetof(struct virtio_net_config, mac) && + length == ETHER_ADDR_LEN) { + for (i = 0; i < ETHER_ADDR_LEN; ++i) + ((uint8_t *)dst)[i] = uhw->mac_addr[i]; + return; + } + + if (offset == offsetof(struct virtio_net_config, status)) + *(uint16_t *)dst = uhw->status; + + if (offset == offsetof(stru
[dpdk-dev] [PATCH v4 5/8] virtio-user: add device emulation layer APIs
Two device emulation layer APIs are added for virtio driver to call: - virtio_user_start_device() - virtio_user_stop_device() These APIs will get called by virtio driver, and they call vhost adapter layer APIs to implement the functionality. Besides, this patch defines a struct named virtio_user_hw to help manage the data stands for this kind of virtual device. -- | -- | | | virtio driver | | | -- | | | | | -- | --> virtio-user PMD | | device emulate |-|> (virtio_user_dev.c, virtio_user_dev.h) | || | | | vhost adapter | | | -- | -- | | | -- | vhost backend | -- Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-By: Neil Horman --- drivers/net/virtio/Makefile | 1 + drivers/net/virtio/virtio_user/virtio_user_dev.c | 168 +++ drivers/net/virtio/virtio_user/virtio_user_dev.h | 59 3 files changed, 228 insertions(+) create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.c create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.h diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index c9f2bc0..68068bd 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -59,6 +59,7 @@ ifeq ($(CONFIG_RTE_VIRTIO_VDEV),y) SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c endif # this lib depends upon: diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c new file mode 100644 index 000..81f7f03 --- /dev/null +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -0,0 +1,168 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "vhost.h" +#include "virtio_user_dev.h" +#include "../virtio_ethdev.h" + +static int +kick_one_vq(struct virtio_user_hw *hw, struct virtqueue *vq, + unsigned queue_sel) +{ + int callfd, kickfd; + struct vhost_vring_file file; + struct vhost_vring_state state; + struct vhost_vring_addr addr = { + .index = queue_sel, + .desc_user_addr = (uint64_t)(uintptr_t)vq->vq_ring.desc, + .avail_user_addr = (uint64_t)(uintptr_t)vq->vq_ring.avail, + .used_user_addr = (uint64_t)(uintptr_t)vq->vq_ring.used, + .log_guest_addr = 0, + .flags = 0, /* disable log */ + }; + + /* May use invalid flag, but some backend leverages kickfd and callfd as +* criteria to judge if dev is alive. so finally we use real event_fd. +*/ + callfd = eventfd(0, O_CLOEXEC | O_NONBLOCK); + if (callfd < 0) { + PMD_DRV_LOG(ERR, "callfd error, %s\n", strerror(errno)); +
[dpdk-dev] [PATCH v4 4/8] virtio-user: add vhost adapter layer
This patch is to provide vhost adapter layer implementations. Instead of relying on a hypervisor to translate between device emulation and vhost backend, here we directly talk with vhost backend through the vhost file. Depending on the type of vhost file, - vhost-user is used if the given path points to a unix socket; - vhost-kernel is used if the given path points to a char device. Here three main APIs are provided to upper layer (device emulation): - vhost_user_setup(), to set up env to talk to a vhost user backend; - vhost_kernel_setup(), to set up env to talk to a vhost kernel backend. - vhost_call(), to provide a unified interface to communicate with vhost backend. -- | -- | | | virtio driver | | | -- | | | | | -- | --> virtio-user PMD | | device emulate | | | || | | | vhost adapter |-|> (vhost_user.c, vhost_kernel.c, vhost.c) | -- | -- | | -- --> (vhost-user protocol or vhost-net ioctls) | -- | vhost backend | -- Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-By: Neil Horman --- config/common_linuxapp| 3 + drivers/net/virtio/Makefile | 6 + drivers/net/virtio/virtio_pci.h | 1 + drivers/net/virtio/virtio_user/vhost.c| 105 drivers/net/virtio/virtio_user/vhost.h| 221 +++ drivers/net/virtio/virtio_user/vhost_kernel.c | 254 + drivers/net/virtio/virtio_user/vhost_user.c | 375 ++ 7 files changed, 965 insertions(+) create mode 100644 drivers/net/virtio/virtio_user/vhost.c create mode 100644 drivers/net/virtio/virtio_user/vhost.h create mode 100644 drivers/net/virtio/virtio_user/vhost_kernel.c create mode 100644 drivers/net/virtio/virtio_user/vhost_user.c diff --git a/config/common_linuxapp b/config/common_linuxapp index 7e698e2..946a6d4 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -43,3 +43,6 @@ CONFIG_RTE_LIBRTE_VHOST=y CONFIG_RTE_LIBRTE_PMD_VHOST=y CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y CONFIG_RTE_LIBRTE_POWER=y + +# Enable virtio-user +CONFIG_RTE_VIRTIO_VDEV=y diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index ef84f60..c9f2bc0 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -55,6 +55,12 @@ ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSSE3,$(CFLAGS)),RTE_MACHINE_CPUFLAG_SSSE SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c endif +ifeq ($(CONFIG_RTE_VIRTIO_VDEV),y) +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c +endif + # this lib depends upon: DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h index a76daf7..b9f1ee5 100644 --- a/drivers/net/virtio/virtio_pci.h +++ b/drivers/net/virtio/virtio_pci.h @@ -260,6 +260,7 @@ struct virtio_hw { struct virtio_pci_common_cfg *common_cfg; struct virtio_net_config *dev_cfg; const struct virtio_pci_ops *vtpci_ops; + void *vdev_private; }; /* diff --git a/drivers/net/virtio/virtio_user/vhost.c b/drivers/net/virtio/virtio_user/vhost.c new file mode 100644 index 000..ff76658 --- /dev/null +++ b/drivers/net/virtio/virtio_user/vhost.c @@ -0,0 +1,105 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR A
[dpdk-dev] [PATCH v4 3/8] virtio: enable use virtual address to fill desc
This patch is related to how to calculate relative address for vhost backend. The principle is that: based on one or multiple shared memory segments, vhost maintains a reference system with the base addresses and length for each segment so that an address from VM comes (usually GPA, Guest Physical Address) can be translated into vhost-recognizable address (named VVA, Vhost Virtual Address). In VM's case, GPA is always locally continuous. But for some other case, like virtio-user, virtual address can be used. It basically means: a. when set_base_addr, VA address is used; b. when preparing RX's descriptors, VA address is used; c. when transmitting packets, VA is filled in TX's descriptors; d. in TX and CQ's header, VA is used. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-By: Neil Horman --- drivers/net/virtio/virtio_ethdev.c | 11 --- drivers/net/virtio/virtio_rxtx.c| 5 ++--- drivers/net/virtio/virtio_rxtx_simple.c | 13 +++-- drivers/net/virtio/virtqueue.h | 13 - 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 0c20fb9..16b324d 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -167,14 +167,14 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl, * One RX packet for ACK. */ vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT; - vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mz->phys_addr; + vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mem; vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr); vq->vq_free_cnt--; i = vq->vq_ring.desc[head].next; for (k = 0; k < pkt_num; k++) { vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT; - vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr + vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mem + sizeof(struct virtio_net_ctrl_hdr) + sizeof(ctrl->status) + sizeof(uint8_t)*sum; vq->vq_ring.desc[i].len = dlen[k]; @@ -184,7 +184,7 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl, } vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE; - vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr + vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mem + sizeof(struct virtio_net_ctrl_hdr); vq->vq_ring.desc[i].len = sizeof(ctrl->status); vq->vq_free_cnt--; @@ -426,6 +426,11 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, memset(vq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); } + /* Use physical address to fill desc.addr by default, +* and will be changed to use virtual address for vdev. +*/ + vq->offset = offsetof(struct rte_mbuf, buf_physaddr); + if (hw->vtpci_ops->setup_queue(hw, vq) < 0) { PMD_INIT_LOG(ERR, "setup_queue failed"); virtio_dev_queue_release(vq); diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c index ef21d8e..9d7e537 100644 --- a/drivers/net/virtio/virtio_rxtx.c +++ b/drivers/net/virtio/virtio_rxtx.c @@ -193,8 +193,7 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) start_dp = vq->vq_ring.desc; start_dp[idx].addr = - (uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM - - hw->vtnet_hdr_size); + MBUF_DATA_DMA_ADDR(cookie, vq->offset) - hw->vtnet_hdr_size; start_dp[idx].len = cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; start_dp[idx].flags = VRING_DESC_F_WRITE; @@ -265,7 +264,7 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie, } do { - start_dp[idx].addr = rte_mbuf_data_dma_addr(cookie); + start_dp[idx].addr = MBUF_DATA_DMA_ADDR(cookie, txvq->offset); start_dp[idx].len = cookie->data_len; start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0; idx = start_dp[idx].next; diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c index 8f5293d..83a794e 100644 --- a/drivers/net/virtio/virtio_rxtx_simple.c +++ b/drivers/net/virtio/virtio_rxtx_simple.c @@ -80,8 +80,8 @@ virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq, vq->sw_ring[desc_idx] = cookie; start_dp = vq->vq_ring.desc; - start_dp[desc_idx].addr = (uint64_t)((uintptr_t)cookie->buf_physaddr + - RTE_PKTMBUF_HEADROOM - vq->hw->vtnet_hdr_size); + start_dp[desc_idx].
[dpdk-dev] [PATCH v4 2/8] virtio: abstract vring hdr desc init as a method
To make it reusable, here we abstract the initialization of vring header into a method. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-By: Neil Horman --- drivers/net/virtio/virtio_ethdev.c | 22 -- drivers/net/virtio/virtqueue.h | 20 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 534f0e6..0c20fb9 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -380,8 +380,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, if (queue_type == VTNET_TQ) { const struct rte_memzone *hdr_mz; - struct virtio_tx_region *txr; - unsigned int i; + size_t hdr_mz_sz = vq_size * sizeof(struct virtio_tx_region); /* * For each xmit packet, allocate a virtio_net_hdr @@ -390,7 +389,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone", dev->data->port_id, queue_idx); hdr_mz = rte_memzone_reserve_aligned(vq_name, -vq_size * sizeof(*txr), +hdr_mz_sz, socket_id, 0, RTE_CACHE_LINE_SIZE); if (hdr_mz == NULL) { @@ -404,21 +403,8 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, vq->virtio_net_hdr_mz = hdr_mz; vq->virtio_net_hdr_mem = hdr_mz->phys_addr; - txr = hdr_mz->addr; - memset(txr, 0, vq_size * sizeof(*txr)); - for (i = 0; i < vq_size; i++) { - struct vring_desc *start_dp = txr[i].tx_indir; - - vring_desc_init(start_dp, RTE_DIM(txr[i].tx_indir)); - - /* first indirect descriptor is always the tx header */ - start_dp->addr = vq->virtio_net_hdr_mem - + i * sizeof(*txr) - + offsetof(struct virtio_tx_region, tx_hdr); - - start_dp->len = vq->hw->vtnet_hdr_size; - start_dp->flags = VRING_DESC_F_NEXT; - } + memset(hdr_mz->addr, 0, hdr_mz_sz); + vring_hdr_desc_init(vq); } else if (queue_type == VTNET_CQ) { /* Allocate a page for control vq command, data and status */ diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h index 83d89ca..3b19fd1 100644 --- a/drivers/net/virtio/virtqueue.h +++ b/drivers/net/virtio/virtqueue.h @@ -264,6 +264,26 @@ vring_desc_init(struct vring_desc *dp, uint16_t n) dp[i].next = VQ_RING_DESC_CHAIN_END; } +static inline void +vring_hdr_desc_init(struct virtqueue *vq) +{ + int i; + struct virtio_tx_region *txr = vq->virtio_net_hdr_mz->addr; + + for (i = 0; i < vq->vq_nentries; i++) { + struct vring_desc *start_dp = txr[i].tx_indir; + + vring_desc_init(start_dp, RTE_DIM(txr[i].tx_indir)); + + /* first indirect descriptor is always the tx header */ + start_dp->addr = vq->virtio_net_hdr_mem + i * sizeof(*txr) + +offsetof(struct virtio_tx_region, tx_hdr); + + start_dp->len = vq->hw->vtnet_hdr_size; + start_dp->flags = VRING_DESC_F_NEXT; + } +} + /** * Tell the backend not to interrupt us. */ -- 2.1.4
[dpdk-dev] [PATCH v4 1/8] virtio: hide phys addr check inside pci ops
This patch is to move phys addr check from virtio_dev_queue_setup to pci ops. To makt that happen, make sure virtio_ops.setup_queue return the result if we pass through the check. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-By: Neil Horman --- drivers/net/virtio/virtio_ethdev.c | 17 + drivers/net/virtio/virtio_pci.c| 30 -- drivers/net/virtio/virtio_pci.h| 2 +- 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index bd990ff..534f0e6 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -369,17 +369,6 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, } } - /* -* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, -* and only accepts 32 bit page frame number. -* Check if the allocated physical memory exceeds 16TB. -*/ - if ((mz->phys_addr + vq->vq_ring_size - 1) >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) { - PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!"); - virtio_dev_queue_release(vq); - return -ENOMEM; - } - memset(mz->addr, 0, sizeof(mz->len)); vq->mz = mz; vq->vq_ring_mem = mz->phys_addr; @@ -451,7 +440,11 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, memset(vq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); } - hw->vtpci_ops->setup_queue(hw, vq); + if (hw->vtpci_ops->setup_queue(hw, vq) < 0) { + PMD_INIT_LOG(ERR, "setup_queue failed"); + virtio_dev_queue_release(vq); + return -EINVAL; + } vq->started = 1; *pvq = vq; diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c index 9cdca06..6bd239c 100644 --- a/drivers/net/virtio/virtio_pci.c +++ b/drivers/net/virtio/virtio_pci.c @@ -55,6 +55,22 @@ */ #define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20) +static inline int +check_vq_phys_addr_ok(struct virtqueue *vq) +{ + /* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, +* and only accepts 32 bit page frame number. +* Check if the allocated physical memory exceeds 16TB. +*/ + if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >> + (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) { + PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!"); + return 0; + } + + return 1; +} + static void legacy_read_dev_config(struct virtio_hw *hw, size_t offset, void *dst, int length) @@ -143,15 +159,20 @@ legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) return dst; } -static void +static int legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) { uint32_t src; + if (!check_vq_phys_addr_ok(vq)) + return -1; + rte_eal_pci_ioport_write(>io, >vq_queue_index, 2, VIRTIO_PCI_QUEUE_SEL); src = vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; rte_eal_pci_ioport_write(>io, , 4, VIRTIO_PCI_QUEUE_PFN); + + return 0; } static void @@ -367,12 +388,15 @@ modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) return io_read16(>common_cfg->queue_size); } -static void +static int modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) { uint64_t desc_addr, avail_addr, used_addr; uint16_t notify_off; + if (!check_vq_phys_addr_ok(vq)) + return -1; + desc_addr = vq->mz->phys_addr; avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc); used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail, @@ -400,6 +424,8 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr); PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)", vq->notify_addr, notify_off); + + return 0; } static void diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h index 554efea..a76daf7 100644 --- a/drivers/net/virtio/virtio_pci.h +++ b/drivers/net/virtio/virtio_pci.h @@ -234,7 +234,7 @@ struct virtio_pci_ops { uint16_t (*set_config_irq)(struct virtio_hw *hw, uint16_t vec); uint16_t (*get_queue_num)(struct virtio_hw *hw, uint16_t queue_id); - void (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); + int (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); void (*del_queue)(struct virtio_hw *hw, struct virtqueue *vq); void (*notify_queue)(struct virtio_hw *hw, struct virtqueue *vq); }; -- 2.1.4
[dpdk-dev] [PATCH v4 0/8] virtio support for container
ive-linuxapp-gcc c. To build a docker image using Dockerfile below. $: cat ./Dockerfile FROM ubuntu:latest WORKDIR /usr/src/dpdk COPY . /usr/src/dpdk ENV PATH "$PATH:/usr/src/dpdk/examples/l2fwd/build/" $: docker build -t dpdk-app-l2fwd . d. Used with vhost-user $: ./examples/vhost/build/vhost-switch -c 3 -n 4 \ --socket-mem 1024,1024 -- -p 0x1 --stats 1 $: docker run -i -t -v :/var/run/usvhost \ -v /dev/hugepages:/dev/hugepages \ dpdk-app-l2fwd l2fwd -c 0x4 -n 4 -m 1024 --no-pci \ --vdev=virtio-user0,path=/var/run/usvhost -- -p 0x1 f. Used with vhost-net $: modprobe vhost $: modprobe vhost-net $: docker run -i -t --privileged \ -v /dev/vhost-net:/dev/vhost-net \ -v /dev/net/tun:/dev/net/tun \ -v /dev/hugepages:/dev/hugepages \ dpdk-app-l2fwd l2fwd -c 0x4 -n 4 -m 1024 --no-pci \ --vdev=virtio-user0,path=/dev/vhost-net -- -p 0x1 By the way, it's not necessary to run in a container. Signed-off-by: Huawei Xie Signed-off-by: Jianfeng Tan Acked-By: Neil Horman Jianfeng Tan (8): virtio: hide phys addr check inside pci ops virtio: abstract vring hdr desc init as a method virtio: enable use virtual address to fill desc virtio-user: add vhost adapter layer virtio-user: add device emulation layer APIs virtio-user: add new virtual pci driver for virtio virtio-user: add a new virtual device named virtio-user doc: update doc for virtio-user config/common_linuxapp | 3 + doc/guides/nics/overview.rst | 64 +-- doc/guides/rel_notes/release_16_07.rst | 4 + drivers/net/virtio/Makefile | 8 + drivers/net/virtio/virtio_ethdev.c | 69 ++-- drivers/net/virtio/virtio_ethdev.h | 2 + drivers/net/virtio/virtio_pci.c | 30 +- drivers/net/virtio/virtio_pci.h | 3 +- drivers/net/virtio/virtio_rxtx.c | 5 +- drivers/net/virtio/virtio_rxtx_simple.c | 13 +- drivers/net/virtio/virtio_user/vhost.c | 105 + drivers/net/virtio/virtio_user/vhost.h | 221 +++ drivers/net/virtio/virtio_user/vhost_kernel.c| 254 drivers/net/virtio/virtio_user/vhost_user.c | 375 ++ drivers/net/virtio/virtio_user/virtio_user_dev.c | 475 +++ drivers/net/virtio/virtio_user/virtio_user_dev.h | 61 +++ drivers/net/virtio/virtio_user/virtio_user_pci.c | 209 ++ drivers/net/virtio/virtqueue.h | 33 +- 18 files changed, 1849 insertions(+), 85 deletions(-) create mode 100644 drivers/net/virtio/virtio_user/vhost.c create mode 100644 drivers/net/virtio/virtio_user/vhost.h create mode 100644 drivers/net/virtio/virtio_user/vhost_kernel.c create mode 100644 drivers/net/virtio/virtio_user/vhost_user.c create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.c create mode 100644 drivers/net/virtio/virtio_user/virtio_user_dev.h create mode 100644 drivers/net/virtio/virtio_user/virtio_user_pci.c -- 2.1.4
[dpdk-dev] [PATCH v3 2/2] virtio: fix memory leak of virtqueue memzones
Issue: When virtio was proposed in DPDK, there is no API to free memzones. But this has changed since rte_memzone_free() has been implemented by commit ff909fe21f0a ("mem: introduce memzone freeing"). This patch is to make sure memzones in struct virtqueue, like mz and virtio_net_hdr_mz, are freed when queue is released or setup fails. Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_ethdev.c | 21 ++--- drivers/net/virtio/virtqueue.h | 2 ++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index b3f4158..bd990ff 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -260,12 +260,18 @@ virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues) } void -virtio_dev_queue_release(struct virtqueue *vq) { +virtio_dev_queue_release(struct virtqueue *vq) +{ struct virtio_hw *hw; if (vq) { hw = vq->hw; - hw->vtpci_ops->del_queue(hw, vq); + if (vq->started) + hw->vtpci_ops->del_queue(hw, vq); + + rte_memzone_free(vq->mz); + if (vq->virtio_net_hdr_mz) + rte_memzone_free(vq->virtio_net_hdr_mz); rte_free(vq->sw_ring); rte_free(vq); @@ -330,7 +336,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, socket_id); if (!vq->sw_ring) { PMD_INIT_LOG(ERR, "Can not allocate RX soft ring"); - rte_free(vq); + virtio_dev_queue_release(vq); return -ENOMEM; } } @@ -358,7 +364,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, if (rte_errno == EEXIST) mz = rte_memzone_lookup(vq_name); if (mz == NULL) { - rte_free(vq); + virtio_dev_queue_release(vq); return -ENOMEM; } } @@ -370,7 +376,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, */ if ((mz->phys_addr + vq->vq_ring_size - 1) >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) { PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!"); - rte_free(vq); + virtio_dev_queue_release(vq); return -ENOMEM; } @@ -402,7 +408,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, if (rte_errno == EEXIST) hdr_mz = rte_memzone_lookup(vq_name); if (hdr_mz == NULL) { - rte_free(vq); + virtio_dev_queue_release(vq); return -ENOMEM; } } @@ -436,7 +442,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, vq->virtio_net_hdr_mz = rte_memzone_lookup(vq_name); if (vq->virtio_net_hdr_mz == NULL) { - rte_free(vq); + virtio_dev_queue_release(vq); return -ENOMEM; } } @@ -447,6 +453,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, hw->vtpci_ops->setup_queue(hw, vq); + vq->started = 1; *pvq = vq; return 0; } diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h index 4e9239e..83d89ca 100644 --- a/drivers/net/virtio/virtqueue.h +++ b/drivers/net/virtio/virtqueue.h @@ -201,6 +201,8 @@ struct virtqueue { uint16_t*notify_addr; + int started; + struct vq_desc_extra { void *cookie; uint16_t ndescs; -- 2.1.4
[dpdk-dev] [PATCH v3 1/2] virtio: cleanup virtio_dev_queue_setup()
Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_ethdev.c | 47 +++--- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 1fe90ae..b3f4158 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -285,6 +285,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, unsigned int vq_size, size; struct virtio_hw *hw = dev->data->dev_private; struct virtqueue *vq = NULL; + const char *queue_names[] = {"rvq", "txq", "cvq"}; PMD_INIT_LOG(DEBUG, "setting up queue: %u", vtpci_queue_idx); @@ -304,34 +305,34 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, return -EINVAL; } - if (queue_type == VTNET_RQ) { - snprintf(vq_name, sizeof(vq_name), "port%d_rvq%d", - dev->data->port_id, queue_idx); - vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + - vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE); - vq->sw_ring = rte_zmalloc_socket("rxq->sw_ring", - (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) * - sizeof(vq->sw_ring[0]), RTE_CACHE_LINE_SIZE, socket_id); - } else if (queue_type == VTNET_TQ) { - snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d", - dev->data->port_id, queue_idx); - vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + - vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE); - } else if (queue_type == VTNET_CQ) { - snprintf(vq_name, sizeof(vq_name), "port%d_cvq", - dev->data->port_id); - vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + - vq_size * sizeof(struct vq_desc_extra), - RTE_CACHE_LINE_SIZE); + if (queue_type < VTNET_RQ || queue_type > VTNET_CQ) { + PMD_INIT_LOG(ERR, "invalid queue type: %d", queue_type); + return -EINVAL; } + + snprintf(vq_name, sizeof(vq_name), "port%d_%s%d", +dev->data->port_id, queue_names[queue_type], queue_idx); + vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + +vq_size * sizeof(struct vq_desc_extra), +RTE_CACHE_LINE_SIZE); if (vq == NULL) { PMD_INIT_LOG(ERR, "Can not allocate virtqueue"); return -ENOMEM; } - if (queue_type == VTNET_RQ && vq->sw_ring == NULL) { - PMD_INIT_LOG(ERR, "Can not allocate RX soft ring"); - rte_free(vq); - return -ENOMEM; + + if (queue_type == VTNET_RQ) { + size_t sz_sw; + + sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) * + sizeof(vq->sw_ring[0]); + vq->sw_ring = rte_zmalloc_socket("rxq->sw_ring", sz_sw, +RTE_CACHE_LINE_SIZE, +socket_id); + if (!vq->sw_ring) { + PMD_INIT_LOG(ERR, "Can not allocate RX soft ring"); + rte_free(vq); + return -ENOMEM; + } } vq->hw = hw; -- 2.1.4
[dpdk-dev] [PATCH v3 0/2] virtio: fix memory leak of virtqueue memzones
Patch 1: Do some cleanup in virtio_dev_queue_setup(); Patch 2: Fix the memory leak bug. Jianfeng Tan (2): v3: Fix a typo in the queue_type check. v2: split cleanup and fix into two patches. virtio: cleanup virtio_dev_queue_setup() virtio: fix memory leak of virtqueue memzones drivers/net/virtio/virtio_ethdev.c | 66 +- drivers/net/virtio/virtqueue.h | 2 ++ 2 files changed, 39 insertions(+), 29 deletions(-) -- 2.1.4
[dpdk-dev] [PATCH v2 1/2] virtio: cleanup virtio_dev_queue_setup()
Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_ethdev.c | 47 +++--- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 1fe90ae..0553b67 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -285,6 +285,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, unsigned int vq_size, size; struct virtio_hw *hw = dev->data->dev_private; struct virtqueue *vq = NULL; + const char *queue_names[] = {"rvq", "txq", "cvq"}; PMD_INIT_LOG(DEBUG, "setting up queue: %u", vtpci_queue_idx); @@ -304,34 +305,34 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, return -EINVAL; } - if (queue_type == VTNET_RQ) { - snprintf(vq_name, sizeof(vq_name), "port%d_rvq%d", - dev->data->port_id, queue_idx); - vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + - vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE); - vq->sw_ring = rte_zmalloc_socket("rxq->sw_ring", - (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) * - sizeof(vq->sw_ring[0]), RTE_CACHE_LINE_SIZE, socket_id); - } else if (queue_type == VTNET_TQ) { - snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d", - dev->data->port_id, queue_idx); - vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + - vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE); - } else if (queue_type == VTNET_CQ) { - snprintf(vq_name, sizeof(vq_name), "port%d_cvq", - dev->data->port_id); - vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + - vq_size * sizeof(struct vq_desc_extra), - RTE_CACHE_LINE_SIZE); + if (queue_type < VTNET_RQ || queue_type > VTNET_RQ) { + PMD_INIT_LOG(ERR, "invalid queue type: %d", queue_type); + return -EINVAL; } + + snprintf(vq_name, sizeof(vq_name), "port%d_%s%d", +dev->data->port_id, queue_names[queue_type], queue_idx); + vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + +vq_size * sizeof(struct vq_desc_extra), +RTE_CACHE_LINE_SIZE); if (vq == NULL) { PMD_INIT_LOG(ERR, "Can not allocate virtqueue"); return -ENOMEM; } - if (queue_type == VTNET_RQ && vq->sw_ring == NULL) { - PMD_INIT_LOG(ERR, "Can not allocate RX soft ring"); - rte_free(vq); - return -ENOMEM; + + if (queue_type == VTNET_RQ) { + size_t sz_sw; + + sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) * + sizeof(vq->sw_ring[0]); + vq->sw_ring = rte_zmalloc_socket("rxq->sw_ring", sz_sw, +RTE_CACHE_LINE_SIZE, +socket_id); + if (!vq->sw_ring) { + PMD_INIT_LOG(ERR, "Can not allocate RX soft ring"); + rte_free(vq); + return -ENOMEM; + } } vq->hw = hw; -- 2.1.4
[dpdk-dev] [PATCH v2 0/2] virtio: fix memory leak of virtqueue memzones
Commit 1: Do some cleanup in virtio_dev_queue_setup(); Commit 2: Fix the memory leak bug. Signed-off-by: Jianfeng Tan Jianfeng Tan (2): virtio: cleanup virtio_dev_queue_setup() virtio: fix memory leak of virtqueue memzones drivers/net/virtio/virtio_ethdev.c | 66 +- drivers/net/virtio/virtqueue.h | 2 ++ 2 files changed, 39 insertions(+), 29 deletions(-) -- 2.1.4
[dpdk-dev] [PATCH v2] virtio: fix modify drv_flags for specific device
Issue: virtio's drv_flags are decided by devices types (modern vs legacy), and which kernel driver is used, and the negotiated features (especially VIRTIO_NET_STATUS) with backend, which makes it possible to multiple virtio devices have different versions of drv_flags, but this variable is currently shared by each virtio device. How to fix: dev_flags is a device-specific variable to store this info. Fixes: da978dfdc43 ("virtio: use port IO to get PCI resource") Reported-by: David Marchand Suggested-by: David Marchand Signed-off-by: Jianfeng Tan --- v2: RTE_PCI_DRV_INTR_LSC -> RTE_ETH_DEV_INTR_LSC. drivers/net/virtio/virtio_ethdev.c | 25 ++--- drivers/net/virtio/virtio_pci.c| 13 +++-- drivers/net/virtio/virtio_pci.h| 3 ++- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 63a368a..1fe90ae 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -59,7 +59,6 @@ #include "virtqueue.h" #include "virtio_rxtx.h" - static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev); static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev); static int virtio_dev_configure(struct rte_eth_dev *dev); @@ -491,7 +490,6 @@ static void virtio_dev_close(struct rte_eth_dev *dev) { struct virtio_hw *hw = dev->data->dev_private; - struct rte_pci_device *pci_dev = dev->pci_dev; PMD_INIT_LOG(DEBUG, "virtio_dev_close"); @@ -499,7 +497,7 @@ virtio_dev_close(struct rte_eth_dev *dev) virtio_dev_stop(dev); /* reset the NIC */ - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR); vtpci_reset(hw); virtio_dev_free_mbufs(dev); @@ -1034,6 +1032,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) struct virtio_net_config *config; struct virtio_net_config local_config; struct rte_pci_device *pci_dev; + uint32_t dev_flags = RTE_ETH_DEV_DETACHABLE; int ret; RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr)); @@ -1057,7 +1056,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) pci_dev = eth_dev->pci_dev; - ret = vtpci_init(pci_dev, hw); + ret = vtpci_init(pci_dev, hw, _flags); if (ret) return ret; @@ -1074,9 +1073,15 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) /* If host does not support status then disable LSC */ if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) - pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC; + dev_flags &= ~RTE_ETH_DEV_INTR_LSC; rte_eth_copy_pci_info(eth_dev, pci_dev); + /* For virtio devices, dev_flags are decided according to feature +* negotiation, aka if VIRTIO_NET_F_STATUS is set, and which kernel +* driver is used, dynamically. And we should keep drv_flags shared +* and unvaried. +*/ + eth_dev->data->dev_flags = dev_flags; rx_func_get(eth_dev); @@ -1155,7 +1160,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) pci_dev->id.device_id); /* Setup interrupt callback */ - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) rte_intr_callback_register(_dev->intr_handle, virtio_interrupt_handler, eth_dev); @@ -1190,7 +1195,7 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev) eth_dev->data->mac_addrs = NULL; /* reset interrupt callback */ - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) rte_intr_callback_unregister(_dev->intr_handle, virtio_interrupt_handler, eth_dev); @@ -1240,7 +1245,6 @@ virtio_dev_configure(struct rte_eth_dev *dev) { const struct rte_eth_rxmode *rxmode = >data->dev_conf.rxmode; struct virtio_hw *hw = dev->data->dev_private; - struct rte_pci_device *pci_dev = dev->pci_dev; PMD_INIT_LOG(DEBUG, "configure"); @@ -1258,7 +1262,7 @@ virtio_dev_configure(struct rte_eth_dev *dev) return -ENOTSUP; } - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) { PMD_DRV_LOG(ERR, "failed to set config vector"); return
[dpdk-dev] [PATCH] virtio: fix memory leak of virtqueue memzones
Issue: When virtio was proposed in DPDK, there is no API to free memzones. But this has changed since rte_memzone_free() has been implemented by commit ff909fe21f. This patch is to make sure memzones in struct virtqueue, like mz and virtio_net_hdr_mz, are freed when queue is released or setup fails. Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_ethdev.c | 69 -- drivers/net/virtio/virtio_ethdev.h | 2 +- drivers/net/virtio/virtio_rxtx.c | 4 +-- 3 files changed, 40 insertions(+), 35 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 63a368a..54eacf6 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -261,12 +261,18 @@ virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues) } void -virtio_dev_queue_release(struct virtqueue *vq) { +virtio_dev_queue_release(struct virtqueue *vq, int io_related) +{ struct virtio_hw *hw; if (vq) { hw = vq->hw; - hw->vtpci_ops->del_queue(hw, vq); + if (io_related) + hw->vtpci_ops->del_queue(hw, vq); + + rte_memzone_free(vq->mz); + if (vq->virtio_net_hdr_mz) + rte_memzone_free(vq->virtio_net_hdr_mz); rte_free(vq->sw_ring); rte_free(vq); @@ -286,6 +292,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, unsigned int vq_size, size; struct virtio_hw *hw = dev->data->dev_private; struct virtqueue *vq = NULL; + const char *queue_names[] = {"rvq", "txq", "cvq"}; PMD_INIT_LOG(DEBUG, "setting up queue: %u", vtpci_queue_idx); @@ -305,34 +312,34 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, return -EINVAL; } - if (queue_type == VTNET_RQ) { - snprintf(vq_name, sizeof(vq_name), "port%d_rvq%d", - dev->data->port_id, queue_idx); - vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + - vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE); - vq->sw_ring = rte_zmalloc_socket("rxq->sw_ring", - (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) * - sizeof(vq->sw_ring[0]), RTE_CACHE_LINE_SIZE, socket_id); - } else if (queue_type == VTNET_TQ) { - snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d", - dev->data->port_id, queue_idx); - vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + - vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE); - } else if (queue_type == VTNET_CQ) { - snprintf(vq_name, sizeof(vq_name), "port%d_cvq", - dev->data->port_id); - vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + - vq_size * sizeof(struct vq_desc_extra), - RTE_CACHE_LINE_SIZE); + if (queue_type < VTNET_RQ || queue_type > VTNET_RQ) { + PMD_INIT_LOG(ERR, "invalid queue type: %d", queue_type); + return -EINVAL; } + + snprintf(vq_name, sizeof(vq_name), "port%d_%s%d", +dev->data->port_id, queue_names[queue_type], queue_idx); + vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + +vq_size * sizeof(struct vq_desc_extra), +RTE_CACHE_LINE_SIZE); if (vq == NULL) { PMD_INIT_LOG(ERR, "Can not allocate virtqueue"); return -ENOMEM; } - if (queue_type == VTNET_RQ && vq->sw_ring == NULL) { - PMD_INIT_LOG(ERR, "Can not allocate RX soft ring"); - rte_free(vq); - return -ENOMEM; + + if (queue_type == VTNET_RQ) { + size_t sz_sw; + + sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) * + sizeof(vq->sw_ring[0]); + vq->sw_ring = rte_zmalloc_socket("rxq->sw_ring", sz_sw, +RTE_CACHE_LINE_SIZE, +socket_id); + if (!vq->sw_ring) { + PMD_INIT_LOG(ERR, "Can not allocate RX soft ring"); + virtio_dev_queue_release(vq, 0); + return -ENOMEM; + } } vq->hw = hw; @@ -358,7 +365,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, if (rte_errno == EEXIST) mz = rte_memzone_lookup(vq_name); if (mz == NULL) { - rte_free(vq); +
[dpdk-dev] [PATCH] virtio: fix modify drv_flags for specific device
Issue: virtio's drv_flags are decided by devices types (modern vs legacy), and which kernel driver is used, and the negotiated features (especially VIRTIO_NET_STATUS) with backend, which makes it possible to multiple virtio devices have different versions of drv_flags, but this variable is currently shared by each virtio device. How to fix: dev_flags is a device-specific variable to store this info. Fixes: da978dfdc43 ("virtio: use port IO to get PCI resource") Reported-by: David Marchand Suggested-by: David Marchand Signed-off-by: Jianfeng Tan --- drivers/net/virtio/virtio_ethdev.c | 27 --- drivers/net/virtio/virtio_pci.c| 13 +++-- drivers/net/virtio/virtio_pci.h| 3 ++- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 63a368a..b144a58 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -59,6 +59,7 @@ #include "virtqueue.h" #include "virtio_rxtx.h" +#define VIRTIO_DRV_FLAGS RTE_PCI_DRV_DETACHABLE static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev); static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev); @@ -491,7 +492,6 @@ static void virtio_dev_close(struct rte_eth_dev *dev) { struct virtio_hw *hw = dev->data->dev_private; - struct rte_pci_device *pci_dev = dev->pci_dev; PMD_INIT_LOG(DEBUG, "virtio_dev_close"); @@ -499,7 +499,7 @@ virtio_dev_close(struct rte_eth_dev *dev) virtio_dev_stop(dev); /* reset the NIC */ - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + if (dev->data->dev_flags & RTE_PCI_DRV_INTR_LSC) vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR); vtpci_reset(hw); virtio_dev_free_mbufs(dev); @@ -1034,6 +1034,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) struct virtio_net_config *config; struct virtio_net_config local_config; struct rte_pci_device *pci_dev; + uint32_t dev_flags = VIRTIO_DRV_FLAGS; int ret; RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr)); @@ -1057,7 +1058,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) pci_dev = eth_dev->pci_dev; - ret = vtpci_init(pci_dev, hw); + ret = vtpci_init(pci_dev, hw, _flags); if (ret) return ret; @@ -1074,9 +1075,15 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) /* If host does not support status then disable LSC */ if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) - pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC; + dev_flags &= ~RTE_PCI_DRV_INTR_LSC; rte_eth_copy_pci_info(eth_dev, pci_dev); + /* For virtio devices, dev_flags are decided according to feature +* negotiation, aka if VIRTIO_NET_F_STATUS is set, and which kernel +* driver is used, dynamically. And we should keep drv_flags shared +* and unvaried. +*/ + eth_dev->data->dev_flags = dev_flags; rx_func_get(eth_dev); @@ -1155,7 +1162,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) pci_dev->id.device_id); /* Setup interrupt callback */ - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + if (eth_dev->data->dev_flags & RTE_PCI_DRV_INTR_LSC) rte_intr_callback_register(_dev->intr_handle, virtio_interrupt_handler, eth_dev); @@ -1190,7 +1197,7 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev) eth_dev->data->mac_addrs = NULL; /* reset interrupt callback */ - if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + if (eth_dev->data->dev_flags & RTE_PCI_DRV_INTR_LSC) rte_intr_callback_unregister(_dev->intr_handle, virtio_interrupt_handler, eth_dev); @@ -1205,7 +1212,7 @@ static struct eth_driver rte_virtio_pmd = { .pci_drv = { .name = "rte_virtio_pmd", .id_table = pci_id_virtio_map, - .drv_flags = RTE_PCI_DRV_DETACHABLE, + .drv_flags = VIRTIO_DRV_FLAGS, }, .eth_dev_init = eth_virtio_dev_init, .eth_dev_uninit = eth_virtio_dev_uninit, @@ -1240,7 +1247,6 @@ virtio_dev_configure(struct rte_eth_dev *dev) { const struct rte_eth_rxmode *rxmode = >data->dev_conf.rxmode; struct virtio_hw *hw = dev->data->dev_private; - struct rte_pci_device *pci_dev = dev->pci_dev; PMD_INIT_LOG(DEBUG, "configure"); @@ -1258,7 +1264,7 @@ virtio_dev_configure(struct rte_eth_dev *dev) retur