>-----Original Message-----
>From: dev [mailto:[email protected]] On Behalf Of Ciara Loftus
>Sent: Tuesday, May 24, 2016 2:15 PM
>To: [email protected]
>Subject: [ovs-dev] [PATCH] netdev-dpdk: NUMA Aware vHost User
>
>This commit allows for vHost User memory from QEMU, DPDK and OVS, as
>well as the servicing PMD, to all come from the same socket.
>
>The socket id of a vhost-user port used to be set to that of the master lcore.
>Now it is possible to update the socket id if it is detected (during VM boot)
>that the vhost device memory is not on this node. If this is the case, a new
>mempool is created from the new node, and the PMD thread currently
>servicing the port will no longer, in favour of a thread from the new node (if
>enabled in the pmd-cpu-mask).
>
>To avail of this functionality, one must enable the
>CONFIG_RTE_LIBRTE_VHOST_NUMA DPDK configuration option.
>
>Signed-off-by: Ciara Loftus <[email protected]>
>---
> .travis.yml | 3 +++
> INSTALL.DPDK.md | 8 ++++++--
> NEWS | 3 +++
> acinclude.m4 | 2 +-
> lib/netdev-dpdk.c | 37 ++++++++++++++++++++++++++++++++++---
> rhel/openvswitch-fedora.spec.in | 1 +
> 6 files changed, 48 insertions(+), 6 deletions(-)
>
>diff --git a/.travis.yml b/.travis.yml
>index ee2cf21..faba325 100644
>--- a/.travis.yml
>+++ b/.travis.yml
>@@ -11,10 +11,13 @@ addons:
> packages:
> - bc
> - gcc-multilib
>+ - libnuma1
>+ - libnuma-dev
> - libssl-dev
> - llvm-dev
> - libjemalloc1
> - libjemalloc-dev
>+ - numactl
>
> before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh
>
>diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md index 93f92e4..bbe0234
>100644
>--- a/INSTALL.DPDK.md
>+++ b/INSTALL.DPDK.md
>@@ -16,7 +16,7 @@ OVS needs a system with 1GB hugepages support.
> Building and Installing:
> ------------------------
>
>-Required: DPDK 16.04
>+Required: DPDK 16.04, libnuma
The change above makes libnuma mandatory to build OVS with DPDK datapath. The
config option CONFIG_RTE_LIBRTE_VHOST_NUMA is disabled by default in DPDK-16.04
and hence steps to enable this option and build DPDK may have to be captured in
"Configure build & Install DPDK" section of the install guide.
> Optional (if building with vhost-cuse): `fuse`, `fuse-devel` (`libfuse-dev`
> on
>Debian/Ubuntu)
>
>@@ -443,7 +443,11 @@ Performance Tuning:
>
> It is good practice to ensure that threads that are in the datapath are
> pinned to cores in the same NUMA area. e.g. pmd threads and QEMU
>vCPUs
>- responsible for forwarding.
>+ responsible for forwarding. If DPDK is built with
>+ CONFIG_RTE_LIBRTE_VHOST_NUMA=y, vHost User ports
>automatically
>+ detect the NUMA socket of the QEMU vCPUs and will be serviced by a
>PMD
>+ from the same node provided a core on this node is enabled in the
>+ pmd-cpu-mask.
>
> 9. Rx Mergeable buffers
>
>diff --git a/NEWS b/NEWS
>index 4e81cad..24ca39f 100644
>--- a/NEWS
>+++ b/NEWS
>@@ -32,6 +32,9 @@ Post-v2.5.0
> * DB entries have been added for many of the DPDK EAL command line
> arguments. Additional arguments can be passed via the dpdk-extra
> entry.
>+ * PMD threads servicing vHost User ports can now come from the NUMA
>+ node that device memory is located on if
>CONFIG_RTE_LIBRTE_VHOST_NUMA
>+ is enabled in DPDK.
> - ovs-benchmark: This utility has been removed due to lack of use and
> bitrot.
> - ovs-appctl:
>diff --git a/acinclude.m4 b/acinclude.m4 index f3de855..99ddf04 100644
>--- a/acinclude.m4
>+++ b/acinclude.m4
>@@ -218,7 +218,7 @@ AC_DEFUN([OVS_CHECK_DPDK], [
> DPDKLIB_FOUND=false
> save_LIBS=$LIBS
> for extras in "" "-ldl"; do
>- LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB"
>+ LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB -lnuma"
The above change makes libnuma mandatory for configuring OVS using DPDK
datapath while ' CONFIG_RTE_LIBRTE_VHOST_NUMA' is disabled by default.
IMHO, can we check if LIBRTE_VHOST_NUMA is enabled(from rte_config.h) and
append "lnuma" only when it is true. This is inline with how we handle VHOST
CUSE case.
> AC_LINK_IFELSE(
> [AC_LANG_PROGRAM([#include <rte_config.h>
> #include <rte_eal.h>], diff --git
> a/lib/netdev-dpdk.c
>b/lib/netdev-dpdk.c index 0d1b8c9..ad6c4bb 100644
>--- a/lib/netdev-dpdk.c
>+++ b/lib/netdev-dpdk.c
>@@ -30,6 +30,7 @@
> #include <sys/types.h>
> #include <sys/stat.h>
> #include <getopt.h>
>+#include <numaif.h>
>
> #include "dirs.h"
> #include "dp-packet.h"
>@@ -378,6 +379,9 @@ struct netdev_dpdk {
> * netdev_dpdk*_reconfigure() is called */
> int requested_n_txq;
> int requested_n_rxq;
>+
>+ /* Socket ID detected when vHost device is brought up */
>+ int requested_socket_id;
> };
>
> struct netdev_rxq_dpdk {
>@@ -747,6 +751,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int
>port_no,
> }
>
> dev->socket_id = sid < 0 ? SOCKET0 : sid;
>+ dev->requested_socket_id = dev->socket_id;
> dev->port_id = port_no;
> dev->type = type;
> dev->flags = 0;
>@@ -2149,6 +2154,8 @@ new_device(struct virtio_net *virtio_dev) {
> struct netdev_dpdk *dev;
> bool exists = false;
>+ int newnode = 0;
>+ long err = 0;
>
> ovs_mutex_lock(&dpdk_mutex);
> /* Add device to the vhost port with the same name as that passed down.
>*/ @@ -2162,6 +2169,19 @@ new_device(struct virtio_net *virtio_dev)
> }
> ovsrcu_set(&dev->virtio_dev, virtio_dev);
> exists = true;
>+
>+ /* Get NUMA information */
>+ err = get_mempolicy(&newnode, NULL, 0, virtio_dev,
>+ MPOL_F_NODE | MPOL_F_ADDR);
>+ if (err) {
>+ VLOG_INFO("Error getting NUMA info for vHost Device '%s'",
>+ virtio_dev->ifname);
>+ newnode = dev->socket_id;
>+ } else if (newnode != dev->socket_id) {
>+ dev->requested_socket_id = newnode;
>+ netdev_request_reconfigure(&dev->up);
>+ }
>+
> virtio_dev->flags |= VIRTIO_DEV_RUNNING;
> /* Disable notifications. */
> set_irq_status(virtio_dev); @@ -2178,8 +2198,8 @@
> new_device(struct
>virtio_net *virtio_dev)
> return -1;
> }
>
>- VLOG_INFO("vHost Device '%s' %"PRIu64" has been added", virtio_dev-
>>ifname,
>- virtio_dev->device_fh);
>+ VLOG_INFO("vHost Device '%s' %"PRIu64" has been added on socket %i",
>+ virtio_dev->ifname, virtio_dev->device_fh, newnode);
> return 0;
> }
>
>@@ -2760,6 +2780,7 @@ static int
> netdev_dpdk_vhost_user_reconfigure(struct netdev *netdev) {
> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>+ int err = 0;
>
> ovs_mutex_lock(&dpdk_mutex);
> ovs_mutex_lock(&dev->mutex);
>@@ -2767,10 +2788,20 @@ netdev_dpdk_vhost_user_reconfigure(struct
>netdev *netdev)
> netdev->n_txq = dev->requested_n_txq;
> netdev->n_rxq = dev->requested_n_rxq;
>
>+ if (dev->requested_socket_id != dev->socket_id) {
>+ dev->socket_id = dev->requested_socket_id;
>+ /* Change mempool to new NUMA Node */
>+ dpdk_mp_put(dev->dpdk_mp);
>+ dev->dpdk_mp = dpdk_mp_get(dev->socket_id, dev->mtu);
>+ if (!dev->dpdk_mp) {
>+ err = ENOMEM;
>+ }
>+ }
>+
> ovs_mutex_unlock(&dev->mutex);
> ovs_mutex_unlock(&dpdk_mutex);
>
>- return 0;
>+ return err;
> }
>
> static int
>diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-
>fedora.spec.in index 0759096..e360d4d 100644
>--- a/rhel/openvswitch-fedora.spec.in
>+++ b/rhel/openvswitch-fedora.spec.in
>@@ -54,6 +54,7 @@ BuildRequires: libcap-ng libcap-ng-devel %endif %if
>%{with dpdk}
> BuildRequires: dpdk-devel >= 2.2.0
>+BuildRequires: numactl numactl-devel numactl-libs
> Provides: %{name}-dpdk = %{version}-%{release} %endif
>
>--
>2.4.3
>
>_______________________________________________
>dev mailing list
>[email protected]
>http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev