On 17 Dec 2019, at 23:27, Yi-Hung Wei wrote:
Currently, the AF_XDP socket (XSK) related memory are allocated by
main
thread in the main thread's NUMA domain. With the patch that detects
netdev-linux's NUMA node id, the PMD thread of AF_XDP port will be run
on
the AF_XDP netdev's NUMA domain. If the net device's NUMA domain
is different from the main thread's NUMA domain, we will have two
cross-NUMA memory accesses (netdev <-> memory, memory <-> CPU).
This patch addresses the aforementioned issue by allocating
the memory in the net device's NUMA domain.
Did not try the patch yet, but it looks good. One quick comment below…
Signed-off-by: Yi-Hung Wei <[email protected]>
---
Documentation/intro/install/afxdp.rst | 2 +-
acinclude.m4 | 5 ++++-
include/sparse/automake.mk | 1 +
include/sparse/numa.h | 27
+++++++++++++++++++++++++++
lib/netdev-afxdp.c | 21 ++++++++++++++++++---
5 files changed, 51 insertions(+), 5 deletions(-)
create mode 100644 include/sparse/numa.h
diff --git a/Documentation/intro/install/afxdp.rst
b/Documentation/intro/install/afxdp.rst
index 7b0736c96114..c4685fa7ebac 100644
--- a/Documentation/intro/install/afxdp.rst
+++ b/Documentation/intro/install/afxdp.rst
@@ -164,7 +164,7 @@ If a test case fails, check the log at::
Setup AF_XDP netdev
-------------------
-Before running OVS with AF_XDP, make sure the libbpf and libelf are
+Before running OVS with AF_XDP, make sure the libbpf, libelf, and
libnuma are
set-up right::
ldd vswitchd/ovs-vswitchd
diff --git a/acinclude.m4 b/acinclude.m4
index 542637ac8cb8..73ed11d701aa 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -286,9 +286,12 @@ AC_DEFUN([OVS_CHECK_LINUX_AF_XDP], [
AC_CHECK_FUNCS([pthread_spin_lock], [],
[AC_MSG_ERROR([unable to find pthread_spin_lock for AF_XDP
support])])
+ AC_CHECK_LIB(numa, numa_alloc_onnode, [],
+ [AC_MSG_ERROR([unable to find libnuma for AF_XDP support])])
+
AC_DEFINE([HAVE_AF_XDP], [1],
[Define to 1 if AF_XDP support is available and
enabled.])
- LIBBPF_LDADD=" -lbpf -lelf"
+ LIBBPF_LDADD=" -lbpf -lelf -lnuma"
AC_SUBST([LIBBPF_LDADD])
AC_CHECK_DECL([xsk_ring_prod__needs_wakeup], [
diff --git a/include/sparse/automake.mk b/include/sparse/automake.mk
index 073631e8c082..974ad3fe55f7 100644
--- a/include/sparse/automake.mk
+++ b/include/sparse/automake.mk
@@ -5,6 +5,7 @@ noinst_HEADERS += \
include/sparse/bits/floatn.h \
include/sparse/assert.h \
include/sparse/math.h \
+ include/sparse/numa.h \
include/sparse/netinet/in.h \
include/sparse/netinet/ip6.h \
include/sparse/netpacket/packet.h \
diff --git a/include/sparse/numa.h b/include/sparse/numa.h
new file mode 100644
index 000000000000..3691a0eaf729
--- /dev/null
+++ b/include/sparse/numa.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2019 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
+ * See the License for the specific language governing permissions
and
+ * limitations under the License.
+ */
+
+#ifndef __CHECKER__
+#error "Use this header only with sparse. It is not a correct
implementation."
+#endif
+
+/* Avoid sparse warning: non-ANSI function declaration of function"
*/
+#define numa_get_membind_compat() numa_get_membind_compat(void)
+#define numa_get_interleave_mask_compat()
numa_get_interleave_mask_compat(void)
+#define numa_get_run_node_mask_compat()
numa_get_run_node_mask_compat(void)
+
+/* Get actual <numa.h> definitions for us to annotate and build on.
*/
+#include_next<numa.h>
diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index 4c1f9c68270a..2d6f739b4b67 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -26,6 +26,7 @@
#include <linux/rtnetlink.h>
#include <linux/if_xdp.h>
#include <net/if.h>
+#include <numa.h>
#include <poll.h>
#include <stdlib.h>
#include <sys/resource.h>
@@ -469,7 +470,7 @@ xsk_configure_all(struct netdev *netdev)
{
struct netdev_linux *dev = netdev_linux_cast(netdev);
int i, ifindex, n_rxq, n_txq;
- int qid = 0;
+ int qid = 0, err = 0;
ifindex = linux_get_ifindex(netdev_get_name(netdev));
@@ -477,6 +478,14 @@ xsk_configure_all(struct netdev *netdev)
ovs_assert(dev->tx_locks == NULL);
n_rxq = netdev_n_rxq(netdev);
+
+ /* Allocate all the xsk related memory in the netdev's NUMA
domain. */
+ struct bitmask *old_bm = numa_get_membind();
+ struct bitmask *new_bm = numa_allocate_nodemask();
+ netdev_get_numa_id(netdev);
+ numa_bitmask_setbit(new_bm, dev->numa_id);
Rather than access the device specific structure, why not use the return
value?
numa_bitmask_setbit(new_bm, netdev_get_numa_id(netdev));
+ numa_set_membind(new_bm);
+
dev->xsks = xcalloc(n_rxq, sizeof *dev->xsks);
if (dev->xdp_mode == OVS_AF_XDP_MODE_BEST_EFFORT) {
@@ -518,11 +527,17 @@ xsk_configure_all(struct netdev *netdev)
ovs_spin_init(&dev->tx_locks[i]);
}
- return 0;
+ goto out;
err:
xsk_destroy_all(netdev);
- return EINVAL;
+ err = EINVAL;
+
+out:
+ numa_set_membind(old_bm);
+ numa_bitmask_free(old_bm);
+ numa_bitmask_free(new_bm);
+ return err;
}
static void
--
2.17.1
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev