Using poll() internally in dpif_linux_recv(), instead of relying
on the results of the main loop poll() call, brings netperf CRR
performance back within 1% of par versus the code base before the
poll_fd_woke() optimizations were introduced. It also increases
the ovs-benchmark results by about 5% versus that baseline, too.
My theory is that this is because the main loop takes long enough
that a significant number of packets can arrive during the main
loop itself, so this reduces the time before OVS gets to those
packets.
---
lib/dpif-linux.c | 50 +++++++++++++++++++++++++++++++++++++++++---------
lib/netlink-socket.c | 13 +++++++++++++
lib/netlink-socket.h | 1 +
3 files changed, 55 insertions(+), 9 deletions(-)
diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index 4b4ac55..75e4301 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -28,7 +28,9 @@
#include <linux/pkt_sched.h>
#include <linux/rtnetlink.h>
#include <linux/sockios.h>
+#include <poll.h>
#include <stdlib.h>
+#include <strings.h>
#include <sys/stat.h>
#include <unistd.h>
@@ -63,6 +65,7 @@ BUILD_ASSERT_DECL(IS_POW2(LRU_MAX_PORTS));
enum { N_UPCALL_SOCKS = 16 };
BUILD_ASSERT_DECL(IS_POW2(N_UPCALL_SOCKS));
+BUILD_ASSERT_DECL(N_UPCALL_SOCKS <= 32); /* We use a 32-bit word as a mask. */
/* This ethtool flag was introduced in Linux 2.6.24, so it might be
* missing if we have old headers. */
@@ -135,7 +138,7 @@ struct dpif_linux {
/* Upcall messages. */
struct nl_sock *upcall_socks[N_UPCALL_SOCKS];
- int last_read_upcall;
+ uint32_t nonempty_socks;
unsigned int listen_mask;
/* Change notification. */
@@ -1009,6 +1012,8 @@ dpif_linux_recv_set_mask(struct dpif *dpif_, int
listen_mask)
return error;
}
}
+
+ dpif->nonempty_socks = 0;
}
dpif->listen_mask = listen_mask;
@@ -1088,24 +1093,51 @@ static int
dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall)
{
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
- int i;
int read_tries = 0;
if (!dpif->listen_mask) {
return EAGAIN;
}
- for (i = 0; i < N_UPCALL_SOCKS; i++) {
- struct nl_sock *upcall_sock;
- int dp_ifindex;
+ if (!dpif->nonempty_socks) {
+ struct pollfd pfds[N_UPCALL_SOCKS];
+ int retval;
+ int i;
- dpif->last_read_upcall = (dpif->last_read_upcall + 1) &
- (N_UPCALL_SOCKS - 1);
- upcall_sock = dpif->upcall_socks[dpif->last_read_upcall];
+ for (i = 0; i < N_UPCALL_SOCKS; i++) {
+ pfds[i].fd = nl_sock_fd(dpif->upcall_socks[i]);
+ pfds[i].events = POLLIN;
+ pfds[i].revents = 0;
+ }
+ do {
+ retval = poll(pfds, N_UPCALL_SOCKS, 0);
+ } while (retval < 0 && errno == EINTR);
+ if (retval < 0) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+ VLOG_WARN_RL(&rl, "poll failed (%s)", strerror(errno));
+ } else if (!retval) {
+ return EAGAIN;
+ }
+
+ for (i = 0; i < N_UPCALL_SOCKS; i++) {
+ if (pfds[i].revents) {
+ dpif->nonempty_socks |= 1u << i;
+ }
+ }
+
+ assert(dpif->nonempty_socks);
+ }
+
+ do {
+ int indx = ffs(dpif->nonempty_socks) - 1;
+ struct nl_sock *upcall_sock = dpif->upcall_socks[indx];
+
+ dpif->nonempty_socks &= ~(1u << indx);
for (;;) {
struct ofpbuf *buf;
+ int dp_ifindex;
int error;
if (++read_tries > 50) {
@@ -1131,7 +1163,7 @@ dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall
*upcall)
return error;
}
}
- }
+ } while (dpif->nonempty_socks);
return EAGAIN;
}
diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
index 3e64de3..7f49626 100644
--- a/lib/netlink-socket.c
+++ b/lib/netlink-socket.c
@@ -854,6 +854,19 @@ nl_sock_wait(const struct nl_sock *sock, short int events)
poll_fd_wait(sock->fd, events);
}
+/* Returns the underlying fd for 'sock', for use in "poll()"-like operations
+ * that can't use nl_sock_wait().
+ *
+ * It's a little tricky to use the returned fd correctly, because nl_sock does
+ * "copy on write" to allow a single nl_sock to be used for notifications,
+ * transactions, and dumps. If 'sock' is used only for notifications and
+ * transactions (and never for dump) then the usage is safe. */
+int
+nl_sock_fd(const struct nl_sock *sock)
+{
+ return sock->fd;
+}
+
/* Returns the PID associated with this socket. */
uint32_t
nl_sock_pid(const struct nl_sock *sock)
diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h
index b6356b9..f2a5d3f 100644
--- a/lib/netlink-socket.h
+++ b/lib/netlink-socket.h
@@ -60,6 +60,7 @@ int nl_sock_transact(struct nl_sock *, const struct ofpbuf
*request,
int nl_sock_drain(struct nl_sock *);
void nl_sock_wait(const struct nl_sock *, short int events);
+int nl_sock_fd(const struct nl_sock *);
uint32_t nl_sock_pid(const struct nl_sock *);
--
1.7.4.4
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev