Good catch. Thanks, I will push this soon.
On Wed, Jul 27, 2011 at 01:48:45PM -0700, Ethan Jackson wrote: > I think it should NULL out bufp in the beginning in case it errors out > earlier. Otherwise looks fine to me. > > Ethan > > On Wed, Jul 20, 2011 at 13:16, Ben Pfaff <[email protected]> wrote: > > Until now, each attempt to receive a message from a Netlink socket has > > taken at least two system calls, one to check the size of the message to > > be received and a second one to delete the message from the socket buffer. > > This commit switches to a new strategy that requires only one system call > > per message received. > > > > In my testing this increases the maximum flow setups per second by a little > > over 10%. > > --- > > ?lib/netlink-socket.c | ?110 > > +++++++++++++++++++++++-------------------------- > > ?1 files changed, 52 insertions(+), 58 deletions(-) > > > > diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c > > index 95b2401..7ad5ed9 100644 > > --- a/lib/netlink-socket.c > > +++ b/lib/netlink-socket.c > > @@ -38,7 +38,7 @@ VLOG_DEFINE_THIS_MODULE(netlink_socket); > > > > ?COVERAGE_DEFINE(netlink_overflow); > > ?COVERAGE_DEFINE(netlink_received); > > -COVERAGE_DEFINE(netlink_recv_retry); > > +COVERAGE_DEFINE(netlink_recv_jumbo); > > ?COVERAGE_DEFINE(netlink_send); > > ?COVERAGE_DEFINE(netlink_sent); > > > > @@ -257,73 +257,67 @@ STRESS_OPTION( > > ?static int > > ?nl_sock_recv__(struct nl_sock *sock, struct ofpbuf **bufp, bool wait) > > ?{ > > - ? ?uint8_t tmp; > > - ? ?ssize_t bufsize = 2048; > > - ? ?ssize_t nbytes, nbytes2; > > - ? ?struct ofpbuf *buf; > > + ? ?/* We can't accurately predict the size of the data to be received. > > ?Most > > + ? ? * received data will fit in a 2 kB buffer, so we allocate that much > > space. > > + ? ? * In case the data is actually bigger than that, we make available > > enough > > + ? ? * additional space to allow Netlink messages to be up to 64 kB long (a > > + ? ? * reasonable figure since that's the maximum length of a Netlink > > + ? ? * attribute). */ > > + ? ?enum { MAX_SIZE = 65536 }; > > + ? ?enum { HEAD_SIZE = 2048 }; > > + ? ?enum { TAIL_SIZE = MAX_SIZE - HEAD_SIZE }; > > + > > ? ? struct nlmsghdr *nlmsghdr; > > - ? ?struct iovec iov; > > - ? ?struct msghdr msg = { > > - ? ? ? ?.msg_name = NULL, > > - ? ? ? ?.msg_namelen = 0, > > - ? ? ? ?.msg_iov = &iov, > > - ? ? ? ?.msg_iovlen = 1, > > - ? ? ? ?.msg_control = NULL, > > - ? ? ? ?.msg_controllen = 0, > > - ? ? ? ?.msg_flags = 0 > > - ? ?}; > > + ? ?uint8_t tail[TAIL_SIZE]; > > + ? ?struct iovec iov[2]; > > + ? ?struct ofpbuf *buf; > > + ? ?struct msghdr msg; > > + ? ?ssize_t retval; > > + > > + ? ?buf = ofpbuf_new(HEAD_SIZE); > > + > > + ? ?iov[0].iov_base = buf->data; > > + ? ?iov[0].iov_len = HEAD_SIZE; > > + ? ?iov[1].iov_base = tail; > > + ? ?iov[1].iov_len = TAIL_SIZE; > > + > > + ? ?memset(&msg, 0, sizeof msg); > > + ? ?msg.msg_iov = iov; > > + ? ?msg.msg_iovlen = 2; > > > > - ? ?buf = ofpbuf_new(bufsize); > > - ? ?*bufp = NULL; > > - > > -try_again: > > - ? ?/* Attempt to read the message. ?We don't know the size of the data > > - ? ? * yet, so we take a guess at 2048. ?If we're wrong, we keep trying > > - ? ? * and doubling the buffer size each time. > > - ? ? */ > > - ? ?nlmsghdr = ofpbuf_put_uninit(buf, bufsize); > > - ? ?iov.iov_base = nlmsghdr; > > - ? ?iov.iov_len = bufsize; > > ? ? do { > > - ? ? ? ?nbytes = recvmsg(sock->fd, &msg, (wait ? 0 : MSG_DONTWAIT) | > > MSG_PEEK); > > - ? ?} while (nbytes < 0 && errno == EINTR); > > - ? ?if (nbytes < 0) { > > + ? ? ? ?retval = recvmsg(sock->fd, &msg, wait ? 0 : MSG_DONTWAIT); > > + ? ?} while (retval < 0 && errno == EINTR); > > + > > + ? ?if (retval < 0) { > > + ? ? ? ?int error = errno; > > + ? ? ? ?if (error == ENOBUFS) { > > + ? ? ? ? ? ?/* Socket receive buffer overflow dropped one or more messages > > that > > + ? ? ? ? ? ? * the kernel tried to send to us. */ > > + ? ? ? ? ? ?COVERAGE_INC(netlink_overflow); > > + ? ? ? ?} > > ? ? ? ? ofpbuf_delete(buf); > > - ? ? ? ?return errno; > > + ? ? ? ?return error; > > ? ? } > > + > > ? ? if (msg.msg_flags & MSG_TRUNC) { > > - ? ? ? ?COVERAGE_INC(netlink_recv_retry); > > - ? ? ? ?bufsize *= 2; > > - ? ? ? ?ofpbuf_reinit(buf, bufsize); > > - ? ? ? ?goto try_again; > > + ? ? ? ?VLOG_ERR_RL(&rl, "truncated message (longer than %d bytes)", > > MAX_SIZE); > > + ? ? ? ?ofpbuf_delete(buf); > > + ? ? ? ?return E2BIG; > > ? ? } > > - ? ?buf->size = nbytes; > > > > - ? ?/* We successfully read the message, so recv again to clear the queue > > */ > > - ? ?iov.iov_base = &tmp; > > - ? ?iov.iov_len = 1; > > - ? ?do { > > - ? ? ? ?nbytes2 = recvmsg(sock->fd, &msg, MSG_DONTWAIT); > > - ? ?} while (nbytes2 < 0 && errno == EINTR); > > - ? ?if (nbytes2 < 0) { > > - ? ? ? ?if (errno == ENOBUFS) { > > - ? ? ? ? ? ?/* The kernel is notifying us that a message it tried to send > > to us > > - ? ? ? ? ? ? * was dropped. ?We have to pass this along to the caller in > > case > > - ? ? ? ? ? ? * it wants to retry a request. ?So kill the buffer, which we > > can > > - ? ? ? ? ? ? * re-read next time. */ > > - ? ? ? ? ? ?COVERAGE_INC(netlink_overflow); > > - ? ? ? ? ? ?ofpbuf_delete(buf); > > - ? ? ? ? ? ?return ENOBUFS; > > - ? ? ? ?} else { > > - ? ? ? ? ? ?VLOG_ERR_RL(&rl, "failed to remove nlmsg from socket: %s\n", > > - ? ? ? ? ? ? ? ? ? ? ? ?strerror(errno)); > > - ? ? ? ?} > > + ? ?ofpbuf_put_uninit(buf, MIN(retval, HEAD_SIZE)); > > + ? ?if (retval > HEAD_SIZE) { > > + ? ? ? ?COVERAGE_INC(netlink_recv_jumbo); > > + ? ? ? ?ofpbuf_put(buf, tail, retval - HEAD_SIZE); > > ? ? } > > - ? ?if (nbytes < sizeof *nlmsghdr > > + > > + ? ?nlmsghdr = buf->data; > > + ? ?if (retval < sizeof *nlmsghdr > > ? ? ? ? || nlmsghdr->nlmsg_len < sizeof *nlmsghdr > > - ? ? ? ?|| nlmsghdr->nlmsg_len > nbytes) { > > + ? ? ? ?|| nlmsghdr->nlmsg_len > retval) { > > ? ? ? ? VLOG_ERR_RL(&rl, "received invalid nlmsg (%zd bytes < %d)", > > - ? ? ? ? ? ? ? ? ? ?bufsize, NLMSG_HDRLEN); > > + ? ? ? ? ? ? ? ? ? ?retval, NLMSG_HDRLEN); > > ? ? ? ? ofpbuf_delete(buf); > > ? ? ? ? return EPROTO; > > ? ? } > > -- > > 1.7.4.4 > > > > _______________________________________________ > > dev mailing list > > [email protected] > > http://openvswitch.org/mailman/listinfo/dev > > _______________________________________________ dev mailing list [email protected] http://openvswitch.org/mailman/listinfo/dev
