Good catch.  Thanks, I will push this soon.

On Wed, Jul 27, 2011 at 01:48:45PM -0700, Ethan Jackson wrote:
> I think it should NULL out bufp in the beginning in case it errors out
> earlier.  Otherwise looks fine to me.
> 
> Ethan
> 
> On Wed, Jul 20, 2011 at 13:16, Ben Pfaff <[email protected]> wrote:
> > Until now, each attempt to receive a message from a Netlink socket has
> > taken at least two system calls, one to check the size of the message to
> > be received and a second one to delete the message from the socket buffer.
> > This commit switches to a new strategy that requires only one system call
> > per message received.
> >
> > In my testing this increases the maximum flow setups per second by a little
> > over 10%.
> > ---
> > ?lib/netlink-socket.c | ?110 
> > +++++++++++++++++++++++--------------------------
> > ?1 files changed, 52 insertions(+), 58 deletions(-)
> >
> > diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
> > index 95b2401..7ad5ed9 100644
> > --- a/lib/netlink-socket.c
> > +++ b/lib/netlink-socket.c
> > @@ -38,7 +38,7 @@ VLOG_DEFINE_THIS_MODULE(netlink_socket);
> >
> > ?COVERAGE_DEFINE(netlink_overflow);
> > ?COVERAGE_DEFINE(netlink_received);
> > -COVERAGE_DEFINE(netlink_recv_retry);
> > +COVERAGE_DEFINE(netlink_recv_jumbo);
> > ?COVERAGE_DEFINE(netlink_send);
> > ?COVERAGE_DEFINE(netlink_sent);
> >
> > @@ -257,73 +257,67 @@ STRESS_OPTION(
> > ?static int
> > ?nl_sock_recv__(struct nl_sock *sock, struct ofpbuf **bufp, bool wait)
> > ?{
> > - ? ?uint8_t tmp;
> > - ? ?ssize_t bufsize = 2048;
> > - ? ?ssize_t nbytes, nbytes2;
> > - ? ?struct ofpbuf *buf;
> > + ? ?/* We can't accurately predict the size of the data to be received. 
> > ?Most
> > + ? ? * received data will fit in a 2 kB buffer, so we allocate that much 
> > space.
> > + ? ? * In case the data is actually bigger than that, we make available 
> > enough
> > + ? ? * additional space to allow Netlink messages to be up to 64 kB long (a
> > + ? ? * reasonable figure since that's the maximum length of a Netlink
> > + ? ? * attribute). */
> > + ? ?enum { MAX_SIZE = 65536 };
> > + ? ?enum { HEAD_SIZE = 2048 };
> > + ? ?enum { TAIL_SIZE = MAX_SIZE - HEAD_SIZE };
> > +
> > ? ? struct nlmsghdr *nlmsghdr;
> > - ? ?struct iovec iov;
> > - ? ?struct msghdr msg = {
> > - ? ? ? ?.msg_name = NULL,
> > - ? ? ? ?.msg_namelen = 0,
> > - ? ? ? ?.msg_iov = &iov,
> > - ? ? ? ?.msg_iovlen = 1,
> > - ? ? ? ?.msg_control = NULL,
> > - ? ? ? ?.msg_controllen = 0,
> > - ? ? ? ?.msg_flags = 0
> > - ? ?};
> > + ? ?uint8_t tail[TAIL_SIZE];
> > + ? ?struct iovec iov[2];
> > + ? ?struct ofpbuf *buf;
> > + ? ?struct msghdr msg;
> > + ? ?ssize_t retval;
> > +
> > + ? ?buf = ofpbuf_new(HEAD_SIZE);
> > +
> > + ? ?iov[0].iov_base = buf->data;
> > + ? ?iov[0].iov_len = HEAD_SIZE;
> > + ? ?iov[1].iov_base = tail;
> > + ? ?iov[1].iov_len = TAIL_SIZE;
> > +
> > + ? ?memset(&msg, 0, sizeof msg);
> > + ? ?msg.msg_iov = iov;
> > + ? ?msg.msg_iovlen = 2;
> >
> > - ? ?buf = ofpbuf_new(bufsize);
> > - ? ?*bufp = NULL;
> > -
> > -try_again:
> > - ? ?/* Attempt to read the message. ?We don't know the size of the data
> > - ? ? * yet, so we take a guess at 2048. ?If we're wrong, we keep trying
> > - ? ? * and doubling the buffer size each time.
> > - ? ? */
> > - ? ?nlmsghdr = ofpbuf_put_uninit(buf, bufsize);
> > - ? ?iov.iov_base = nlmsghdr;
> > - ? ?iov.iov_len = bufsize;
> > ? ? do {
> > - ? ? ? ?nbytes = recvmsg(sock->fd, &msg, (wait ? 0 : MSG_DONTWAIT) | 
> > MSG_PEEK);
> > - ? ?} while (nbytes < 0 && errno == EINTR);
> > - ? ?if (nbytes < 0) {
> > + ? ? ? ?retval = recvmsg(sock->fd, &msg, wait ? 0 : MSG_DONTWAIT);
> > + ? ?} while (retval < 0 && errno == EINTR);
> > +
> > + ? ?if (retval < 0) {
> > + ? ? ? ?int error = errno;
> > + ? ? ? ?if (error == ENOBUFS) {
> > + ? ? ? ? ? ?/* Socket receive buffer overflow dropped one or more messages 
> > that
> > + ? ? ? ? ? ? * the kernel tried to send to us. */
> > + ? ? ? ? ? ?COVERAGE_INC(netlink_overflow);
> > + ? ? ? ?}
> > ? ? ? ? ofpbuf_delete(buf);
> > - ? ? ? ?return errno;
> > + ? ? ? ?return error;
> > ? ? }
> > +
> > ? ? if (msg.msg_flags & MSG_TRUNC) {
> > - ? ? ? ?COVERAGE_INC(netlink_recv_retry);
> > - ? ? ? ?bufsize *= 2;
> > - ? ? ? ?ofpbuf_reinit(buf, bufsize);
> > - ? ? ? ?goto try_again;
> > + ? ? ? ?VLOG_ERR_RL(&rl, "truncated message (longer than %d bytes)", 
> > MAX_SIZE);
> > + ? ? ? ?ofpbuf_delete(buf);
> > + ? ? ? ?return E2BIG;
> > ? ? }
> > - ? ?buf->size = nbytes;
> >
> > - ? ?/* We successfully read the message, so recv again to clear the queue 
> > */
> > - ? ?iov.iov_base = &tmp;
> > - ? ?iov.iov_len = 1;
> > - ? ?do {
> > - ? ? ? ?nbytes2 = recvmsg(sock->fd, &msg, MSG_DONTWAIT);
> > - ? ?} while (nbytes2 < 0 && errno == EINTR);
> > - ? ?if (nbytes2 < 0) {
> > - ? ? ? ?if (errno == ENOBUFS) {
> > - ? ? ? ? ? ?/* The kernel is notifying us that a message it tried to send 
> > to us
> > - ? ? ? ? ? ? * was dropped. ?We have to pass this along to the caller in 
> > case
> > - ? ? ? ? ? ? * it wants to retry a request. ?So kill the buffer, which we 
> > can
> > - ? ? ? ? ? ? * re-read next time. */
> > - ? ? ? ? ? ?COVERAGE_INC(netlink_overflow);
> > - ? ? ? ? ? ?ofpbuf_delete(buf);
> > - ? ? ? ? ? ?return ENOBUFS;
> > - ? ? ? ?} else {
> > - ? ? ? ? ? ?VLOG_ERR_RL(&rl, "failed to remove nlmsg from socket: %s\n",
> > - ? ? ? ? ? ? ? ? ? ? ? ?strerror(errno));
> > - ? ? ? ?}
> > + ? ?ofpbuf_put_uninit(buf, MIN(retval, HEAD_SIZE));
> > + ? ?if (retval > HEAD_SIZE) {
> > + ? ? ? ?COVERAGE_INC(netlink_recv_jumbo);
> > + ? ? ? ?ofpbuf_put(buf, tail, retval - HEAD_SIZE);
> > ? ? }
> > - ? ?if (nbytes < sizeof *nlmsghdr
> > +
> > + ? ?nlmsghdr = buf->data;
> > + ? ?if (retval < sizeof *nlmsghdr
> > ? ? ? ? || nlmsghdr->nlmsg_len < sizeof *nlmsghdr
> > - ? ? ? ?|| nlmsghdr->nlmsg_len > nbytes) {
> > + ? ? ? ?|| nlmsghdr->nlmsg_len > retval) {
> > ? ? ? ? VLOG_ERR_RL(&rl, "received invalid nlmsg (%zd bytes < %d)",
> > - ? ? ? ? ? ? ? ? ? ?bufsize, NLMSG_HDRLEN);
> > + ? ? ? ? ? ? ? ? ? ?retval, NLMSG_HDRLEN);
> > ? ? ? ? ofpbuf_delete(buf);
> > ? ? ? ? return EPROTO;
> > ? ? }
> > --
> > 1.7.4.4
> >
> > _______________________________________________
> > dev mailing list
> > [email protected]
> > http://openvswitch.org/mailman/listinfo/dev
> >
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev

Reply via email to