Author: andre
Date: Mon Jun 22 23:08:05 2009
New Revision: 194672
URL: http://svn.freebsd.org/changeset/base/194672

Log:
  Add soreceive_stream(), an optimized version of soreceive() for
  stream (TCP) sockets.
  
  It is functionally identical to generic soreceive() but has a
  number stream specific optimizations:
  o does only one sockbuf unlock/lock per receive independent of
    the length of data to be moved into the uio compared to
    soreceive() which unlocks/locks per *mbuf*.
  o uses m_mbuftouio() instead of its own copy(out) variant.
  o much more compact code flow as a large number of special
    cases is removed.
  o much improved reability.
  
  It offers significantly reduced CPU usage and lock contention
  when receiving fast TCP streams.  Additional gains are obtained
  when the receiving application is using SO_RCVLOWAT to batch up
  some data before a read (and wakeup) is done.
  
  This function was written by "reverse engineering" and is not
  just a stripped down variant of soreceive().
  
  It is not yet enabled by default on TCP sockets.  Instead it is
  commented out in the protocol initialization in tcp_usrreq.c
  until more widespread testing has been done.
  
  Testers, especially with 10GigE gear, are welcome.
  
  MFP4: r164817 //depot/user/andre/soreceive_stream/

Modified:
  head/sys/kern/uipc_socket.c
  head/sys/netinet/tcp_usrreq.c
  head/sys/sys/socketvar.h

Modified: head/sys/kern/uipc_socket.c
==============================================================================
--- head/sys/kern/uipc_socket.c Mon Jun 22 22:54:44 2009        (r194671)
+++ head/sys/kern/uipc_socket.c Mon Jun 22 23:08:05 2009        (r194672)
@@ -1857,6 +1857,202 @@ release:
 }
 
 /*
+ * Optimized version of soreceive() for stream (TCP) sockets.
+ */
+int
+soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
+    struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
+{
+       int len = 0, error = 0, flags, oresid;
+       struct sockbuf *sb;
+       struct mbuf *m, *n = NULL;
+
+       /* We only do stream sockets. */
+       if (so->so_type != SOCK_STREAM)
+               return (EINVAL);
+       if (psa != NULL)
+               *psa = NULL;
+       if (controlp != NULL)
+               return (EINVAL);
+       if (flagsp != NULL)
+               flags = *flagsp &~ MSG_EOR;
+       else
+               flags = 0;
+       if (flags & MSG_OOB)
+               return (soreceive_rcvoob(so, uio, flags));
+       if (mp0 != NULL)
+               *mp0 = NULL;
+
+       sb = &so->so_rcv;
+
+       /* Prevent other readers from entering the socket. */
+       error = sblock(sb, SBLOCKWAIT(flags));
+       if (error)
+               goto out;
+       SOCKBUF_LOCK(sb);
+
+       /* Easy one, no space to copyout anything. */
+       if (uio->uio_resid == 0) {
+               error = EINVAL;
+               goto out;
+       }
+       oresid = uio->uio_resid;
+
+       /* We will never ever get anything unless we are connected. */
+       if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
+               /* When disconnecting there may be still some data left. */
+               if (sb->sb_cc > 0)
+                       goto deliver;
+               if (!(so->so_state & SS_ISDISCONNECTED))
+                       error = ENOTCONN;
+               goto out;
+       }
+
+       /* Socket buffer is empty and we shall not block. */
+       if (sb->sb_cc == 0 &&
+           ((sb->sb_flags & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
+               error = EAGAIN;
+               goto out;
+       }
+
+restart:
+       SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+
+       /* Abort if socket has reported problems. */
+       if (so->so_error) {
+               if (sb->sb_cc > 0)
+                       goto deliver;
+               if (oresid > uio->uio_resid)
+                       goto out;
+               error = so->so_error;
+               if (!(flags & MSG_PEEK))
+                       so->so_error = 0;
+               goto out;
+       }
+
+       /* Door is closed.  Deliver what is left, if any. */
+       if (sb->sb_state & SBS_CANTRCVMORE) {
+               if (sb->sb_cc > 0)
+                       goto deliver;
+               else
+                       goto out;
+       }
+
+       /* Socket buffer got some data that we shall deliver now. */
+       if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) &&
+           ((sb->sb_flags & SS_NBIO) ||
+            (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
+            sb->sb_cc >= sb->sb_lowat ||
+            sb->sb_cc >= uio->uio_resid ||
+            sb->sb_cc >= sb->sb_hiwat) ) {
+               goto deliver;
+       }
+
+       /* On MSG_WAITALL we must wait until all data or error arrives. */
+       if ((flags & MSG_WAITALL) &&
+           (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_lowat))
+               goto deliver;
+
+       /*
+        * Wait and block until (more) data comes in.
+        * NB: Drops the sockbuf lock during wait.
+        */
+       error = sbwait(sb);
+       if (error)
+               goto out;
+       goto restart;
+
+deliver:
+       SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+       KASSERT(sb->sb_cc > 0, ("%s: sockbuf empty", __func__));
+       KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
+
+       /* Statistics. */
+       if (uio->uio_td)
+               uio->uio_td->td_ru.ru_msgrcv++;
+
+       /* Fill uio until full or current end of socket buffer is reached. */
+       len = min(uio->uio_resid, sb->sb_cc);
+       if (mp0 != NULL) {
+               /* Dequeue as many mbufs as possible. */
+               if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
+                       for (*mp0 = m = sb->sb_mb;
+                            m != NULL && m->m_len <= len;
+                            m = m->m_next) {
+                               len -= m->m_len;
+                               uio->uio_resid -= m->m_len;
+                               sbfree(sb, m);
+                               n = m;
+                       }
+                       sb->sb_mb = m;
+                       if (sb->sb_mb == NULL)
+                               SB_EMPTY_FIXUP(sb);
+                       n->m_next = NULL;
+               }
+               /* Copy the remainder. */
+               if (len > 0) {
+                       KASSERT(sb->sb_mb != NULL,
+                           ("%s: len > 0 && sb->sb_mb empty", __func__));
+
+                       m = m_copym(sb->sb_mb, 0, len, M_DONTWAIT);
+                       if (m == NULL)
+                               len = 0;        /* Don't flush data from 
sockbuf. */
+                       else
+                               uio->uio_resid -= m->m_len;
+                       if (*mp0 != NULL)
+                               n->m_next = m;
+                       else
+                               *mp0 = m;
+                       if (*mp0 == NULL) {
+                               error = ENOBUFS;
+                               goto out;
+                       }
+               }
+       } else {
+               /* NB: Must unlock socket buffer as uiomove may sleep. */
+               SOCKBUF_UNLOCK(sb);
+               error = m_mbuftouio(uio, sb->sb_mb, len);
+               SOCKBUF_LOCK(sb);
+               if (error)
+                       goto out;
+       }
+       SBLASTRECORDCHK(sb);
+       SBLASTMBUFCHK(sb);
+
+       /*
+        * Remove the delivered data from the socket buffer unless we
+        * were only peeking.
+        */
+       if (!(flags & MSG_PEEK)) {
+               if (len > 0)
+                       sbdrop_locked(sb, len);
+
+               /* Notify protocol that we drained some data. */
+               if ((so->so_proto->pr_flags & PR_WANTRCVD) &&
+                   (((flags & MSG_WAITALL) && uio->uio_resid > 0) ||
+                    !(flags & MSG_SOCALLBCK))) {
+                       SOCKBUF_UNLOCK(sb);
+                       (*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags);
+                       SOCKBUF_LOCK(sb);
+               }
+       }
+
+       /*
+        * For MSG_WAITALL we may have to loop again and wait for
+        * more data to come in.
+        */
+       if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
+               goto restart;
+out:
+       SOCKBUF_LOCK_ASSERT(sb);
+       SBLASTRECORDCHK(sb);
+       SBLASTMBUFCHK(sb);
+       SOCKBUF_UNLOCK(sb);
+       sbunlock(sb);
+       return (error);
+}
+
+/*
  * Optimized version of soreceive() for simple datagram cases from userspace.
  * Unlike in the stream case, we're able to drop a datagram if copyout()
  * fails, and because we handle datagrams atomically, we don't need to use a

Modified: head/sys/netinet/tcp_usrreq.c
==============================================================================
--- head/sys/netinet/tcp_usrreq.c       Mon Jun 22 22:54:44 2009        
(r194671)
+++ head/sys/netinet/tcp_usrreq.c       Mon Jun 22 23:08:05 2009        
(r194672)
@@ -1032,6 +1032,9 @@ struct pr_usrreqs tcp_usrreqs = {
        .pru_send =             tcp_usr_send,
        .pru_shutdown =         tcp_usr_shutdown,
        .pru_sockaddr =         in_getsockaddr,
+#if 0
+       .pru_soreceive =        soreceive_stream,
+#endif
        .pru_sosetlabel =       in_pcbsosetlabel,
        .pru_close =            tcp_usr_close,
 };
@@ -1053,6 +1056,9 @@ struct pr_usrreqs tcp6_usrreqs = {
        .pru_send =             tcp_usr_send,
        .pru_shutdown =         tcp_usr_shutdown,
        .pru_sockaddr =         in6_mapped_sockaddr,
+#if 0
+       .pru_soreceive =        soreceive_stream,
+#endif
        .pru_sosetlabel =       in_pcbsosetlabel,
        .pru_close =            tcp_usr_close,
 };

Modified: head/sys/sys/socketvar.h
==============================================================================
--- head/sys/sys/socketvar.h    Mon Jun 22 22:54:44 2009        (r194671)
+++ head/sys/sys/socketvar.h    Mon Jun 22 23:08:05 2009        (r194672)
@@ -345,6 +345,9 @@ int sopoll_generic(struct socket *so, in
            struct ucred *active_cred, struct thread *td);
 int    soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio,
            struct mbuf **mp0, struct mbuf **controlp, int *flagsp);
+int    soreceive_stream(struct socket *so, struct sockaddr **paddr,
+           struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
+           int *flagsp);
 int    soreceive_dgram(struct socket *so, struct sockaddr **paddr,
            struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
            int *flagsp);
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to