Author: markj
Date: Tue Sep 15 19:23:22 2020
New Revision: 365764
URL: https://svnweb.freebsd.org/changeset/base/365764

Log:
  Simplify unix socket connection peer locking.
  
  unp_pcb_owned_lock2() has some sharp edges and forces callers to deal
  with a bunch of cases.  Simplify it:
  
  - Rename to unp_pcb_lock_peer().
  - Return the connected peer instead of forcing callers to load it
    beforehand.
  - Handle self-connected sockets.
  - In unp_connectat(), just lock the accept socket directly.  It should
    not be possible for the nascent socket to participate in any other
    lock orders.
  - Get rid of connect_internal().  It does not provide any useful
    checking anymore.
  - Block in unp_connectat() when a different thread is concurrently
    attempting to lock both sides of a connection.  This provides simpler
    semantics for callers of unp_pcb_lock_peer().
  - Make unp_connectat() return EISCONN if the socket is already
    connected.  This fixes a race[1] when multiple threads attempt to
    connect() to different addresses using the same datagram socket.
    Upper layers will disconnect a connected datagram socket before
    calling the protocol connect's method, but there is no synchronization
    between this and protocol-layer code.
  
  Reported by:  syzkaller [1]
  Tested by:    pho
  Sponsored by: The FreeBSD Foundation
  Differential Revision:        https://reviews.freebsd.org/D26299

Modified:
  head/sys/kern/uipc_usrreq.c
  head/sys/sys/unpcb.h

Modified: head/sys/kern/uipc_usrreq.c
==============================================================================
--- head/sys/kern/uipc_usrreq.c Tue Sep 15 19:23:01 2020        (r365763)
+++ head/sys/kern/uipc_usrreq.c Tue Sep 15 19:23:22 2020        (r365764)
@@ -279,6 +279,7 @@ static struct mtx   unp_defers_lock;
                                            "unp", "unp",       \
                                            MTX_DUPOK|MTX_DEF)
 #define        UNP_PCB_LOCK_DESTROY(unp)       mtx_destroy(&(unp)->unp_mtx)
+#define        UNP_PCB_LOCKPTR(unp)            (&(unp)->unp_mtx)
 #define        UNP_PCB_LOCK(unp)               mtx_lock(&(unp)->unp_mtx)
 #define        UNP_PCB_TRYLOCK(unp)            mtx_trylock(&(unp)->unp_mtx)
 #define        UNP_PCB_UNLOCK(unp)             mtx_unlock(&(unp)->unp_mtx)
@@ -368,35 +369,55 @@ unp_pcb_unlock_pair(struct unpcb *unp, struct unpcb *u
                UNP_PCB_UNLOCK(unp2);
 }
 
-static __noinline void
-unp_pcb_owned_lock2_slowpath(struct unpcb *unp, struct unpcb **unp2p,
-    int *freed)
+/*
+ * Try to lock the connected peer of an already locked socket.  In some cases
+ * this requires that we unlock the current socket.  The pairbusy counter is
+ * used to block concurrent connection attempts while the lock is dropped.  The
+ * caller must be careful to revalidate PCB state.
+ */
+static struct unpcb *
+unp_pcb_lock_peer(struct unpcb *unp)
 {
        struct unpcb *unp2;
 
-       unp2 = *unp2p;
+       UNP_PCB_LOCK_ASSERT(unp);
+       unp2 = unp->unp_conn;
+       if (__predict_false(unp2 == NULL))
+               return (NULL);
+       if (__predict_false(unp == unp2))
+               return (unp);
+
+       UNP_PCB_UNLOCK_ASSERT(unp2);
+
+       if (__predict_true(UNP_PCB_TRYLOCK(unp2)))
+               return (unp2);
+       if ((uintptr_t)unp2 > (uintptr_t)unp) {
+               UNP_PCB_LOCK(unp2);
+               return (unp2);
+       }
+       unp->unp_pairbusy++;
        unp_pcb_hold(unp2);
        UNP_PCB_UNLOCK(unp);
+
        UNP_PCB_LOCK(unp2);
        UNP_PCB_LOCK(unp);
-       *freed = unp_pcb_rele(unp2);
-       if (*freed)
-               *unp2p = NULL;
+       KASSERT(unp->unp_conn == unp2 || unp->unp_conn == NULL,
+           ("%s: socket %p was reconnected", __func__, unp));
+       if (--unp->unp_pairbusy == 0 && (unp->unp_flags & UNP_WAITING) != 0) {
+               unp->unp_flags &= ~UNP_WAITING;
+               wakeup(unp);
+       }
+       if (unp_pcb_rele(unp2)) {
+               /* unp2 is unlocked. */
+               return (NULL);
+       }
+       if (unp->unp_conn == NULL) {
+               UNP_PCB_UNLOCK(unp2);
+               return (NULL);
+       }
+       return (unp2);
 }
 
-#define unp_pcb_owned_lock2(unp, unp2, freed) do {                     \
-       freed = 0;                                                      \
-       UNP_PCB_LOCK_ASSERT(unp);                                       \
-       UNP_PCB_UNLOCK_ASSERT(unp2);                                    \
-       MPASS((unp) != (unp2));                                         \
-       if (__predict_true(UNP_PCB_TRYLOCK(unp2)))                      \
-               break;                                                  \
-       else if ((uintptr_t)(unp2) > (uintptr_t)(unp))                  \
-               UNP_PCB_LOCK(unp2);                                     \
-       else                                                            \
-               unp_pcb_owned_lock2_slowpath((unp), &(unp2), &freed);   \
-} while (0)
-
 /*
  * Definitions of protocols supported in the LOCAL domain.
  */
@@ -710,7 +731,7 @@ uipc_close(struct socket *so)
        struct unpcb *unp, *unp2;
        struct vnode *vp = NULL;
        struct mtx *vplock;
-       int freed;
+
        unp = sotounpcb(so);
        KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
 
@@ -728,15 +749,10 @@ uipc_close(struct socket *so)
                VOP_UNP_DETACH(vp);
                unp->unp_vnode = NULL;
        }
-       unp2 = unp->unp_conn;
-       if (__predict_false(unp == unp2)) {
+       if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
                unp_disconnect(unp, unp2);
-       } else if (unp2 != NULL) {
-               unp_pcb_owned_lock2(unp, unp2, freed);
-               unp_disconnect(unp, unp2);
-       } else {
+       else
                UNP_PCB_UNLOCK(unp);
-       }
        if (vp) {
                mtx_unlock(vplock);
                vrele(vp);
@@ -765,14 +781,13 @@ uipc_detach(struct socket *so)
        struct unpcb *unp, *unp2;
        struct mtx *vplock;
        struct vnode *vp;
-       int freeunp, local_unp_rights;
+       int local_unp_rights;
 
        unp = sotounpcb(so);
        KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
 
        vp = NULL;
        vplock = NULL;
-       local_unp_rights = 0;
 
        SOCK_LOCK(so);
        if (!SOLISTENING(so)) {
@@ -811,21 +826,11 @@ uipc_detach(struct socket *so)
                VOP_UNP_DETACH(vp);
                unp->unp_vnode = NULL;
        }
-       if (__predict_false(unp == unp->unp_conn)) {
-               unp_disconnect(unp, unp);
-               unp2 = NULL;
-       } else {
-               if ((unp2 = unp->unp_conn) != NULL) {
-                       unp_pcb_owned_lock2(unp, unp2, freeunp);
-                       if (freeunp)
-                               unp2 = NULL;
-               }
-               unp_pcb_hold(unp);
-               if (unp2 != NULL)
-                       unp_disconnect(unp, unp2);
-               else
-                       UNP_PCB_UNLOCK(unp);
-       }
+       if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
+               unp_disconnect(unp, unp2);
+       else
+               UNP_PCB_UNLOCK(unp);
+
        UNP_REF_LIST_LOCK();
        while (!LIST_EMPTY(&unp->unp_refs)) {
                struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
@@ -838,11 +843,9 @@ uipc_detach(struct socket *so)
                unp_drop(ref);
                UNP_REF_LIST_LOCK();
        }
-
        UNP_REF_LIST_UNLOCK();
+
        UNP_PCB_LOCK(unp);
-       freeunp = unp_pcb_rele(unp);
-       MPASS(freeunp == 0);
        local_unp_rights = unp_rights;
        unp->unp_socket->so_pcb = NULL;
        unp->unp_socket = NULL;
@@ -862,24 +865,15 @@ static int
 uipc_disconnect(struct socket *so)
 {
        struct unpcb *unp, *unp2;
-       int freed;
 
        unp = sotounpcb(so);
        KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL"));
 
        UNP_PCB_LOCK(unp);
-       if ((unp2 = unp->unp_conn) == NULL) {
+       if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
+               unp_disconnect(unp, unp2);
+       else
                UNP_PCB_UNLOCK(unp);
-               return (0);
-       }
-       if (__predict_true(unp != unp2)) {
-               unp_pcb_owned_lock2(unp, unp2, freed);
-               if (__predict_false(freed)) {
-                       UNP_PCB_UNLOCK(unp);
-                       return (0);
-               }
-       }
-       unp_disconnect(unp, unp2);
        return (0);
 }
 
@@ -998,27 +992,6 @@ uipc_rcvd(struct socket *so, int flags)
 }
 
 static int
-connect_internal(struct socket *so, struct sockaddr *nam, struct thread *td)
-{
-       int error;
-       struct unpcb *unp;
-
-       unp = so->so_pcb;
-       if (unp->unp_conn != NULL)
-               return (EISCONN);
-       error = unp_connect(so, nam, td);
-       if (error)
-               return (error);
-       UNP_PCB_LOCK(unp);
-       if (unp->unp_conn == NULL) {
-               UNP_PCB_UNLOCK(unp);
-               if (error == 0)
-                       error = ENOTCONN;
-       }
-       return (error);
-}
-
-static int
 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
@@ -1048,57 +1021,26 @@ uipc_send(struct socket *so, int flags, struct mbuf *m
                const struct sockaddr *from;
 
                if (nam != NULL) {
-                       /*
-                        * We return with UNP_PCB_LOCK_HELD so we know that
-                        * the reference is live if the pointer is valid.
-                        */
-                       if ((error = connect_internal(so, nam, td)))
+                       error = unp_connect(so, nam, td);
+                       if (error != 0)
                                break;
-                       MPASS(unp->unp_conn != NULL);
-                       unp2 = unp->unp_conn;
-               } else  {
-                       UNP_PCB_LOCK(unp);
-
-                       /*
-                        * Because connect() and send() are non-atomic in a 
sendto()
-                        * with a target address, it's possible that the socket 
will
-                        * have disconnected before the send() can run.  In 
that case
-                        * return the slightly counter-intuitive but otherwise
-                        * correct error that the socket is not connected.
-                        */
-                       if ((unp2 = unp->unp_conn)  == NULL) {
-                               UNP_PCB_UNLOCK(unp);
-                               error = ENOTCONN;
-                               break;
-                       }
                }
-               if (__predict_false(unp == unp2)) {
-                       if (unp->unp_socket == NULL) {
-                               error = ENOTCONN;
-                               break;
-                       }
-                       goto connect_self;
-               }
-               unp_pcb_owned_lock2(unp, unp2, freed);
-               if (__predict_false(freed)) {
-                       UNP_PCB_UNLOCK(unp);
-                       error = ENOTCONN;
-                       break;
-               }
+               UNP_PCB_LOCK(unp);
+
                /*
-                * The socket referencing unp2 may have been closed
-                * or unp may have been disconnected if the unp lock
-                * was dropped to acquire unp2.
+                * Because connect() and send() are non-atomic in a sendto()
+                * with a target address, it's possible that the socket will
+                * have disconnected before the send() can run.  In that case
+                * return the slightly counter-intuitive but otherwise
+                * correct error that the socket is not connected.
                 */
-               if (__predict_false(unp->unp_conn == NULL) ||
-                       unp2->unp_socket == NULL) {
+               unp2 = unp_pcb_lock_peer(unp);
+               if (unp2 == NULL) {
                        UNP_PCB_UNLOCK(unp);
-                       if (unp_pcb_rele(unp2) == 0)
-                               UNP_PCB_UNLOCK(unp2);
                        error = ENOTCONN;
                        break;
                }
-       connect_self:
+
                if (unp2->unp_flags & UNP_WANTCRED)
                        control = unp_addsockcred(td, control);
                if (unp->unp_addr != NULL)
@@ -1127,36 +1069,26 @@ uipc_send(struct socket *so, int flags, struct mbuf *m
        case SOCK_STREAM:
                if ((so->so_state & SS_ISCONNECTED) == 0) {
                        if (nam != NULL) {
-                               error = connect_internal(so, nam, td);
+                               error = unp_connect(so, nam, td);
                                if (error != 0)
                                        break;
                        } else {
                                error = ENOTCONN;
                                break;
                        }
-               } else {
-                       UNP_PCB_LOCK(unp);
                }
 
-               if ((unp2 = unp->unp_conn) == NULL) {
+               UNP_PCB_LOCK(unp);
+               if ((unp2 = unp_pcb_lock_peer(unp)) == NULL) {
                        UNP_PCB_UNLOCK(unp);
                        error = ENOTCONN;
                        break;
                } else if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
-                       UNP_PCB_UNLOCK(unp);
+                       unp_pcb_unlock_pair(unp, unp2);
                        error = EPIPE;
                        break;
-               } else if ((unp2 = unp->unp_conn) == NULL) {
-                       UNP_PCB_UNLOCK(unp);
-                       error = ENOTCONN;
-                       break;
                }
-               unp_pcb_owned_lock2(unp, unp2, freed);
                UNP_PCB_UNLOCK(unp);
-               if (__predict_false(freed)) {
-                       error = ENOTCONN;
-                       break;
-               }
                if ((so2 = unp2->unp_socket) == NULL) {
                        UNP_PCB_UNLOCK(unp2);
                        error = ENOTCONN;
@@ -1291,30 +1223,19 @@ uipc_ready(struct socket *so, struct mbuf *m, int coun
            ("%s: unexpected socket type for %p", __func__, so));
 
        UNP_PCB_LOCK(unp);
-       if ((unp2 = unp->unp_conn) == NULL) {
+       if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
                UNP_PCB_UNLOCK(unp);
-               goto search;
+               so2 = unp2->unp_socket;
+               SOCKBUF_LOCK(&so2->so_rcv);
+               if ((error = sbready(&so2->so_rcv, m, count)) == 0)
+                       sorwakeup_locked(so2);
+               else
+                       SOCKBUF_UNLOCK(&so2->so_rcv);
+               UNP_PCB_UNLOCK(unp2);
+               return (error);
        }
-       if (unp != unp2) {
-               if (UNP_PCB_TRYLOCK(unp2) == 0) {
-                       unp_pcb_hold(unp2);
-                       UNP_PCB_UNLOCK(unp);
-                       UNP_PCB_LOCK(unp2);
-                       if (unp_pcb_rele(unp2))
-                               goto search;
-               } else
-                       UNP_PCB_UNLOCK(unp);
-       }
-       so2 = unp2->unp_socket;
-       SOCKBUF_LOCK(&so2->so_rcv);
-       if ((error = sbready(&so2->so_rcv, m, count)) == 0)
-               sorwakeup_locked(so2);
-       else
-               SOCKBUF_UNLOCK(&so2->so_rcv);
-       UNP_PCB_UNLOCK(unp2);
-       return (error);
+       UNP_PCB_UNLOCK(unp);
 
-search:
        /*
         * The receiving socket has been disconnected, but may still be valid.
         * In this case, the now-ready mbufs are still present in its socket
@@ -1566,7 +1487,7 @@ unp_connectat(int fd, struct socket *so, struct sockad
        char buf[SOCK_MAXADDRLEN];
        struct sockaddr *sa;
        cap_rights_t rights;
-       int error, len, freed;
+       int error, len;
        bool connreq;
 
        if (nam->sa_family != AF_UNIX)
@@ -1582,9 +1503,33 @@ unp_connectat(int fd, struct socket *so, struct sockad
 
        unp = sotounpcb(so);
        UNP_PCB_LOCK(unp);
-       if (unp->unp_flags & UNP_CONNECTING) {
-               UNP_PCB_UNLOCK(unp);
-               return (EALREADY);
+       for (;;) {
+               /*
+                * Wait for connection state to stabilize.  If a connection
+                * already exists, give up.  For datagram sockets, which permit
+                * multiple consecutive connect(2) calls, upper layers are
+                * responsible for disconnecting in advance of a subsequent
+                * connect(2), but this is not synchronized with PCB connection
+                * state.
+                *
+                * Also make sure that no threads are currently attempting to
+                * lock the peer socket, to ensure that unp_conn cannot
+                * transition between two valid sockets while locks are dropped.
+                */
+               if (unp->unp_conn != NULL) {
+                       UNP_PCB_UNLOCK(unp);
+                       return (EISCONN);
+               }
+               if ((unp->unp_flags & UNP_CONNECTING) != 0) {
+                       UNP_PCB_UNLOCK(unp);
+                       return (EALREADY);
+               }
+               if (unp->unp_pairbusy > 0) {
+                       unp->unp_flags |= UNP_WAITING;
+                       mtx_sleep(unp, UNP_PCB_LOCKPTR(unp), 0, "unpeer", 0);
+                       continue;
+               }
+               break;
        }
        unp->unp_flags |= UNP_CONNECTING;
        UNP_PCB_UNLOCK(unp);
@@ -1657,12 +1602,12 @@ unp_connectat(int fd, struct socket *so, struct sockad
 
                UNP_PCB_UNLOCK(unp2);
                unp2 = unp3;
-               unp_pcb_owned_lock2(unp2, unp, freed);
-               if (__predict_false(freed)) {
-                       UNP_PCB_UNLOCK(unp2);
-                       error = ECONNREFUSED;
-                       goto bad2;
-               }
+
+               /*
+                * It is safe to block on the PCB lock here since unp2 is
+                * nascent and cannot be connected to any other sockets.
+                */
+               UNP_PCB_LOCK(unp);
 #ifdef MAC
                mac_socketpeer_set_from_socket(so, so2);
                mac_socketpeer_set_from_socket(so2, so);
@@ -1683,6 +1628,8 @@ bad:
        }
        free(sa, M_SONAME);
        UNP_PCB_LOCK(unp);
+       KASSERT((unp->unp_flags & UNP_CONNECTING) != 0,
+           ("%s: unp %p has UNP_CONNECTING clear", __func__, unp));
        unp->unp_flags &= ~UNP_CONNECTING;
        UNP_PCB_UNLOCK(unp);
        return (error);
@@ -1722,6 +1669,8 @@ unp_connect2(struct socket *so, struct socket *so2, in
 
        UNP_PCB_LOCK_ASSERT(unp);
        UNP_PCB_LOCK_ASSERT(unp2);
+       KASSERT(unp->unp_conn == NULL,
+           ("%s: socket %p is already connected", __func__, unp));
 
        if (so2->so_type != so->so_type)
                return (EPROTOTYPE);
@@ -1738,6 +1687,8 @@ unp_connect2(struct socket *so, struct socket *so2, in
 
        case SOCK_STREAM:
        case SOCK_SEQPACKET:
+               KASSERT(unp2->unp_conn == NULL,
+                   ("%s: socket %p is already connected", __func__, unp2));
                unp2->unp_conn = unp;
                if (req == PRU_CONNECT &&
                    ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
@@ -1992,25 +1943,18 @@ unp_drop(struct unpcb *unp)
 {
        struct socket *so = unp->unp_socket;
        struct unpcb *unp2;
-       int freed;
 
        /*
         * Regardless of whether the socket's peer dropped the connection
         * with this socket by aborting or disconnecting, POSIX requires
         * that ECONNRESET is returned.
         */
-       /* acquire a reference so that unp isn't freed from underneath us */
 
        UNP_PCB_LOCK(unp);
        if (so)
                so->so_error = ECONNRESET;
-       unp2 = unp->unp_conn;
-       /* Last reference dropped in unp_disconnect(). */
-       if (unp2 == unp) {
-               unp_pcb_rele_notlast(unp);
-               unp_disconnect(unp, unp2);
-       } else if (unp2 != NULL) {
-               unp_pcb_owned_lock2(unp, unp2, freed);
+       if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
+               /* Last reference dropped in unp_disconnect(). */
                unp_pcb_rele_notlast(unp);
                unp_disconnect(unp, unp2);
        } else if (!unp_pcb_rele(unp)) {

Modified: head/sys/sys/unpcb.h
==============================================================================
--- head/sys/sys/unpcb.h        Tue Sep 15 19:23:01 2020        (r365763)
+++ head/sys/sys/unpcb.h        Tue Sep 15 19:23:22 2020        (r365764)
@@ -85,6 +85,7 @@ struct unpcb {
        struct  sockaddr_un *unp_addr;  /* (p) bound address of socket */
        struct  socket *unp_socket;     /* (c) pointer back to socket */
        /* Cache line 2 */
+       u_int   unp_pairbusy;           /* (p) threads acquiring peer locks */
        struct  vnode *unp_vnode;       /* (p) associated file if applicable */
        struct  xucred unp_peercred;    /* (p) peer credentials if applicable */
        LIST_ENTRY(unpcb) unp_reflink;  /* (l) link in unp_refs list */
@@ -117,6 +118,7 @@ struct unpcb {
  */
 #define        UNP_CONNECTING                  0x010   /* Currently 
connecting. */
 #define        UNP_BINDING                     0x020   /* Currently binding. */
+#define        UNP_WAITING                     0x040   /* Peer state is 
changing. */
 
 /*
  * Flags in unp_gcflag.
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to