The branch main has been updated by glebius:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=4328318445aec29c87f33ebfa11f5253a73a7896

commit 4328318445aec29c87f33ebfa11f5253a73a7896
Author:     Gleb Smirnoff <[email protected]>
AuthorDate: 2022-05-12 20:22:12 +0000
Commit:     Gleb Smirnoff <[email protected]>
CommitDate: 2022-05-12 20:22:12 +0000

    sockets: use socket buffer mutexes in struct socket directly
    
    Since c67f3b8b78e the sockbuf mutexes belong to the containing socket,
    and socket buffers just point to it.  In 74a68313b50 macros that access
    this mutex directly were added.  Go over the core socket code and
    eliminate code that reaches the mutex by dereferencing the sockbuf
    compatibility pointer.
    
    This change requires a KPI change, as some functions were given the
    sockbuf pointer only without any hint if it is a receive or send buffer.
    
    This change doesn't cover the whole kernel, many protocols still use
    compatibility pointers internally.  However, it allows operation of a
    protocol that doesn't use them.
    
    Reviewed by:            markj
    Differential revision:  https://reviews.freebsd.org/D35152
---
 sys/dev/cxgbe/tom/t4_cpl_io.c                  |   4 +-
 sys/dev/cxgbe/tom/t4_ddp.c                     |   2 +-
 sys/dev/hyperv/hvsock/hv_sock.c                |   4 +-
 sys/kern/kern_sendfile.c                       |   2 +-
 sys/kern/sys_socket.c                          |  31 +++---
 sys/kern/uipc_sockbuf.c                        | 142 ++++++++++++++++---------
 sys/kern/uipc_socket.c                         |  48 ++++-----
 sys/kern/uipc_usrreq.c                         |   2 +-
 sys/netinet/sctp_output.c                      |   4 +-
 sys/netinet/sctputil.c                         |   4 +-
 sys/netinet/tcp_input.c                        |   8 +-
 sys/netinet/tcp_output.c                       |   4 +-
 sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c |   4 +-
 sys/rpc/clnt_bck.c                             |   2 +-
 sys/rpc/clnt_vc.c                              |   2 +-
 sys/sys/sockbuf.h                              |  16 +--
 sys/sys/socketvar.h                            |  61 +++++------
 17 files changed, 191 insertions(+), 149 deletions(-)

diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c
index 130c2468b20b..59d1c367f94c 100644
--- a/sys/dev/cxgbe/tom/t4_cpl_io.c
+++ b/sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -803,7 +803,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int 
drop)
                        int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc,
                            V_tcp_autosndbuf_max);
 
-                       if (!sbreserve_locked(sb, newsize, so, NULL))
+                       if (!sbreserve_locked(so, SO_SND, newsize, NULL))
                                sb->sb_flags &= ~SB_AUTOSIZE;
                        else
                                sowwakeup = 1;  /* room available */
@@ -1770,7 +1770,7 @@ do_rx_data(struct sge_iq *iq, const struct rss_header 
*rss, struct mbuf *m)
                unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc,
                    V_tcp_autorcvbuf_max);
 
-               if (!sbreserve_locked(sb, newsize, so, NULL))
+               if (!sbreserve_locked(so, SO_RCV, newsize, NULL))
                        sb->sb_flags &= ~SB_AUTOSIZE;
        }
 
diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c
index 05bb903a28aa..11fea91b060e 100644
--- a/sys/dev/cxgbe/tom/t4_ddp.c
+++ b/sys/dev/cxgbe/tom/t4_ddp.c
@@ -555,7 +555,7 @@ handle_ddp_data(struct toepcb *toep, __be32 ddp_report, 
__be32 rcv_nxt, int len)
                unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc,
                    V_tcp_autorcvbuf_max);
 
-               if (!sbreserve_locked(sb, newsize, so, NULL))
+               if (!sbreserve_locked(so, SO_RCV, newsize, NULL))
                        sb->sb_flags &= ~SB_AUTOSIZE;
        }
        SOCKBUF_UNLOCK(sb);
diff --git a/sys/dev/hyperv/hvsock/hv_sock.c b/sys/dev/hyperv/hvsock/hv_sock.c
index f0238c233181..8c327a22e6fd 100644
--- a/sys/dev/hyperv/hvsock/hv_sock.c
+++ b/sys/dev/hyperv/hvsock/hv_sock.c
@@ -763,7 +763,7 @@ hvs_trans_soreceive(struct socket *so, struct sockaddr 
**paddr,
                 * Wait and block until (more) data comes in.
                 * Note: Drops the sockbuf lock during wait.
                 */
-               error = sbwait(sb);
+               error = sbwait(so, SO_RCV);
 
                if (error)
                        break;
@@ -859,7 +859,7 @@ hvs_trans_sosend(struct socket *so, struct sockaddr *addr, 
struct uio *uio,
                                 * Sleep wait until space avaiable to send
                                 * Note: Drops the sockbuf lock during wait.
                                 */
-                               error = sbwait(sb);
+                               error = sbwait(so, SO_SND);
 
                                if (error)
                                        break;
diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c
index 30383490ca41..2de015254ab9 100644
--- a/sys/kern/kern_sendfile.c
+++ b/sys/kern/kern_sendfile.c
@@ -814,7 +814,7 @@ retry_space:
                         * state may have changed and we retest
                         * for it.
                         */
-                       error = sbwait(&so->so_snd);
+                       error = sbwait(so, SO_SND);
                        /*
                         * An error from sbwait usually indicates that we've
                         * been interrupted by a signal. If we've sent anything
diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c
index 774b317c6ecb..b1b47d3d3c26 100644
--- a/sys/kern/sys_socket.c
+++ b/sys/kern/sys_socket.c
@@ -179,12 +179,12 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct 
ucred *active_cred,
                                so->sol_sbrcv_flags |= SB_ASYNC;
                                so->sol_sbsnd_flags |= SB_ASYNC;
                        } else {
-                               SOCKBUF_LOCK(&so->so_rcv);
+                               SOCK_RECVBUF_LOCK(so);
                                so->so_rcv.sb_flags |= SB_ASYNC;
-                               SOCKBUF_UNLOCK(&so->so_rcv);
-                               SOCKBUF_LOCK(&so->so_snd);
+                               SOCK_RECVBUF_UNLOCK(so);
+                               SOCK_SENDBUF_LOCK(so);
                                so->so_snd.sb_flags |= SB_ASYNC;
-                               SOCKBUF_UNLOCK(&so->so_snd);
+                               SOCK_SENDBUF_UNLOCK(so);
                        }
                        SOCK_UNLOCK(so);
                } else {
@@ -194,12 +194,12 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct 
ucred *active_cred,
                                so->sol_sbrcv_flags &= ~SB_ASYNC;
                                so->sol_sbsnd_flags &= ~SB_ASYNC;
                        } else {
-                               SOCKBUF_LOCK(&so->so_rcv);
+                               SOCK_RECVBUF_LOCK(so);
                                so->so_rcv.sb_flags &= ~SB_ASYNC;
-                               SOCKBUF_UNLOCK(&so->so_rcv);
-                               SOCKBUF_LOCK(&so->so_snd);
+                               SOCK_RECVBUF_UNLOCK(so);
+                               SOCK_SENDBUF_LOCK(so);
                                so->so_snd.sb_flags &= ~SB_ASYNC;
-                               SOCKBUF_UNLOCK(&so->so_snd);
+                               SOCK_SENDBUF_UNLOCK(so);
                        }
                        SOCK_UNLOCK(so);
                }
@@ -751,10 +751,12 @@ soaio_snd(void *context, int pending)
 }
 
 void
-sowakeup_aio(struct socket *so, struct sockbuf *sb)
+sowakeup_aio(struct socket *so, sb_which which)
 {
+       struct sockbuf *sb = sobuf(so, which);
+
+       SOCK_BUF_LOCK_ASSERT(so, which);
 
-       SOCKBUF_LOCK_ASSERT(sb);
        sb->sb_flags &= ~SB_AIO;
        if (sb->sb_flags & SB_AIO_RUNNING)
                return;
@@ -799,6 +801,7 @@ soo_aio_queue(struct file *fp, struct kaiocb *job)
 {
        struct socket *so;
        struct sockbuf *sb;
+       sb_which which;
        int error;
 
        so = fp->f_data;
@@ -809,12 +812,14 @@ soo_aio_queue(struct file *fp, struct kaiocb *job)
        /* Lock through the socket, since this may be a listening socket. */
        switch (job->uaiocb.aio_lio_opcode & (LIO_WRITE | LIO_READ)) {
        case LIO_READ:
-               sb = &so->so_rcv;
                SOCK_RECVBUF_LOCK(so);
+               sb = &so->so_rcv;
+               which = SO_RCV;
                break;
        case LIO_WRITE:
-               sb = &so->so_snd;
                SOCK_SENDBUF_LOCK(so);
+               sb = &so->so_snd;
+               which = SO_SND;
                break;
        default:
                return (EINVAL);
@@ -833,7 +838,7 @@ soo_aio_queue(struct file *fp, struct kaiocb *job)
        TAILQ_INSERT_TAIL(&sb->sb_aiojobq, job, list);
        if (!(sb->sb_flags & SB_AIO_RUNNING)) {
                if (soaio_ready(so, sb))
-                       sowakeup_aio(so, sb);
+                       sowakeup_aio(so, which);
                else
                        sb->sb_flags |= SB_AIO;
        }
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
index 5ac6c79a928f..421fa5da37d9 100644
--- a/sys/kern/uipc_sockbuf.c
+++ b/sys/kern/uipc_sockbuf.c
@@ -396,27 +396,27 @@ void
 socantsendmore_locked(struct socket *so)
 {
 
-       SOCKBUF_LOCK_ASSERT(&so->so_snd);
+       SOCK_SENDBUF_LOCK_ASSERT(so);
 
        so->so_snd.sb_state |= SBS_CANTSENDMORE;
        sowwakeup_locked(so);
-       mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
+       SOCK_SENDBUF_UNLOCK_ASSERT(so);
 }
 
 void
 socantsendmore(struct socket *so)
 {
 
-       SOCKBUF_LOCK(&so->so_snd);
+       SOCK_SENDBUF_LOCK(so);
        socantsendmore_locked(so);
-       mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
+       SOCK_SENDBUF_UNLOCK_ASSERT(so);
 }
 
 void
 socantrcvmore_locked(struct socket *so)
 {
 
-       SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+       SOCK_RECVBUF_LOCK_ASSERT(so);
 
        so->so_rcv.sb_state |= SBS_CANTRCVMORE;
 #ifdef KERN_TLS
@@ -424,53 +424,55 @@ socantrcvmore_locked(struct socket *so)
                ktls_check_rx(&so->so_rcv);
 #endif
        sorwakeup_locked(so);
-       mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
+       SOCK_RECVBUF_UNLOCK_ASSERT(so);
 }
 
 void
 socantrcvmore(struct socket *so)
 {
 
-       SOCKBUF_LOCK(&so->so_rcv);
+       SOCK_RECVBUF_LOCK(so);
        socantrcvmore_locked(so);
-       mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
+       SOCK_RECVBUF_UNLOCK_ASSERT(so);
 }
 
 void
 soroverflow_locked(struct socket *so)
 {
 
-       SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+       SOCK_RECVBUF_LOCK_ASSERT(so);
 
        if (so->so_options & SO_RERROR) {
                so->so_rerror = ENOBUFS;
                sorwakeup_locked(so);
        } else
-               SOCKBUF_UNLOCK(&so->so_rcv);
+               SOCK_RECVBUF_UNLOCK(so);
 
-       mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
+       SOCK_RECVBUF_UNLOCK_ASSERT(so);
 }
 
 void
 soroverflow(struct socket *so)
 {
 
-       SOCKBUF_LOCK(&so->so_rcv);
+       SOCK_RECVBUF_LOCK(so);
        soroverflow_locked(so);
-       mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
+       SOCK_RECVBUF_UNLOCK_ASSERT(so);
 }
 
 /*
  * Wait for data to arrive at/drain from a socket buffer.
  */
 int
-sbwait(struct sockbuf *sb)
+sbwait(struct socket *so, sb_which which)
 {
+       struct sockbuf *sb;
 
-       SOCKBUF_LOCK_ASSERT(sb);
+       SOCK_BUF_LOCK_ASSERT(so, which);
 
+       sb = sobuf(so, which);
        sb->sb_flags |= SB_WAIT;
-       return (msleep_sbt(&sb->sb_acc, SOCKBUF_MTX(sb),
+       return (msleep_sbt(&sb->sb_acc, soeventmtx(so, which),
            (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
            sb->sb_timeo, 0, 0));
 }
@@ -487,13 +489,15 @@ sbwait(struct sockbuf *sb)
  * then release it to avoid lock order issues.  It's not clear that's
  * correct.
  */
-void
-sowakeup(struct socket *so, struct sockbuf *sb)
+static __always_inline void
+sowakeup(struct socket *so, const sb_which which)
 {
+       struct sockbuf *sb;
        int ret;
 
-       SOCKBUF_LOCK_ASSERT(sb);
+       SOCK_BUF_LOCK_ASSERT(so, which);
 
+       sb = sobuf(so, which);
        selwakeuppri(sb->sb_sel, PSOCK);
        if (!SEL_WAITING(sb->sb_sel))
                sb->sb_flags &= ~SB_SEL;
@@ -512,13 +516,43 @@ sowakeup(struct socket *so, struct sockbuf *sb)
        } else
                ret = SU_OK;
        if (sb->sb_flags & SB_AIO)
-               sowakeup_aio(so, sb);
-       SOCKBUF_UNLOCK(sb);
+               sowakeup_aio(so, which);
+       SOCK_BUF_UNLOCK(so, which);
        if (ret == SU_ISCONNECTED)
                soisconnected(so);
        if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
                pgsigio(&so->so_sigio, SIGIO, 0);
-       mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED);
+       SOCK_BUF_UNLOCK_ASSERT(so, which);
+}
+
+/*
+ * Do we need to notify the other side when I/O is possible?
+ */
+static __always_inline bool
+sb_notify(const struct sockbuf *sb)
+{
+       return ((sb->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC |
+           SB_UPCALL | SB_AIO | SB_KNOTE)) != 0);
+}
+
+void
+sorwakeup_locked(struct socket *so)
+{
+       SOCK_RECVBUF_LOCK_ASSERT(so);
+       if (sb_notify(&so->so_rcv))
+               sowakeup(so, SO_RCV);
+       else
+               SOCK_RECVBUF_UNLOCK(so);
+}
+
+void
+sowwakeup_locked(struct socket *so)
+{
+       SOCK_SENDBUF_LOCK_ASSERT(so);
+       if (sb_notify(&so->so_snd))
+               sowakeup(so, SO_SND);
+       else
+               SOCK_SENDBUF_UNLOCK(so);
 }
 
 /*
@@ -557,11 +591,11 @@ soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
 {
        struct thread *td = curthread;
 
-       SOCKBUF_LOCK(&so->so_snd);
-       SOCKBUF_LOCK(&so->so_rcv);
-       if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0)
+       SOCK_SENDBUF_LOCK(so);
+       SOCK_RECVBUF_LOCK(so);
+       if (sbreserve_locked(so, SO_SND, sndcc, td) == 0)
                goto bad;
-       if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0)
+       if (sbreserve_locked(so, SO_RCV, rcvcc, td) == 0)
                goto bad2;
        if (so->so_rcv.sb_lowat == 0)
                so->so_rcv.sb_lowat = 1;
@@ -569,14 +603,14 @@ soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
                so->so_snd.sb_lowat = MCLBYTES;
        if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
                so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
-       SOCKBUF_UNLOCK(&so->so_rcv);
-       SOCKBUF_UNLOCK(&so->so_snd);
+       SOCK_RECVBUF_UNLOCK(so);
+       SOCK_SENDBUF_UNLOCK(so);
        return (0);
 bad2:
-       sbrelease_locked(&so->so_snd, so);
+       sbrelease_locked(so, SO_SND);
 bad:
-       SOCKBUF_UNLOCK(&so->so_rcv);
-       SOCKBUF_UNLOCK(&so->so_snd);
+       SOCK_RECVBUF_UNLOCK(so);
+       SOCK_SENDBUF_UNLOCK(so);
        return (ENOBUFS);
 }
 
@@ -600,13 +634,14 @@ sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
  * Allot mbufs to a sockbuf.  Attempt to scale mbmax so that mbcnt doesn't
  * become limiting if buffering efficiency is near the normal case.
  */
-int
-sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
+bool
+sbreserve_locked(struct socket *so, sb_which which, u_long cc,
     struct thread *td)
 {
+       struct sockbuf *sb = sobuf(so, which);
        rlim_t sbsize_limit;
 
-       SOCKBUF_LOCK_ASSERT(sb);
+       SOCK_BUF_LOCK_ASSERT(so, which);
 
        /*
         * When a thread is passed, we take into account the thread's socket
@@ -616,24 +651,25 @@ sbreserve_locked(struct sockbuf *sb, u_long cc, struct 
socket *so,
         * we don't apply a process limit.
         */
        if (cc > sb_max_adj)
-               return (0);
+               return (false);
        if (td != NULL) {
                sbsize_limit = lim_cur(td, RLIMIT_SBSIZE);
        } else
                sbsize_limit = RLIM_INFINITY;
        if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
            sbsize_limit))
-               return (0);
+               return (false);
        sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
        if (sb->sb_lowat > sb->sb_hiwat)
                sb->sb_lowat = sb->sb_hiwat;
-       return (1);
+       return (true);
 }
 
 int
 sbsetopt(struct socket *so, int cmd, u_long cc)
 {
        struct sockbuf *sb;
+       sb_which wh;
        short *flags;
        u_int *hiwat, *lowat;
        int error;
@@ -660,16 +696,18 @@ sbsetopt(struct socket *so, int cmd, u_long cc)
                        case SO_SNDLOWAT:
                        case SO_SNDBUF:
                                sb = &so->so_snd;
+                               wh = SO_SND;
                                break;
                        case SO_RCVLOWAT:
                        case SO_RCVBUF:
                                sb = &so->so_rcv;
+                               wh = SO_RCV;
                                break;
                }
                flags = &sb->sb_flags;
                hiwat = &sb->sb_hiwat;
                lowat = &sb->sb_lowat;
-               SOCKBUF_LOCK(sb);
+               SOCK_BUF_LOCK(so, wh);
        }
 
        error = 0;
@@ -685,7 +723,7 @@ sbsetopt(struct socket *so, int cmd, u_long cc)
                        if (*lowat > *hiwat)
                                *lowat = *hiwat;
                } else {
-                       if (!sbreserve_locked(sb, cc, so, curthread))
+                       if (!sbreserve_locked(so, wh, cc, curthread))
                                error = ENOBUFS;
                }
                if (error == 0)
@@ -702,7 +740,7 @@ sbsetopt(struct socket *so, int cmd, u_long cc)
        }
 
        if (!SOLISTENING(so))
-               SOCKBUF_UNLOCK(sb);
+               SOCK_BUF_UNLOCK(so, wh);
        SOCK_UNLOCK(so);
        return (error);
 }
@@ -711,8 +749,9 @@ sbsetopt(struct socket *so, int cmd, u_long cc)
  * Free mbufs held by a socket, and reserved mbuf space.
  */
 static void
-sbrelease_internal(struct sockbuf *sb, struct socket *so)
+sbrelease_internal(struct socket *so, sb_which which)
 {
+       struct sockbuf *sb = sobuf(so, which);
 
        sbflush_internal(sb);
        (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
@@ -721,33 +760,34 @@ sbrelease_internal(struct sockbuf *sb, struct socket *so)
 }
 
 void
-sbrelease_locked(struct sockbuf *sb, struct socket *so)
+sbrelease_locked(struct socket *so, sb_which which)
 {
 
-       SOCKBUF_LOCK_ASSERT(sb);
+       SOCK_BUF_LOCK_ASSERT(so, which);
 
-       sbrelease_internal(sb, so);
+       sbrelease_internal(so, which);
 }
 
 void
-sbrelease(struct sockbuf *sb, struct socket *so)
+sbrelease(struct socket *so, sb_which which)
 {
 
-       SOCKBUF_LOCK(sb);
-       sbrelease_locked(sb, so);
-       SOCKBUF_UNLOCK(sb);
+       SOCK_BUF_LOCK(so, which);
+       sbrelease_locked(so, which);
+       SOCK_BUF_UNLOCK(so, which);
 }
 
 void
-sbdestroy(struct sockbuf *sb, struct socket *so)
+sbdestroy(struct socket *so, sb_which which)
 {
-
-       sbrelease_internal(sb, so);
 #ifdef KERN_TLS
+       struct sockbuf *sb = sobuf(so, which);
+
        if (sb->sb_tls_info != NULL)
                ktls_free(sb->sb_tls_info);
        sb->sb_tls_info = NULL;
 #endif
+       sbrelease_internal(so, which);
 }
 
 /*
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 628730171715..49a2b5773cc6 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -420,8 +420,8 @@ soalloc(struct vnet *vnet)
        mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK);
        so->so_snd.sb_mtx = &so->so_snd_mtx;
        so->so_rcv.sb_mtx = &so->so_rcv_mtx;
-       SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
-       SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
+       mtx_init(&so->so_snd_mtx, "so_snd", NULL, MTX_DEF);
+       mtx_init(&so->so_rcv_mtx, "so_rcv", NULL, MTX_DEF);
        so->so_rcv.sb_sel = &so->so_rdsel;
        so->so_snd.sb_sel = &so->so_wrsel;
        sx_init(&so->so_snd_sx, "so_snd_sx");
@@ -491,8 +491,8 @@ sodealloc(struct socket *so)
                            &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
                sx_destroy(&so->so_snd_sx);
                sx_destroy(&so->so_rcv_sx);
-               SOCKBUF_LOCK_DESTROY(&so->so_snd);
-               SOCKBUF_LOCK_DESTROY(&so->so_rcv);
+               mtx_destroy(&so->so_snd_mtx);
+               mtx_destroy(&so->so_rcv_mtx);
        }
        crfree(so->so_cred);
        mtx_destroy(&so->so_lock);
@@ -990,8 +990,8 @@ solisten_proto(struct socket *so, int backlog)
        sbrcv_timeo = so->so_rcv.sb_timeo;
        sbsnd_timeo = so->so_snd.sb_timeo;
 
-       sbdestroy(&so->so_snd, so);
-       sbdestroy(&so->so_rcv, so);
+       sbdestroy(so, SO_SND);
+       sbdestroy(so, SO_RCV);
 
 #ifdef INVARIANTS
        bzero(&so->so_rcv,
@@ -1208,8 +1208,8 @@ sofree(struct socket *so)
         * to be acquired or held.
         */
        if (!SOLISTENING(so)) {
-               sbdestroy(&so->so_snd, so);
-               sbdestroy(&so->so_rcv, so);
+               sbdestroy(so, SO_SND);
+               sbdestroy(so, SO_RCV);
        }
        seldrain(&so->so_rdsel);
        seldrain(&so->so_wrsel);
@@ -1735,7 +1735,7 @@ restart:
                                error = EWOULDBLOCK;
                                goto release;
                        }
-                       error = sbwait(&so->so_snd);
+                       error = sbwait(so, SO_SND);
                        SOCKBUF_UNLOCK(&so->so_snd);
                        if (error)
                                goto release;
@@ -2067,7 +2067,7 @@ restart:
                }
                SBLASTRECORDCHK(&so->so_rcv);
                SBLASTMBUFCHK(&so->so_rcv);
-               error = sbwait(&so->so_rcv);
+               error = sbwait(so, SO_RCV);
                SOCKBUF_UNLOCK(&so->so_rcv);
                if (error)
                        goto release;
@@ -2389,7 +2389,7 @@ dontblock:
                         * the protocol. Skip blocking in this case.
                         */
                        if (so->so_rcv.sb_mb == NULL) {
-                               error = sbwait(&so->so_rcv);
+                               error = sbwait(so, SO_RCV);
                                if (error) {
                                        SOCKBUF_UNLOCK(&so->so_rcv);
                                        goto release;
@@ -2570,7 +2570,7 @@ restart:
         * Wait and block until (more) data comes in.
         * NB: Drops the sockbuf lock during wait.
         */
-       error = sbwait(sb);
+       error = sbwait(so, SO_RCV);
        if (error)
                goto out;
        goto restart;
@@ -2742,7 +2742,7 @@ soreceive_dgram(struct socket *so, struct sockaddr **psa, 
struct uio *uio,
                }
                SBLASTRECORDCHK(&so->so_rcv);
                SBLASTMBUFCHK(&so->so_rcv);
-               error = sbwait(&so->so_rcv);
+               error = sbwait(so, SO_RCV);
                if (error) {
                        SOCKBUF_UNLOCK(&so->so_rcv);
                        return (error);
@@ -2960,7 +2960,7 @@ sorflush(struct socket *so)
                MPASS(pr->pr_domain->dom_dispose != NULL);
                (*pr->pr_domain->dom_dispose)(so);
        } else {
-               sbrelease(&so->so_rcv, so);
+               sbrelease(so, SO_RCV);
                SOCK_IO_RECV_UNLOCK(so);
        }
 
@@ -3610,8 +3610,8 @@ sopoll_generic(struct socket *so, int events, struct 
ucred *active_cred,
                }
        } else {
                revents = 0;
-               SOCKBUF_LOCK(&so->so_snd);
-               SOCKBUF_LOCK(&so->so_rcv);
+               SOCK_SENDBUF_LOCK(so);
+               SOCK_RECVBUF_LOCK(so);
                if (events & (POLLIN | POLLRDNORM))
                        if (soreadabledata(so))
                                revents |= events & (POLLIN | POLLRDNORM);
@@ -3642,8 +3642,8 @@ sopoll_generic(struct socket *so, int events, struct 
ucred *active_cred,
                                so->so_snd.sb_flags |= SB_SEL;
                        }
                }
-               SOCKBUF_UNLOCK(&so->so_rcv);
-               SOCKBUF_UNLOCK(&so->so_snd);
+               SOCK_RECVBUF_UNLOCK(so);
+               SOCK_SENDBUF_UNLOCK(so);
        }
        SOCK_UNLOCK(so);
        return (revents);
@@ -4297,12 +4297,12 @@ so_rdknl_assert_lock(void *arg, int what)
                if (SOLISTENING(so))
                        SOCK_LOCK_ASSERT(so);
                else
-                       SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+                       SOCK_RECVBUF_LOCK_ASSERT(so);
        } else {
                if (SOLISTENING(so))
                        SOCK_UNLOCK_ASSERT(so);
                else
-                       SOCKBUF_UNLOCK_ASSERT(&so->so_rcv);
+                       SOCK_RECVBUF_UNLOCK_ASSERT(so);
        }
 }
 
@@ -4314,7 +4314,7 @@ so_wrknl_lock(void *arg)
        if (SOLISTENING(so))
                SOCK_LOCK(so);
        else
-               SOCKBUF_LOCK(&so->so_snd);
+               SOCK_SENDBUF_LOCK(so);
 }
 
 static void
@@ -4325,7 +4325,7 @@ so_wrknl_unlock(void *arg)
        if (SOLISTENING(so))
                SOCK_UNLOCK(so);
        else
-               SOCKBUF_UNLOCK(&so->so_snd);
+               SOCK_SENDBUF_UNLOCK(so);
 }
 
 static void
@@ -4337,12 +4337,12 @@ so_wrknl_assert_lock(void *arg, int what)
                if (SOLISTENING(so))
                        SOCK_LOCK_ASSERT(so);
                else
-                       SOCKBUF_LOCK_ASSERT(&so->so_snd);
+                       SOCK_SENDBUF_LOCK_ASSERT(so);
        } else {
                if (SOLISTENING(so))
                        SOCK_UNLOCK_ASSERT(so);
                else
-                       SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
+                       SOCK_SENDBUF_UNLOCK_ASSERT(so);
        }
 }
 
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index efa586d346c5..b326dbd825a6 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -2771,7 +2771,7 @@ unp_dispose(struct socket *so)
        KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0,
            ("%s: ccc %u mb %p mbcnt %u", __func__,
            sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt));
-       sbrelease_locked(sb, so);
+       sbrelease_locked(so, SO_RCV);
        SOCK_RECVBUF_UNLOCK(so);
        if (SOCK_IO_RECV_OWNED(so))
                SOCK_IO_RECV_UNLOCK(so);
diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c
index 9a8927160441..9e2e70313be3 100644
--- a/sys/netinet/sctp_output.c
+++ b/sys/netinet/sctp_output.c
@@ -12992,7 +12992,7 @@ sctp_lower_sosend(struct socket *so,
                        stcb->block_entry = &be;
                        SCTP_TCB_UNLOCK(stcb);
                        hold_tcblock = false;
-                       error = sbwait(&so->so_snd);
+                       error = sbwait(so, SO_SND);
                        if (error == 0) {
                                if (so->so_error != 0) {
                                        error = so->so_error;
@@ -13352,7 +13352,7 @@ skip_preblock:
                                stcb->block_entry = &be;
                                SCTP_TCB_UNLOCK(stcb);
                                hold_tcblock = false;
-                               error = sbwait(&so->so_snd);
+                               error = sbwait(so, SO_SND);
                                if (error == 0) {
                                        if (so->so_error != 0)
                                                error = so->so_error;
diff --git a/sys/netinet/sctputil.c b/sys/netinet/sctputil.c
index 8451ed5e2007..e20a49be1adb 100644
--- a/sys/netinet/sctputil.c
+++ b/sys/netinet/sctputil.c
@@ -5640,7 +5640,7 @@ restart_nosblocks:
                        }
                }
                if (block_allowed) {
-                       error = sbwait(&so->so_rcv);
+                       error = sbwait(so, SO_RCV);
                        if (error) {
                                goto out;
                        }
@@ -6255,7 +6255,7 @@ wait_some_more:
                        goto release;
                }
                if (so->so_rcv.sb_cc <= control->held_length) {
-                       error = sbwait(&so->so_rcv);
+                       error = sbwait(so, SO_RCV);
                        if (error) {
                                goto release;
                        }
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 695cd5a916db..be86ceca445c 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -1921,8 +1921,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct 
socket *so,
                                 * Give up when limit is reached.
                                 */
                                if (newsize)
-                                       if (!sbreserve_locked(&so->so_rcv,
-                                           newsize, so, NULL))
+                                       if (!sbreserve_locked(so, SO_RCV,
+                                           newsize, NULL))
                                                so->so_rcv.sb_flags &= 
~SB_AUTOSIZE;
                                m_adj(m, drop_hdrlen);  /* delayed header drop 
*/
                                sbappendstream_locked(&so->so_rcv, m, 0);
@@ -3848,7 +3848,7 @@ tcp_mss(struct tcpcb *tp, int offer)
                if (bufsize > sb_max)
                        bufsize = sb_max;
                if (bufsize > so->so_snd.sb_hiwat)
-                       (void)sbreserve_locked(&so->so_snd, bufsize, so, NULL);
+                       (void)sbreserve_locked(so, SO_SND, bufsize, NULL);
        }
        SOCKBUF_UNLOCK(&so->so_snd);
        /*
@@ -3871,7 +3871,7 @@ tcp_mss(struct tcpcb *tp, int offer)
                if (bufsize > sb_max)
                        bufsize = sb_max;
                if (bufsize > so->so_rcv.sb_hiwat)
-                       (void)sbreserve_locked(&so->so_rcv, bufsize, so, NULL);
+                       (void)sbreserve_locked(so, SO_RCV, bufsize, NULL);
        }
        SOCKBUF_UNLOCK(&so->so_rcv);
 
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 299f1d034717..b0dedb1a24d1 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -2166,9 +2166,9 @@ tcp_sndbuf_autoscale(struct tcpcb *tp, struct socket *so, 
uint32_t sendwin)
                    sbused(&so->so_snd) < V_tcp_autosndbuf_max &&
                    sendwin >= (sbused(&so->so_snd) -
                    (tp->snd_nxt - tp->snd_una))) {
-                       if (!sbreserve_locked(&so->so_snd,
+                       if (!sbreserve_locked(so, SO_SND,
                            min(so->so_snd.sb_hiwat + V_tcp_autosndbuf_inc,
-                            V_tcp_autosndbuf_max), so, curthread))
+                            V_tcp_autosndbuf_max), curthread))
                                so->so_snd.sb_flags &= ~SB_AUTOSIZE;
                }
        }
diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c 
b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
index c822276185cb..def5edb98983 100644
--- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
+++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
@@ -1142,7 +1142,7 @@ restart:
                                error = EWOULDBLOCK;
                                goto release;
                        }
-                       error = sbwait(&so->so_snd);
+                       error = sbwait(so, SO_SND);
                        SOCKBUF_UNLOCK(&so->so_snd);
                        if (error)
                                goto release;
@@ -1336,7 +1336,7 @@ restart:
         * Wait and block until (more) data comes in.
         * NB: Drops the sockbuf lock during wait.
         */
-       error = sbwait(sb);
+       error = sbwait(so, SO_RCV);
        if (error)
                goto out;
        goto restart;
diff --git a/sys/rpc/clnt_bck.c b/sys/rpc/clnt_bck.c
index 514905bf1cc2..810a957bb97b 100644
--- a/sys/rpc/clnt_bck.c
+++ b/sys/rpc/clnt_bck.c
@@ -326,7 +326,7 @@ if (error != 0) printf("sosend=%d\n", error);
        if (error == EMSGSIZE) {
 printf("emsgsize\n");
                SOCKBUF_LOCK(&xprt->xp_socket->so_snd);
-               sbwait(&xprt->xp_socket->so_snd);
+               sbwait(xprt->xp_socket, SO_SND);
                SOCKBUF_UNLOCK(&xprt->xp_socket->so_snd);
                sx_xunlock(&xprt->xp_lock);
                AUTH_VALIDATE(auth, xid, NULL, NULL);
diff --git a/sys/rpc/clnt_vc.c b/sys/rpc/clnt_vc.c
index dfada2bea388..f565de06f4bd 100644
--- a/sys/rpc/clnt_vc.c
+++ b/sys/rpc/clnt_vc.c
@@ -447,7 +447,7 @@ call_again:
        if (error == EMSGSIZE || (error == ERESTART &&
            (ct->ct_waitflag & PCATCH) == 0 && trycnt-- > 0)) {
                SOCKBUF_LOCK(&ct->ct_socket->so_snd);
-               sbwait(&ct->ct_socket->so_snd);
+               sbwait(ct->ct_socket, SO_SND);
                SOCKBUF_UNLOCK(&ct->ct_socket->so_snd);
                AUTH_VALIDATE(auth, xid, NULL, NULL);
                mtx_lock(&ct->ct_lock);
diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h
index 372f04eba54c..2484407d557c 100644
--- a/sys/sys/sockbuf.h
+++ b/sys/sys/sockbuf.h
@@ -116,6 +116,9 @@ struct sockbuf {
 #endif /* defined(_KERNEL) || defined(_WANT_SOCKET) */
 #ifdef _KERNEL
 
+/* 'which' values for KPIs that operate on one buffer of a socket. */
+typedef enum { SO_RCV, SO_SND } sb_which;
+
 /*
  * Per-socket buffer mutex used to protect most fields in the socket buffer.
  * These make use of the mutex pointer embedded in struct sockbuf, which
@@ -124,9 +127,6 @@ struct sockbuf {
  * these locking macros.
  */
 #define        SOCKBUF_MTX(_sb)                ((_sb)->sb_mtx)
-#define        SOCKBUF_LOCK_INIT(_sb, _name) \
-       mtx_init(SOCKBUF_MTX(_sb), _name, NULL, MTX_DEF)
-#define        SOCKBUF_LOCK_DESTROY(_sb)       mtx_destroy(SOCKBUF_MTX(_sb))
 #define        SOCKBUF_LOCK(_sb)               mtx_lock(SOCKBUF_MTX(_sb))
 #define        SOCKBUF_OWNED(_sb)              mtx_owned(SOCKBUF_MTX(_sb))
 #define        SOCKBUF_UNLOCK(_sb)             mtx_unlock(SOCKBUF_MTX(_sb))
@@ -162,7 +162,7 @@ struct mbuf *
 struct mbuf *
        sbcreatecontrol_how(void *p, int size, int type, int level,
            int wait);
-void   sbdestroy(struct sockbuf *sb, struct socket *so);
+void   sbdestroy(struct socket *, sb_which);
 void   sbdrop(struct sockbuf *sb, int len);
 void   sbdrop_locked(struct sockbuf *sb, int len);
 struct mbuf *
@@ -171,17 +171,17 @@ void      sbdroprecord(struct sockbuf *sb);
 void   sbdroprecord_locked(struct sockbuf *sb);
 void   sbflush(struct sockbuf *sb);
 void   sbflush_locked(struct sockbuf *sb);
-void   sbrelease(struct sockbuf *sb, struct socket *so);
-void   sbrelease_locked(struct sockbuf *sb, struct socket *so);
+void   sbrelease(struct socket *, sb_which);
+void   sbrelease_locked(struct socket *, sb_which);
 int    sbsetopt(struct socket *so, int cmd, u_long cc);
-int    sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
+bool   sbreserve_locked(struct socket *so, sb_which which, u_long cc,
            struct thread *td);
 void   sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len);
 struct mbuf *
        sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff);
 struct mbuf *
        sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff);
-int    sbwait(struct sockbuf *sb);
+int    sbwait(struct socket *, sb_which);
 void   sballoc(struct sockbuf *, struct mbuf *);
 void   sbfree(struct sockbuf *, struct mbuf *);
 void   sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m);
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index fe6faa842bda..05eefd7e4fd4 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -77,8 +77,8 @@ enum socket_qstate {
  * Locking key to struct socket:
  * (a) constant after allocation, no locking required.
  * (b) locked by SOCK_LOCK(so).
- * (cr) locked by SOCK_RECVBUF_LOCK(so)/SOCKBUF_LOCK(&so->so_rcv).
- * (cs) locked by SOCK_SENDBUF_LOCK(so)/SOCKBUF_LOCK(&so->so_snd).
+ * (cr) locked by SOCK_RECVBUF_LOCK(so)
+ * (cs) locked by SOCK_SENDBUF_LOCK(so)
  * (e) locked by SOLISTEN_LOCK() of corresponding listening socket.
  * (f) not locked since integer reads/writes are atomic.
  * (g) used only as a sleep/wakeup address, no value.
@@ -256,8 +256,8 @@ struct socket {
 } while (0)
 
 /*
- * Socket buffer locks.  These manipulate the same mutexes as SOCKBUF_LOCK()
- * and related macros.
+ * Socket buffer locks.  These are strongly preferred over SOCKBUF_LOCK(sb)
+ * macros, as we are moving towards protocol specific socket buffers.
  */
 #define        SOCK_RECVBUF_MTX(so)                                            
\
        (&(so)->so_rcv_mtx)
@@ -281,8 +281,26 @@ struct socket {
 #define        SOCK_SENDBUF_UNLOCK_ASSERT(so)                                  
\
        mtx_assert(SOCK_SENDBUF_MTX(so), MA_NOTOWNED)
 
-/* 'which' values for socket buffer events and upcalls. */
-typedef enum { SO_RCV, SO_SND } sb_which;
+#define        SOCK_BUF_LOCK(so, which)                                        
\
+       mtx_lock(soeventmtx(so, which))
+#define        SOCK_BUF_UNLOCK(so, which)                                      
\
+       mtx_unlock(soeventmtx(so, which))
+#define        SOCK_BUF_LOCK_ASSERT(so, which)                                 
\
+       mtx_assert(soeventmtx(so, which), MA_OWNED)
+#define        SOCK_BUF_UNLOCK_ASSERT(so, which)                               
\
+       mtx_assert(soeventmtx(so, which), MA_NOTOWNED)
+
+static inline struct sockbuf *
+sobuf(struct socket *so, const sb_which which)
+{
+       return (which == SO_RCV ? &so->so_rcv : &so->so_snd);
+}
+
+static inline struct mtx *
+soeventmtx(struct socket *so, const sb_which which)
+{
+       return (which == SO_RCV ? SOCK_RECVBUF_MTX(so) : SOCK_SENDBUF_MTX(so));
+}
 
 /*
  * Macros for sockets and socket buffering.
*** 57 LINES SKIPPED ***

Reply via email to