Here's the next iteration of my diff introducing a rwlock to serialize
the network input path with socket paths.  Changes are:

  - more timeout_set_proc() that should fix problems reported by
    Chris Jackman.

  - I introduced a set of macro to make it easier to audit existing
    splsoftnet().

  - It makes use of splassert_fail() if the lock is not held.


My plan is to commit it, assuming it is stable enough, then fix the
remaining issues in tree.  This includes:

  - Analyze and if needed fix the two code paths were we do an unlock/lock
    dance

  - Remove unneeded/recursive splsoftnet() dances.

Once that's done we should be able to remove the KERNEL_LOCK() from the
input path.

So please test and report back.

diff --git sys/kern/sys_socket.c sys/kern/sys_socket.c
index 7a90f78..a7be8a1 100644
--- sys/kern/sys_socket.c
+++ sys/kern/sys_socket.c
@@ -133,7 +133,7 @@ soo_poll(struct file *fp, int events, struct proc *p)
        int revents = 0;
        int s;
 
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        if (events & (POLLIN | POLLRDNORM)) {
                if (soreadable(so))
                        revents |= events & (POLLIN | POLLRDNORM);
@@ -159,7 +159,7 @@ soo_poll(struct file *fp, int events, struct proc *p)
                        so->so_snd.sb_flagsintr |= SB_SEL;
                }
        }
-       splx(s);
+       SOCKET_UNLOCK(s);
        return (revents);
 }
 
diff --git sys/kern/uipc_socket.c sys/kern/uipc_socket.c
index 9e8d05f..dd067b3 100644
--- sys/kern/uipc_socket.c
+++ sys/kern/uipc_socket.c
@@ -89,6 +89,11 @@ struct pool sosplice_pool;
 struct taskq *sosplice_taskq;
 #endif
 
+/*
+ * Serialize socket operations.
+ */
+struct rwlock socketlock = RWLOCK_INITIALIZER("socketlock");
+
 void
 soinit(void)
 {
@@ -123,7 +128,7 @@ socreate(int dom, struct socket **aso, int type, int proto)
                return (EPROTONOSUPPORT);
        if (prp->pr_type != type)
                return (EPROTOTYPE);
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO);
        TAILQ_INIT(&so->so_q0);
        TAILQ_INIT(&so->so_q);
@@ -141,10 +146,10 @@ socreate(int dom, struct socket **aso, int type, int 
proto)
        if (error) {
                so->so_state |= SS_NOFDREF;
                sofree(so);
-               splx(s);
+               SOCKET_UNLOCK(s);
                return (error);
        }
-       splx(s);
+       SOCKET_UNLOCK(s);
        *aso = so;
        return (0);
 }
@@ -154,9 +159,9 @@ sobind(struct socket *so, struct mbuf *nam, struct proc *p)
 {
        int s, error;
 
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p);
-       splx(s);
+       SOCKET_UNLOCK(s);
        return (error);
 }
 
@@ -171,11 +176,11 @@ solisten(struct socket *so, int backlog)
        if (isspliced(so) || issplicedback(so))
                return (EOPNOTSUPP);
 #endif /* SOCKET_SPLICE */
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL,
            curproc);
        if (error) {
-               splx(s);
+               SOCKET_UNLOCK(s);
                return (error);
        }
        if (TAILQ_FIRST(&so->so_q) == NULL)
@@ -185,14 +190,14 @@ solisten(struct socket *so, int backlog)
        if (backlog < sominconn)
                backlog = sominconn;
        so->so_qlimit = backlog;
-       splx(s);
+       SOCKET_UNLOCK(s);
        return (0);
 }
 
 void
 sofree(struct socket *so)
 {
-       splsoftassert(IPL_SOFTNET);
+       SOCKET_ASSERT_LOCKED();
 
        if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
                return;
@@ -232,7 +237,7 @@ soclose(struct socket *so)
        struct socket *so2;
        int s, error = 0;
 
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        if (so->so_options & SO_ACCEPTCONN) {
                while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
                        (void) soqremque(so2, 0);
@@ -256,7 +261,7 @@ soclose(struct socket *so)
                            (so->so_state & SS_NBIO))
                                goto drop;
                        while (so->so_state & SS_ISCONNECTED) {
-                               error = tsleep(&so->so_timeo,
+                               error = rwsleep(&so->so_timeo, &socketlock,
                                    PSOCK | PCATCH, "netcls",
                                    so->so_linger * hz);
                                if (error)
@@ -276,14 +281,14 @@ discard:
                panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type);
        so->so_state |= SS_NOFDREF;
        sofree(so);
-       splx(s);
+       SOCKET_UNLOCK(s);
        return (error);
 }
 
 int
 soabort(struct socket *so)
 {
-       splsoftassert(IPL_SOFTNET);
+       SOCKET_ASSERT_LOCKED();
 
        return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL,
           curproc);
@@ -294,7 +299,7 @@ soaccept(struct socket *so, struct mbuf *nam)
 {
        int error = 0;
 
-       splsoftassert(IPL_SOFTNET);
+       SOCKET_ASSERT_LOCKED();
 
        if ((so->so_state & SS_NOFDREF) == 0)
                panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type);
@@ -315,7 +320,7 @@ soconnect(struct socket *so, struct mbuf *nam)
 
        if (so->so_options & SO_ACCEPTCONN)
                return (EOPNOTSUPP);
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        /*
         * If protocol is connection-based, can only connect once.
         * Otherwise, if connected, try to disconnect first.
@@ -329,7 +334,7 @@ soconnect(struct socket *so, struct mbuf *nam)
        else
                error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
                    NULL, nam, NULL, curproc);
-       splx(s);
+       SOCKET_UNLOCK(s);
        return (error);
 }
 
@@ -338,10 +343,10 @@ soconnect2(struct socket *so1, struct socket *so2)
 {
        int s, error;
 
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL,
            (struct mbuf *)so2, NULL, curproc);
-       splx(s);
+       SOCKET_UNLOCK(s);
        return (error);
 }
 
@@ -350,7 +355,7 @@ sodisconnect(struct socket *so)
 {
        int error;
 
-       splsoftassert(IPL_SOFTNET);
+       SOCKET_ASSERT_LOCKED();
 
        if ((so->so_state & SS_ISCONNECTED) == 0)
                return (ENOTCONN);
@@ -418,21 +423,20 @@ sosend(struct socket *so, struct mbuf *addr, struct uio 
*uio, struct mbuf *top,
                            (sizeof(struct file *) / sizeof(int)));
        }
 
-#define        snderr(errno)   { error = errno; splx(s); goto release; }
+#define        snderr(errno)   { error = errno; SOCKET_UNLOCK(s); goto 
release; }
 
 restart:
        if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
                goto out;
        so->so_state |= SS_ISSENDING;
        do {
-               s = splsoftnet();
+               SOCKET_LOCK(s);
                if (so->so_state & SS_CANTSENDMORE)
                        snderr(EPIPE);
                if (so->so_error) {
                        error = so->so_error;
                        so->so_error = 0;
-                       splx(s);
-                       goto release;
+                       snderr(error);
                }
                if ((so->so_state & SS_ISCONNECTED) == 0) {
                        if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
@@ -456,12 +460,12 @@ restart:
                        sbunlock(&so->so_snd);
                        error = sbwait(&so->so_snd);
                        so->so_state &= ~SS_ISSENDING;
-                       splx(s);
+                       SOCKET_UNLOCK(s);
                        if (error)
                                goto out;
                        goto restart;
                }
-               splx(s);
+               SOCKET_UNLOCK(s);
                space -= clen;
                do {
                        if (uio == NULL) {
@@ -481,13 +485,13 @@ restart:
                                if (flags & MSG_EOR)
                                        top->m_flags |= M_EOR;
                        }
-                       s = splsoftnet();               /* XXX */
+                       SOCKET_LOCK(s);
                        if (resid == 0)
                                so->so_state &= ~SS_ISSENDING;
                        error = (*so->so_proto->pr_usrreq)(so,
                            (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
                            top, addr, control, curproc);
-                       splx(s);
+                       SOCKET_UNLOCK(s);
                        clen = 0;
                        control = NULL;
                        top = NULL;
@@ -617,8 +621,8 @@ sbsync(struct sockbuf *sb, struct mbuf *nextrecord)
  * must begin with an address if the protocol so specifies,
  * followed by an optional mbuf or mbufs containing ancillary data,
  * and then zero or more mbufs of data.
- * In order to avoid blocking network interrupts for the entire time here,
- * we splx() while doing the actual copy to user space.
+ * In order to avoid blocking network for the entire time here, we splx()
+ * and release ``socketlock'' while doing the actual copy to user space.
  * Although the sockbuf is locked, new data may still be appended,
  * and thus we must maintain consistency of the sockbuf during that time.
  *
@@ -672,7 +676,7 @@ bad:
 restart:
        if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
                return (error);
-       s = splsoftnet();
+       SOCKET_LOCK(s);
 
        m = so->so_rcv.sb_mb;
 #ifdef SOCKET_SPLICE
@@ -737,7 +741,7 @@ restart:
                SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
                sbunlock(&so->so_rcv);
                error = sbwait(&so->so_rcv);
-               splx(s);
+               SOCKET_UNLOCK(s);
                if (error)
                        return (error);
                goto restart;
@@ -871,9 +875,9 @@ dontblock:
                        SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
                        SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
                        resid = uio->uio_resid;
-                       splx(s);
+                       SOCKET_UNLOCK(s);
                        uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
-                       s = splsoftnet();
+                       SOCKET_LOCK(s);
                        if (uio_error)
                                uio->uio_resid = resid - len;
                } else
@@ -955,7 +959,7 @@ dontblock:
                        error = sbwait(&so->so_rcv);
                        if (error) {
                                sbunlock(&so->so_rcv);
-                               splx(s);
+                               SOCKET_UNLOCK(s);
                                return (0);
                        }
                        if ((m = so->so_rcv.sb_mb) != NULL)
@@ -991,7 +995,7 @@ dontblock:
        if (orig_resid == uio->uio_resid && orig_resid &&
            (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
                sbunlock(&so->so_rcv);
-               splx(s);
+               SOCKET_UNLOCK(s);
                goto restart;
        }
 
@@ -1002,7 +1006,7 @@ dontblock:
                *flagsp |= flags;
 release:
        sbunlock(&so->so_rcv);
-       splx(s);
+       SOCKET_UNLOCK(s);
        return (error);
 }
 
@@ -1012,7 +1016,7 @@ soshutdown(struct socket *so, int how)
        struct protosw *pr = so->so_proto;
        int s, error = 0;
 
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        switch (how) {
        case SHUT_RD:
        case SHUT_RDWR:
@@ -1028,7 +1032,8 @@ soshutdown(struct socket *so, int how)
                error = EINVAL;
                break;
        }
-       splx(s);
+       SOCKET_UNLOCK(s);
+
        return (error);
 }
 
@@ -1042,6 +1047,7 @@ sorflush(struct socket *so)
 
        sb->sb_flags |= SB_NOINTR;
        (void) sblock(sb, M_WAITOK);
+       /* XXXSMP */
        s = splnet();
        socantrcvmore(so);
        sbunlock(sb);
@@ -1095,10 +1101,10 @@ sosplice(struct socket *so, int fd, off_t max, struct 
timeval *tv)
                if ((error = sblock(&so->so_rcv,
                    (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0)
                        return (error);
-               s = splsoftnet();
+               SOCKET_LOCK(s);
                if (so->so_sp->ssp_socket)
                        sounsplice(so, so->so_sp->ssp_socket, 1);
-               splx(s);
+               SOCKET_UNLOCK(s);
                sbunlock(&so->so_rcv);
                return (0);
        }
@@ -1127,7 +1133,7 @@ sosplice(struct socket *so, int fd, off_t max, struct 
timeval *tv)
                FRELE(fp, curproc);
                return (error);
        }
-       s = splsoftnet();
+       SOCKET_LOCK(s);
 
        if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) {
                error = EBUSY;
@@ -1168,7 +1174,7 @@ sosplice(struct socket *so, int fd, off_t max, struct 
timeval *tv)
        }
 
  release:
-       splx(s);
+       SOCKET_UNLOCK(s);
        sbunlock(&sosp->so_snd);
        sbunlock(&so->so_rcv);
        FRELE(fp, curproc);
@@ -1178,7 +1184,7 @@ sosplice(struct socket *so, int fd, off_t max, struct 
timeval *tv)
 void
 sounsplice(struct socket *so, struct socket *sosp, int wakeup)
 {
-       splsoftassert(IPL_SOFTNET);
+       SOCKET_ASSERT_LOCKED();
 
        task_del(sosplice_taskq, &so->so_splicetask);
        timeout_del(&so->so_idleto);
@@ -1195,12 +1201,12 @@ soidle(void *arg)
        struct socket *so = arg;
        int s;
 
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
                so->so_error = ETIMEDOUT;
                sounsplice(so, so->so_sp->ssp_socket, 1);
        }
-       splx(s);
+       SOCKET_UNLOCK(s);
 }
 
 void
@@ -1209,7 +1215,7 @@ sotask(void *arg)
        struct socket *so = arg;
        int s;
 
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
                /*
                 * We may not sleep here as sofree() and unsplice() may be
@@ -1218,7 +1224,7 @@ sotask(void *arg)
                 */
                somove(so, M_DONTWAIT);
        }
-       splx(s);
+       SOCKET_UNLOCK(s);
 
        /* Avoid user land starvation. */
        yield();
@@ -1240,7 +1246,7 @@ somove(struct socket *so, int wait)
        int              error = 0, maxreached = 0;
        short            state;
 
-       splsoftassert(IPL_SOFTNET);
+       SOCKET_ASSERT_LOCKED();
 
  nextpkt:
        if (so->so_error) {
@@ -1502,7 +1508,7 @@ somove(struct socket *so, int wait)
 void
 sorwakeup(struct socket *so)
 {
-       splsoftassert(IPL_SOFTNET);
+       SOCKET_ASSERT_LOCKED();
 
 #ifdef SOCKET_SPLICE
        if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
@@ -1523,14 +1529,18 @@ sorwakeup(struct socket *so)
                return;
 #endif
        sowakeup(so, &so->so_rcv);
-       if (so->so_upcall)
+       if (so->so_upcall) {
+               /* XXXSMP breaks atomicity */
+               rw_exit_write(&socketlock);
                (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT);
+               rw_enter_write(&socketlock);
+       }
 }
 
 void
 sowwakeup(struct socket *so)
 {
-       splsoftassert(IPL_SOFTNET);
+       SOCKET_ASSERT_LOCKED();
 
 #ifdef SOCKET_SPLICE
        if (so->so_snd.sb_flagsintr & SB_SPLICE)
@@ -1876,7 +1886,8 @@ soo_kqfilter(struct file *fp, struct knote *kn)
 {
        struct socket *so = kn->kn_fp->f_data;
        struct sockbuf *sb;
-       int s;
+
+       KERNEL_ASSERT_LOCKED();
 
        switch (kn->kn_filter) {
        case EVFILT_READ:
@@ -1894,10 +1905,9 @@ soo_kqfilter(struct file *fp, struct knote *kn)
                return (EINVAL);
        }
 
-       s = splnet();
        SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext);
        sb->sb_flags |= SB_KNOTE;
-       splx(s);
+
        return (0);
 }
 
@@ -1905,12 +1915,12 @@ void
 filt_sordetach(struct knote *kn)
 {
        struct socket *so = kn->kn_fp->f_data;
-       int s = splnet();
+
+       KERNEL_ASSERT_LOCKED();
 
        SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext);
        if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note))
                so->so_rcv.sb_flags &= ~SB_KNOTE;
-       splx(s);
 }
 
 int
@@ -1939,12 +1949,12 @@ void
 filt_sowdetach(struct knote *kn)
 {
        struct socket *so = kn->kn_fp->f_data;
-       int s = splnet();
+
+       KERNEL_ASSERT_LOCKED();
 
        SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext);
        if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note))
                so->so_snd.sb_flags &= ~SB_KNOTE;
-       splx(s);
 }
 
 int
diff --git sys/kern/uipc_socket2.c sys/kern/uipc_socket2.c
index c3b7c3a..ed9fa6f 100644
--- sys/kern/uipc_socket2.c
+++ sys/kern/uipc_socket2.c
@@ -145,7 +145,7 @@ sonewconn(struct socket *head, int connstatus)
        struct socket *so;
        int soqueue = connstatus ? 1 : 0;
 
-       splsoftassert(IPL_SOFTNET);
+       SOCKET_ASSERT_LOCKED();
 
        if (mclpools[0].pr_nout > mclpools[0].pr_hardlimit * 95 / 100)
                return (NULL);
@@ -274,10 +274,10 @@ socantrcvmore(struct socket *so)
 int
 sbwait(struct sockbuf *sb)
 {
-       splsoftassert(IPL_SOFTNET);
+       SOCKET_ASSERT_LOCKED();
 
        sb->sb_flagsintr |= SB_WAIT;
-       return (tsleep(&sb->sb_cc,
+       return (rwsleep(&sb->sb_cc, &socketlock,
            (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "netio",
            sb->sb_timeo));
 }
@@ -315,7 +315,7 @@ sbunlock(struct sockbuf *sb)
 void
 sowakeup(struct socket *so, struct sockbuf *sb)
 {
-       splsoftassert(IPL_SOFTNET);
+       SOCKET_ASSERT_LOCKED();
 
        selwakeup(&sb->sb_sel);
        sb->sb_flagsintr &= ~SB_SEL;
diff --git sys/kern/uipc_syscalls.c sys/kern/uipc_syscalls.c
index e064bc9..9a0e18f 100644
--- sys/kern/uipc_syscalls.c
+++ sys/kern/uipc_syscalls.c
@@ -276,7 +276,7 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, 
socklen_t *anamelen,
        if ((error = getsock(p, sock, &fp)) != 0)
                return (error);
 
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        headfp = fp;
        head = fp->f_data;
 
@@ -301,7 +301,8 @@ redo:
                        head->so_error = ECONNABORTED;
                        break;
                }
-               error = tsleep(&head->so_timeo, PSOCK | PCATCH, "netcon", 0);
+               error = rwsleep(&head->so_timeo, &socketlock, PSOCK | PCATCH,
+                   "netcon", 0);
                if (error) {
                        goto bad;
                }
@@ -377,7 +378,7 @@ redo:
        }
        m_freem(nam);
 bad:
-       splx(s);
+       SOCKET_UNLOCK(s);
        FRELE(headfp, p);
        return (error);
 }
@@ -434,9 +435,10 @@ sys_connect(struct proc *p, void *v, register_t *retval)
                m_freem(nam);
                return (EINPROGRESS);
        }
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
-               error = tsleep(&so->so_timeo, PSOCK | PCATCH, "netcon2", 0);
+               error = rwsleep(&so->so_timeo, &socketlock, PSOCK | PCATCH,
+                   "netcon2", 0);
                if (error) {
                        if (error == EINTR || error == ERESTART)
                                interrupted = 1;
@@ -447,7 +449,7 @@ sys_connect(struct proc *p, void *v, register_t *retval)
                error = so->so_error;
                so->so_error = 0;
        }
-       splx(s);
+       SOCKET_UNLOCK(s);
 bad:
        if (!interrupted)
                so->so_state &= ~SS_ISCONNECTING;
diff --git sys/kern/uipc_usrreq.c sys/kern/uipc_usrreq.c
index e0f7f27..84bab2f 100644
--- sys/kern/uipc_usrreq.c
+++ sys/kern/uipc_usrreq.c
@@ -131,7 +131,11 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, 
struct mbuf *nam,
                break;
 
        case PRU_BIND:
+               /* XXXSMP breaks atomicity */
+               rw_assert_wrlock(&socketlock);
+               rw_exit_write(&socketlock);
                error = unp_bind(unp, nam, p);
+               rw_enter_write(&socketlock);
                break;
 
        case PRU_LISTEN:
diff --git sys/net/if.c sys/net/if.c
index b7c9e11..797344f 100644
--- sys/net/if.c
+++ sys/net/if.c
@@ -160,7 +160,8 @@ void        if_netisr(void *);
 void   ifa_print_all(void);
 #endif
 
-void   if_start_locked(struct ifnet *ifp);
+void   if_start_locked(struct ifnet *);
+int    if_ioctl_locked(struct socket *, u_long, caddr_t, struct proc *);
 
 /*
  * interface index map
@@ -835,10 +836,15 @@ if_netisr(void *unused)
        int s;
 
        KERNEL_LOCK();
-       s = splsoftnet();
+       SOCKET_LOCK(s);
 
        while ((n = netisr) != 0) {
-               sched_pause();
+               /* Like sched_pause() but with a rwlock dance. */
+               if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
+                       SOCKET_UNLOCK(s);
+                       yield();
+                       SOCKET_LOCK(s);
+               }
 
                atomic_clearbits_int(&netisr, n);
 
@@ -876,7 +882,7 @@ if_netisr(void *unused)
                pfsyncintr();
 #endif
 
-       splx(s);
+       SOCKET_UNLOCK(s);
        KERNEL_UNLOCK();
 }
 
@@ -1429,7 +1435,7 @@ if_downall(void)
        struct ifnet *ifp;
        int s;
 
-       s = splnet();
+       SOCKET_LOCK(s);
        TAILQ_FOREACH(ifp, &ifnet, if_list) {
                if ((ifp->if_flags & IFF_UP) == 0)
                        continue;
@@ -1442,7 +1448,7 @@ if_downall(void)
                            (caddr_t)&ifrq);
                }
        }
-       splx(s);
+       SOCKET_UNLOCK(s);
 }
 
 /*
@@ -1502,9 +1508,9 @@ if_linkstate_task(void *xifidx)
        if (ifp == NULL)
                return;
 
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        if_linkstate(ifp);
-       splx(s);
+       SOCKET_UNLOCK(s);
 
        if_put(ifp);
 }
@@ -1512,7 +1518,7 @@ if_linkstate_task(void *xifidx)
 void
 if_linkstate(struct ifnet *ifp)
 {
-       splsoftassert(IPL_SOFTNET);
+       SOCKET_ASSERT_LOCKED();
 
        rt_ifmsg(ifp);
 #ifndef SMALL_KERNEL
@@ -1703,6 +1709,18 @@ if_setrdomain(struct ifnet *ifp, int rdomain)
 int
 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
 {
+       int s, error;
+
+       SOCKET_LOCK(s);
+       error = if_ioctl_locked(so, cmd, data, p);
+       SOCKET_UNLOCK(s);
+
+       return (error);
+}
+
+int
+if_ioctl_locked(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
+{
        struct ifnet *ifp;
        struct ifreq *ifr;
        struct sockaddr_dl *sdl;
@@ -1751,20 +1769,15 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, 
struct proc *p)
                switch (ifar->ifar_af) {
                case AF_INET:
                        /* attach is a noop for AF_INET */
-                       if (cmd == SIOCIFAFDETACH) {
-                               s = splsoftnet();
+                       if (cmd == SIOCIFAFDETACH)
                                in_ifdetach(ifp);
-                               splx(s);
-                       }
                        return (0);
 #ifdef INET6
                case AF_INET6:
-                       s = splsoftnet();
                        if (cmd == SIOCIFAFATTACH)
                                error = in6_ifattach(ifp);
                        else
                                in6_ifdetach(ifp);
-                       splx(s);
                        return (error);
 #endif /* INET6 */
                default:
diff --git sys/net/route.c sys/net/route.c
index a04b095..ab2b924 100644
--- sys/net/route.c
+++ sys/net/route.c
@@ -547,7 +547,7 @@ rtredirect(struct sockaddr *dst, struct sockaddr *gateway,
        int                      flags = RTF_GATEWAY|RTF_HOST;
        uint8_t                  prio = RTP_NONE;
 
-       splsoftassert(IPL_SOFTNET);
+       SOCKET_ASSERT_LOCKED();
 
        /* verify the gateway is directly reachable */
        if ((ifa = ifa_ifwithnet(gateway, rdomain)) == NULL) {
@@ -1498,6 +1498,8 @@ rt_timer_queue_destroy(struct rttimer_queue *rtq)
 {
        struct rttimer  *r;
 
+       SOCKET_ASSERT_LOCKED();
+
        while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
                LIST_REMOVE(r, rtt_link);
                TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
@@ -1590,7 +1592,7 @@ rt_timer_timer(void *arg)
 
        current_time = time_uptime;
 
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        for (rtq = LIST_FIRST(&rttimer_queue_head); rtq != NULL;
             rtq = LIST_NEXT(rtq, rtq_link)) {
                while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
@@ -1605,7 +1607,7 @@ rt_timer_timer(void *arg)
                                printf("rt_timer_timer: rtq_count reached 0\n");
                }
        }
-       splx(s);
+       SOCKET_UNLOCK(s);
 
        timeout_add_sec(to, 1);
 }
diff --git sys/net/rtsock.c sys/net/rtsock.c
index 46150c6..a40c6c8 100644
--- sys/net/rtsock.c
+++ sys/net/rtsock.c
@@ -296,6 +296,7 @@ route_ctloutput(int op, struct socket *so, int level, int 
optname,
        return (error);
 }
 
+/* XXXSMP */
 void
 rt_senddesync(void *data)
 {
diff --git sys/netinet/if_ether.c sys/netinet/if_ether.c
index da076c2..9f43add 100644
--- sys/netinet/if_ether.c
+++ sys/netinet/if_ether.c
@@ -110,10 +110,10 @@ void
 arptimer(void *arg)
 {
        struct timeout *to = (struct timeout *)arg;
-       int s;
        struct llinfo_arp *la, *nla;
+       int s;
 
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        timeout_add_sec(to, arpt_prune);
        LIST_FOREACH_SAFE(la, &arp_list, la_list, nla) {
                struct rtentry *rt = la->la_rt;
@@ -121,7 +121,7 @@ arptimer(void *arg)
                if (rt->rt_expire && rt->rt_expire <= time_uptime)
                        arptfree(rt); /* timer has expired; clear */
        }
-       splx(s);
+       SOCKET_UNLOCK(s);
 }
 
 void
@@ -138,7 +138,7 @@ arp_rtrequest(struct ifnet *ifp, int req, struct rtentry 
*rt)
                pool_init(&arp_pool, sizeof(struct llinfo_arp), 0,
                    IPL_SOFTNET, 0, "arp", NULL);
 
-               timeout_set(&arptimer_to, arptimer, &arptimer_to);
+               timeout_set_proc(&arptimer_to, arptimer, &arptimer_to);
                timeout_add_sec(&arptimer_to, 1);
        }
 
diff --git sys/netinet/ip_carp.c sys/netinet/ip_carp.c
index ff3ae78..1c4aa86 100644
--- sys/netinet/ip_carp.c
+++ sys/netinet/ip_carp.c
@@ -1045,7 +1045,7 @@ carp_send_ad(void *v)
                return;
        }
 
-       s = splsoftnet();
+       SOCKET_LOCK(s);
 
        /* bow out if we've gone to backup (the carp interface is going down) */
        if (sc->sc_bow_out) {
@@ -1246,7 +1246,7 @@ carp_send_ad(void *v)
 
 retry_later:
        sc->cur_vhe = NULL;
-       splx(s);
+       SOCKET_UNLOCK(s);
        if (advbase != 255 || advskew != 255)
                timeout_add(&vhe->ad_tmo, tvtohz(&tv));
 }
diff --git sys/netinet/ip_icmp.c sys/netinet/ip_icmp.c
index cdd60aa..57d4553 100644
--- sys/netinet/ip_icmp.c
+++ sys/netinet/ip_icmp.c
@@ -884,7 +884,7 @@ icmp_sysctl(int *name, u_int namelen, void *oldp, size_t 
*oldlenp, void *newp,
        if (namelen != 1)
                return (ENOTDIR);
 
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        switch (name[0]) {
        case ICMPCTL_REDIRTIMEOUT:
 
@@ -921,7 +921,7 @@ icmp_sysctl(int *name, u_int namelen, void *oldp, size_t 
*oldlenp, void *newp,
                error = ENOPROTOOPT;
                break;
        }
-       splx(s);
+       SOCKET_UNLOCK(s);
 
        return (error);
 }
@@ -1046,7 +1046,8 @@ void
 icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
 {
        struct ifnet *ifp;
-       int s;
+
+       SOCKET_ASSERT_LOCKED();
 
        ifp = if_get(rt->rt_ifidx);
        if (ifp == NULL)
@@ -1058,7 +1059,6 @@ icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer 
*r)
 
                sin = *satosin(rt_key(rt));
 
-               s = splsoftnet();
                rtdeletemsg(rt, ifp, r->rtt_tableid);
 
                /* Notify TCP layer of increased Path MTU estimate */
@@ -1066,7 +1066,6 @@ icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer 
*r)
                if (ctlfunc)
                        (*ctlfunc)(PRC_MTUINC, sintosa(&sin),
                            r->rtt_tableid, NULL);
-               splx(s);
        } else {
                if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
                        rt->rt_rmx.rmx_mtu = 0;
@@ -1097,17 +1096,15 @@ void
 icmp_redirect_timeout(struct rtentry *rt, struct rttimer *r)
 {
        struct ifnet *ifp;
-       int s;
+
+       SOCKET_ASSERT_LOCKED();
 
        ifp = if_get(rt->rt_ifidx);
        if (ifp == NULL)
                return;
 
-       if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) {
-               s = splsoftnet();
+       if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST))
                rtdeletemsg(rt, ifp, r->rtt_tableid);
-               splx(s);
-       }
 
        if_put(ifp);
 }
diff --git sys/netinet/ip_input.c sys/netinet/ip_input.c
index 7936492..dc10925 100644
--- sys/netinet/ip_input.c
+++ sys/netinet/ip_input.c
@@ -1601,20 +1601,20 @@ ip_sysctl(int *name, u_int namelen, void *oldp, size_t 
*oldlenp, void *newp,
                        ip_mtudisc_timeout_q =
                            rt_timer_queue_create(ip_mtudisc_timeout);
                } else if (ip_mtudisc == 0 && ip_mtudisc_timeout_q != NULL) {
-                       s = splsoftnet();
+                       SOCKET_LOCK(s);
                        rt_timer_queue_destroy(ip_mtudisc_timeout_q);
                        ip_mtudisc_timeout_q = NULL;
-                       splx(s);
+                       SOCKET_UNLOCK(s);
                }
                return error;
        case IPCTL_MTUDISCTIMEOUT:
                error = sysctl_int(oldp, oldlenp, newp, newlen,
                   &ip_mtudisc_timeout);
                if (ip_mtudisc_timeout_q != NULL) {
-                       s = splsoftnet();
+                       SOCKET_LOCK(s);
                        rt_timer_queue_change(ip_mtudisc_timeout_q,
                                              ip_mtudisc_timeout);
-                       splx(s);
+                       SOCKET_UNLOCK(s);
                }
                return (error);
        case IPCTL_IPSEC_ENC_ALGORITHM:
@@ -1755,12 +1755,15 @@ ip_send_dispatch(void *xmq)
        int s;
 
        mq_delist(mq, &ml);
+       if (ml_empty(&ml))
+               return;
+
        KERNEL_LOCK();
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        while ((m = ml_dequeue(&ml)) != NULL) {
                ip_output(m, NULL, NULL, 0, NULL, NULL, 0);
        }
-       splx(s);
+       SOCKET_UNLOCK(s);
        KERNEL_UNLOCK();
 }
 
diff --git sys/netinet/ip_output.c sys/netinet/ip_output.c
index 2c0f416..58a31cd 100644
--- sys/netinet/ip_output.c
+++ sys/netinet/ip_output.c
@@ -109,6 +109,8 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route 
*ro, int flags,
        int rv;
 #endif
 
+       SOCKET_ASSERT_LOCKED();
+
 #ifdef IPSEC
        if (inp && (inp->inp_flags & INP_IPV6) != 0)
                panic("ip_output: IPv6 pcb is passed");
diff --git sys/netinet/tcp_input.c sys/netinet/tcp_input.c
index 2d06f54..8668f15 100644
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -3522,11 +3522,9 @@ syn_cache_timer(void *arg)
        struct syn_cache *sc = arg;
        int s;
 
-       s = splsoftnet();
-       if (sc->sc_flags & SCF_DEAD) {
-               splx(s);
-               return;
-       }
+       SOCKET_LOCK(s);
+       if (sc->sc_flags & SCF_DEAD)
+               goto out;
 
        if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) {
                /* Drop it -- too many retransmissions. */
@@ -3549,14 +3547,15 @@ syn_cache_timer(void *arg)
        sc->sc_rxtshift++;
        SYN_CACHE_TIMER_ARM(sc);
 
-       splx(s);
+ out:
+       SOCKET_UNLOCK(s);
        return;
 
  dropit:
        tcpstat.tcps_sc_timed_out++;
        syn_cache_rm(sc);
        syn_cache_put(sc);
-       splx(s);
+       SOCKET_UNLOCK(s);
 }
 
 void
diff --git sys/netinet/tcp_timer.c sys/netinet/tcp_timer.c
index 6f4f07e..6ef40fd 100644
--- sys/netinet/tcp_timer.c
+++ sys/netinet/tcp_timer.c
@@ -112,15 +112,13 @@ tcp_delack(void *arg)
         * for whatever reason, it will restart the delayed
         * ACK callout.
         */
-
-       s = splsoftnet();
-       if (tp->t_flags & TF_DEAD) {
-               splx(s);
-               return;
-       }
+       SOCKET_LOCK(s);
+       if (tp->t_flags & TF_DEAD)
+               goto out;
        tp->t_flags |= TF_ACKNOW;
        (void) tcp_output(tp);
-       splx(s);
+ out:
+       SOCKET_UNLOCK(s);
 }
 
 /*
@@ -193,11 +191,9 @@ tcp_timer_rexmt(void *arg)
        uint32_t rto;
        int s;
 
-       s = splsoftnet();
-       if (tp->t_flags & TF_DEAD) {
-               splx(s);
-               return;
-       }
+       SOCKET_LOCK(s);
+       if (tp->t_flags & TF_DEAD)
+               goto out;
 
        if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb &&
            SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
@@ -224,8 +220,7 @@ tcp_timer_rexmt(void *arg)
                sin.sin_addr = tp->t_inpcb->inp_faddr;
                in_pcbnotifyall(&tcbtable, sintosa(&sin),
                    tp->t_inpcb->inp_rtableid, EMSGSIZE, tcp_mtudisc);
-               splx(s);
-               return;
+               goto out;
        }
 
 #ifdef TCP_SACK
@@ -376,7 +371,7 @@ tcp_timer_rexmt(void *arg)
        (void) tcp_output(tp);
 
  out:
-       splx(s);
+       SOCKET_UNLOCK(s);
 }
 
 void
@@ -386,11 +381,10 @@ tcp_timer_persist(void *arg)
        uint32_t rto;
        int s;
 
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        if ((tp->t_flags & TF_DEAD) ||
             TCP_TIMER_ISARMED(tp, TCPT_REXMT)) {
-               splx(s);
-               return;
+               goto out;
        }
        tcpstat.tcps_persisttimeo++;
        /*
@@ -415,7 +409,7 @@ tcp_timer_persist(void *arg)
        (void) tcp_output(tp);
        tp->t_force = 0;
  out:
-       splx(s);
+       SOCKET_UNLOCK(s);
 }
 
 void
@@ -424,11 +418,9 @@ tcp_timer_keep(void *arg)
        struct tcpcb *tp = arg;
        int s;
 
-       s = splsoftnet();
-       if (tp->t_flags & TF_DEAD) {
-               splx(s);
-               return;
-       }
+       SOCKET_LOCK(s);
+       if (tp->t_flags & TF_DEAD)
+               goto out;
 
        tcpstat.tcps_keeptimeo++;
        if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
@@ -457,15 +449,14 @@ tcp_timer_keep(void *arg)
                TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
        } else
                TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
-
-       splx(s);
+ out:
+       SOCKET_UNLOCK(s);
        return;
 
  dropit:
        tcpstat.tcps_keepdrops++;
        tp = tcp_drop(tp, ETIMEDOUT);
-
-       splx(s);
+       SOCKET_UNLOCK(s);
 }
 
 void
@@ -474,11 +465,9 @@ tcp_timer_2msl(void *arg)
        struct tcpcb *tp = arg;
        int s;
 
-       s = splsoftnet();
-       if (tp->t_flags & TF_DEAD) {
-               splx(s);
-               return;
-       }
+       SOCKET_LOCK(s);
+       if (tp->t_flags & TF_DEAD)
+               goto out;
 
 #ifdef TCP_SACK
        tcp_timer_freesack(tp);
@@ -490,5 +479,6 @@ tcp_timer_2msl(void *arg)
        else
                tp = tcp_close(tp);
 
-       splx(s);
+ out:
+       SOCKET_UNLOCK(s);
 }
diff --git sys/netinet6/icmp6.c sys/netinet6/icmp6.c
index c918004..2abbc12 100644
--- sys/netinet6/icmp6.c
+++ sys/netinet6/icmp6.c
@@ -1914,17 +1914,14 @@ icmp6_mtudisc_clone(struct sockaddr *dst, u_int rdomain)
        if ((rt->rt_flags & RTF_HOST) == 0) {
                struct rt_addrinfo info;
                struct rtentry *nrt;
-               int s;
 
                bzero(&info, sizeof(info));
                info.rti_flags = RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC;
                info.rti_info[RTAX_DST] = dst;
                info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 
-               s = splsoftnet();
                error = rtrequest(RTM_ADD, &info, rt->rt_priority, &nrt,
                    rdomain);
-               splx(s);
                if (error) {
                        rtfree(rt);
                        return NULL;
@@ -1947,16 +1944,15 @@ void
 icmp6_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
 {
        struct ifnet *ifp;
-       int s;
+
+       SOCKET_ASSERT_LOCKED();
 
        ifp = if_get(rt->rt_ifidx);
        if (ifp == NULL)
                return;
 
        if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) {
-               s = splsoftnet();
                rtdeletemsg(rt, ifp, r->rtt_tableid);
-               splx(s);
        } else {
                if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
                        rt->rt_rmx.rmx_mtu = 0;
@@ -1969,17 +1965,15 @@ void
 icmp6_redirect_timeout(struct rtentry *rt, struct rttimer *r)
 {
        struct ifnet *ifp;
-       int s;
+
+       SOCKET_ASSERT_LOCKED();
 
        ifp = if_get(rt->rt_ifidx);
        if (ifp == NULL)
                return;
 
-       if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) {
-               s = splsoftnet();
+       if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST))
                rtdeletemsg(rt, ifp, r->rtt_tableid);
-               splx(s);
-       }
 
        if_put(ifp);
 }
diff --git sys/netinet6/ip6_input.c sys/netinet6/ip6_input.c
index 9ac2555..aed3ebd 100644
--- sys/netinet6/ip6_input.c
+++ sys/netinet6/ip6_input.c
@@ -1429,12 +1429,15 @@ ip6_send_dispatch(void *xmq)
        int s;
 
        mq_delist(mq, &ml);
+       if (ml_empty(&ml))
+               return;
+
        KERNEL_LOCK();
-       s = splsoftnet();
+       SOCKET_LOCK(s);
        while ((m = ml_dequeue(&ml)) != NULL) {
                ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL);
        }
-       splx(s);
+       SOCKET_UNLOCK(s);
        KERNEL_UNLOCK();
 }
 
diff --git sys/netinet6/nd6.c sys/netinet6/nd6.c
index 34c8d9c..66e6068 100644
--- sys/netinet6/nd6.c
+++ sys/netinet6/nd6.c
@@ -308,10 +308,6 @@ skip1:
 void
 nd6_llinfo_settimer(struct llinfo_nd6 *ln, int secs)
 {
-       int s;
-
-       s = splsoftnet();
-
        if (secs < 0) {
                ln->ln_rt->rt_expire = 0;
                timeout_del(&ln->ln_timer_ch);
@@ -319,8 +315,6 @@ nd6_llinfo_settimer(struct llinfo_nd6 *ln, int secs)
                ln->ln_rt->rt_expire = time_uptime + secs;
                timeout_add_sec(&ln->ln_timer_ch, secs);
        }
-
-       splx(s);
 }
 
 void
@@ -333,14 +327,14 @@ nd6_llinfo_timer(void *arg)
        struct ifnet *ifp;
        struct nd_ifinfo *ndi = NULL;
 
-       s = splsoftnet();
+       SOCKET_LOCK(s);
 
        ln = (struct llinfo_nd6 *)arg;
 
        if ((rt = ln->ln_rt) == NULL)
                panic("ln->ln_rt == NULL");
        if ((ifp = if_get(rt->rt_ifidx)) == NULL) {
-               splx(s);
+               SOCKET_UNLOCK(s);
                return;
        }
        ndi = ND_IFINFO(ifp);
@@ -427,7 +421,7 @@ nd6_llinfo_timer(void *arg)
        }
 
        if_put(ifp);
-       splx(s);
+       SOCKET_UNLOCK(s);
 }
 
 /*
@@ -989,7 +983,7 @@ nd6_rtrequest(struct ifnet *ifp, int req, struct rtentry 
*rt)
                nd6_inuse++;
                nd6_allocated++;
                ln->ln_rt = rt;
-               timeout_set(&ln->ln_timer_ch, nd6_llinfo_timer, ln);
+               timeout_set_proc(&ln->ln_timer_ch, nd6_llinfo_timer, ln);
                /* this is required for "ndp" command. - shin */
                if (req == RTM_ADD) {
                        /*
diff --git sys/sys/systm.h sys/sys/systm.h
index 5ef388b..56d57d3 100644
--- sys/sys/systm.h
+++ sys/sys/systm.h
@@ -290,6 +290,31 @@ struct uio;
 int    uiomove(void *, size_t, struct uio *);
 
 #if defined(_KERNEL)
+/*
+ * Serialize socket operations to ensure that code paths that were
+ * atomically executed stay atomic until we turn then mpsafe.
+ */
+extern struct rwlock socketlock;
+
+#define        SOCKET_LOCK(s)                                                  
\
+do {                                                                   \
+       rw_enter_write(&socketlock);                                    \
+       s = splsoftnet();                                               \
+} while (/* CONSTCOND */ 0)
+
+#define        SOCKET_UNLOCK(s)                                                
\
+do {                                                                   \
+       splx(s);                                                        \
+       rw_exit_write(&socketlock);                                     \
+} while (/* CONSTCOND */ 0)
+
+#define        SOCKET_ASSERT_LOCKED()                                          
\
+do {                                                                   \
+       if (rw_status(&socketlock) != RW_WRITE)                         \
+               splassert_fail(RW_WRITE, rw_status(&socketlock), __func__);\
+       splsoftassert(IPL_SOFTNET);                                     \
+} while (0)
+
 __returns_twice int    setjmp(label_t *);
 __dead void    longjmp(label_t *);
 #endif

Reply via email to