On Tue, Oct 04, 2016 at 04:44:29PM +0200, Martin Pieuchot wrote:
> On 10/03/16 16:43, Martin Pieuchot wrote:
> > Diff below introduces a single write lock that will be used to serialize
> > access to ip_output().
> >
> > This lock will be then split in multiple readers and writers to allow
> > multiple forwarding paths to run in parallel of each others but still
> > serialized with the socket layer.
> >
> > I'm currently looking for people wanting to run this diff and try to
> > break it. In other words, your machine might panic with it and if it
> > does report the panic to me so the diff can be improved.
> >
> > I tested NFS v2 and v3 so I'm quite confident, but I might have missed
> > some obvious stuff.
>
> Updated diff attaced including a fix for syn_cache_timer(), problem
> reported by Chris Jackman.
>
So far, so good, on i386 and amd64 vmm(4) VMs. booted, did a pkg_add
upgrade, and cvsync.
No issues seen so far.
-ml
> Index: kern/kern_rwlock.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_rwlock.c,v
> retrieving revision 1.27
> diff -u -p -r1.27 kern_rwlock.c
> --- kern/kern_rwlock.c 14 Mar 2015 07:33:42 -0000 1.27
> +++ kern/kern_rwlock.c 4 Oct 2016 14:40:29 -0000
> @@ -98,6 +98,12 @@ rw_enter_read(struct rwlock *rwl)
> membar_enter();
> }
>
> +#if 1
> +#include <machine/db_machdep.h>
> +#include <ddb/db_output.h>
> +#include <ddb/db_interface.h>
> +#endif
> +
> void
> rw_enter_write(struct rwlock *rwl)
> {
> @@ -108,6 +114,15 @@ rw_enter_write(struct rwlock *rwl)
> rw_enter(rwl, RW_WRITE);
> else
> membar_enter();
> +
> +#if 1
> + if ((rwl == &netlock) && (splassert_ctl == 3)) {
> + printf("ENTER::%d::", cpu_number());
> + db_stack_trace_print(
> + (db_expr_t)__builtin_frame_address(1),
> + TRUE, 1, "", printf);
> + }
> +#endif
> }
>
> void
> @@ -129,6 +144,15 @@ rw_exit_write(struct rwlock *rwl)
> unsigned long owner = rwl->rwl_owner;
>
> rw_assert_wrlock(rwl);
> +
> +#if 1
> + if ((rwl == &netlock) && (splassert_ctl == 3)) {
> + printf("EXIT::%d::", cpu_number());
> + db_stack_trace_print(
> + (db_expr_t)__builtin_frame_address(1),
> + TRUE, 1, "", printf);
> + }
> +#endif
>
> membar_exit();
> if (__predict_false((owner & RWLOCK_WAIT) ||
> Index: kern/sys_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/sys_socket.c,v
> retrieving revision 1.21
> diff -u -p -r1.21 sys_socket.c
> --- kern/sys_socket.c 5 Dec 2015 10:11:53 -0000 1.21
> +++ kern/sys_socket.c 4 Oct 2016 14:40:29 -0000
> @@ -131,8 +131,10 @@ soo_poll(struct file *fp, int events, st
> {
> struct socket *so = fp->f_data;
> int revents = 0;
> - int s = splsoftnet();
> + int s;
>
> + rw_enter_write(&netlock);
> + s = splsoftnet();
> if (events & (POLLIN | POLLRDNORM)) {
> if (soreadable(so))
> revents |= events & (POLLIN | POLLRDNORM);
> @@ -159,6 +161,7 @@ soo_poll(struct file *fp, int events, st
> }
> }
> splx(s);
> + rw_exit_write(&netlock);
> return (revents);
> }
>
> Index: kern/uipc_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.161
> diff -u -p -r1.161 uipc_socket.c
> --- kern/uipc_socket.c 20 Sep 2016 14:27:43 -0000 1.161
> +++ kern/uipc_socket.c 4 Oct 2016 14:40:29 -0000
> @@ -123,6 +123,7 @@ socreate(int dom, struct socket **aso, i
> return (EPROTONOSUPPORT);
> if (prp->pr_type != type)
> return (EPROTOTYPE);
> + rw_enter_write(&netlock);
> s = splsoftnet();
> so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO);
> TAILQ_INIT(&so->so_q0);
> @@ -142,9 +143,11 @@ socreate(int dom, struct socket **aso, i
> so->so_state |= SS_NOFDREF;
> sofree(so);
> splx(s);
> + rw_exit_write(&netlock);
> return (error);
> }
> splx(s);
> + rw_exit_write(&netlock);
> *aso = so;
> return (0);
> }
> @@ -152,11 +155,13 @@ socreate(int dom, struct socket **aso, i
> int
> sobind(struct socket *so, struct mbuf *nam, struct proc *p)
> {
> - int s = splsoftnet();
> - int error;
> + int s, error;
>
> + rw_enter_write(&netlock);
> + s = splsoftnet();
> error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p);
> splx(s);
> + rw_exit_write(&netlock);
> return (error);
> }
>
> @@ -171,11 +176,13 @@ solisten(struct socket *so, int backlog)
> if (isspliced(so) || issplicedback(so))
> return (EOPNOTSUPP);
> #endif /* SOCKET_SPLICE */
> + rw_enter_write(&netlock);
> s = splsoftnet();
> error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL,
> curproc);
> if (error) {
> splx(s);
> + rw_exit_write(&netlock);
> return (error);
> }
> if (TAILQ_FIRST(&so->so_q) == NULL)
> @@ -186,6 +193,7 @@ solisten(struct socket *so, int backlog)
> backlog = sominconn;
> so->so_qlimit = backlog;
> splx(s);
> + rw_exit_write(&netlock);
> return (0);
> }
>
> @@ -196,6 +204,7 @@ solisten(struct socket *so, int backlog)
> void
> sofree(struct socket *so)
> {
> + rw_assert_wrlock(&netlock);
> splsoftassert(IPL_SOFTNET);
>
> if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
> @@ -234,9 +243,10 @@ int
> soclose(struct socket *so)
> {
> struct socket *so2;
> - int s = splsoftnet(); /* conservative */
> - int error = 0;
> + int s, error = 0;
>
> + rw_enter_write(&netlock);
> + s = splsoftnet(); /* conservative */
> if (so->so_options & SO_ACCEPTCONN) {
> while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
> (void) soqremque(so2, 0);
> @@ -260,7 +270,7 @@ soclose(struct socket *so)
> (so->so_state & SS_NBIO))
> goto drop;
> while (so->so_state & SS_ISCONNECTED) {
> - error = tsleep(&so->so_timeo,
> + error = rwsleep(&so->so_timeo, &netlock,
> PSOCK | PCATCH, "netcls",
> so->so_linger * hz);
> if (error)
> @@ -281,6 +291,7 @@ discard:
> so->so_state |= SS_NOFDREF;
> sofree(so);
> splx(s);
> + rw_exit_write(&netlock);
> return (error);
> }
>
> @@ -290,6 +301,7 @@ discard:
> int
> soabort(struct socket *so)
> {
> + rw_assert_wrlock(&netlock);
> splsoftassert(IPL_SOFTNET);
>
> return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL,
> @@ -301,6 +313,7 @@ soaccept(struct socket *so, struct mbuf
> {
> int error = 0;
>
> + rw_assert_wrlock(&netlock);
> splsoftassert(IPL_SOFTNET);
>
> if ((so->so_state & SS_NOFDREF) == 0)
> @@ -318,11 +331,11 @@ soaccept(struct socket *so, struct mbuf
> int
> soconnect(struct socket *so, struct mbuf *nam)
> {
> - int s;
> - int error;
> + int s, error;
>
> if (so->so_options & SO_ACCEPTCONN)
> return (EOPNOTSUPP);
> + rw_enter_write(&netlock);
> s = splsoftnet();
> /*
> * If protocol is connection-based, can only connect once.
> @@ -338,18 +351,21 @@ soconnect(struct socket *so, struct mbuf
> error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
> NULL, nam, NULL, curproc);
> splx(s);
> + rw_exit_write(&netlock);
> return (error);
> }
>
> int
> soconnect2(struct socket *so1, struct socket *so2)
> {
> - int s = splsoftnet();
> - int error;
> + int s, error;
>
> + rw_enter_write(&netlock);
> + s = splsoftnet();
> error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL,
> (struct mbuf *)so2, NULL, curproc);
> splx(s);
> + rw_exit_write(&netlock);
> return (error);
> }
>
> @@ -358,14 +374,20 @@ sodisconnect(struct socket *so)
> {
> int error;
>
> + rw_assert_wrlock(&netlock);
> splsoftassert(IPL_SOFTNET);
>
> - if ((so->so_state & SS_ISCONNECTED) == 0)
> - return (ENOTCONN);
> - if (so->so_state & SS_ISDISCONNECTING)
> - return (EALREADY);
> + if ((so->so_state & SS_ISCONNECTED) == 0) {
> + error = ENOTCONN;
> + goto bad;
> + }
> + if (so->so_state & SS_ISDISCONNECTING) {
> + error = EALREADY;
> + goto bad;
> + }
> error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL,
> NULL, curproc);
> +bad:
> return (error);
> }
>
> @@ -426,21 +448,21 @@ sosend(struct socket *so, struct mbuf *a
> (sizeof(struct file *) / sizeof(int)));
> }
>
> -#define snderr(errno) { error = errno; splx(s); goto release; }
> +#define snderr(e) { error = e; splx(s); rw_exit_write(&netlock); goto
> release; }
>
> restart:
> if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
> goto out;
> so->so_state |= SS_ISSENDING;
> do {
> + rw_enter_write(&netlock);
> s = splsoftnet();
> if (so->so_state & SS_CANTSENDMORE)
> snderr(EPIPE);
> if (so->so_error) {
> error = so->so_error;
> so->so_error = 0;
> - splx(s);
> - goto release;
> + snderr(error);
> }
> if ((so->so_state & SS_ISCONNECTED) == 0) {
> if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
> @@ -465,11 +487,13 @@ restart:
> error = sbwait(&so->so_snd);
> so->so_state &= ~SS_ISSENDING;
> splx(s);
> + rw_exit_write(&netlock);
> if (error)
> goto out;
> goto restart;
> }
> splx(s);
> + rw_exit_write(&netlock);
> space -= clen;
> do {
> if (uio == NULL) {
> @@ -489,6 +513,7 @@ restart:
> if (flags & MSG_EOR)
> top->m_flags |= M_EOR;
> }
> + rw_enter_write(&netlock);
> s = splsoftnet(); /* XXX */
> if (resid == 0)
> so->so_state &= ~SS_ISSENDING;
> @@ -496,6 +521,7 @@ restart:
> (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
> top, addr, control, curproc);
> splx(s);
> + rw_exit_write(&netlock);
> clen = 0;
> control = NULL;
> top = NULL;
> @@ -625,8 +651,8 @@ sbsync(struct sockbuf *sb, struct mbuf *
> * must begin with an address if the protocol so specifies,
> * followed by an optional mbuf or mbufs containing ancillary data,
> * and then zero or more mbufs of data.
> - * In order to avoid blocking network interrupts for the entire time here,
> - * we splx() while doing the actual copy to user space.
> + * In order to avoid blocking network for the entire time here, we splx()
> + * and release ``netlock'' while doing the actual copy to user space.
> * Although the sockbuf is locked, new data may still be appended,
> * and thus we must maintain consistency of the sockbuf during that time.
> *
> @@ -680,6 +706,8 @@ bad:
> restart:
> if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
> return (error);
> +
> + rw_enter_write(&netlock);
> s = splsoftnet();
>
> m = so->so_rcv.sb_mb;
> @@ -746,6 +774,7 @@ restart:
> sbunlock(&so->so_rcv);
> error = sbwait(&so->so_rcv);
> splx(s);
> + rw_exit_write(&netlock);
> if (error)
> return (error);
> goto restart;
> @@ -880,7 +909,9 @@ dontblock:
> SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
> resid = uio->uio_resid;
> splx(s);
> + rw_exit_write(&netlock);
> uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
> + rw_enter_write(&netlock);
> s = splsoftnet();
> if (uio_error)
> uio->uio_resid = resid - len;
> @@ -964,6 +995,7 @@ dontblock:
> if (error) {
> sbunlock(&so->so_rcv);
> splx(s);
> + rw_exit_write(&netlock);
> return (0);
> }
> if ((m = so->so_rcv.sb_mb) != NULL)
> @@ -1000,6 +1032,7 @@ dontblock:
> (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
> sbunlock(&so->so_rcv);
> splx(s);
> + rw_exit_write(&netlock);
> goto restart;
> }
>
> @@ -1011,6 +1044,7 @@ dontblock:
> release:
> sbunlock(&so->so_rcv);
> splx(s);
> + rw_exit_write(&netlock);
> return (error);
> }
>
> @@ -1020,6 +1054,7 @@ soshutdown(struct socket *so, int how)
> struct protosw *pr = so->so_proto;
> int s, error = 0;
>
> + rw_enter_write(&netlock);
> s = splsoftnet();
> switch (how) {
> case SHUT_RD:
> @@ -1037,6 +1072,8 @@ soshutdown(struct socket *so, int how)
> break;
> }
> splx(s);
> + rw_exit_write(&netlock);
> +
> return (error);
> }
>
> @@ -1050,6 +1087,7 @@ sorflush(struct socket *so)
>
> sb->sb_flags |= SB_NOINTR;
> (void) sblock(sb, M_WAITOK);
> + /* XXXSMP */
> s = splnet();
> socantrcvmore(so);
> sbunlock(sb);
> @@ -1103,10 +1141,12 @@ sosplice(struct socket *so, int fd, off_
> if ((error = sblock(&so->so_rcv,
> (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0)
> return (error);
> + rw_enter_write(&netlock);
> s = splsoftnet();
> if (so->so_sp->ssp_socket)
> sounsplice(so, so->so_sp->ssp_socket, 1);
> splx(s);
> + rw_exit_write(&netlock);
> sbunlock(&so->so_rcv);
> return (0);
> }
> @@ -1135,6 +1175,7 @@ sosplice(struct socket *so, int fd, off_
> FRELE(fp, curproc);
> return (error);
> }
> + rw_enter_write(&netlock);
> s = splsoftnet();
>
> if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) {
> @@ -1177,6 +1218,7 @@ sosplice(struct socket *so, int fd, off_
>
> release:
> splx(s);
> + rw_exit_write(&netlock);
> sbunlock(&sosp->so_snd);
> sbunlock(&so->so_rcv);
> FRELE(fp, curproc);
> @@ -1186,6 +1228,7 @@ sosplice(struct socket *so, int fd, off_
> void
> sounsplice(struct socket *so, struct socket *sosp, int wakeup)
> {
> + rw_assert_wrlock(&netlock);
> splsoftassert(IPL_SOFTNET);
>
> task_del(sosplice_taskq, &so->so_splicetask);
> @@ -1203,12 +1246,14 @@ soidle(void *arg)
> struct socket *so = arg;
> int s;
>
> + rw_enter_write(&netlock);
> s = splsoftnet();
> if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
> so->so_error = ETIMEDOUT;
> sounsplice(so, so->so_sp->ssp_socket, 1);
> }
> splx(s);
> + rw_exit_write(&netlock);
> }
>
> void
> @@ -1217,6 +1262,7 @@ sotask(void *arg)
> struct socket *so = arg;
> int s;
>
> + rw_enter_write(&netlock);
> s = splsoftnet();
> if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
> /*
> @@ -1227,6 +1273,7 @@ sotask(void *arg)
> somove(so, M_DONTWAIT);
> }
> splx(s);
> + rw_exit_write(&netlock);
>
> /* Avoid user land starvation. */
> yield();
> @@ -1248,6 +1295,7 @@ somove(struct socket *so, int wait)
> int error = 0, maxreached = 0;
> short state;
>
> + rw_assert_wrlock(&netlock);
> splsoftassert(IPL_SOFTNET);
>
> nextpkt:
> @@ -1510,6 +1558,7 @@ somove(struct socket *so, int wait)
> void
> sorwakeup(struct socket *so)
> {
> + rw_assert_wrlock(&netlock);
> splsoftassert(IPL_SOFTNET);
>
> #ifdef SOCKET_SPLICE
> @@ -1531,13 +1580,17 @@ sorwakeup(struct socket *so)
> return;
> #endif
> sowakeup(so, &so->so_rcv);
> - if (so->so_upcall)
> + if (so->so_upcall) {
> + rw_exit_write(&netlock);
> (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT);
> + rw_enter_write(&netlock);
> + }
> }
>
> void
> sowwakeup(struct socket *so)
> {
> + rw_assert_wrlock(&netlock);
> splsoftassert(IPL_SOFTNET);
>
> #ifdef SOCKET_SPLICE
> @@ -1884,7 +1937,8 @@ soo_kqfilter(struct file *fp, struct kno
> {
> struct socket *so = kn->kn_fp->f_data;
> struct sockbuf *sb;
> - int s;
> +
> + KERNEL_ASSERT_LOCKED();
>
> switch (kn->kn_filter) {
> case EVFILT_READ:
> @@ -1902,10 +1956,9 @@ soo_kqfilter(struct file *fp, struct kno
> return (EINVAL);
> }
>
> - s = splnet();
> SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext);
> sb->sb_flags |= SB_KNOTE;
> - splx(s);
> +
> return (0);
> }
>
> @@ -1913,12 +1966,12 @@ void
> filt_sordetach(struct knote *kn)
> {
> struct socket *so = kn->kn_fp->f_data;
> - int s = splnet();
> +
> + KERNEL_ASSERT_LOCKED();
>
> SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext);
> if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note))
> so->so_rcv.sb_flags &= ~SB_KNOTE;
> - splx(s);
> }
>
> int
> @@ -1947,12 +2000,12 @@ void
> filt_sowdetach(struct knote *kn)
> {
> struct socket *so = kn->kn_fp->f_data;
> - int s = splnet();
> +
> + KERNEL_ASSERT_LOCKED();
>
> SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext);
> if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note))
> so->so_snd.sb_flags &= ~SB_KNOTE;
> - splx(s);
> }
>
> int
> Index: kern/uipc_socket2.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
> retrieving revision 1.65
> diff -u -p -r1.65 uipc_socket2.c
> --- kern/uipc_socket2.c 2 Sep 2016 13:28:21 -0000 1.65
> +++ kern/uipc_socket2.c 4 Oct 2016 14:40:29 -0000
> @@ -138,8 +138,6 @@ soisdisconnected(struct socket *so)
> * then we allocate a new structure, properly linked into the
> * data structure of the original socket, and return this.
> * Connstatus may be 0 or SS_ISCONNECTED.
> - *
> - * Must be called at splsoftnet()
> */
> struct socket *
> sonewconn(struct socket *head, int connstatus)
> @@ -147,6 +145,7 @@ sonewconn(struct socket *head, int conns
> struct socket *so;
> int soqueue = connstatus ? 1 : 0;
>
> + rw_assert_wrlock(&netlock);
> splsoftassert(IPL_SOFTNET);
>
> if (mclpools[0].pr_nout > mclpools[0].pr_hardlimit * 95 / 100)
> @@ -276,10 +275,11 @@ socantrcvmore(struct socket *so)
> int
> sbwait(struct sockbuf *sb)
> {
> + rw_assert_wrlock(&netlock);
> splsoftassert(IPL_SOFTNET);
>
> sb->sb_flagsintr |= SB_WAIT;
> - return (tsleep(&sb->sb_cc,
> + return (rwsleep(&sb->sb_cc, &netlock,
> (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "netio",
> sb->sb_timeo));
> }
> @@ -317,7 +317,8 @@ sbunlock(struct sockbuf *sb)
> void
> sowakeup(struct socket *so, struct sockbuf *sb)
> {
> - int s = splsoftnet();
> + rw_assert_wrlock(&netlock);
> + splassert(IPL_SOFTNET);
>
> selwakeup(&sb->sb_sel);
> sb->sb_flagsintr &= ~SB_SEL;
> @@ -325,7 +326,7 @@ sowakeup(struct socket *so, struct sockb
> sb->sb_flagsintr &= ~SB_WAIT;
> wakeup(&sb->sb_cc);
> }
> - splx(s);
> +
> if (so->so_state & SS_ASYNC)
> csignal(so->so_pgid, SIGIO, so->so_siguid, so->so_sigeuid);
> }
> Index: kern/uipc_syscalls.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
> retrieving revision 1.133
> diff -u -p -r1.133 uipc_syscalls.c
> --- kern/uipc_syscalls.c 9 Aug 2016 02:25:35 -0000 1.133
> +++ kern/uipc_syscalls.c 4 Oct 2016 14:40:29 -0000
> @@ -250,6 +250,7 @@ doaccept(struct proc *p, int sock, struc
> if ((error = getsock(p, sock, &fp)) != 0)
> return (error);
>
> + rw_enter_write(&netlock);
> s = splsoftnet();
> headfp = fp;
> head = fp->f_data;
> @@ -275,7 +276,8 @@ redo:
> head->so_error = ECONNABORTED;
> break;
> }
> - error = tsleep(&head->so_timeo, PSOCK | PCATCH, "netcon", 0);
> + error = rwsleep(&head->so_timeo, &netlock, PSOCK | PCATCH,
> + "netcon", 0);
> if (error) {
> goto bad;
> }
> @@ -352,6 +354,7 @@ redo:
> m_freem(nam);
> bad:
> splx(s);
> + rw_exit_write(&netlock);
> FRELE(headfp, p);
> return (error);
> }
> @@ -406,9 +409,11 @@ sys_connect(struct proc *p, void *v, reg
> m_freem(nam);
> return (EINPROGRESS);
> }
> + rw_enter_write(&netlock);
> s = splsoftnet();
> while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
> - error = tsleep(&so->so_timeo, PSOCK | PCATCH, "netcon2", 0);
> + error = rwsleep(&so->so_timeo, &netlock, PSOCK | PCATCH,
> + "netcon2", 0);
> if (error) {
> if (error == EINTR || error == ERESTART)
> interrupted = 1;
> @@ -420,6 +425,7 @@ sys_connect(struct proc *p, void *v, reg
> so->so_error = 0;
> }
> splx(s);
> + rw_exit_write(&netlock);
> bad:
> if (!interrupted)
> so->so_state &= ~SS_ISCONNECTING;
> Index: kern/uipc_usrreq.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
> retrieving revision 1.102
> diff -u -p -r1.102 uipc_usrreq.c
> --- kern/uipc_usrreq.c 26 Aug 2016 07:12:30 -0000 1.102
> +++ kern/uipc_usrreq.c 4 Oct 2016 14:40:29 -0000
> @@ -131,7 +131,10 @@ uipc_usrreq(struct socket *so, int req,
> break;
>
> case PRU_BIND:
> + rw_assert_wrlock(&netlock);
> + rw_exit_write(&netlock);
> error = unp_bind(unp, nam, p);
> + rw_enter_write(&netlock);
> break;
>
> case PRU_LISTEN:
> Index: net/if.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.452
> diff -u -p -r1.452 if.c
> --- net/if.c 3 Oct 2016 12:26:13 -0000 1.452
> +++ net/if.c 4 Oct 2016 14:40:29 -0000
> @@ -163,7 +163,13 @@ void if_netisr(void *);
> void ifa_print_all(void);
> #endif
>
> -void if_start_locked(struct ifnet *ifp);
> +void if_start_locked(struct ifnet *);
> +int if_ioctl_locked(struct socket *, u_long, caddr_t, struct proc *);
> +
> +/*
> + * Network lock: serialize socket operations.
> + */
> +struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
>
> /*
> * interface index map
> @@ -836,10 +842,16 @@ if_netisr(void *unused)
> int s;
>
> KERNEL_LOCK();
> + rw_enter_write(&netlock);
> s = splsoftnet();
>
> while ((n = netisr) != 0) {
> - sched_pause();
> + /* Like sched_pause() but with a rwlock dance. */
> + if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
> + rw_exit_write(&netlock);
> + yield();
> + rw_enter_write(&netlock);
> + }
>
> atomic_clearbits_int(&netisr, n);
>
> @@ -878,6 +890,7 @@ if_netisr(void *unused)
> #endif
>
> splx(s);
> + rw_exit_write(&netlock);
> KERNEL_UNLOCK();
> }
>
> @@ -1435,6 +1448,7 @@ if_downall(void)
> struct ifnet *ifp;
> int s;
>
> + rw_enter_write(&netlock);
> s = splnet();
> TAILQ_FOREACH(ifp, &ifnet, if_list) {
> if ((ifp->if_flags & IFF_UP) == 0)
> @@ -1449,6 +1463,7 @@ if_downall(void)
> }
> }
> splx(s);
> + rw_exit_write(&netlock);
> }
>
> /*
> @@ -1508,9 +1523,11 @@ if_linkstate_task(void *xifidx)
> if (ifp == NULL)
> return;
>
> + rw_enter_write(&netlock);
> s = splsoftnet();
> if_linkstate(ifp);
> splx(s);
> + rw_exit_write(&netlock);
>
> if_put(ifp);
> }
> @@ -1518,6 +1535,7 @@ if_linkstate_task(void *xifidx)
> void
> if_linkstate(struct ifnet *ifp)
> {
> + rw_assert_wrlock(&netlock);
> splsoftassert(IPL_SOFTNET);
>
> rt_ifmsg(ifp);
> @@ -1708,6 +1726,18 @@ if_setrdomain(struct ifnet *ifp, int rdo
> */
> int
> ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
> +{
> + int error;
> +
> + rw_enter_write(&netlock);
> + error = if_ioctl_locked(so, cmd, data, p);
> + rw_exit_write(&netlock);
> +
> + return (error);
> +}
> +
> +int
> +if_ioctl_locked(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
> {
> struct ifnet *ifp;
> struct ifreq *ifr;
> Index: net/rtsock.c
> ===================================================================
> RCS file: /cvs/src/sys/net/rtsock.c,v
> retrieving revision 1.207
> diff -u -p -r1.207 rtsock.c
> --- net/rtsock.c 27 Sep 2016 18:41:11 -0000 1.207
> +++ net/rtsock.c 4 Oct 2016 14:40:29 -0000
> @@ -296,6 +296,7 @@ route_ctloutput(int op, struct socket *s
> return (error);
> }
>
> +/* XXXSMP */
> void
> rt_senddesync(void *data)
> {
> Index: netinet/ip_carp.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_carp.c,v
> retrieving revision 1.294
> diff -u -p -r1.294 ip_carp.c
> --- netinet/ip_carp.c 4 Oct 2016 13:54:32 -0000 1.294
> +++ netinet/ip_carp.c 4 Oct 2016 14:40:29 -0000
> @@ -1045,6 +1045,7 @@ carp_send_ad(void *v)
> return;
> }
>
> + rw_enter_write(&netlock);
> s = splsoftnet();
>
> /* bow out if we've gone to backup (the carp interface is going down) */
> @@ -1247,6 +1248,7 @@ carp_send_ad(void *v)
> retry_later:
> sc->cur_vhe = NULL;
> splx(s);
> + rw_exit_write(&netlock);
> if (advbase != 255 || advskew != 255)
> timeout_add(&vhe->ad_tmo, tvtohz(&tv));
> }
> Index: netinet/ip_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_input.c,v
> retrieving revision 1.282
> diff -u -p -r1.282 ip_input.c
> --- netinet/ip_input.c 22 Sep 2016 10:12:25 -0000 1.282
> +++ netinet/ip_input.c 4 Oct 2016 14:40:29 -0000
> @@ -1755,12 +1755,17 @@ ip_send_dispatch(void *xmq)
> int s;
>
> mq_delist(mq, &ml);
> + if (ml_empty(&ml))
> + return;
> +
> KERNEL_LOCK();
> + rw_enter_write(&netlock);
> s = splsoftnet();
> while ((m = ml_dequeue(&ml)) != NULL) {
> ip_output(m, NULL, NULL, 0, NULL, NULL, 0);
> }
> splx(s);
> + rw_exit_write(&netlock);
> KERNEL_UNLOCK();
> }
>
> Index: netinet/ip_output.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_output.c,v
> retrieving revision 1.327
> diff -u -p -r1.327 ip_output.c
> --- netinet/ip_output.c 4 Sep 2016 17:18:56 -0000 1.327
> +++ netinet/ip_output.c 4 Oct 2016 14:40:29 -0000
> @@ -109,6 +109,9 @@ ip_output(struct mbuf *m0, struct mbuf *
> int rv;
> #endif
>
> + /* Make sure this thread hold the correct lock. */
> + KASSERT(rw_status(&netlock) == RW_WRITE);
> +
> #ifdef IPSEC
> if (inp && (inp->inp_flags & INP_IPV6) != 0)
> panic("ip_output: IPv6 pcb is passed");
> Index: netinet/tcp_timer.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/tcp_timer.c,v
> retrieving revision 1.50
> diff -u -p -r1.50 tcp_timer.c
> --- netinet/tcp_timer.c 24 Sep 2016 14:51:37 -0000 1.50
> +++ netinet/tcp_timer.c 4 Oct 2016 14:40:29 -0000
> @@ -112,15 +112,15 @@ tcp_delack(void *arg)
> * for whatever reason, it will restart the delayed
> * ACK callout.
> */
> -
> + rw_enter_write(&netlock);
> s = splsoftnet();
> - if (tp->t_flags & TF_DEAD) {
> - splx(s);
> - return;
> - }
> + if (tp->t_flags & TF_DEAD)
> + goto out;
> tp->t_flags |= TF_ACKNOW;
> (void) tcp_output(tp);
> + out:
> splx(s);
> + rw_exit_write(&netlock);
> }
>
> /*
> @@ -193,11 +193,10 @@ tcp_timer_rexmt(void *arg)
> uint32_t rto;
> int s;
>
> + rw_enter_write(&netlock);
> s = splsoftnet();
> - if (tp->t_flags & TF_DEAD) {
> - splx(s);
> - return;
> - }
> + if (tp->t_flags & TF_DEAD)
> + goto out;
>
> if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb &&
> SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
> @@ -224,8 +223,7 @@ tcp_timer_rexmt(void *arg)
> sin.sin_addr = tp->t_inpcb->inp_faddr;
> in_pcbnotifyall(&tcbtable, sintosa(&sin),
> tp->t_inpcb->inp_rtableid, EMSGSIZE, tcp_mtudisc);
> - splx(s);
> - return;
> + goto out;
> }
>
> #ifdef TCP_SACK
> @@ -377,6 +375,7 @@ tcp_timer_rexmt(void *arg)
>
> out:
> splx(s);
> + rw_exit_write(&netlock);
> }
>
> void
> @@ -386,11 +385,11 @@ tcp_timer_persist(void *arg)
> uint32_t rto;
> int s;
>
> + rw_enter_write(&netlock);
> s = splsoftnet();
> if ((tp->t_flags & TF_DEAD) ||
> TCP_TIMER_ISARMED(tp, TCPT_REXMT)) {
> - splx(s);
> - return;
> + goto out;
> }
> tcpstat.tcps_persisttimeo++;
> /*
> @@ -416,6 +415,7 @@ tcp_timer_persist(void *arg)
> tp->t_force = 0;
> out:
> splx(s);
> + rw_exit_write(&netlock);
> }
>
> void
> @@ -424,11 +424,10 @@ tcp_timer_keep(void *arg)
> struct tcpcb *tp = arg;
> int s;
>
> + rw_enter_write(&netlock);
> s = splsoftnet();
> - if (tp->t_flags & TF_DEAD) {
> - splx(s);
> - return;
> - }
> + if (tp->t_flags & TF_DEAD)
> + goto out;
>
> tcpstat.tcps_keeptimeo++;
> if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
> @@ -457,8 +456,9 @@ tcp_timer_keep(void *arg)
> TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
> } else
> TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
> -
> + out:
> splx(s);
> + rw_exit_write(&netlock);
> return;
>
> dropit:
> @@ -466,6 +466,7 @@ tcp_timer_keep(void *arg)
> tp = tcp_drop(tp, ETIMEDOUT);
>
> splx(s);
> + rw_exit_write(&netlock);
> }
>
> void
> @@ -474,11 +475,10 @@ tcp_timer_2msl(void *arg)
> struct tcpcb *tp = arg;
> int s;
>
> + rw_enter_write(&netlock);
> s = splsoftnet();
> - if (tp->t_flags & TF_DEAD) {
> - splx(s);
> - return;
> - }
> + if (tp->t_flags & TF_DEAD)
> + goto out;
>
> #ifdef TCP_SACK
> tcp_timer_freesack(tp);
> @@ -490,5 +490,7 @@ tcp_timer_2msl(void *arg)
> else
> tp = tcp_close(tp);
>
> + out:
> splx(s);
> + rw_exit_write(&netlock);
> }
> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/tcp_input.c,v
> retrieving revision 1.329
> diff -u -p -r1.329 tcp_input.c
> --- netinet/tcp_input.c 4 Oct 2016 13:56:50 -0000 1.329
> +++ netinet/tcp_input.c 4 Oct 2016 14:40:29 -0000
> @@ -3522,11 +3522,10 @@ syn_cache_timer(void *arg)
> struct syn_cache *sc = arg;
> int s;
>
> + rw_enter_write(&netlock);
> s = splsoftnet();
> - if (sc->sc_flags & SCF_DEAD) {
> - splx(s);
> - return;
> - }
> + if (sc->sc_flags & SCF_DEAD)
> + goto out;
>
> if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) {
> /* Drop it -- too many retransmissions. */
> @@ -3549,7 +3548,9 @@ syn_cache_timer(void *arg)
> sc->sc_rxtshift++;
> SYN_CACHE_TIMER_ARM(sc);
>
> + out:
> splx(s);
> + rw_exit_write(&netlock);
> return;
>
> dropit:
> @@ -3557,6 +3558,7 @@ syn_cache_timer(void *arg)
> syn_cache_rm(sc);
> syn_cache_put(sc);
> splx(s);
> + rw_exit_write(&netlock);
> }
>
> void
> Index: netinet6/ip6_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
> retrieving revision 1.168
> diff -u -p -r1.168 ip6_input.c
> --- netinet6/ip6_input.c 24 Aug 2016 09:41:12 -0000 1.168
> +++ netinet6/ip6_input.c 4 Oct 2016 14:40:29 -0000
> @@ -1429,12 +1429,17 @@ ip6_send_dispatch(void *xmq)
> int s;
>
> mq_delist(mq, &ml);
> + if (ml_empty(&ml))
> + return;
> +
> KERNEL_LOCK();
> + rw_enter_write(&netlock);
> s = splsoftnet();
> while ((m = ml_dequeue(&ml)) != NULL) {
> ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL);
> }
> splx(s);
> + rw_exit_write(&netlock);
> KERNEL_UNLOCK();
> }
>
> Index: sys/systm.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/systm.h,v
> retrieving revision 1.119
> diff -u -p -r1.119 systm.h
> --- sys/systm.h 24 Sep 2016 18:35:52 -0000 1.119
> +++ sys/systm.h 4 Oct 2016 14:40:29 -0000
> @@ -290,6 +290,11 @@ struct uio;
> int uiomove(void *, size_t, struct uio *);
>
> #if defined(_KERNEL)
> +/*
> + * Network lock: serialize socket operations.
> + */
> +extern struct rwlock netlock;
> +
> __returns_twice int setjmp(label_t *);
> __dead void longjmp(label_t *);
> #endif