On Tue, Oct 04, 2016 at 04:44:29PM +0200, Martin Pieuchot wrote: > On 10/03/16 16:43, Martin Pieuchot wrote: > > Diff below introduces a single write lock that will be used to serialize > > access to ip_output(). > > > > This lock will be then split in multiple readers and writers to allow > > multiple forwarding paths to run in parallel of each others but still > > serialized with the socket layer. > > > > I'm currently looking for people wanting to run this diff and try to > > break it. In other words, your machine might panic with it and if it > > does report the panic to me so the diff can be improved. > > > > I tested NFS v2 and v3 so I'm quite confident, but I might have missed > > some obvious stuff. > > Updated diff attaced including a fix for syn_cache_timer(), problem > reported by Chris Jackman. >
So far, so good, on i386 and amd64 vmm(4) VMs. booted, did a pkg_add upgrade, and cvsync. No issues seen so far. -ml > Index: kern/kern_rwlock.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_rwlock.c,v > retrieving revision 1.27 > diff -u -p -r1.27 kern_rwlock.c > --- kern/kern_rwlock.c 14 Mar 2015 07:33:42 -0000 1.27 > +++ kern/kern_rwlock.c 4 Oct 2016 14:40:29 -0000 > @@ -98,6 +98,12 @@ rw_enter_read(struct rwlock *rwl) > membar_enter(); > } > > +#if 1 > +#include <machine/db_machdep.h> > +#include <ddb/db_output.h> > +#include <ddb/db_interface.h> > +#endif > + > void > rw_enter_write(struct rwlock *rwl) > { > @@ -108,6 +114,15 @@ rw_enter_write(struct rwlock *rwl) > rw_enter(rwl, RW_WRITE); > else > membar_enter(); > + > +#if 1 > + if ((rwl == &netlock) && (splassert_ctl == 3)) { > + printf("ENTER::%d::", cpu_number()); > + db_stack_trace_print( > + (db_expr_t)__builtin_frame_address(1), > + TRUE, 1, "", printf); > + } > +#endif > } > > void > @@ -129,6 +144,15 @@ rw_exit_write(struct rwlock *rwl) > unsigned long owner = rwl->rwl_owner; > > rw_assert_wrlock(rwl); > + > +#if 1 > + if ((rwl == &netlock) && (splassert_ctl == 3)) { > + printf("EXIT::%d::", cpu_number()); > + db_stack_trace_print( > + (db_expr_t)__builtin_frame_address(1), > + TRUE, 1, "", printf); > + } > +#endif > > membar_exit(); > if (__predict_false((owner & RWLOCK_WAIT) || > Index: kern/sys_socket.c > =================================================================== > RCS file: /cvs/src/sys/kern/sys_socket.c,v > retrieving revision 1.21 > diff -u -p -r1.21 sys_socket.c > --- kern/sys_socket.c 5 Dec 2015 10:11:53 -0000 1.21 > +++ kern/sys_socket.c 4 Oct 2016 14:40:29 -0000 > @@ -131,8 +131,10 @@ soo_poll(struct file *fp, int events, st > { > struct socket *so = fp->f_data; > int revents = 0; > - int s = splsoftnet(); > + int s; > > + rw_enter_write(&netlock); > + s = splsoftnet(); > if (events & (POLLIN | POLLRDNORM)) { > if (soreadable(so)) > revents |= events & (POLLIN | POLLRDNORM); > @@ -159,6 +161,7 @@ soo_poll(struct file *fp, int events, st > } > } > splx(s); > + rw_exit_write(&netlock); > return (revents); > } > > Index: kern/uipc_socket.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_socket.c,v > retrieving revision 1.161 > diff -u -p -r1.161 uipc_socket.c > --- kern/uipc_socket.c 20 Sep 2016 14:27:43 -0000 1.161 > +++ kern/uipc_socket.c 4 Oct 2016 14:40:29 -0000 > @@ -123,6 +123,7 @@ socreate(int dom, struct socket **aso, i > return (EPROTONOSUPPORT); > if (prp->pr_type != type) > return (EPROTOTYPE); > + rw_enter_write(&netlock); > s = splsoftnet(); > so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO); > TAILQ_INIT(&so->so_q0); > @@ -142,9 +143,11 @@ socreate(int dom, struct socket **aso, i > so->so_state |= SS_NOFDREF; > sofree(so); > splx(s); > + rw_exit_write(&netlock); > return (error); > } > splx(s); > + rw_exit_write(&netlock); > *aso = so; > return (0); > } > @@ -152,11 +155,13 @@ socreate(int dom, struct socket **aso, i > int > sobind(struct socket *so, struct mbuf *nam, struct proc *p) > { > - int s = splsoftnet(); > - int error; > + int s, error; > > + rw_enter_write(&netlock); > + s = splsoftnet(); > error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p); > splx(s); > + rw_exit_write(&netlock); > return (error); > } > > @@ -171,11 +176,13 @@ solisten(struct socket *so, int backlog) > if (isspliced(so) || issplicedback(so)) > return (EOPNOTSUPP); > #endif /* SOCKET_SPLICE */ > + rw_enter_write(&netlock); > s = splsoftnet(); > error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL, > curproc); > if (error) { > splx(s); > + rw_exit_write(&netlock); > return (error); > } > if (TAILQ_FIRST(&so->so_q) == NULL) > @@ -186,6 +193,7 @@ solisten(struct socket *so, int backlog) > backlog = sominconn; > so->so_qlimit = backlog; > splx(s); > + rw_exit_write(&netlock); > return (0); > } > > @@ -196,6 +204,7 @@ solisten(struct socket *so, int backlog) > void > sofree(struct socket *so) > { > + rw_assert_wrlock(&netlock); > splsoftassert(IPL_SOFTNET); > > if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) > @@ -234,9 +243,10 @@ int > soclose(struct socket *so) > { > struct socket *so2; > - int s = splsoftnet(); /* conservative */ > - int error = 0; > + int s, error = 0; > > + rw_enter_write(&netlock); > + s = splsoftnet(); /* conservative */ > if (so->so_options & SO_ACCEPTCONN) { > while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { > (void) soqremque(so2, 0); > @@ -260,7 +270,7 @@ soclose(struct socket *so) > (so->so_state & SS_NBIO)) > goto drop; > while (so->so_state & SS_ISCONNECTED) { > - error = tsleep(&so->so_timeo, > + error = rwsleep(&so->so_timeo, &netlock, > PSOCK | PCATCH, "netcls", > so->so_linger * hz); > if (error) > @@ -281,6 +291,7 @@ discard: > so->so_state |= SS_NOFDREF; > sofree(so); > splx(s); > + rw_exit_write(&netlock); > return (error); > } > > @@ -290,6 +301,7 @@ discard: > int > soabort(struct socket *so) > { > + rw_assert_wrlock(&netlock); > splsoftassert(IPL_SOFTNET); > > return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL, > @@ -301,6 +313,7 @@ soaccept(struct socket *so, struct mbuf > { > int error = 0; > > + rw_assert_wrlock(&netlock); > splsoftassert(IPL_SOFTNET); > > if ((so->so_state & SS_NOFDREF) == 0) > @@ -318,11 +331,11 @@ soaccept(struct socket *so, struct mbuf > int > soconnect(struct socket *so, struct mbuf *nam) > { > - int s; > - int error; > + int s, error; > > if (so->so_options & SO_ACCEPTCONN) > return (EOPNOTSUPP); > + rw_enter_write(&netlock); > s = splsoftnet(); > /* > * If protocol is connection-based, can only connect once. > @@ -338,18 +351,21 @@ soconnect(struct socket *so, struct mbuf > error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, > NULL, nam, NULL, curproc); > splx(s); > + rw_exit_write(&netlock); > return (error); > } > > int > soconnect2(struct socket *so1, struct socket *so2) > { > - int s = splsoftnet(); > - int error; > + int s, error; > > + rw_enter_write(&netlock); > + s = splsoftnet(); > error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, > (struct mbuf *)so2, NULL, curproc); > splx(s); > + rw_exit_write(&netlock); > return (error); > } > > @@ -358,14 +374,20 @@ sodisconnect(struct socket *so) > { > int error; > > + rw_assert_wrlock(&netlock); > splsoftassert(IPL_SOFTNET); > > - if ((so->so_state & SS_ISCONNECTED) == 0) > - return (ENOTCONN); > - if (so->so_state & SS_ISDISCONNECTING) > - return (EALREADY); > + if ((so->so_state & SS_ISCONNECTED) == 0) { > + error = ENOTCONN; > + goto bad; > + } > + if (so->so_state & SS_ISDISCONNECTING) { > + error = EALREADY; > + goto bad; > + } > error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL, > NULL, curproc); > +bad: > return (error); > } > > @@ -426,21 +448,21 @@ sosend(struct socket *so, struct mbuf *a > (sizeof(struct file *) / sizeof(int))); > } > > -#define snderr(errno) { error = errno; splx(s); goto release; } > +#define snderr(e) { error = e; splx(s); rw_exit_write(&netlock); goto > release; } > > restart: > if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) > goto out; > so->so_state |= SS_ISSENDING; > do { > + rw_enter_write(&netlock); > s = splsoftnet(); > if (so->so_state & SS_CANTSENDMORE) > snderr(EPIPE); > if (so->so_error) { > error = so->so_error; > so->so_error = 0; > - splx(s); > - goto release; > + snderr(error); > } > if ((so->so_state & SS_ISCONNECTED) == 0) { > if (so->so_proto->pr_flags & PR_CONNREQUIRED) { > @@ -465,11 +487,13 @@ restart: > error = sbwait(&so->so_snd); > so->so_state &= ~SS_ISSENDING; > splx(s); > + rw_exit_write(&netlock); > if (error) > goto out; > goto restart; > } > splx(s); > + rw_exit_write(&netlock); > space -= clen; > do { > if (uio == NULL) { > @@ -489,6 +513,7 @@ restart: > if (flags & MSG_EOR) > top->m_flags |= M_EOR; > } > + rw_enter_write(&netlock); > s = splsoftnet(); /* XXX */ > if (resid == 0) > so->so_state &= ~SS_ISSENDING; > @@ -496,6 +521,7 @@ restart: > (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, > top, addr, control, curproc); > splx(s); > + rw_exit_write(&netlock); > clen = 0; > control = NULL; > top = NULL; > @@ -625,8 +651,8 @@ sbsync(struct sockbuf *sb, struct mbuf * > * must begin with an address if the protocol so specifies, > * followed by an optional mbuf or mbufs containing ancillary data, > * and then zero or more mbufs of data. > - * In order to avoid blocking network interrupts for the entire time here, > - * we splx() while doing the actual copy to user space. > + * In order to avoid blocking network for the entire time here, we splx() > + * and release ``netlock'' while doing the actual copy to user space. > * Although the sockbuf is locked, new data may still be appended, > * and thus we must maintain consistency of the sockbuf during that time. > * > @@ -680,6 +706,8 @@ bad: > restart: > if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) > return (error); > + > + rw_enter_write(&netlock); > s = splsoftnet(); > > m = so->so_rcv.sb_mb; > @@ -746,6 +774,7 @@ restart: > sbunlock(&so->so_rcv); > error = sbwait(&so->so_rcv); > splx(s); > + rw_exit_write(&netlock); > if (error) > return (error); > goto restart; > @@ -880,7 +909,9 @@ dontblock: > SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); > resid = uio->uio_resid; > splx(s); > + rw_exit_write(&netlock); > uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio); > + rw_enter_write(&netlock); > s = splsoftnet(); > if (uio_error) > uio->uio_resid = resid - len; > @@ -964,6 +995,7 @@ dontblock: > if (error) { > sbunlock(&so->so_rcv); > splx(s); > + rw_exit_write(&netlock); > return (0); > } > if ((m = so->so_rcv.sb_mb) != NULL) > @@ -1000,6 +1032,7 @@ dontblock: > (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { > sbunlock(&so->so_rcv); > splx(s); > + rw_exit_write(&netlock); > goto restart; > } > > @@ -1011,6 +1044,7 @@ dontblock: > release: > sbunlock(&so->so_rcv); > splx(s); > + rw_exit_write(&netlock); > return (error); > } > > @@ -1020,6 +1054,7 @@ soshutdown(struct socket *so, int how) > struct protosw *pr = so->so_proto; > int s, error = 0; > > + rw_enter_write(&netlock); > s = splsoftnet(); > switch (how) { > case SHUT_RD: > @@ -1037,6 +1072,8 @@ soshutdown(struct socket *so, int how) > break; > } > splx(s); > + rw_exit_write(&netlock); > + > return (error); > } > > @@ -1050,6 +1087,7 @@ sorflush(struct socket *so) > > sb->sb_flags |= SB_NOINTR; > (void) sblock(sb, M_WAITOK); > + /* XXXSMP */ > s = splnet(); > socantrcvmore(so); > sbunlock(sb); > @@ -1103,10 +1141,12 @@ sosplice(struct socket *so, int fd, off_ > if ((error = sblock(&so->so_rcv, > (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0) > return (error); > + rw_enter_write(&netlock); > s = splsoftnet(); > if (so->so_sp->ssp_socket) > sounsplice(so, so->so_sp->ssp_socket, 1); > splx(s); > + rw_exit_write(&netlock); > sbunlock(&so->so_rcv); > return (0); > } > @@ -1135,6 +1175,7 @@ sosplice(struct socket *so, int fd, off_ > FRELE(fp, curproc); > return (error); > } > + rw_enter_write(&netlock); > s = splsoftnet(); > > if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) { > @@ -1177,6 +1218,7 @@ sosplice(struct socket *so, int fd, off_ > > release: > splx(s); > + rw_exit_write(&netlock); > sbunlock(&sosp->so_snd); > sbunlock(&so->so_rcv); > FRELE(fp, curproc); > @@ -1186,6 +1228,7 @@ sosplice(struct socket *so, int fd, off_ > void > sounsplice(struct socket *so, struct socket *sosp, int wakeup) > { > + rw_assert_wrlock(&netlock); > splsoftassert(IPL_SOFTNET); > > task_del(sosplice_taskq, &so->so_splicetask); > @@ -1203,12 +1246,14 @@ soidle(void *arg) > struct socket *so = arg; > int s; > > + rw_enter_write(&netlock); > s = splsoftnet(); > if (so->so_rcv.sb_flagsintr & SB_SPLICE) { > so->so_error = ETIMEDOUT; > sounsplice(so, so->so_sp->ssp_socket, 1); > } > splx(s); > + rw_exit_write(&netlock); > } > > void > @@ -1217,6 +1262,7 @@ sotask(void *arg) > struct socket *so = arg; > int s; > > + rw_enter_write(&netlock); > s = splsoftnet(); > if (so->so_rcv.sb_flagsintr & SB_SPLICE) { > /* > @@ -1227,6 +1273,7 @@ sotask(void *arg) > somove(so, M_DONTWAIT); > } > splx(s); > + rw_exit_write(&netlock); > > /* Avoid user land starvation. */ > yield(); > @@ -1248,6 +1295,7 @@ somove(struct socket *so, int wait) > int error = 0, maxreached = 0; > short state; > > + rw_assert_wrlock(&netlock); > splsoftassert(IPL_SOFTNET); > > nextpkt: > @@ -1510,6 +1558,7 @@ somove(struct socket *so, int wait) > void > sorwakeup(struct socket *so) > { > + rw_assert_wrlock(&netlock); > splsoftassert(IPL_SOFTNET); > > #ifdef SOCKET_SPLICE > @@ -1531,13 +1580,17 @@ sorwakeup(struct socket *so) > return; > #endif > sowakeup(so, &so->so_rcv); > - if (so->so_upcall) > + if (so->so_upcall) { > + rw_exit_write(&netlock); > (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT); > + rw_enter_write(&netlock); > + } > } > > void > sowwakeup(struct socket *so) > { > + rw_assert_wrlock(&netlock); > splsoftassert(IPL_SOFTNET); > > #ifdef SOCKET_SPLICE > @@ -1884,7 +1937,8 @@ soo_kqfilter(struct file *fp, struct kno > { > struct socket *so = kn->kn_fp->f_data; > struct sockbuf *sb; > - int s; > + > + KERNEL_ASSERT_LOCKED(); > > switch (kn->kn_filter) { > case EVFILT_READ: > @@ -1902,10 +1956,9 @@ soo_kqfilter(struct file *fp, struct kno > return (EINVAL); > } > > - s = splnet(); > SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); > sb->sb_flags |= SB_KNOTE; > - splx(s); > + > return (0); > } > > @@ -1913,12 +1966,12 @@ void > filt_sordetach(struct knote *kn) > { > struct socket *so = kn->kn_fp->f_data; > - int s = splnet(); > + > + KERNEL_ASSERT_LOCKED(); > > SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); > if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) > so->so_rcv.sb_flags &= ~SB_KNOTE; > - splx(s); > } > > int > @@ -1947,12 +2000,12 @@ void > filt_sowdetach(struct knote *kn) > { > struct socket *so = kn->kn_fp->f_data; > - int s = splnet(); > + > + KERNEL_ASSERT_LOCKED(); > > SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); > if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) > so->so_snd.sb_flags &= ~SB_KNOTE; > - splx(s); > } > > int > Index: kern/uipc_socket2.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_socket2.c,v > retrieving revision 1.65 > diff -u -p -r1.65 uipc_socket2.c > --- kern/uipc_socket2.c 2 Sep 2016 13:28:21 -0000 1.65 > +++ kern/uipc_socket2.c 4 Oct 2016 14:40:29 -0000 > @@ -138,8 +138,6 @@ soisdisconnected(struct socket *so) > * then we allocate a new structure, properly linked into the > * data structure of the original socket, and return this. > * Connstatus may be 0 or SS_ISCONNECTED. > - * > - * Must be called at splsoftnet() > */ > struct socket * > sonewconn(struct socket *head, int connstatus) > @@ -147,6 +145,7 @@ sonewconn(struct socket *head, int conns > struct socket *so; > int soqueue = connstatus ? 1 : 0; > > + rw_assert_wrlock(&netlock); > splsoftassert(IPL_SOFTNET); > > if (mclpools[0].pr_nout > mclpools[0].pr_hardlimit * 95 / 100) > @@ -276,10 +275,11 @@ socantrcvmore(struct socket *so) > int > sbwait(struct sockbuf *sb) > { > + rw_assert_wrlock(&netlock); > splsoftassert(IPL_SOFTNET); > > sb->sb_flagsintr |= SB_WAIT; > - return (tsleep(&sb->sb_cc, > + return (rwsleep(&sb->sb_cc, &netlock, > (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "netio", > sb->sb_timeo)); > } > @@ -317,7 +317,8 @@ sbunlock(struct sockbuf *sb) > void > sowakeup(struct socket *so, struct sockbuf *sb) > { > - int s = splsoftnet(); > + rw_assert_wrlock(&netlock); > + splassert(IPL_SOFTNET); > > selwakeup(&sb->sb_sel); > sb->sb_flagsintr &= ~SB_SEL; > @@ -325,7 +326,7 @@ sowakeup(struct socket *so, struct sockb > sb->sb_flagsintr &= ~SB_WAIT; > wakeup(&sb->sb_cc); > } > - splx(s); > + > if (so->so_state & SS_ASYNC) > csignal(so->so_pgid, SIGIO, so->so_siguid, so->so_sigeuid); > } > Index: kern/uipc_syscalls.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v > retrieving revision 1.133 > diff -u -p -r1.133 uipc_syscalls.c > --- kern/uipc_syscalls.c 9 Aug 2016 02:25:35 -0000 1.133 > +++ kern/uipc_syscalls.c 4 Oct 2016 14:40:29 -0000 > @@ -250,6 +250,7 @@ doaccept(struct proc *p, int sock, struc > if ((error = getsock(p, sock, &fp)) != 0) > return (error); > > + rw_enter_write(&netlock); > s = splsoftnet(); > headfp = fp; > head = fp->f_data; > @@ -275,7 +276,8 @@ redo: > head->so_error = ECONNABORTED; > break; > } > - error = tsleep(&head->so_timeo, PSOCK | PCATCH, "netcon", 0); > + error = rwsleep(&head->so_timeo, &netlock, PSOCK | PCATCH, > + "netcon", 0); > if (error) { > goto bad; > } > @@ -352,6 +354,7 @@ redo: > m_freem(nam); > bad: > splx(s); > + rw_exit_write(&netlock); > FRELE(headfp, p); > return (error); > } > @@ -406,9 +409,11 @@ sys_connect(struct proc *p, void *v, reg > m_freem(nam); > return (EINPROGRESS); > } > + rw_enter_write(&netlock); > s = splsoftnet(); > while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { > - error = tsleep(&so->so_timeo, PSOCK | PCATCH, "netcon2", 0); > + error = rwsleep(&so->so_timeo, &netlock, PSOCK | PCATCH, > + "netcon2", 0); > if (error) { > if (error == EINTR || error == ERESTART) > interrupted = 1; > @@ -420,6 +425,7 @@ sys_connect(struct proc *p, void *v, reg > so->so_error = 0; > } > splx(s); > + rw_exit_write(&netlock); > bad: > if (!interrupted) > so->so_state &= ~SS_ISCONNECTING; > Index: kern/uipc_usrreq.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v > retrieving revision 1.102 > diff -u -p -r1.102 uipc_usrreq.c > --- kern/uipc_usrreq.c 26 Aug 2016 07:12:30 -0000 1.102 > +++ kern/uipc_usrreq.c 4 Oct 2016 14:40:29 -0000 > @@ -131,7 +131,10 @@ uipc_usrreq(struct socket *so, int req, > break; > > case PRU_BIND: > + rw_assert_wrlock(&netlock); > + rw_exit_write(&netlock); > error = unp_bind(unp, nam, p); > + rw_enter_write(&netlock); > break; > > case PRU_LISTEN: > Index: net/if.c > =================================================================== > RCS file: /cvs/src/sys/net/if.c,v > retrieving revision 1.452 > diff -u -p -r1.452 if.c > --- net/if.c 3 Oct 2016 12:26:13 -0000 1.452 > +++ net/if.c 4 Oct 2016 14:40:29 -0000 > @@ -163,7 +163,13 @@ void if_netisr(void *); > void ifa_print_all(void); > #endif > > -void if_start_locked(struct ifnet *ifp); > +void if_start_locked(struct ifnet *); > +int if_ioctl_locked(struct socket *, u_long, caddr_t, struct proc *); > + > +/* > + * Network lock: serialize socket operations. > + */ > +struct rwlock netlock = RWLOCK_INITIALIZER("netlock"); > > /* > * interface index map > @@ -836,10 +842,16 @@ if_netisr(void *unused) > int s; > > KERNEL_LOCK(); > + rw_enter_write(&netlock); > s = splsoftnet(); > > while ((n = netisr) != 0) { > - sched_pause(); > + /* Like sched_pause() but with a rwlock dance. */ > + if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) { > + rw_exit_write(&netlock); > + yield(); > + rw_enter_write(&netlock); > + } > > atomic_clearbits_int(&netisr, n); > > @@ -878,6 +890,7 @@ if_netisr(void *unused) > #endif > > splx(s); > + rw_exit_write(&netlock); > KERNEL_UNLOCK(); > } > > @@ -1435,6 +1448,7 @@ if_downall(void) > struct ifnet *ifp; > int s; > > + rw_enter_write(&netlock); > s = splnet(); > TAILQ_FOREACH(ifp, &ifnet, if_list) { > if ((ifp->if_flags & IFF_UP) == 0) > @@ -1449,6 +1463,7 @@ if_downall(void) > } > } > splx(s); > + rw_exit_write(&netlock); > } > > /* > @@ -1508,9 +1523,11 @@ if_linkstate_task(void *xifidx) > if (ifp == NULL) > return; > > + rw_enter_write(&netlock); > s = splsoftnet(); > if_linkstate(ifp); > splx(s); > + rw_exit_write(&netlock); > > if_put(ifp); > } > @@ -1518,6 +1535,7 @@ if_linkstate_task(void *xifidx) > void > if_linkstate(struct ifnet *ifp) > { > + rw_assert_wrlock(&netlock); > splsoftassert(IPL_SOFTNET); > > rt_ifmsg(ifp); > @@ -1708,6 +1726,18 @@ if_setrdomain(struct ifnet *ifp, int rdo > */ > int > ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) > +{ > + int error; > + > + rw_enter_write(&netlock); > + error = if_ioctl_locked(so, cmd, data, p); > + rw_exit_write(&netlock); > + > + return (error); > +} > + > +int > +if_ioctl_locked(struct socket *so, u_long cmd, caddr_t data, struct proc *p) > { > struct ifnet *ifp; > struct ifreq *ifr; > Index: net/rtsock.c > =================================================================== > RCS file: /cvs/src/sys/net/rtsock.c,v > retrieving revision 1.207 > diff -u -p -r1.207 rtsock.c > --- net/rtsock.c 27 Sep 2016 18:41:11 -0000 1.207 > +++ net/rtsock.c 4 Oct 2016 14:40:29 -0000 > @@ -296,6 +296,7 @@ route_ctloutput(int op, struct socket *s > return (error); > } > > +/* XXXSMP */ > void > rt_senddesync(void *data) > { > Index: netinet/ip_carp.c > =================================================================== > RCS file: /cvs/src/sys/netinet/ip_carp.c,v > retrieving revision 1.294 > diff -u -p -r1.294 ip_carp.c > --- netinet/ip_carp.c 4 Oct 2016 13:54:32 -0000 1.294 > +++ netinet/ip_carp.c 4 Oct 2016 14:40:29 -0000 > @@ -1045,6 +1045,7 @@ carp_send_ad(void *v) > return; > } > > + rw_enter_write(&netlock); > s = splsoftnet(); > > /* bow out if we've gone to backup (the carp interface is going down) */ > @@ -1247,6 +1248,7 @@ carp_send_ad(void *v) > retry_later: > sc->cur_vhe = NULL; > splx(s); > + rw_exit_write(&netlock); > if (advbase != 255 || advskew != 255) > timeout_add(&vhe->ad_tmo, tvtohz(&tv)); > } > Index: netinet/ip_input.c > =================================================================== > RCS file: /cvs/src/sys/netinet/ip_input.c,v > retrieving revision 1.282 > diff -u -p -r1.282 ip_input.c > --- netinet/ip_input.c 22 Sep 2016 10:12:25 -0000 1.282 > +++ netinet/ip_input.c 4 Oct 2016 14:40:29 -0000 > @@ -1755,12 +1755,17 @@ ip_send_dispatch(void *xmq) > int s; > > mq_delist(mq, &ml); > + if (ml_empty(&ml)) > + return; > + > KERNEL_LOCK(); > + rw_enter_write(&netlock); > s = splsoftnet(); > while ((m = ml_dequeue(&ml)) != NULL) { > ip_output(m, NULL, NULL, 0, NULL, NULL, 0); > } > splx(s); > + rw_exit_write(&netlock); > KERNEL_UNLOCK(); > } > > Index: netinet/ip_output.c > =================================================================== > RCS file: /cvs/src/sys/netinet/ip_output.c,v > retrieving revision 1.327 > diff -u -p -r1.327 ip_output.c > --- netinet/ip_output.c 4 Sep 2016 17:18:56 -0000 1.327 > +++ netinet/ip_output.c 4 Oct 2016 14:40:29 -0000 > @@ -109,6 +109,9 @@ ip_output(struct mbuf *m0, struct mbuf * > int rv; > #endif > > + /* Make sure this thread hold the correct lock. */ > + KASSERT(rw_status(&netlock) == RW_WRITE); > + > #ifdef IPSEC > if (inp && (inp->inp_flags & INP_IPV6) != 0) > panic("ip_output: IPv6 pcb is passed"); > Index: netinet/tcp_timer.c > =================================================================== > RCS file: /cvs/src/sys/netinet/tcp_timer.c,v > retrieving revision 1.50 > diff -u -p -r1.50 tcp_timer.c > --- netinet/tcp_timer.c 24 Sep 2016 14:51:37 -0000 1.50 > +++ netinet/tcp_timer.c 4 Oct 2016 14:40:29 -0000 > @@ -112,15 +112,15 @@ tcp_delack(void *arg) > * for whatever reason, it will restart the delayed > * ACK callout. > */ > - > + rw_enter_write(&netlock); > s = splsoftnet(); > - if (tp->t_flags & TF_DEAD) { > - splx(s); > - return; > - } > + if (tp->t_flags & TF_DEAD) > + goto out; > tp->t_flags |= TF_ACKNOW; > (void) tcp_output(tp); > + out: > splx(s); > + rw_exit_write(&netlock); > } > > /* > @@ -193,11 +193,10 @@ tcp_timer_rexmt(void *arg) > uint32_t rto; > int s; > > + rw_enter_write(&netlock); > s = splsoftnet(); > - if (tp->t_flags & TF_DEAD) { > - splx(s); > - return; > - } > + if (tp->t_flags & TF_DEAD) > + goto out; > > if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb && > SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) && > @@ -224,8 +223,7 @@ tcp_timer_rexmt(void *arg) > sin.sin_addr = tp->t_inpcb->inp_faddr; > in_pcbnotifyall(&tcbtable, sintosa(&sin), > tp->t_inpcb->inp_rtableid, EMSGSIZE, tcp_mtudisc); > - splx(s); > - return; > + goto out; > } > > #ifdef TCP_SACK > @@ -377,6 +375,7 @@ tcp_timer_rexmt(void *arg) > > out: > splx(s); > + rw_exit_write(&netlock); > } > > void > @@ -386,11 +385,11 @@ tcp_timer_persist(void *arg) > uint32_t rto; > int s; > > + rw_enter_write(&netlock); > s = splsoftnet(); > if ((tp->t_flags & TF_DEAD) || > TCP_TIMER_ISARMED(tp, TCPT_REXMT)) { > - splx(s); > - return; > + goto out; > } > tcpstat.tcps_persisttimeo++; > /* > @@ -416,6 +415,7 @@ tcp_timer_persist(void *arg) > tp->t_force = 0; > out: > splx(s); > + rw_exit_write(&netlock); > } > > void > @@ -424,11 +424,10 @@ tcp_timer_keep(void *arg) > struct tcpcb *tp = arg; > int s; > > + rw_enter_write(&netlock); > s = splsoftnet(); > - if (tp->t_flags & TF_DEAD) { > - splx(s); > - return; > - } > + if (tp->t_flags & TF_DEAD) > + goto out; > > tcpstat.tcps_keeptimeo++; > if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) > @@ -457,8 +456,9 @@ tcp_timer_keep(void *arg) > TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl); > } else > TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); > - > + out: > splx(s); > + rw_exit_write(&netlock); > return; > > dropit: > @@ -466,6 +466,7 @@ tcp_timer_keep(void *arg) > tp = tcp_drop(tp, ETIMEDOUT); > > splx(s); > + rw_exit_write(&netlock); > } > > void > @@ -474,11 +475,10 @@ tcp_timer_2msl(void *arg) > struct tcpcb *tp = arg; > int s; > > + rw_enter_write(&netlock); > s = splsoftnet(); > - if (tp->t_flags & TF_DEAD) { > - splx(s); > - return; > - } > + if (tp->t_flags & TF_DEAD) > + goto out; > > #ifdef TCP_SACK > tcp_timer_freesack(tp); > @@ -490,5 +490,7 @@ tcp_timer_2msl(void *arg) > else > tp = tcp_close(tp); > > + out: > splx(s); > + rw_exit_write(&netlock); > } > Index: netinet/tcp_input.c > =================================================================== > RCS file: /cvs/src/sys/netinet/tcp_input.c,v > retrieving revision 1.329 > diff -u -p -r1.329 tcp_input.c > --- netinet/tcp_input.c 4 Oct 2016 13:56:50 -0000 1.329 > +++ netinet/tcp_input.c 4 Oct 2016 14:40:29 -0000 > @@ -3522,11 +3522,10 @@ syn_cache_timer(void *arg) > struct syn_cache *sc = arg; > int s; > > + rw_enter_write(&netlock); > s = splsoftnet(); > - if (sc->sc_flags & SCF_DEAD) { > - splx(s); > - return; > - } > + if (sc->sc_flags & SCF_DEAD) > + goto out; > > if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) { > /* Drop it -- too many retransmissions. */ > @@ -3549,7 +3548,9 @@ syn_cache_timer(void *arg) > sc->sc_rxtshift++; > SYN_CACHE_TIMER_ARM(sc); > > + out: > splx(s); > + rw_exit_write(&netlock); > return; > > dropit: > @@ -3557,6 +3558,7 @@ syn_cache_timer(void *arg) > syn_cache_rm(sc); > syn_cache_put(sc); > splx(s); > + rw_exit_write(&netlock); > } > > void > Index: netinet6/ip6_input.c > =================================================================== > RCS file: /cvs/src/sys/netinet6/ip6_input.c,v > retrieving revision 1.168 > diff -u -p -r1.168 ip6_input.c > --- netinet6/ip6_input.c 24 Aug 2016 09:41:12 -0000 1.168 > +++ netinet6/ip6_input.c 4 Oct 2016 14:40:29 -0000 > @@ -1429,12 +1429,17 @@ ip6_send_dispatch(void *xmq) > int s; > > mq_delist(mq, &ml); > + if (ml_empty(&ml)) > + return; > + > KERNEL_LOCK(); > + rw_enter_write(&netlock); > s = splsoftnet(); > while ((m = ml_dequeue(&ml)) != NULL) { > ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL); > } > splx(s); > + rw_exit_write(&netlock); > KERNEL_UNLOCK(); > } > > Index: sys/systm.h > =================================================================== > RCS file: /cvs/src/sys/sys/systm.h,v > retrieving revision 1.119 > diff -u -p -r1.119 systm.h > --- sys/systm.h 24 Sep 2016 18:35:52 -0000 1.119 > +++ sys/systm.h 4 Oct 2016 14:40:29 -0000 > @@ -290,6 +290,11 @@ struct uio; > int uiomove(void *, size_t, struct uio *); > > #if defined(_KERNEL) > +/* > + * Network lock: serialize socket operations. > + */ > +extern struct rwlock netlock; > + > __returns_twice int setjmp(label_t *); > __dead void longjmp(label_t *); > #endif