Diff below introduces a single write lock that will be used to serialize
access to ip_output().
This lock will be then split in multiple readers and writers to allow
multiple forwarding paths to run in parallel of each others but still
serialized with the socket layer.
I'm currently looking for people wanting to run this diff and try to
break it. In other words, your machine might panic with it and if it
does report the panic to me so the diff can be improved.
I tested NFS v2 and v3 so I'm quite confident, but I might have missed
some obvious stuff.
PS: This diff includes the timeout_set_proc() diff I just sent.
Index: kern/kern_rwlock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_rwlock.c,v
retrieving revision 1.27
diff -u -p -r1.27 kern_rwlock.c
--- kern/kern_rwlock.c 14 Mar 2015 07:33:42 -0000 1.27
+++ kern/kern_rwlock.c 3 Oct 2016 12:59:16 -0000
@@ -98,6 +98,12 @@ rw_enter_read(struct rwlock *rwl)
membar_enter();
}
+#if 1
+#include <machine/db_machdep.h>
+#include <ddb/db_output.h>
+#include <ddb/db_interface.h>
+#endif
+
void
rw_enter_write(struct rwlock *rwl)
{
@@ -108,6 +114,15 @@ rw_enter_write(struct rwlock *rwl)
rw_enter(rwl, RW_WRITE);
else
membar_enter();
+
+#if 1
+ if ((rwl == &netlock) && (splassert_ctl == 3)) {
+ printf("ENTER::%d::", cpu_number());
+ db_stack_trace_print(
+ (db_expr_t)__builtin_frame_address(1),
+ TRUE, 1, "", printf);
+ }
+#endif
}
void
@@ -129,6 +144,15 @@ rw_exit_write(struct rwlock *rwl)
unsigned long owner = rwl->rwl_owner;
rw_assert_wrlock(rwl);
+
+#if 1
+ if ((rwl == &netlock) && (splassert_ctl == 3)) {
+ printf("EXIT::%d::", cpu_number());
+ db_stack_trace_print(
+ (db_expr_t)__builtin_frame_address(1),
+ TRUE, 1, "", printf);
+ }
+#endif
membar_exit();
if (__predict_false((owner & RWLOCK_WAIT) ||
Index: kern/sys_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_socket.c,v
retrieving revision 1.21
diff -u -p -r1.21 sys_socket.c
--- kern/sys_socket.c 5 Dec 2015 10:11:53 -0000 1.21
+++ kern/sys_socket.c 3 Oct 2016 12:59:16 -0000
@@ -131,8 +131,10 @@ soo_poll(struct file *fp, int events, st
{
struct socket *so = fp->f_data;
int revents = 0;
- int s = splsoftnet();
+ int s;
+ rw_enter_write(&netlock);
+ s = splsoftnet();
if (events & (POLLIN | POLLRDNORM)) {
if (soreadable(so))
revents |= events & (POLLIN | POLLRDNORM);
@@ -159,6 +161,7 @@ soo_poll(struct file *fp, int events, st
}
}
splx(s);
+ rw_exit_write(&netlock);
return (revents);
}
Index: kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.161
diff -u -p -r1.161 uipc_socket.c
--- kern/uipc_socket.c 20 Sep 2016 14:27:43 -0000 1.161
+++ kern/uipc_socket.c 3 Oct 2016 12:59:16 -0000
@@ -123,6 +123,7 @@ socreate(int dom, struct socket **aso, i
return (EPROTONOSUPPORT);
if (prp->pr_type != type)
return (EPROTOTYPE);
+ rw_enter_write(&netlock);
s = splsoftnet();
so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO);
TAILQ_INIT(&so->so_q0);
@@ -142,9 +143,11 @@ socreate(int dom, struct socket **aso, i
so->so_state |= SS_NOFDREF;
sofree(so);
splx(s);
+ rw_exit_write(&netlock);
return (error);
}
splx(s);
+ rw_exit_write(&netlock);
*aso = so;
return (0);
}
@@ -152,11 +155,13 @@ socreate(int dom, struct socket **aso, i
int
sobind(struct socket *so, struct mbuf *nam, struct proc *p)
{
- int s = splsoftnet();
- int error;
+ int s, error;
+ rw_enter_write(&netlock);
+ s = splsoftnet();
error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p);
splx(s);
+ rw_exit_write(&netlock);
return (error);
}
@@ -171,11 +176,13 @@ solisten(struct socket *so, int backlog)
if (isspliced(so) || issplicedback(so))
return (EOPNOTSUPP);
#endif /* SOCKET_SPLICE */
+ rw_enter_write(&netlock);
s = splsoftnet();
error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL,
curproc);
if (error) {
splx(s);
+ rw_exit_write(&netlock);
return (error);
}
if (TAILQ_FIRST(&so->so_q) == NULL)
@@ -186,6 +193,7 @@ solisten(struct socket *so, int backlog)
backlog = sominconn;
so->so_qlimit = backlog;
splx(s);
+ rw_exit_write(&netlock);
return (0);
}
@@ -196,6 +204,7 @@ solisten(struct socket *so, int backlog)
void
sofree(struct socket *so)
{
+ rw_assert_wrlock(&netlock);
splsoftassert(IPL_SOFTNET);
if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
@@ -234,9 +243,10 @@ int
soclose(struct socket *so)
{
struct socket *so2;
- int s = splsoftnet(); /* conservative */
- int error = 0;
+ int s, error = 0;
+ rw_enter_write(&netlock);
+ s = splsoftnet(); /* conservative */
if (so->so_options & SO_ACCEPTCONN) {
while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
(void) soqremque(so2, 0);
@@ -260,7 +270,7 @@ soclose(struct socket *so)
(so->so_state & SS_NBIO))
goto drop;
while (so->so_state & SS_ISCONNECTED) {
- error = tsleep(&so->so_timeo,
+ error = rwsleep(&so->so_timeo, &netlock,
PSOCK | PCATCH, "netcls",
so->so_linger * hz);
if (error)
@@ -281,6 +291,7 @@ discard:
so->so_state |= SS_NOFDREF;
sofree(so);
splx(s);
+ rw_exit_write(&netlock);
return (error);
}
@@ -290,6 +301,7 @@ discard:
int
soabort(struct socket *so)
{
+ rw_assert_wrlock(&netlock);
splsoftassert(IPL_SOFTNET);
return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL,
@@ -301,6 +313,7 @@ soaccept(struct socket *so, struct mbuf
{
int error = 0;
+ rw_assert_wrlock(&netlock);
splsoftassert(IPL_SOFTNET);
if ((so->so_state & SS_NOFDREF) == 0)
@@ -318,11 +331,11 @@ soaccept(struct socket *so, struct mbuf
int
soconnect(struct socket *so, struct mbuf *nam)
{
- int s;
- int error;
+ int s, error;
if (so->so_options & SO_ACCEPTCONN)
return (EOPNOTSUPP);
+ rw_enter_write(&netlock);
s = splsoftnet();
/*
* If protocol is connection-based, can only connect once.
@@ -338,18 +351,21 @@ soconnect(struct socket *so, struct mbuf
error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
NULL, nam, NULL, curproc);
splx(s);
+ rw_exit_write(&netlock);
return (error);
}
int
soconnect2(struct socket *so1, struct socket *so2)
{
- int s = splsoftnet();
- int error;
+ int s, error;
+ rw_enter_write(&netlock);
+ s = splsoftnet();
error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL,
(struct mbuf *)so2, NULL, curproc);
splx(s);
+ rw_exit_write(&netlock);
return (error);
}
@@ -358,14 +374,20 @@ sodisconnect(struct socket *so)
{
int error;
+ rw_assert_wrlock(&netlock);
splsoftassert(IPL_SOFTNET);
- if ((so->so_state & SS_ISCONNECTED) == 0)
- return (ENOTCONN);
- if (so->so_state & SS_ISDISCONNECTING)
- return (EALREADY);
+ if ((so->so_state & SS_ISCONNECTED) == 0) {
+ error = ENOTCONN;
+ goto bad;
+ }
+ if (so->so_state & SS_ISDISCONNECTING) {
+ error = EALREADY;
+ goto bad;
+ }
error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL,
NULL, curproc);
+bad:
return (error);
}
@@ -426,21 +448,21 @@ sosend(struct socket *so, struct mbuf *a
(sizeof(struct file *) / sizeof(int)));
}
-#define snderr(errno) { error = errno; splx(s); goto release; }
+#define snderr(e) { error = e; splx(s); rw_exit_write(&netlock); goto
release; }
restart:
if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
goto out;
so->so_state |= SS_ISSENDING;
do {
+ rw_enter_write(&netlock);
s = splsoftnet();
if (so->so_state & SS_CANTSENDMORE)
snderr(EPIPE);
if (so->so_error) {
error = so->so_error;
so->so_error = 0;
- splx(s);
- goto release;
+ snderr(error);
}
if ((so->so_state & SS_ISCONNECTED) == 0) {
if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
@@ -465,11 +487,13 @@ restart:
error = sbwait(&so->so_snd);
so->so_state &= ~SS_ISSENDING;
splx(s);
+ rw_exit_write(&netlock);
if (error)
goto out;
goto restart;
}
splx(s);
+ rw_exit_write(&netlock);
space -= clen;
do {
if (uio == NULL) {
@@ -489,6 +513,7 @@ restart:
if (flags & MSG_EOR)
top->m_flags |= M_EOR;
}
+ rw_enter_write(&netlock);
s = splsoftnet(); /* XXX */
if (resid == 0)
so->so_state &= ~SS_ISSENDING;
@@ -496,6 +521,7 @@ restart:
(flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
top, addr, control, curproc);
splx(s);
+ rw_exit_write(&netlock);
clen = 0;
control = NULL;
top = NULL;
@@ -625,8 +651,8 @@ sbsync(struct sockbuf *sb, struct mbuf *
* must begin with an address if the protocol so specifies,
* followed by an optional mbuf or mbufs containing ancillary data,
* and then zero or more mbufs of data.
- * In order to avoid blocking network interrupts for the entire time here,
- * we splx() while doing the actual copy to user space.
+ * In order to avoid blocking network for the entire time here, we splx()
+ * and release ``netlock'' while doing the actual copy to user space.
* Although the sockbuf is locked, new data may still be appended,
* and thus we must maintain consistency of the sockbuf during that time.
*
@@ -680,6 +706,8 @@ bad:
restart:
if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
return (error);
+
+ rw_enter_write(&netlock);
s = splsoftnet();
m = so->so_rcv.sb_mb;
@@ -746,6 +774,7 @@ restart:
sbunlock(&so->so_rcv);
error = sbwait(&so->so_rcv);
splx(s);
+ rw_exit_write(&netlock);
if (error)
return (error);
goto restart;
@@ -880,7 +909,9 @@ dontblock:
SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
resid = uio->uio_resid;
splx(s);
+ rw_exit_write(&netlock);
uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
+ rw_enter_write(&netlock);
s = splsoftnet();
if (uio_error)
uio->uio_resid = resid - len;
@@ -964,6 +995,7 @@ dontblock:
if (error) {
sbunlock(&so->so_rcv);
splx(s);
+ rw_exit_write(&netlock);
return (0);
}
if ((m = so->so_rcv.sb_mb) != NULL)
@@ -1000,6 +1032,7 @@ dontblock:
(flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
sbunlock(&so->so_rcv);
splx(s);
+ rw_exit_write(&netlock);
goto restart;
}
@@ -1011,6 +1044,7 @@ dontblock:
release:
sbunlock(&so->so_rcv);
splx(s);
+ rw_exit_write(&netlock);
return (error);
}
@@ -1020,6 +1054,7 @@ soshutdown(struct socket *so, int how)
struct protosw *pr = so->so_proto;
int s, error = 0;
+ rw_enter_write(&netlock);
s = splsoftnet();
switch (how) {
case SHUT_RD:
@@ -1037,6 +1072,8 @@ soshutdown(struct socket *so, int how)
break;
}
splx(s);
+ rw_exit_write(&netlock);
+
return (error);
}
@@ -1050,6 +1087,7 @@ sorflush(struct socket *so)
sb->sb_flags |= SB_NOINTR;
(void) sblock(sb, M_WAITOK);
+ /* XXXSMP */
s = splnet();
socantrcvmore(so);
sbunlock(sb);
@@ -1103,10 +1141,12 @@ sosplice(struct socket *so, int fd, off_
if ((error = sblock(&so->so_rcv,
(so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0)
return (error);
+ rw_enter_write(&netlock);
s = splsoftnet();
if (so->so_sp->ssp_socket)
sounsplice(so, so->so_sp->ssp_socket, 1);
splx(s);
+ rw_exit_write(&netlock);
sbunlock(&so->so_rcv);
return (0);
}
@@ -1135,6 +1175,7 @@ sosplice(struct socket *so, int fd, off_
FRELE(fp, curproc);
return (error);
}
+ rw_enter_write(&netlock);
s = splsoftnet();
if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) {
@@ -1177,6 +1218,7 @@ sosplice(struct socket *so, int fd, off_
release:
splx(s);
+ rw_exit_write(&netlock);
sbunlock(&sosp->so_snd);
sbunlock(&so->so_rcv);
FRELE(fp, curproc);
@@ -1186,6 +1228,7 @@ sosplice(struct socket *so, int fd, off_
void
sounsplice(struct socket *so, struct socket *sosp, int wakeup)
{
+ rw_assert_wrlock(&netlock);
splsoftassert(IPL_SOFTNET);
task_del(sosplice_taskq, &so->so_splicetask);
@@ -1203,12 +1246,14 @@ soidle(void *arg)
struct socket *so = arg;
int s;
+ rw_enter_write(&netlock);
s = splsoftnet();
if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
so->so_error = ETIMEDOUT;
sounsplice(so, so->so_sp->ssp_socket, 1);
}
splx(s);
+ rw_exit_write(&netlock);
}
void
@@ -1217,6 +1262,7 @@ sotask(void *arg)
struct socket *so = arg;
int s;
+ rw_enter_write(&netlock);
s = splsoftnet();
if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
/*
@@ -1227,6 +1273,7 @@ sotask(void *arg)
somove(so, M_DONTWAIT);
}
splx(s);
+ rw_exit_write(&netlock);
/* Avoid user land starvation. */
yield();
@@ -1248,6 +1295,7 @@ somove(struct socket *so, int wait)
int error = 0, maxreached = 0;
short state;
+ rw_assert_wrlock(&netlock);
splsoftassert(IPL_SOFTNET);
nextpkt:
@@ -1510,6 +1558,7 @@ somove(struct socket *so, int wait)
void
sorwakeup(struct socket *so)
{
+ rw_assert_wrlock(&netlock);
splsoftassert(IPL_SOFTNET);
#ifdef SOCKET_SPLICE
@@ -1531,13 +1580,17 @@ sorwakeup(struct socket *so)
return;
#endif
sowakeup(so, &so->so_rcv);
- if (so->so_upcall)
+ if (so->so_upcall) {
+ rw_exit_write(&netlock);
(*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT);
+ rw_enter_write(&netlock);
+ }
}
void
sowwakeup(struct socket *so)
{
+ rw_assert_wrlock(&netlock);
splsoftassert(IPL_SOFTNET);
#ifdef SOCKET_SPLICE
@@ -1884,7 +1937,8 @@ soo_kqfilter(struct file *fp, struct kno
{
struct socket *so = kn->kn_fp->f_data;
struct sockbuf *sb;
- int s;
+
+ KERNEL_ASSERT_LOCKED();
switch (kn->kn_filter) {
case EVFILT_READ:
@@ -1902,10 +1956,9 @@ soo_kqfilter(struct file *fp, struct kno
return (EINVAL);
}
- s = splnet();
SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext);
sb->sb_flags |= SB_KNOTE;
- splx(s);
+
return (0);
}
@@ -1913,12 +1966,12 @@ void
filt_sordetach(struct knote *kn)
{
struct socket *so = kn->kn_fp->f_data;
- int s = splnet();
+
+ KERNEL_ASSERT_LOCKED();
SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext);
if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note))
so->so_rcv.sb_flags &= ~SB_KNOTE;
- splx(s);
}
int
@@ -1947,12 +2000,12 @@ void
filt_sowdetach(struct knote *kn)
{
struct socket *so = kn->kn_fp->f_data;
- int s = splnet();
+
+ KERNEL_ASSERT_LOCKED();
SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext);
if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note))
so->so_snd.sb_flags &= ~SB_KNOTE;
- splx(s);
}
int
Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.65
diff -u -p -r1.65 uipc_socket2.c
--- kern/uipc_socket2.c 2 Sep 2016 13:28:21 -0000 1.65
+++ kern/uipc_socket2.c 3 Oct 2016 12:59:16 -0000
@@ -138,8 +138,6 @@ soisdisconnected(struct socket *so)
* then we allocate a new structure, properly linked into the
* data structure of the original socket, and return this.
* Connstatus may be 0 or SS_ISCONNECTED.
- *
- * Must be called at splsoftnet()
*/
struct socket *
sonewconn(struct socket *head, int connstatus)
@@ -147,6 +145,7 @@ sonewconn(struct socket *head, int conns
struct socket *so;
int soqueue = connstatus ? 1 : 0;
+ rw_assert_wrlock(&netlock);
splsoftassert(IPL_SOFTNET);
if (mclpools[0].pr_nout > mclpools[0].pr_hardlimit * 95 / 100)
@@ -276,10 +275,11 @@ socantrcvmore(struct socket *so)
int
sbwait(struct sockbuf *sb)
{
+ rw_assert_wrlock(&netlock);
splsoftassert(IPL_SOFTNET);
sb->sb_flagsintr |= SB_WAIT;
- return (tsleep(&sb->sb_cc,
+ return (rwsleep(&sb->sb_cc, &netlock,
(sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "netio",
sb->sb_timeo));
}
@@ -317,7 +317,8 @@ sbunlock(struct sockbuf *sb)
void
sowakeup(struct socket *so, struct sockbuf *sb)
{
- int s = splsoftnet();
+ rw_assert_wrlock(&netlock);
+ splassert(IPL_SOFTNET);
selwakeup(&sb->sb_sel);
sb->sb_flagsintr &= ~SB_SEL;
@@ -325,7 +326,7 @@ sowakeup(struct socket *so, struct sockb
sb->sb_flagsintr &= ~SB_WAIT;
wakeup(&sb->sb_cc);
}
- splx(s);
+
if (so->so_state & SS_ASYNC)
csignal(so->so_pgid, SIGIO, so->so_siguid, so->so_sigeuid);
}
Index: kern/uipc_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
retrieving revision 1.133
diff -u -p -r1.133 uipc_syscalls.c
--- kern/uipc_syscalls.c 9 Aug 2016 02:25:35 -0000 1.133
+++ kern/uipc_syscalls.c 3 Oct 2016 12:59:16 -0000
@@ -250,6 +250,7 @@ doaccept(struct proc *p, int sock, struc
if ((error = getsock(p, sock, &fp)) != 0)
return (error);
+ rw_enter_write(&netlock);
s = splsoftnet();
headfp = fp;
head = fp->f_data;
@@ -275,7 +276,8 @@ redo:
head->so_error = ECONNABORTED;
break;
}
- error = tsleep(&head->so_timeo, PSOCK | PCATCH, "netcon", 0);
+ error = rwsleep(&head->so_timeo, &netlock, PSOCK | PCATCH,
+ "netcon", 0);
if (error) {
goto bad;
}
@@ -352,6 +354,7 @@ redo:
m_freem(nam);
bad:
splx(s);
+ rw_exit_write(&netlock);
FRELE(headfp, p);
return (error);
}
@@ -406,9 +409,11 @@ sys_connect(struct proc *p, void *v, reg
m_freem(nam);
return (EINPROGRESS);
}
+ rw_enter_write(&netlock);
s = splsoftnet();
while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
- error = tsleep(&so->so_timeo, PSOCK | PCATCH, "netcon2", 0);
+ error = rwsleep(&so->so_timeo, &netlock, PSOCK | PCATCH,
+ "netcon2", 0);
if (error) {
if (error == EINTR || error == ERESTART)
interrupted = 1;
@@ -420,6 +425,7 @@ sys_connect(struct proc *p, void *v, reg
so->so_error = 0;
}
splx(s);
+ rw_exit_write(&netlock);
bad:
if (!interrupted)
so->so_state &= ~SS_ISCONNECTING;
Index: kern/uipc_usrreq.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
retrieving revision 1.102
diff -u -p -r1.102 uipc_usrreq.c
--- kern/uipc_usrreq.c 26 Aug 2016 07:12:30 -0000 1.102
+++ kern/uipc_usrreq.c 3 Oct 2016 12:59:16 -0000
@@ -131,7 +131,10 @@ uipc_usrreq(struct socket *so, int req,
break;
case PRU_BIND:
+ rw_assert_wrlock(&netlock);
+ rw_exit_write(&netlock);
error = unp_bind(unp, nam, p);
+ rw_enter_write(&netlock);
break;
case PRU_LISTEN:
Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.451
diff -u -p -r1.451 if.c
--- net/if.c 28 Sep 2016 08:31:42 -0000 1.451
+++ net/if.c 3 Oct 2016 12:59:16 -0000
@@ -163,7 +163,13 @@ void if_netisr(void *);
void ifa_print_all(void);
#endif
-void if_start_locked(struct ifnet *ifp);
+void if_start_locked(struct ifnet *);
+int if_ioctl_locked(struct socket *, u_long, caddr_t, struct proc *);
+
+/*
+ * Network lock: serialize socket operations.
+ */
+struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
/*
* interface index map
@@ -836,10 +842,16 @@ if_netisr(void *unused)
int s;
KERNEL_LOCK();
+ rw_enter_write(&netlock);
s = splsoftnet();
while ((n = netisr) != 0) {
- sched_pause();
+ /* Like sched_pause() but with a rwlock dance. */
+ if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
+ rw_exit_write(&netlock);
+ yield();
+ rw_enter_write(&netlock);
+ }
atomic_clearbits_int(&netisr, n);
@@ -878,6 +890,7 @@ if_netisr(void *unused)
#endif
splx(s);
+ rw_exit_write(&netlock);
KERNEL_UNLOCK();
}
@@ -1441,6 +1454,7 @@ if_downall(void)
struct ifnet *ifp;
int s;
+ rw_enter_write(&netlock);
s = splnet();
TAILQ_FOREACH(ifp, &ifnet, if_list) {
if ((ifp->if_flags & IFF_UP) == 0)
@@ -1455,6 +1469,7 @@ if_downall(void)
}
}
splx(s);
+ rw_exit_write(&netlock);
}
/*
@@ -1514,9 +1529,11 @@ if_linkstate_task(void *xifidx)
if (ifp == NULL)
return;
+ rw_enter_write(&netlock);
s = splsoftnet();
if_linkstate(ifp);
splx(s);
+ rw_exit_write(&netlock);
if_put(ifp);
}
@@ -1524,6 +1541,7 @@ if_linkstate_task(void *xifidx)
void
if_linkstate(struct ifnet *ifp)
{
+ rw_assert_wrlock(&netlock);
splsoftassert(IPL_SOFTNET);
rt_ifmsg(ifp);
@@ -1714,6 +1732,18 @@ if_setrdomain(struct ifnet *ifp, int rdo
*/
int
ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
+{
+ int error;
+
+ rw_enter_write(&netlock);
+ error = if_ioctl_locked(so, cmd, data, p);
+ rw_exit_write(&netlock);
+
+ return (error);
+}
+
+int
+if_ioctl_locked(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
{
struct ifnet *ifp;
struct ifreq *ifr;
Index: net/if_pflow.c
===================================================================
RCS file: /cvs/src/sys/net/if_pflow.c,v
retrieving revision 1.61
diff -u -p -r1.61 if_pflow.c
--- net/if_pflow.c 29 Apr 2016 08:55:03 -0000 1.61
+++ net/if_pflow.c 3 Oct 2016 12:59:16 -0000
@@ -548,15 +548,16 @@ pflow_init_timeouts(struct pflow_softc *
if (timeout_initialized(&sc->sc_tmo_tmpl))
timeout_del(&sc->sc_tmo_tmpl);
if (!timeout_initialized(&sc->sc_tmo))
- timeout_set(&sc->sc_tmo, pflow_timeout, sc);
+ timeout_set_proc(&sc->sc_tmo, pflow_timeout, sc);
break;
case PFLOW_PROTO_10:
if (!timeout_initialized(&sc->sc_tmo_tmpl))
- timeout_set(&sc->sc_tmo_tmpl, pflow_timeout_tmpl, sc);
+ timeout_set_proc(&sc->sc_tmo_tmpl, pflow_timeout_tmpl,
+ sc);
if (!timeout_initialized(&sc->sc_tmo))
- timeout_set(&sc->sc_tmo, pflow_timeout, sc);
+ timeout_set_proc(&sc->sc_tmo, pflow_timeout, sc);
if (!timeout_initialized(&sc->sc_tmo6))
- timeout_set(&sc->sc_tmo6, pflow_timeout6, sc);
+ timeout_set_proc(&sc->sc_tmo6, pflow_timeout6, sc);
timeout_add_sec(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT);
break;
Index: net/if_pfsync.c
===================================================================
RCS file: /cvs/src/sys/net/if_pfsync.c,v
retrieving revision 1.234
diff -u -p -r1.234 if_pfsync.c
--- net/if_pfsync.c 27 Sep 2016 04:57:17 -0000 1.234
+++ net/if_pfsync.c 3 Oct 2016 12:59:16 -0000
@@ -328,9 +328,9 @@ pfsync_clone_create(struct if_clone *ifc
IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
ifp->if_hdrlen = sizeof(struct pfsync_header);
ifp->if_mtu = ETHERMTU;
- timeout_set(&sc->sc_tmo, pfsync_timeout, sc);
- timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
- timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
+ timeout_set_proc(&sc->sc_tmo, pfsync_timeout, sc);
+ timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
+ timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
if_attach(ifp);
if_alloc_sadl(ifp);
@@ -1720,7 +1720,7 @@ pfsync_defer(struct pf_state *st, struct
sc->sc_deferred++;
TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
- timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd);
+ timeout_set_proc(&pd->pd_tmo, pfsync_defer_tmo, pd);
timeout_add_msec(&pd->pd_tmo, 20);
schednetisr(NETISR_PFSYNC);
Index: net/rtsock.c
===================================================================
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.207
diff -u -p -r1.207 rtsock.c
--- net/rtsock.c 27 Sep 2016 18:41:11 -0000 1.207
+++ net/rtsock.c 3 Oct 2016 12:59:16 -0000
@@ -296,6 +296,7 @@ route_ctloutput(int op, struct socket *s
return (error);
}
+/* XXXSMP */
void
rt_senddesync(void *data)
{
Index: netinet/ip_carp.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.293
diff -u -p -r1.293 ip_carp.c
--- netinet/ip_carp.c 25 Jul 2016 16:44:04 -0000 1.293
+++ netinet/ip_carp.c 3 Oct 2016 12:59:16 -0000
@@ -831,9 +831,9 @@ carp_new_vhost(struct carp_softc *sc, in
vhe->vhid = vhid;
vhe->advskew = advskew;
vhe->state = INIT;
- timeout_set(&vhe->ad_tmo, carp_send_ad, vhe);
- timeout_set(&vhe->md_tmo, carp_master_down, vhe);
- timeout_set(&vhe->md6_tmo, carp_master_down, vhe);
+ timeout_set_proc(&vhe->ad_tmo, carp_send_ad, vhe);
+ timeout_set_proc(&vhe->md_tmo, carp_master_down, vhe);
+ timeout_set_proc(&vhe->md6_tmo, carp_master_down, vhe);
KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
@@ -1045,6 +1045,7 @@ carp_send_ad(void *v)
return;
}
+ rw_enter_write(&netlock);
s = splsoftnet();
/* bow out if we've gone to backup (the carp interface is going down) */
@@ -1247,6 +1248,7 @@ carp_send_ad(void *v)
retry_later:
sc->cur_vhe = NULL;
splx(s);
+ rw_exit_write(&netlock);
if (advbase != 255 || advskew != 255)
timeout_add(&vhe->ad_tmo, tvtohz(&tv));
}
Index: netinet/ip_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.282
diff -u -p -r1.282 ip_input.c
--- netinet/ip_input.c 22 Sep 2016 10:12:25 -0000 1.282
+++ netinet/ip_input.c 3 Oct 2016 12:59:16 -0000
@@ -1755,12 +1755,17 @@ ip_send_dispatch(void *xmq)
int s;
mq_delist(mq, &ml);
+ if (ml_empty(&ml))
+ return;
+
KERNEL_LOCK();
+ rw_enter_write(&netlock);
s = splsoftnet();
while ((m = ml_dequeue(&ml)) != NULL) {
ip_output(m, NULL, NULL, 0, NULL, NULL, 0);
}
splx(s);
+ rw_exit_write(&netlock);
KERNEL_UNLOCK();
}
Index: netinet/ip_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.327
diff -u -p -r1.327 ip_output.c
--- netinet/ip_output.c 4 Sep 2016 17:18:56 -0000 1.327
+++ netinet/ip_output.c 3 Oct 2016 12:59:16 -0000
@@ -109,6 +109,9 @@ ip_output(struct mbuf *m0, struct mbuf *
int rv;
#endif
+ /* Make sure this thread hold the correct lock. */
+ KASSERT(rw_status(&netlock) == RW_WRITE);
+
#ifdef IPSEC
if (inp && (inp->inp_flags & INP_IPV6) != 0)
panic("ip_output: IPv6 pcb is passed");
Index: netinet/tcp_timer.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_timer.c,v
retrieving revision 1.50
diff -u -p -r1.50 tcp_timer.c
--- netinet/tcp_timer.c 24 Sep 2016 14:51:37 -0000 1.50
+++ netinet/tcp_timer.c 3 Oct 2016 12:59:16 -0000
@@ -112,15 +112,15 @@ tcp_delack(void *arg)
* for whatever reason, it will restart the delayed
* ACK callout.
*/
-
+ rw_enter_write(&netlock);
s = splsoftnet();
- if (tp->t_flags & TF_DEAD) {
- splx(s);
- return;
- }
+ if (tp->t_flags & TF_DEAD)
+ goto out;
tp->t_flags |= TF_ACKNOW;
(void) tcp_output(tp);
+ out:
splx(s);
+ rw_exit_write(&netlock);
}
/*
@@ -193,11 +193,10 @@ tcp_timer_rexmt(void *arg)
uint32_t rto;
int s;
+ rw_enter_write(&netlock);
s = splsoftnet();
- if (tp->t_flags & TF_DEAD) {
- splx(s);
- return;
- }
+ if (tp->t_flags & TF_DEAD)
+ goto out;
if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb &&
SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
@@ -224,8 +223,7 @@ tcp_timer_rexmt(void *arg)
sin.sin_addr = tp->t_inpcb->inp_faddr;
in_pcbnotifyall(&tcbtable, sintosa(&sin),
tp->t_inpcb->inp_rtableid, EMSGSIZE, tcp_mtudisc);
- splx(s);
- return;
+ goto out;
}
#ifdef TCP_SACK
@@ -377,6 +375,7 @@ tcp_timer_rexmt(void *arg)
out:
splx(s);
+ rw_exit_write(&netlock);
}
void
@@ -386,11 +385,11 @@ tcp_timer_persist(void *arg)
uint32_t rto;
int s;
+ rw_enter_write(&netlock);
s = splsoftnet();
if ((tp->t_flags & TF_DEAD) ||
TCP_TIMER_ISARMED(tp, TCPT_REXMT)) {
- splx(s);
- return;
+ goto out;
}
tcpstat.tcps_persisttimeo++;
/*
@@ -416,6 +415,7 @@ tcp_timer_persist(void *arg)
tp->t_force = 0;
out:
splx(s);
+ rw_exit_write(&netlock);
}
void
@@ -424,11 +424,10 @@ tcp_timer_keep(void *arg)
struct tcpcb *tp = arg;
int s;
+ rw_enter_write(&netlock);
s = splsoftnet();
- if (tp->t_flags & TF_DEAD) {
- splx(s);
- return;
- }
+ if (tp->t_flags & TF_DEAD)
+ goto out;
tcpstat.tcps_keeptimeo++;
if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
@@ -457,8 +456,9 @@ tcp_timer_keep(void *arg)
TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
} else
TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
-
+ out:
splx(s);
+ rw_exit_write(&netlock);
return;
dropit:
@@ -466,6 +466,7 @@ tcp_timer_keep(void *arg)
tp = tcp_drop(tp, ETIMEDOUT);
splx(s);
+ rw_exit_write(&netlock);
}
void
@@ -474,11 +475,10 @@ tcp_timer_2msl(void *arg)
struct tcpcb *tp = arg;
int s;
+ rw_enter_write(&netlock);
s = splsoftnet();
- if (tp->t_flags & TF_DEAD) {
- splx(s);
- return;
- }
+ if (tp->t_flags & TF_DEAD)
+ goto out;
#ifdef TCP_SACK
tcp_timer_freesack(tp);
@@ -490,5 +490,7 @@ tcp_timer_2msl(void *arg)
else
tp = tcp_close(tp);
+ out:
splx(s);
+ rw_exit_write(&netlock);
}
Index: netinet/tcp_timer.h
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_timer.h,v
retrieving revision 1.13
diff -u -p -r1.13 tcp_timer.h
--- netinet/tcp_timer.h 6 Jul 2011 23:44:20 -0000 1.13
+++ netinet/tcp_timer.h 3 Oct 2016 12:59:16 -0000
@@ -116,7 +116,7 @@ const char *tcptimers[] =
* Init, arm, disarm, and test TCP timers.
*/
#define TCP_TIMER_INIT(tp, timer)
\
- timeout_set(&(tp)->t_timer[(timer)], tcp_timer_funcs[(timer)], tp)
+ timeout_set_proc(&(tp)->t_timer[(timer)], tcp_timer_funcs[(timer)], tp)
#define TCP_TIMER_ARM(tp, timer, nticks)
\
timeout_add(&(tp)->t_timer[(timer)], (nticks) * (hz / PR_SLOWHZ))
Index: netinet/tcp_var.h
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_var.h,v
retrieving revision 1.115
diff -u -p -r1.115 tcp_var.h
--- netinet/tcp_var.h 20 Jul 2016 19:57:53 -0000 1.115
+++ netinet/tcp_var.h 3 Oct 2016 12:59:16 -0000
@@ -217,7 +217,7 @@ extern int tcp_delack_ticks;
void tcp_delack(void *);
#define TCP_INIT_DELACK(tp) \
- timeout_set(&(tp)->t_delack_to, tcp_delack, tp)
+ timeout_set_proc(&(tp)->t_delack_to, tcp_delack, tp)
#define TCP_RESTART_DELACK(tp) \
timeout_add(&(tp)->t_delack_to, tcp_delack_ticks)
Index: netinet6/ip6_input.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.168
diff -u -p -r1.168 ip6_input.c
--- netinet6/ip6_input.c 24 Aug 2016 09:41:12 -0000 1.168
+++ netinet6/ip6_input.c 3 Oct 2016 12:59:16 -0000
@@ -1429,12 +1429,17 @@ ip6_send_dispatch(void *xmq)
int s;
mq_delist(mq, &ml);
+ if (ml_empty(&ml))
+ return;
+
KERNEL_LOCK();
+ rw_enter_write(&netlock);
s = splsoftnet();
while ((m = ml_dequeue(&ml)) != NULL) {
ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL);
}
splx(s);
+ rw_exit_write(&netlock);
KERNEL_UNLOCK();
}
Index: sys/systm.h
===================================================================
RCS file: /cvs/src/sys/sys/systm.h,v
retrieving revision 1.119
diff -u -p -r1.119 systm.h
--- sys/systm.h 24 Sep 2016 18:35:52 -0000 1.119
+++ sys/systm.h 3 Oct 2016 12:59:16 -0000
@@ -290,6 +290,11 @@ struct uio;
int uiomove(void *, size_t, struct uio *);
#if defined(_KERNEL)
+/*
+ * Network lock: serialize socket operations.
+ */
+extern struct rwlock netlock;
+
__returns_twice int setjmp(label_t *);
__dead void longjmp(label_t *);
#endif