On 03/11/16(Thu) 11:21, Martin Pieuchot wrote:
> Here's the next iteration of my diff introducing a rwlock to serialize
> the network input path with socket paths. Changes are:
>
> - more timeout_set_proc() that should fix problems reported by
> Chris Jackman.
>
> - I introduced a set of macro to make it easier to audit existing
> splsoftnet().
>
> - It makes use of splassert_fail() if the lock is not held.
>
>
> My plan is to commit it, assuming it is stable enough, then fix the
> remaining issues in tree. This includes:
>
> - Analyze and if needed fix the two code paths were we do an unlock/lock
> dance
>
> - Remove unneeded/recursive splsoftnet() dances.
>
> Once that's done we should be able to remove the KERNEL_LOCK() from the
> input path.
>
> So please test and report back.
Updated version that prevents a recursion in doaccept(), reported by Nils
Frohberg.
diff --git sys/kern/sys_socket.c sys/kern/sys_socket.c
index 7a90f78..a7be8a1 100644
--- sys/kern/sys_socket.c
+++ sys/kern/sys_socket.c
@@ -133,7 +133,7 @@ soo_poll(struct file *fp, int events, struct proc *p)
int revents = 0;
int s;
- s = splsoftnet();
+ SOCKET_LOCK(s);
if (events & (POLLIN | POLLRDNORM)) {
if (soreadable(so))
revents |= events & (POLLIN | POLLRDNORM);
@@ -159,7 +159,7 @@ soo_poll(struct file *fp, int events, struct proc *p)
so->so_snd.sb_flagsintr |= SB_SEL;
}
}
- splx(s);
+ SOCKET_UNLOCK(s);
return (revents);
}
diff --git sys/kern/uipc_socket.c sys/kern/uipc_socket.c
index 9e8d05f..dd067b3 100644
--- sys/kern/uipc_socket.c
+++ sys/kern/uipc_socket.c
@@ -89,6 +89,11 @@ struct pool sosplice_pool;
struct taskq *sosplice_taskq;
#endif
+/*
+ * Serialize socket operations.
+ */
+struct rwlock socketlock = RWLOCK_INITIALIZER("socketlock");
+
void
soinit(void)
{
@@ -123,7 +128,7 @@ socreate(int dom, struct socket **aso, int type, int proto)
return (EPROTONOSUPPORT);
if (prp->pr_type != type)
return (EPROTOTYPE);
- s = splsoftnet();
+ SOCKET_LOCK(s);
so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO);
TAILQ_INIT(&so->so_q0);
TAILQ_INIT(&so->so_q);
@@ -141,10 +146,10 @@ socreate(int dom, struct socket **aso, int type, int
proto)
if (error) {
so->so_state |= SS_NOFDREF;
sofree(so);
- splx(s);
+ SOCKET_UNLOCK(s);
return (error);
}
- splx(s);
+ SOCKET_UNLOCK(s);
*aso = so;
return (0);
}
@@ -154,9 +159,9 @@ sobind(struct socket *so, struct mbuf *nam, struct proc *p)
{
int s, error;
- s = splsoftnet();
+ SOCKET_LOCK(s);
error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p);
- splx(s);
+ SOCKET_UNLOCK(s);
return (error);
}
@@ -171,11 +176,11 @@ solisten(struct socket *so, int backlog)
if (isspliced(so) || issplicedback(so))
return (EOPNOTSUPP);
#endif /* SOCKET_SPLICE */
- s = splsoftnet();
+ SOCKET_LOCK(s);
error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL,
curproc);
if (error) {
- splx(s);
+ SOCKET_UNLOCK(s);
return (error);
}
if (TAILQ_FIRST(&so->so_q) == NULL)
@@ -185,14 +190,14 @@ solisten(struct socket *so, int backlog)
if (backlog < sominconn)
backlog = sominconn;
so->so_qlimit = backlog;
- splx(s);
+ SOCKET_UNLOCK(s);
return (0);
}
void
sofree(struct socket *so)
{
- splsoftassert(IPL_SOFTNET);
+ SOCKET_ASSERT_LOCKED();
if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
return;
@@ -232,7 +237,7 @@ soclose(struct socket *so)
struct socket *so2;
int s, error = 0;
- s = splsoftnet();
+ SOCKET_LOCK(s);
if (so->so_options & SO_ACCEPTCONN) {
while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
(void) soqremque(so2, 0);
@@ -256,7 +261,7 @@ soclose(struct socket *so)
(so->so_state & SS_NBIO))
goto drop;
while (so->so_state & SS_ISCONNECTED) {
- error = tsleep(&so->so_timeo,
+ error = rwsleep(&so->so_timeo, &socketlock,
PSOCK | PCATCH, "netcls",
so->so_linger * hz);
if (error)
@@ -276,14 +281,14 @@ discard:
panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type);
so->so_state |= SS_NOFDREF;
sofree(so);
- splx(s);
+ SOCKET_UNLOCK(s);
return (error);
}
int
soabort(struct socket *so)
{
- splsoftassert(IPL_SOFTNET);
+ SOCKET_ASSERT_LOCKED();
return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL,
curproc);
@@ -294,7 +299,7 @@ soaccept(struct socket *so, struct mbuf *nam)
{
int error = 0;
- splsoftassert(IPL_SOFTNET);
+ SOCKET_ASSERT_LOCKED();
if ((so->so_state & SS_NOFDREF) == 0)
panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type);
@@ -315,7 +320,7 @@ soconnect(struct socket *so, struct mbuf *nam)
if (so->so_options & SO_ACCEPTCONN)
return (EOPNOTSUPP);
- s = splsoftnet();
+ SOCKET_LOCK(s);
/*
* If protocol is connection-based, can only connect once.
* Otherwise, if connected, try to disconnect first.
@@ -329,7 +334,7 @@ soconnect(struct socket *so, struct mbuf *nam)
else
error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
NULL, nam, NULL, curproc);
- splx(s);
+ SOCKET_UNLOCK(s);
return (error);
}
@@ -338,10 +343,10 @@ soconnect2(struct socket *so1, struct socket *so2)
{
int s, error;
- s = splsoftnet();
+ SOCKET_LOCK(s);
error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL,
(struct mbuf *)so2, NULL, curproc);
- splx(s);
+ SOCKET_UNLOCK(s);
return (error);
}
@@ -350,7 +355,7 @@ sodisconnect(struct socket *so)
{
int error;
- splsoftassert(IPL_SOFTNET);
+ SOCKET_ASSERT_LOCKED();
if ((so->so_state & SS_ISCONNECTED) == 0)
return (ENOTCONN);
@@ -418,21 +423,20 @@ sosend(struct socket *so, struct mbuf *addr, struct uio
*uio, struct mbuf *top,
(sizeof(struct file *) / sizeof(int)));
}
-#define snderr(errno) { error = errno; splx(s); goto release; }
+#define snderr(errno) { error = errno; SOCKET_UNLOCK(s); goto
release; }
restart:
if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
goto out;
so->so_state |= SS_ISSENDING;
do {
- s = splsoftnet();
+ SOCKET_LOCK(s);
if (so->so_state & SS_CANTSENDMORE)
snderr(EPIPE);
if (so->so_error) {
error = so->so_error;
so->so_error = 0;
- splx(s);
- goto release;
+ snderr(error);
}
if ((so->so_state & SS_ISCONNECTED) == 0) {
if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
@@ -456,12 +460,12 @@ restart:
sbunlock(&so->so_snd);
error = sbwait(&so->so_snd);
so->so_state &= ~SS_ISSENDING;
- splx(s);
+ SOCKET_UNLOCK(s);
if (error)
goto out;
goto restart;
}
- splx(s);
+ SOCKET_UNLOCK(s);
space -= clen;
do {
if (uio == NULL) {
@@ -481,13 +485,13 @@ restart:
if (flags & MSG_EOR)
top->m_flags |= M_EOR;
}
- s = splsoftnet(); /* XXX */
+ SOCKET_LOCK(s);
if (resid == 0)
so->so_state &= ~SS_ISSENDING;
error = (*so->so_proto->pr_usrreq)(so,
(flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
top, addr, control, curproc);
- splx(s);
+ SOCKET_UNLOCK(s);
clen = 0;
control = NULL;
top = NULL;
@@ -617,8 +621,8 @@ sbsync(struct sockbuf *sb, struct mbuf *nextrecord)
* must begin with an address if the protocol so specifies,
* followed by an optional mbuf or mbufs containing ancillary data,
* and then zero or more mbufs of data.
- * In order to avoid blocking network interrupts for the entire time here,
- * we splx() while doing the actual copy to user space.
+ * In order to avoid blocking network for the entire time here, we splx()
+ * and release ``socketlock'' while doing the actual copy to user space.
* Although the sockbuf is locked, new data may still be appended,
* and thus we must maintain consistency of the sockbuf during that time.
*
@@ -672,7 +676,7 @@ bad:
restart:
if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
return (error);
- s = splsoftnet();
+ SOCKET_LOCK(s);
m = so->so_rcv.sb_mb;
#ifdef SOCKET_SPLICE
@@ -737,7 +741,7 @@ restart:
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
sbunlock(&so->so_rcv);
error = sbwait(&so->so_rcv);
- splx(s);
+ SOCKET_UNLOCK(s);
if (error)
return (error);
goto restart;
@@ -871,9 +875,9 @@ dontblock:
SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
resid = uio->uio_resid;
- splx(s);
+ SOCKET_UNLOCK(s);
uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
- s = splsoftnet();
+ SOCKET_LOCK(s);
if (uio_error)
uio->uio_resid = resid - len;
} else
@@ -955,7 +959,7 @@ dontblock:
error = sbwait(&so->so_rcv);
if (error) {
sbunlock(&so->so_rcv);
- splx(s);
+ SOCKET_UNLOCK(s);
return (0);
}
if ((m = so->so_rcv.sb_mb) != NULL)
@@ -991,7 +995,7 @@ dontblock:
if (orig_resid == uio->uio_resid && orig_resid &&
(flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
sbunlock(&so->so_rcv);
- splx(s);
+ SOCKET_UNLOCK(s);
goto restart;
}
@@ -1002,7 +1006,7 @@ dontblock:
*flagsp |= flags;
release:
sbunlock(&so->so_rcv);
- splx(s);
+ SOCKET_UNLOCK(s);
return (error);
}
@@ -1012,7 +1016,7 @@ soshutdown(struct socket *so, int how)
struct protosw *pr = so->so_proto;
int s, error = 0;
- s = splsoftnet();
+ SOCKET_LOCK(s);
switch (how) {
case SHUT_RD:
case SHUT_RDWR:
@@ -1028,7 +1032,8 @@ soshutdown(struct socket *so, int how)
error = EINVAL;
break;
}
- splx(s);
+ SOCKET_UNLOCK(s);
+
return (error);
}
@@ -1042,6 +1047,7 @@ sorflush(struct socket *so)
sb->sb_flags |= SB_NOINTR;
(void) sblock(sb, M_WAITOK);
+ /* XXXSMP */
s = splnet();
socantrcvmore(so);
sbunlock(sb);
@@ -1095,10 +1101,10 @@ sosplice(struct socket *so, int fd, off_t max, struct
timeval *tv)
if ((error = sblock(&so->so_rcv,
(so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0)
return (error);
- s = splsoftnet();
+ SOCKET_LOCK(s);
if (so->so_sp->ssp_socket)
sounsplice(so, so->so_sp->ssp_socket, 1);
- splx(s);
+ SOCKET_UNLOCK(s);
sbunlock(&so->so_rcv);
return (0);
}
@@ -1127,7 +1133,7 @@ sosplice(struct socket *so, int fd, off_t max, struct
timeval *tv)
FRELE(fp, curproc);
return (error);
}
- s = splsoftnet();
+ SOCKET_LOCK(s);
if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) {
error = EBUSY;
@@ -1168,7 +1174,7 @@ sosplice(struct socket *so, int fd, off_t max, struct
timeval *tv)
}
release:
- splx(s);
+ SOCKET_UNLOCK(s);
sbunlock(&sosp->so_snd);
sbunlock(&so->so_rcv);
FRELE(fp, curproc);
@@ -1178,7 +1184,7 @@ sosplice(struct socket *so, int fd, off_t max, struct
timeval *tv)
void
sounsplice(struct socket *so, struct socket *sosp, int wakeup)
{
- splsoftassert(IPL_SOFTNET);
+ SOCKET_ASSERT_LOCKED();
task_del(sosplice_taskq, &so->so_splicetask);
timeout_del(&so->so_idleto);
@@ -1195,12 +1201,12 @@ soidle(void *arg)
struct socket *so = arg;
int s;
- s = splsoftnet();
+ SOCKET_LOCK(s);
if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
so->so_error = ETIMEDOUT;
sounsplice(so, so->so_sp->ssp_socket, 1);
}
- splx(s);
+ SOCKET_UNLOCK(s);
}
void
@@ -1209,7 +1215,7 @@ sotask(void *arg)
struct socket *so = arg;
int s;
- s = splsoftnet();
+ SOCKET_LOCK(s);
if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
/*
* We may not sleep here as sofree() and unsplice() may be
@@ -1218,7 +1224,7 @@ sotask(void *arg)
*/
somove(so, M_DONTWAIT);
}
- splx(s);
+ SOCKET_UNLOCK(s);
/* Avoid user land starvation. */
yield();
@@ -1240,7 +1246,7 @@ somove(struct socket *so, int wait)
int error = 0, maxreached = 0;
short state;
- splsoftassert(IPL_SOFTNET);
+ SOCKET_ASSERT_LOCKED();
nextpkt:
if (so->so_error) {
@@ -1502,7 +1508,7 @@ somove(struct socket *so, int wait)
void
sorwakeup(struct socket *so)
{
- splsoftassert(IPL_SOFTNET);
+ SOCKET_ASSERT_LOCKED();
#ifdef SOCKET_SPLICE
if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
@@ -1523,14 +1529,18 @@ sorwakeup(struct socket *so)
return;
#endif
sowakeup(so, &so->so_rcv);
- if (so->so_upcall)
+ if (so->so_upcall) {
+ /* XXXSMP breaks atomicity */
+ rw_exit_write(&socketlock);
(*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT);
+ rw_enter_write(&socketlock);
+ }
}
void
sowwakeup(struct socket *so)
{
- splsoftassert(IPL_SOFTNET);
+ SOCKET_ASSERT_LOCKED();
#ifdef SOCKET_SPLICE
if (so->so_snd.sb_flagsintr & SB_SPLICE)
@@ -1876,7 +1886,8 @@ soo_kqfilter(struct file *fp, struct knote *kn)
{
struct socket *so = kn->kn_fp->f_data;
struct sockbuf *sb;
- int s;
+
+ KERNEL_ASSERT_LOCKED();
switch (kn->kn_filter) {
case EVFILT_READ:
@@ -1894,10 +1905,9 @@ soo_kqfilter(struct file *fp, struct knote *kn)
return (EINVAL);
}
- s = splnet();
SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext);
sb->sb_flags |= SB_KNOTE;
- splx(s);
+
return (0);
}
@@ -1905,12 +1915,12 @@ void
filt_sordetach(struct knote *kn)
{
struct socket *so = kn->kn_fp->f_data;
- int s = splnet();
+
+ KERNEL_ASSERT_LOCKED();
SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext);
if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note))
so->so_rcv.sb_flags &= ~SB_KNOTE;
- splx(s);
}
int
@@ -1939,12 +1949,12 @@ void
filt_sowdetach(struct knote *kn)
{
struct socket *so = kn->kn_fp->f_data;
- int s = splnet();
+
+ KERNEL_ASSERT_LOCKED();
SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext);
if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note))
so->so_snd.sb_flags &= ~SB_KNOTE;
- splx(s);
}
int
diff --git sys/kern/uipc_socket2.c sys/kern/uipc_socket2.c
index c3b7c3a..ed9fa6f 100644
--- sys/kern/uipc_socket2.c
+++ sys/kern/uipc_socket2.c
@@ -145,7 +145,7 @@ sonewconn(struct socket *head, int connstatus)
struct socket *so;
int soqueue = connstatus ? 1 : 0;
- splsoftassert(IPL_SOFTNET);
+ SOCKET_ASSERT_LOCKED();
if (mclpools[0].pr_nout > mclpools[0].pr_hardlimit * 95 / 100)
return (NULL);
@@ -274,10 +274,10 @@ socantrcvmore(struct socket *so)
int
sbwait(struct sockbuf *sb)
{
- splsoftassert(IPL_SOFTNET);
+ SOCKET_ASSERT_LOCKED();
sb->sb_flagsintr |= SB_WAIT;
- return (tsleep(&sb->sb_cc,
+ return (rwsleep(&sb->sb_cc, &socketlock,
(sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "netio",
sb->sb_timeo));
}
@@ -315,7 +315,7 @@ sbunlock(struct sockbuf *sb)
void
sowakeup(struct socket *so, struct sockbuf *sb)
{
- splsoftassert(IPL_SOFTNET);
+ SOCKET_ASSERT_LOCKED();
selwakeup(&sb->sb_sel);
sb->sb_flagsintr &= ~SB_SEL;
diff --git sys/kern/uipc_syscalls.c sys/kern/uipc_syscalls.c
index e064bc9..fd54e70 100644
--- sys/kern/uipc_syscalls.c
+++ sys/kern/uipc_syscalls.c
@@ -276,16 +276,11 @@ doaccept(struct proc *p, int sock, struct sockaddr *name,
socklen_t *anamelen,
if ((error = getsock(p, sock, &fp)) != 0)
return (error);
- s = splsoftnet();
headfp = fp;
- head = fp->f_data;
-
- if (isdnssocket((struct socket *)fp->f_data)) {
- error = EINVAL;
- goto bad;
- }
redo:
- if ((head->so_options & SO_ACCEPTCONN) == 0) {
+ SOCKET_LOCK(s);
+ head = headfp->f_data;
+ if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) {
error = EINVAL;
goto bad;
}
@@ -301,7 +296,8 @@ redo:
head->so_error = ECONNABORTED;
break;
}
- error = tsleep(&head->so_timeo, PSOCK | PCATCH, "netcon", 0);
+ error = rwsleep(&head->so_timeo, &socketlock, PSOCK | PCATCH,
+ "netcon", 0);
if (error) {
goto bad;
}
@@ -311,7 +307,7 @@ redo:
head->so_error = 0;
goto bad;
}
-
+
/* Figure out whether the new socket should be non-blocking. */
nflag = flags & SOCK_NONBLOCK_INHERIT ? (headfp->f_flag & FNONBLOCK)
: (flags & SOCK_NONBLOCK ? FNONBLOCK : 0);
@@ -338,6 +334,7 @@ redo:
* or another thread or process to accept it. If so, start over.
*/
if (head->so_qlen == 0) {
+ SOCKET_UNLOCK(s);
m_freem(nam);
fdplock(fdp);
fdremove(fdp, tmpfd);
@@ -366,18 +363,23 @@ redo:
if (error) {
/* if an error occurred, free the file descriptor */
+ SOCKET_UNLOCK(s);
+ m_freem(nam);
fdplock(fdp);
fdremove(fdp, tmpfd);
closef(fp, p);
fdpunlock(fdp);
+ goto out;
} else {
(*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&nflag, p);
FILE_SET_MATURE(fp, p);
*retval = tmpfd;
+ m_freem(nam);
}
- m_freem(nam);
+
bad:
- splx(s);
+ SOCKET_UNLOCK(s);
+out:
FRELE(headfp, p);
return (error);
}
@@ -434,9 +436,10 @@ sys_connect(struct proc *p, void *v, register_t *retval)
m_freem(nam);
return (EINPROGRESS);
}
- s = splsoftnet();
+ SOCKET_LOCK(s);
while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
- error = tsleep(&so->so_timeo, PSOCK | PCATCH, "netcon2", 0);
+ error = rwsleep(&so->so_timeo, &socketlock, PSOCK | PCATCH,
+ "netcon2", 0);
if (error) {
if (error == EINTR || error == ERESTART)
interrupted = 1;
@@ -447,7 +450,7 @@ sys_connect(struct proc *p, void *v, register_t *retval)
error = so->so_error;
so->so_error = 0;
}
- splx(s);
+ SOCKET_UNLOCK(s);
bad:
if (!interrupted)
so->so_state &= ~SS_ISCONNECTING;
diff --git sys/kern/uipc_usrreq.c sys/kern/uipc_usrreq.c
index e0f7f27..84bab2f 100644
--- sys/kern/uipc_usrreq.c
+++ sys/kern/uipc_usrreq.c
@@ -131,7 +131,11 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m,
struct mbuf *nam,
break;
case PRU_BIND:
+ /* XXXSMP breaks atomicity */
+ rw_assert_wrlock(&socketlock);
+ rw_exit_write(&socketlock);
error = unp_bind(unp, nam, p);
+ rw_enter_write(&socketlock);
break;
case PRU_LISTEN:
diff --git sys/net/if.c sys/net/if.c
index b7c9e11..797344f 100644
--- sys/net/if.c
+++ sys/net/if.c
@@ -160,7 +160,8 @@ void if_netisr(void *);
void ifa_print_all(void);
#endif
-void if_start_locked(struct ifnet *ifp);
+void if_start_locked(struct ifnet *);
+int if_ioctl_locked(struct socket *, u_long, caddr_t, struct proc *);
/*
* interface index map
@@ -835,10 +836,15 @@ if_netisr(void *unused)
int s;
KERNEL_LOCK();
- s = splsoftnet();
+ SOCKET_LOCK(s);
while ((n = netisr) != 0) {
- sched_pause();
+ /* Like sched_pause() but with a rwlock dance. */
+ if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
+ SOCKET_UNLOCK(s);
+ yield();
+ SOCKET_LOCK(s);
+ }
atomic_clearbits_int(&netisr, n);
@@ -876,7 +882,7 @@ if_netisr(void *unused)
pfsyncintr();
#endif
- splx(s);
+ SOCKET_UNLOCK(s);
KERNEL_UNLOCK();
}
@@ -1429,7 +1435,7 @@ if_downall(void)
struct ifnet *ifp;
int s;
- s = splnet();
+ SOCKET_LOCK(s);
TAILQ_FOREACH(ifp, &ifnet, if_list) {
if ((ifp->if_flags & IFF_UP) == 0)
continue;
@@ -1442,7 +1448,7 @@ if_downall(void)
(caddr_t)&ifrq);
}
}
- splx(s);
+ SOCKET_UNLOCK(s);
}
/*
@@ -1502,9 +1508,9 @@ if_linkstate_task(void *xifidx)
if (ifp == NULL)
return;
- s = splsoftnet();
+ SOCKET_LOCK(s);
if_linkstate(ifp);
- splx(s);
+ SOCKET_UNLOCK(s);
if_put(ifp);
}
@@ -1512,7 +1518,7 @@ if_linkstate_task(void *xifidx)
void
if_linkstate(struct ifnet *ifp)
{
- splsoftassert(IPL_SOFTNET);
+ SOCKET_ASSERT_LOCKED();
rt_ifmsg(ifp);
#ifndef SMALL_KERNEL
@@ -1703,6 +1709,18 @@ if_setrdomain(struct ifnet *ifp, int rdomain)
int
ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
{
+ int s, error;
+
+ SOCKET_LOCK(s);
+ error = if_ioctl_locked(so, cmd, data, p);
+ SOCKET_UNLOCK(s);
+
+ return (error);
+}
+
+int
+if_ioctl_locked(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
+{
struct ifnet *ifp;
struct ifreq *ifr;
struct sockaddr_dl *sdl;
@@ -1751,20 +1769,15 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data,
struct proc *p)
switch (ifar->ifar_af) {
case AF_INET:
/* attach is a noop for AF_INET */
- if (cmd == SIOCIFAFDETACH) {
- s = splsoftnet();
+ if (cmd == SIOCIFAFDETACH)
in_ifdetach(ifp);
- splx(s);
- }
return (0);
#ifdef INET6
case AF_INET6:
- s = splsoftnet();
if (cmd == SIOCIFAFATTACH)
error = in6_ifattach(ifp);
else
in6_ifdetach(ifp);
- splx(s);
return (error);
#endif /* INET6 */
default:
diff --git sys/net/route.c sys/net/route.c
index a04b095..ab2b924 100644
--- sys/net/route.c
+++ sys/net/route.c
@@ -547,7 +547,7 @@ rtredirect(struct sockaddr *dst, struct sockaddr *gateway,
int flags = RTF_GATEWAY|RTF_HOST;
uint8_t prio = RTP_NONE;
- splsoftassert(IPL_SOFTNET);
+ SOCKET_ASSERT_LOCKED();
/* verify the gateway is directly reachable */
if ((ifa = ifa_ifwithnet(gateway, rdomain)) == NULL) {
@@ -1498,6 +1498,8 @@ rt_timer_queue_destroy(struct rttimer_queue *rtq)
{
struct rttimer *r;
+ SOCKET_ASSERT_LOCKED();
+
while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
LIST_REMOVE(r, rtt_link);
TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
@@ -1590,7 +1592,7 @@ rt_timer_timer(void *arg)
current_time = time_uptime;
- s = splsoftnet();
+ SOCKET_LOCK(s);
for (rtq = LIST_FIRST(&rttimer_queue_head); rtq != NULL;
rtq = LIST_NEXT(rtq, rtq_link)) {
while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
@@ -1605,7 +1607,7 @@ rt_timer_timer(void *arg)
printf("rt_timer_timer: rtq_count reached 0\n");
}
}
- splx(s);
+ SOCKET_UNLOCK(s);
timeout_add_sec(to, 1);
}
diff --git sys/net/rtsock.c sys/net/rtsock.c
index 46150c6..a40c6c8 100644
--- sys/net/rtsock.c
+++ sys/net/rtsock.c
@@ -296,6 +296,7 @@ route_ctloutput(int op, struct socket *so, int level, int
optname,
return (error);
}
+/* XXXSMP */
void
rt_senddesync(void *data)
{
diff --git sys/netinet/if_ether.c sys/netinet/if_ether.c
index da076c2..9f43add 100644
--- sys/netinet/if_ether.c
+++ sys/netinet/if_ether.c
@@ -110,10 +110,10 @@ void
arptimer(void *arg)
{
struct timeout *to = (struct timeout *)arg;
- int s;
struct llinfo_arp *la, *nla;
+ int s;
- s = splsoftnet();
+ SOCKET_LOCK(s);
timeout_add_sec(to, arpt_prune);
LIST_FOREACH_SAFE(la, &arp_list, la_list, nla) {
struct rtentry *rt = la->la_rt;
@@ -121,7 +121,7 @@ arptimer(void *arg)
if (rt->rt_expire && rt->rt_expire <= time_uptime)
arptfree(rt); /* timer has expired; clear */
}
- splx(s);
+ SOCKET_UNLOCK(s);
}
void
@@ -138,7 +138,7 @@ arp_rtrequest(struct ifnet *ifp, int req, struct rtentry
*rt)
pool_init(&arp_pool, sizeof(struct llinfo_arp), 0,
IPL_SOFTNET, 0, "arp", NULL);
- timeout_set(&arptimer_to, arptimer, &arptimer_to);
+ timeout_set_proc(&arptimer_to, arptimer, &arptimer_to);
timeout_add_sec(&arptimer_to, 1);
}
diff --git sys/netinet/ip_carp.c sys/netinet/ip_carp.c
index ff3ae78..1c4aa86 100644
--- sys/netinet/ip_carp.c
+++ sys/netinet/ip_carp.c
@@ -1045,7 +1045,7 @@ carp_send_ad(void *v)
return;
}
- s = splsoftnet();
+ SOCKET_LOCK(s);
/* bow out if we've gone to backup (the carp interface is going down) */
if (sc->sc_bow_out) {
@@ -1246,7 +1246,7 @@ carp_send_ad(void *v)
retry_later:
sc->cur_vhe = NULL;
- splx(s);
+ SOCKET_UNLOCK(s);
if (advbase != 255 || advskew != 255)
timeout_add(&vhe->ad_tmo, tvtohz(&tv));
}
diff --git sys/netinet/ip_icmp.c sys/netinet/ip_icmp.c
index cdd60aa..57d4553 100644
--- sys/netinet/ip_icmp.c
+++ sys/netinet/ip_icmp.c
@@ -884,7 +884,7 @@ icmp_sysctl(int *name, u_int namelen, void *oldp, size_t
*oldlenp, void *newp,
if (namelen != 1)
return (ENOTDIR);
- s = splsoftnet();
+ SOCKET_LOCK(s);
switch (name[0]) {
case ICMPCTL_REDIRTIMEOUT:
@@ -921,7 +921,7 @@ icmp_sysctl(int *name, u_int namelen, void *oldp, size_t
*oldlenp, void *newp,
error = ENOPROTOOPT;
break;
}
- splx(s);
+ SOCKET_UNLOCK(s);
return (error);
}
@@ -1046,7 +1046,8 @@ void
icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
{
struct ifnet *ifp;
- int s;
+
+ SOCKET_ASSERT_LOCKED();
ifp = if_get(rt->rt_ifidx);
if (ifp == NULL)
@@ -1058,7 +1059,6 @@ icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer
*r)
sin = *satosin(rt_key(rt));
- s = splsoftnet();
rtdeletemsg(rt, ifp, r->rtt_tableid);
/* Notify TCP layer of increased Path MTU estimate */
@@ -1066,7 +1066,6 @@ icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer
*r)
if (ctlfunc)
(*ctlfunc)(PRC_MTUINC, sintosa(&sin),
r->rtt_tableid, NULL);
- splx(s);
} else {
if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
rt->rt_rmx.rmx_mtu = 0;
@@ -1097,17 +1096,15 @@ void
icmp_redirect_timeout(struct rtentry *rt, struct rttimer *r)
{
struct ifnet *ifp;
- int s;
+
+ SOCKET_ASSERT_LOCKED();
ifp = if_get(rt->rt_ifidx);
if (ifp == NULL)
return;
- if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) {
- s = splsoftnet();
+ if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST))
rtdeletemsg(rt, ifp, r->rtt_tableid);
- splx(s);
- }
if_put(ifp);
}
diff --git sys/netinet/ip_input.c sys/netinet/ip_input.c
index 7936492..dc10925 100644
--- sys/netinet/ip_input.c
+++ sys/netinet/ip_input.c
@@ -1601,20 +1601,20 @@ ip_sysctl(int *name, u_int namelen, void *oldp, size_t
*oldlenp, void *newp,
ip_mtudisc_timeout_q =
rt_timer_queue_create(ip_mtudisc_timeout);
} else if (ip_mtudisc == 0 && ip_mtudisc_timeout_q != NULL) {
- s = splsoftnet();
+ SOCKET_LOCK(s);
rt_timer_queue_destroy(ip_mtudisc_timeout_q);
ip_mtudisc_timeout_q = NULL;
- splx(s);
+ SOCKET_UNLOCK(s);
}
return error;
case IPCTL_MTUDISCTIMEOUT:
error = sysctl_int(oldp, oldlenp, newp, newlen,
&ip_mtudisc_timeout);
if (ip_mtudisc_timeout_q != NULL) {
- s = splsoftnet();
+ SOCKET_LOCK(s);
rt_timer_queue_change(ip_mtudisc_timeout_q,
ip_mtudisc_timeout);
- splx(s);
+ SOCKET_UNLOCK(s);
}
return (error);
case IPCTL_IPSEC_ENC_ALGORITHM:
@@ -1755,12 +1755,15 @@ ip_send_dispatch(void *xmq)
int s;
mq_delist(mq, &ml);
+ if (ml_empty(&ml))
+ return;
+
KERNEL_LOCK();
- s = splsoftnet();
+ SOCKET_LOCK(s);
while ((m = ml_dequeue(&ml)) != NULL) {
ip_output(m, NULL, NULL, 0, NULL, NULL, 0);
}
- splx(s);
+ SOCKET_UNLOCK(s);
KERNEL_UNLOCK();
}
diff --git sys/netinet/ip_output.c sys/netinet/ip_output.c
index 2c0f416..58a31cd 100644
--- sys/netinet/ip_output.c
+++ sys/netinet/ip_output.c
@@ -109,6 +109,8 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route
*ro, int flags,
int rv;
#endif
+ SOCKET_ASSERT_LOCKED();
+
#ifdef IPSEC
if (inp && (inp->inp_flags & INP_IPV6) != 0)
panic("ip_output: IPv6 pcb is passed");
diff --git sys/netinet/tcp_input.c sys/netinet/tcp_input.c
index 2d06f54..8668f15 100644
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -3522,11 +3522,9 @@ syn_cache_timer(void *arg)
struct syn_cache *sc = arg;
int s;
- s = splsoftnet();
- if (sc->sc_flags & SCF_DEAD) {
- splx(s);
- return;
- }
+ SOCKET_LOCK(s);
+ if (sc->sc_flags & SCF_DEAD)
+ goto out;
if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) {
/* Drop it -- too many retransmissions. */
@@ -3549,14 +3547,15 @@ syn_cache_timer(void *arg)
sc->sc_rxtshift++;
SYN_CACHE_TIMER_ARM(sc);
- splx(s);
+ out:
+ SOCKET_UNLOCK(s);
return;
dropit:
tcpstat.tcps_sc_timed_out++;
syn_cache_rm(sc);
syn_cache_put(sc);
- splx(s);
+ SOCKET_UNLOCK(s);
}
void
diff --git sys/netinet/tcp_timer.c sys/netinet/tcp_timer.c
index 6f4f07e..6ef40fd 100644
--- sys/netinet/tcp_timer.c
+++ sys/netinet/tcp_timer.c
@@ -112,15 +112,13 @@ tcp_delack(void *arg)
* for whatever reason, it will restart the delayed
* ACK callout.
*/
-
- s = splsoftnet();
- if (tp->t_flags & TF_DEAD) {
- splx(s);
- return;
- }
+ SOCKET_LOCK(s);
+ if (tp->t_flags & TF_DEAD)
+ goto out;
tp->t_flags |= TF_ACKNOW;
(void) tcp_output(tp);
- splx(s);
+ out:
+ SOCKET_UNLOCK(s);
}
/*
@@ -193,11 +191,9 @@ tcp_timer_rexmt(void *arg)
uint32_t rto;
int s;
- s = splsoftnet();
- if (tp->t_flags & TF_DEAD) {
- splx(s);
- return;
- }
+ SOCKET_LOCK(s);
+ if (tp->t_flags & TF_DEAD)
+ goto out;
if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb &&
SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
@@ -224,8 +220,7 @@ tcp_timer_rexmt(void *arg)
sin.sin_addr = tp->t_inpcb->inp_faddr;
in_pcbnotifyall(&tcbtable, sintosa(&sin),
tp->t_inpcb->inp_rtableid, EMSGSIZE, tcp_mtudisc);
- splx(s);
- return;
+ goto out;
}
#ifdef TCP_SACK
@@ -376,7 +371,7 @@ tcp_timer_rexmt(void *arg)
(void) tcp_output(tp);
out:
- splx(s);
+ SOCKET_UNLOCK(s);
}
void
@@ -386,11 +381,10 @@ tcp_timer_persist(void *arg)
uint32_t rto;
int s;
- s = splsoftnet();
+ SOCKET_LOCK(s);
if ((tp->t_flags & TF_DEAD) ||
TCP_TIMER_ISARMED(tp, TCPT_REXMT)) {
- splx(s);
- return;
+ goto out;
}
tcpstat.tcps_persisttimeo++;
/*
@@ -415,7 +409,7 @@ tcp_timer_persist(void *arg)
(void) tcp_output(tp);
tp->t_force = 0;
out:
- splx(s);
+ SOCKET_UNLOCK(s);
}
void
@@ -424,11 +418,9 @@ tcp_timer_keep(void *arg)
struct tcpcb *tp = arg;
int s;
- s = splsoftnet();
- if (tp->t_flags & TF_DEAD) {
- splx(s);
- return;
- }
+ SOCKET_LOCK(s);
+ if (tp->t_flags & TF_DEAD)
+ goto out;
tcpstat.tcps_keeptimeo++;
if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
@@ -457,15 +449,14 @@ tcp_timer_keep(void *arg)
TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
} else
TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
-
- splx(s);
+ out:
+ SOCKET_UNLOCK(s);
return;
dropit:
tcpstat.tcps_keepdrops++;
tp = tcp_drop(tp, ETIMEDOUT);
-
- splx(s);
+ SOCKET_UNLOCK(s);
}
void
@@ -474,11 +465,9 @@ tcp_timer_2msl(void *arg)
struct tcpcb *tp = arg;
int s;
- s = splsoftnet();
- if (tp->t_flags & TF_DEAD) {
- splx(s);
- return;
- }
+ SOCKET_LOCK(s);
+ if (tp->t_flags & TF_DEAD)
+ goto out;
#ifdef TCP_SACK
tcp_timer_freesack(tp);
@@ -490,5 +479,6 @@ tcp_timer_2msl(void *arg)
else
tp = tcp_close(tp);
- splx(s);
+ out:
+ SOCKET_UNLOCK(s);
}
diff --git sys/netinet6/icmp6.c sys/netinet6/icmp6.c
index c918004..2abbc12 100644
--- sys/netinet6/icmp6.c
+++ sys/netinet6/icmp6.c
@@ -1914,17 +1914,14 @@ icmp6_mtudisc_clone(struct sockaddr *dst, u_int rdomain)
if ((rt->rt_flags & RTF_HOST) == 0) {
struct rt_addrinfo info;
struct rtentry *nrt;
- int s;
bzero(&info, sizeof(info));
info.rti_flags = RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC;
info.rti_info[RTAX_DST] = dst;
info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
- s = splsoftnet();
error = rtrequest(RTM_ADD, &info, rt->rt_priority, &nrt,
rdomain);
- splx(s);
if (error) {
rtfree(rt);
return NULL;
@@ -1947,16 +1944,15 @@ void
icmp6_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
{
struct ifnet *ifp;
- int s;
+
+ SOCKET_ASSERT_LOCKED();
ifp = if_get(rt->rt_ifidx);
if (ifp == NULL)
return;
if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) {
- s = splsoftnet();
rtdeletemsg(rt, ifp, r->rtt_tableid);
- splx(s);
} else {
if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
rt->rt_rmx.rmx_mtu = 0;
@@ -1969,17 +1965,15 @@ void
icmp6_redirect_timeout(struct rtentry *rt, struct rttimer *r)
{
struct ifnet *ifp;
- int s;
+
+ SOCKET_ASSERT_LOCKED();
ifp = if_get(rt->rt_ifidx);
if (ifp == NULL)
return;
- if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) {
- s = splsoftnet();
+ if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST))
rtdeletemsg(rt, ifp, r->rtt_tableid);
- splx(s);
- }
if_put(ifp);
}
diff --git sys/netinet6/ip6_input.c sys/netinet6/ip6_input.c
index 9ac2555..aed3ebd 100644
--- sys/netinet6/ip6_input.c
+++ sys/netinet6/ip6_input.c
@@ -1429,12 +1429,15 @@ ip6_send_dispatch(void *xmq)
int s;
mq_delist(mq, &ml);
+ if (ml_empty(&ml))
+ return;
+
KERNEL_LOCK();
- s = splsoftnet();
+ SOCKET_LOCK(s);
while ((m = ml_dequeue(&ml)) != NULL) {
ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL);
}
- splx(s);
+ SOCKET_UNLOCK(s);
KERNEL_UNLOCK();
}
diff --git sys/netinet6/nd6.c sys/netinet6/nd6.c
index 34c8d9c..66e6068 100644
--- sys/netinet6/nd6.c
+++ sys/netinet6/nd6.c
@@ -308,10 +308,6 @@ skip1:
void
nd6_llinfo_settimer(struct llinfo_nd6 *ln, int secs)
{
- int s;
-
- s = splsoftnet();
-
if (secs < 0) {
ln->ln_rt->rt_expire = 0;
timeout_del(&ln->ln_timer_ch);
@@ -319,8 +315,6 @@ nd6_llinfo_settimer(struct llinfo_nd6 *ln, int secs)
ln->ln_rt->rt_expire = time_uptime + secs;
timeout_add_sec(&ln->ln_timer_ch, secs);
}
-
- splx(s);
}
void
@@ -333,14 +327,14 @@ nd6_llinfo_timer(void *arg)
struct ifnet *ifp;
struct nd_ifinfo *ndi = NULL;
- s = splsoftnet();
+ SOCKET_LOCK(s);
ln = (struct llinfo_nd6 *)arg;
if ((rt = ln->ln_rt) == NULL)
panic("ln->ln_rt == NULL");
if ((ifp = if_get(rt->rt_ifidx)) == NULL) {
- splx(s);
+ SOCKET_UNLOCK(s);
return;
}
ndi = ND_IFINFO(ifp);
@@ -427,7 +421,7 @@ nd6_llinfo_timer(void *arg)
}
if_put(ifp);
- splx(s);
+ SOCKET_UNLOCK(s);
}
/*
@@ -989,7 +983,7 @@ nd6_rtrequest(struct ifnet *ifp, int req, struct rtentry
*rt)
nd6_inuse++;
nd6_allocated++;
ln->ln_rt = rt;
- timeout_set(&ln->ln_timer_ch, nd6_llinfo_timer, ln);
+ timeout_set_proc(&ln->ln_timer_ch, nd6_llinfo_timer, ln);
/* this is required for "ndp" command. - shin */
if (req == RTM_ADD) {
/*
diff --git sys/sys/systm.h sys/sys/systm.h
index 5ef388b..56d57d3 100644
--- sys/sys/systm.h
+++ sys/sys/systm.h
@@ -290,6 +290,31 @@ struct uio;
int uiomove(void *, size_t, struct uio *);
#if defined(_KERNEL)
+/*
+ * Serialize socket operations to ensure that code paths that were
+ * atomically executed stay atomic until we turn then mpsafe.
+ */
+extern struct rwlock socketlock;
+
+#define SOCKET_LOCK(s)
\
+do { \
+ rw_enter_write(&socketlock); \
+ s = splsoftnet(); \
+} while (/* CONSTCOND */ 0)
+
+#define SOCKET_UNLOCK(s)
\
+do { \
+ splx(s); \
+ rw_exit_write(&socketlock); \
+} while (/* CONSTCOND */ 0)
+
+#define SOCKET_ASSERT_LOCKED()
\
+do { \
+ if (rw_status(&socketlock) != RW_WRITE) \
+ splassert_fail(RW_WRITE, rw_status(&socketlock), __func__);\
+ splsoftassert(IPL_SOFTNET); \
+} while (0)
+
__returns_twice int setjmp(label_t *);
__dead void longjmp(label_t *);
#endif