Diff below enables the NET_LOCK(), again.
What's new?
- The lock ordering problem with fdplock() should now be fixed. It is
also documented, fdplock() should be taken first if both locks are
needed.
- Page faults involving NFS, or any other code path already holding the
NET_LOCK(), have been fixed. The recursion is similar to the existing
vnode problem, so I simply added a hack there.
- I added some NET_ASSERT_UNLOCKED() to help finding other possible
deadlocks.
Please test it as before and report any problem back to me.
Martin
diff --git sys/kern/kern_rwlock.c sys/kern/kern_rwlock.c
index bdaee0c1ae7..9182b42706a 100644
--- sys/kern/kern_rwlock.c
+++ sys/kern/kern_rwlock.c
@@ -195,6 +195,9 @@ retry:
unsigned long set = o | op->wait_set;
int do_sleep;
+ if (panicstr)
+ return (0);
+
rw_enter_diag(rwl, flags);
if (flags & RW_NOSLEEP)
@@ -205,7 +208,7 @@ retry:
sleep_setup_signal(&sls, op->wait_prio | PCATCH);
do_sleep = !rw_cas(&rwl->rwl_owner, o, set);
-
+ NET_ASSERT_UNLOCKED();
sleep_finish(&sls, do_sleep);
if ((flags & RW_INTR) &&
(error = sleep_finish_signal(&sls)) != 0)
diff --git sys/kern/kern_synch.c sys/kern/kern_synch.c
index beeefe06458..93468c276b3 100644
--- sys/kern/kern_synch.c
+++ sys/kern/kern_synch.c
@@ -111,6 +111,7 @@ tsleep(const volatile void *ident, int priority, const char
*wmesg, int timo)
int hold_count;
#endif
+ NET_ASSERT_UNLOCKED();
KASSERT((priority & ~(PRIMASK | PCATCH)) == 0);
#ifdef MULTIPROCESSOR
@@ -170,6 +171,7 @@ msleep(const volatile void *ident, struct mutex *mtx, int
priority,
int hold_count;
#endif
+ NET_ASSERT_UNLOCKED();
KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0);
KASSERT(mtx != NULL);
diff --git sys/kern/uipc_socket.c sys/kern/uipc_socket.c
index 245210e594a..f7c199bf747 100644
--- sys/kern/uipc_socket.c
+++ sys/kern/uipc_socket.c
@@ -256,7 +256,7 @@ soclose(struct socket *so)
(so->so_state & SS_NBIO))
goto drop;
while (so->so_state & SS_ISCONNECTED) {
- error = tsleep(&so->so_timeo,
+ error = rwsleep(&so->so_timeo, &netlock,
PSOCK | PCATCH, "netcls",
so->so_linger * hz);
if (error)
@@ -615,7 +615,7 @@ sbsync(struct sockbuf *sb, struct mbuf *nextrecord)
* followed by an optional mbuf or mbufs containing ancillary data,
* and then zero or more mbufs of data.
* In order to avoid blocking network for the entire time here, we splx()
- * and release NET_LOCK() while doing the actual copy to user space.
+ * and release ``netlock'' while doing the actual copy to user space.
* Although the sockbuf is locked, new data may still be appended,
* and thus we must maintain consistency of the sockbuf during that time.
*
@@ -800,9 +800,12 @@ dontblock:
if (controlp) {
if (pr->pr_domain->dom_externalize &&
mtod(cm, struct cmsghdr *)->cmsg_type ==
- SCM_RIGHTS)
- error = (*pr->pr_domain->dom_externalize)(cm,
- controllen, flags);
+ SCM_RIGHTS) {
+ NET_UNLOCK(s);
+ error = (*pr->pr_domain->dom_externalize)(
+ cm, controllen, flags);
+ NET_LOCK(s);
+ }
*controlp = cm;
} else {
/*
@@ -1039,7 +1042,7 @@ sorflush(struct socket *so)
struct sockbuf asb;
sb->sb_flags |= SB_NOINTR;
- (void) sblock(sb, M_WAITOK, NULL);
+ (void) sblock(sb, M_WAITOK, &netlock);
socantrcvmore(so);
sbunlock(sb);
asb = *sb;
@@ -1528,7 +1531,10 @@ sorwakeup(struct socket *so)
#endif
sowakeup(so, &so->so_rcv);
if (so->so_upcall) {
+ /* XXXSMP breaks atomicity */
+ rw_exit_write(&netlock);
(*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT);
+ rw_enter_write(&netlock);
}
}
diff --git sys/kern/uipc_socket2.c sys/kern/uipc_socket2.c
index 1ac5f515bc5..6dd5abbd473 100644
--- sys/kern/uipc_socket2.c
+++ sys/kern/uipc_socket2.c
@@ -276,7 +276,7 @@ sbwait(struct sockbuf *sb)
NET_ASSERT_LOCKED();
sb->sb_flagsintr |= SB_WAIT;
- return (tsleep(&sb->sb_cc,
+ return (rwsleep(&sb->sb_cc, &netlock,
(sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "netio",
sb->sb_timeo));
}
diff --git sys/kern/uipc_syscalls.c sys/kern/uipc_syscalls.c
index ed24be8ab85..7ad629c4d1b 100644
--- sys/kern/uipc_syscalls.c
+++ sys/kern/uipc_syscalls.c
@@ -278,6 +278,7 @@ doaccept(struct proc *p, int sock, struct sockaddr *name,
socklen_t *anamelen,
headfp = fp;
redo:
+ fdplock(fdp);
NET_LOCK(s);
head = headfp->f_data;
if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) {
@@ -296,7 +297,7 @@ redo:
head->so_error = ECONNABORTED;
break;
}
- error = tsleep(&head->so_timeo, PSOCK | PCATCH,
+ error = rwsleep(&head->so_timeo, &netlock, PSOCK | PCATCH,
"netcon", 0);
if (error) {
goto bad;
@@ -312,11 +313,9 @@ redo:
nflag = flags & SOCK_NONBLOCK_INHERIT ? (headfp->f_flag & FNONBLOCK)
: (flags & SOCK_NONBLOCK ? FNONBLOCK : 0);
- fdplock(fdp);
error = falloc(p, &fp, &tmpfd);
if (error == 0 && (flags & SOCK_CLOEXEC))
fdp->fd_ofileflags[tmpfd] |= UF_EXCLOSE;
- fdpunlock(fdp);
if (error != 0) {
/*
* Probably ran out of file descriptors. Wakeup
@@ -336,7 +335,6 @@ redo:
if (head->so_qlen == 0) {
NET_UNLOCK(s);
m_freem(nam);
- fdplock(fdp);
fdremove(fdp, tmpfd);
closef(fp, p);
fdpunlock(fdp);
@@ -365,7 +363,6 @@ redo:
/* if an error occurred, free the file descriptor */
NET_UNLOCK(s);
m_freem(nam);
- fdplock(fdp);
fdremove(fdp, tmpfd);
closef(fp, p);
fdpunlock(fdp);
@@ -377,6 +374,7 @@ redo:
m_freem(nam);
bad:
NET_UNLOCK(s);
+ fdpunlock(fdp);
out:
FRELE(headfp, p);
return (error);
@@ -436,7 +434,7 @@ sys_connect(struct proc *p, void *v, register_t *retval)
}
NET_LOCK(s);
while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
- error = tsleep(&so->so_timeo, PSOCK | PCATCH,
+ error = rwsleep(&so->so_timeo, &netlock, PSOCK | PCATCH,
"netcon2", 0);
if (error) {
if (error == EINTR || error == ERESTART)
diff --git sys/kern/uipc_usrreq.c sys/kern/uipc_usrreq.c
index 94204dc527b..556d1d08069 100644
--- sys/kern/uipc_usrreq.c
+++ sys/kern/uipc_usrreq.c
@@ -131,7 +131,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m,
struct mbuf *nam,
break;
case PRU_BIND:
+ /* XXXSMP breaks atomicity */
+ rw_exit_write(&netlock);
error = unp_bind(unp, nam, p);
+ rw_enter_write(&netlock);
break;
case PRU_LISTEN:
@@ -140,7 +143,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m,
struct mbuf *nam,
break;
case PRU_CONNECT:
+ /* XXXSMP breaks atomicity */
+ rw_exit_write(&netlock);
error = unp_connect(so, nam, p);
+ rw_enter_write(&netlock);
break;
case PRU_CONNECT2:
@@ -208,7 +214,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m,
struct mbuf *nam,
error = EISCONN;
break;
}
+ /* XXXSMP breaks atomicity */
+ rw_exit_write(&netlock);
error = unp_connect(so, nam, p);
+ rw_enter_write(&netlock);
if (error)
break;
} else {
@@ -485,6 +494,8 @@ unp_connect(struct socket *so, struct mbuf *nam, struct
proc *p)
struct nameidata nd;
int error, s;
+ NET_ASSERT_UNLOCKED();
+
if (soun->sun_family != AF_UNIX)
return (EAFNOSUPPORT);
diff --git sys/net/if.c sys/net/if.c
index 20e9dcbc9de..dd5bfde95b0 100644
--- sys/net/if.c
+++ sys/net/if.c
@@ -230,6 +230,21 @@ struct taskq *softnettq;
struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
/*
+ * Serialize socket operations to ensure no new sleeping points
+ * are introduced in IP output paths.
+ *
+ * If a code path needs both the NET_LOCK() and fdplock(), they
+ * have to be taken in the following order:
+ *
+ * fdplock()
+ * NET_LOCK()
+ * ...
+ * NET_UNLOCK()
+ * fdpunlock()
+ */
+struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
+
+/*
* Network interface utility routines.
*/
void
@@ -1076,7 +1091,10 @@ if_clone_create(const char *name, int rdomain)
if (ifunit(name) != NULL)
return (EEXIST);
+ /* XXXSMP breaks atomicity */
+ rw_exit_write(&netlock);
ret = (*ifc->ifc_create)(ifc, unit);
+ rw_enter_write(&netlock);
if (ret != 0 || (ifp = ifunit(name)) == NULL)
return (ret);
@@ -1118,7 +1136,10 @@ if_clone_destroy(const char *name)
splx(s);
}
+ /* XXXSMP breaks atomicity */
+ rw_exit_write(&netlock);
ret = (*ifc->ifc_destroy)(ifp);
+ rw_enter_write(&netlock);
return (ret);
}
diff --git sys/net/if_pflow.c sys/net/if_pflow.c
index 793eaee3d4c..ecbddd6bb7d 100644
--- sys/net/if_pflow.c
+++ sys/net/if_pflow.c
@@ -505,6 +505,8 @@ pflowioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
sizeof(pflowr))))
return (error);
+ /* XXXSMP breaks atomicity */
+ rw_exit_write(&netlock);
s = splnet();
error = pflow_set(sc, &pflowr);
splx(s);
@@ -522,6 +524,7 @@ pflowioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
} else
ifp->if_flags &= ~IFF_RUNNING;
+ rw_enter_write(&netlock);
break;
default:
diff --git sys/sys/filedesc.h sys/sys/filedesc.h
index 148c8bc0182..730e5794eb1 100644
--- sys/sys/filedesc.h
+++ sys/sys/filedesc.h
@@ -138,7 +138,7 @@ struct file *fd_getfile_mode(struct filedesc *, int, int);
int closef(struct file *, struct proc *);
int getsock(struct proc *, int, struct file **);
-#define fdplock(fdp) rw_enter_write(&(fdp)->fd_lock)
+#define fdplock(fdp) do { NET_ASSERT_UNLOCKED();
rw_enter_write(&(fdp)->fd_lock); } while (0)
#define fdpunlock(fdp) rw_exit_write(&(fdp)->fd_lock)
#define fdpassertlocked(fdp) rw_assert_wrlock(&(fdp)->fd_lock)
#endif
diff --git sys/sys/systm.h sys/sys/systm.h
index f9d34f34a17..834de358be5 100644
--- sys/sys/systm.h
+++ sys/sys/systm.h
@@ -291,21 +291,35 @@ int uiomove(void *, size_t, struct uio *);
#if defined(_KERNEL)
+#include <sys/rwlock.h>
+
+extern struct rwlock netlock;
+
#define NET_LOCK(s)
\
do { \
+ rw_enter_write(&netlock); \
s = splsoftnet(); \
} while (0)
-#define NET_UNLOCK(s)
\
+#define NET_UNLOCK(s) \
do { \
splx(s); \
+ rw_exit_write(&netlock); \
} while (0)
#define NET_ASSERT_LOCKED()
\
do { \
+ if (rw_status(&netlock) != RW_WRITE) \
+ splassert_fail(RW_WRITE, rw_status(&netlock), __func__);\
splsoftassert(IPL_SOFTNET); \
} while (0)
+#define NET_ASSERT_UNLOCKED()
\
+do { \
+ if (rw_status(&netlock) == RW_WRITE) \
+ splassert_fail(0, rw_status(&netlock), __func__); \
+} while (0)
+
__returns_twice int setjmp(label_t *);
__dead void longjmp(label_t *);
#endif
diff --git sys/uvm/uvm_vnode.c sys/uvm/uvm_vnode.c
index 2abfac2f14d..6590534358b 100644
--- sys/uvm/uvm_vnode.c
+++ sys/uvm/uvm_vnode.c
@@ -1176,6 +1176,15 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int
npages, int flags, int rw)
result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL, curproc);
if (result == 0) {
+ int netlocked = (rw_status(&netlock) == RW_WRITE);
+
+ /*
+ * This process may already have the NET_LOCK(), if we
+ * faulted in copyin() or copyout() in the network stack.
+ */
+ if (netlocked)
+ rw_exit_write(&netlock);
+
/* NOTE: vnode now locked! */
if (rw == UIO_READ)
result = VOP_READ(vn, &uio, 0, curproc->p_ucred);
@@ -1184,6 +1193,10 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int
npages, int flags, int rw)
(flags & PGO_PDFREECLUST) ? IO_NOCACHE : 0,
curproc->p_ucred);
+ if (netlocked)
+ rw_enter_write(&netlock);
+
+
if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
VOP_UNLOCK(vn, curproc);
}