Diff below enables the NET_LOCK(), again.

What's new?

 - The lock ordering problem with fdplock() should now be fixed.  It is
   also documented, fdplock() should be taken first if both locks are 
   needed.

 - Page faults involving NFS, or any other code path already holding the
   NET_LOCK(), have been fixed.  The recursion is similar to the existing
   vnode problem, so I simply added a hack there.

 - I added some NET_ASSERT_UNLOCKED() to help finding other possible
   deadlocks.


Please test it as before and report any problem back to me.

Martin

diff --git sys/kern/kern_rwlock.c sys/kern/kern_rwlock.c
index bdaee0c1ae7..9182b42706a 100644
--- sys/kern/kern_rwlock.c
+++ sys/kern/kern_rwlock.c
@@ -195,6 +195,9 @@ retry:
                unsigned long set = o | op->wait_set;
                int do_sleep;
 
+               if (panicstr)
+                       return (0);
+
                rw_enter_diag(rwl, flags);
 
                if (flags & RW_NOSLEEP)
@@ -205,7 +208,7 @@ retry:
                        sleep_setup_signal(&sls, op->wait_prio | PCATCH);
 
                do_sleep = !rw_cas(&rwl->rwl_owner, o, set);
-
+               NET_ASSERT_UNLOCKED();
                sleep_finish(&sls, do_sleep);
                if ((flags & RW_INTR) &&
                    (error = sleep_finish_signal(&sls)) != 0)
diff --git sys/kern/kern_synch.c sys/kern/kern_synch.c
index beeefe06458..93468c276b3 100644
--- sys/kern/kern_synch.c
+++ sys/kern/kern_synch.c
@@ -111,6 +111,7 @@ tsleep(const volatile void *ident, int priority, const char 
*wmesg, int timo)
        int hold_count;
 #endif
 
+       NET_ASSERT_UNLOCKED();
        KASSERT((priority & ~(PRIMASK | PCATCH)) == 0);
 
 #ifdef MULTIPROCESSOR
@@ -170,6 +171,7 @@ msleep(const volatile void *ident, struct mutex *mtx, int 
priority,
        int hold_count;
 #endif
 
+       NET_ASSERT_UNLOCKED();
        KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0);
        KASSERT(mtx != NULL);
 
diff --git sys/kern/uipc_socket.c sys/kern/uipc_socket.c
index 245210e594a..f7c199bf747 100644
--- sys/kern/uipc_socket.c
+++ sys/kern/uipc_socket.c
@@ -256,7 +256,7 @@ soclose(struct socket *so)
                            (so->so_state & SS_NBIO))
                                goto drop;
                        while (so->so_state & SS_ISCONNECTED) {
-                               error = tsleep(&so->so_timeo,
+                               error = rwsleep(&so->so_timeo, &netlock,
                                    PSOCK | PCATCH, "netcls",
                                    so->so_linger * hz);
                                if (error)
@@ -615,7 +615,7 @@ sbsync(struct sockbuf *sb, struct mbuf *nextrecord)
  * followed by an optional mbuf or mbufs containing ancillary data,
  * and then zero or more mbufs of data.
  * In order to avoid blocking network for the entire time here, we splx()
- * and release NET_LOCK() while doing the actual copy to user space.
+ * and release ``netlock'' while doing the actual copy to user space.
  * Although the sockbuf is locked, new data may still be appended,
  * and thus we must maintain consistency of the sockbuf during that time.
  *
@@ -800,9 +800,12 @@ dontblock:
                        if (controlp) {
                                if (pr->pr_domain->dom_externalize &&
                                    mtod(cm, struct cmsghdr *)->cmsg_type ==
-                                   SCM_RIGHTS)
-                                  error = (*pr->pr_domain->dom_externalize)(cm,
-                                      controllen, flags);
+                                   SCM_RIGHTS) {
+                                   NET_UNLOCK(s);
+                                   error = (*pr->pr_domain->dom_externalize)(
+                                       cm, controllen, flags);
+                                  NET_LOCK(s);
+                               }
                                *controlp = cm;
                        } else {
                                /*
@@ -1039,7 +1042,7 @@ sorflush(struct socket *so)
        struct sockbuf asb;
 
        sb->sb_flags |= SB_NOINTR;
-       (void) sblock(sb, M_WAITOK, NULL);
+       (void) sblock(sb, M_WAITOK, &netlock);
        socantrcvmore(so);
        sbunlock(sb);
        asb = *sb;
@@ -1528,7 +1531,10 @@ sorwakeup(struct socket *so)
 #endif
        sowakeup(so, &so->so_rcv);
        if (so->so_upcall) {
+               /* XXXSMP breaks atomicity */
+               rw_exit_write(&netlock);
                (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT);
+               rw_enter_write(&netlock);
        }
 }
 
diff --git sys/kern/uipc_socket2.c sys/kern/uipc_socket2.c
index 1ac5f515bc5..6dd5abbd473 100644
--- sys/kern/uipc_socket2.c
+++ sys/kern/uipc_socket2.c
@@ -276,7 +276,7 @@ sbwait(struct sockbuf *sb)
        NET_ASSERT_LOCKED();
 
        sb->sb_flagsintr |= SB_WAIT;
-       return (tsleep(&sb->sb_cc,
+       return (rwsleep(&sb->sb_cc, &netlock,
            (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "netio",
            sb->sb_timeo));
 }
diff --git sys/kern/uipc_syscalls.c sys/kern/uipc_syscalls.c
index ed24be8ab85..7ad629c4d1b 100644
--- sys/kern/uipc_syscalls.c
+++ sys/kern/uipc_syscalls.c
@@ -278,6 +278,7 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, 
socklen_t *anamelen,
 
        headfp = fp;
 redo:
+       fdplock(fdp);
        NET_LOCK(s);
        head = headfp->f_data;
        if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) {
@@ -296,7 +297,7 @@ redo:
                        head->so_error = ECONNABORTED;
                        break;
                }
-               error = tsleep(&head->so_timeo, PSOCK | PCATCH,
+               error = rwsleep(&head->so_timeo, &netlock, PSOCK | PCATCH,
                    "netcon", 0);
                if (error) {
                        goto bad;
@@ -312,11 +313,9 @@ redo:
        nflag = flags & SOCK_NONBLOCK_INHERIT ? (headfp->f_flag & FNONBLOCK)
            : (flags & SOCK_NONBLOCK ? FNONBLOCK : 0);
 
-       fdplock(fdp);
        error = falloc(p, &fp, &tmpfd);
        if (error == 0 && (flags & SOCK_CLOEXEC))
                fdp->fd_ofileflags[tmpfd] |= UF_EXCLOSE;
-       fdpunlock(fdp);
        if (error != 0) {
                /*
                 * Probably ran out of file descriptors.  Wakeup
@@ -336,7 +335,6 @@ redo:
        if (head->so_qlen == 0) {
                NET_UNLOCK(s);
                m_freem(nam);
-               fdplock(fdp);
                fdremove(fdp, tmpfd);
                closef(fp, p);
                fdpunlock(fdp);
@@ -365,7 +363,6 @@ redo:
                /* if an error occurred, free the file descriptor */
                NET_UNLOCK(s);
                m_freem(nam);
-               fdplock(fdp);
                fdremove(fdp, tmpfd);
                closef(fp, p);
                fdpunlock(fdp);
@@ -377,6 +374,7 @@ redo:
        m_freem(nam);
 bad:
        NET_UNLOCK(s);
+       fdpunlock(fdp);
 out:
        FRELE(headfp, p);
        return (error);
@@ -436,7 +434,7 @@ sys_connect(struct proc *p, void *v, register_t *retval)
        }
        NET_LOCK(s);
        while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
-               error = tsleep(&so->so_timeo, PSOCK | PCATCH,
+               error = rwsleep(&so->so_timeo, &netlock, PSOCK | PCATCH,
                    "netcon2", 0);
                if (error) {
                        if (error == EINTR || error == ERESTART)
diff --git sys/kern/uipc_usrreq.c sys/kern/uipc_usrreq.c
index 94204dc527b..556d1d08069 100644
--- sys/kern/uipc_usrreq.c
+++ sys/kern/uipc_usrreq.c
@@ -131,7 +131,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, 
struct mbuf *nam,
                break;
 
        case PRU_BIND:
+               /* XXXSMP breaks atomicity */
+               rw_exit_write(&netlock);
                error = unp_bind(unp, nam, p);
+               rw_enter_write(&netlock);
                break;
 
        case PRU_LISTEN:
@@ -140,7 +143,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, 
struct mbuf *nam,
                break;
 
        case PRU_CONNECT:
+               /* XXXSMP breaks atomicity */
+               rw_exit_write(&netlock);
                error = unp_connect(so, nam, p);
+               rw_enter_write(&netlock);
                break;
 
        case PRU_CONNECT2:
@@ -208,7 +214,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, 
struct mbuf *nam,
                                        error = EISCONN;
                                        break;
                                }
+                               /* XXXSMP breaks atomicity */
+                               rw_exit_write(&netlock);
                                error = unp_connect(so, nam, p);
+                               rw_enter_write(&netlock);
                                if (error)
                                        break;
                        } else {
@@ -485,6 +494,8 @@ unp_connect(struct socket *so, struct mbuf *nam, struct 
proc *p)
        struct nameidata nd;
        int error, s;
 
+       NET_ASSERT_UNLOCKED();
+
        if (soun->sun_family != AF_UNIX)
                return (EAFNOSUPPORT);
 
diff --git sys/net/if.c sys/net/if.c
index 20e9dcbc9de..dd5bfde95b0 100644
--- sys/net/if.c
+++ sys/net/if.c
@@ -230,6 +230,21 @@ struct taskq       *softnettq;
 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
 
 /*
+ * Serialize socket operations to ensure no new sleeping points
+ * are introduced in IP output paths.
+ *
+ * If a code path needs both the NET_LOCK() and fdplock(), they
+ * have to be taken in the following order:
+ *
+ *  fdplock()
+ *  NET_LOCK()
+ *  ...
+ *  NET_UNLOCK()
+ *  fdpunlock()
+ */
+struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
+
+/*
  * Network interface utility routines.
  */
 void
@@ -1076,7 +1091,10 @@ if_clone_create(const char *name, int rdomain)
        if (ifunit(name) != NULL)
                return (EEXIST);
 
+       /* XXXSMP breaks atomicity */
+       rw_exit_write(&netlock);
        ret = (*ifc->ifc_create)(ifc, unit);
+       rw_enter_write(&netlock);
 
        if (ret != 0 || (ifp = ifunit(name)) == NULL)
                return (ret);
@@ -1118,7 +1136,10 @@ if_clone_destroy(const char *name)
                splx(s);
        }
 
+       /* XXXSMP breaks atomicity */
+       rw_exit_write(&netlock);
        ret = (*ifc->ifc_destroy)(ifp);
+       rw_enter_write(&netlock);
 
        return (ret);
 }
diff --git sys/net/if_pflow.c sys/net/if_pflow.c
index 793eaee3d4c..ecbddd6bb7d 100644
--- sys/net/if_pflow.c
+++ sys/net/if_pflow.c
@@ -505,6 +505,8 @@ pflowioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
                    sizeof(pflowr))))
                        return (error);
 
+               /* XXXSMP breaks atomicity */
+               rw_exit_write(&netlock);
                s = splnet();
                error = pflow_set(sc, &pflowr);
                splx(s);
@@ -522,6 +524,7 @@ pflowioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
                } else
                        ifp->if_flags &= ~IFF_RUNNING;
 
+               rw_enter_write(&netlock);
                break;
 
        default:
diff --git sys/sys/filedesc.h sys/sys/filedesc.h
index 148c8bc0182..730e5794eb1 100644
--- sys/sys/filedesc.h
+++ sys/sys/filedesc.h
@@ -138,7 +138,7 @@ struct file *fd_getfile_mode(struct filedesc *, int, int);
 int    closef(struct file *, struct proc *);
 int    getsock(struct proc *, int, struct file **);
 
-#define        fdplock(fdp)    rw_enter_write(&(fdp)->fd_lock)
+#define        fdplock(fdp)    do { NET_ASSERT_UNLOCKED(); 
rw_enter_write(&(fdp)->fd_lock); } while (0)
 #define        fdpunlock(fdp)  rw_exit_write(&(fdp)->fd_lock)
 #define        fdpassertlocked(fdp)    rw_assert_wrlock(&(fdp)->fd_lock)
 #endif
diff --git sys/sys/systm.h sys/sys/systm.h
index f9d34f34a17..834de358be5 100644
--- sys/sys/systm.h
+++ sys/sys/systm.h
@@ -291,21 +291,35 @@ int       uiomove(void *, size_t, struct uio *);
 
 #if defined(_KERNEL)
 
+#include <sys/rwlock.h>
+
+extern struct rwlock netlock;
+
 #define        NET_LOCK(s)                                                     
\
 do {                                                                   \
+       rw_enter_write(&netlock);                                       \
        s = splsoftnet();                                               \
 } while (0)
 
-#define        NET_UNLOCK(s)                                                   
\
+#define        NET_UNLOCK(s)                                           \
 do {                                                                   \
        splx(s);                                                        \
+       rw_exit_write(&netlock);                                        \
 } while (0)
 
 #define        NET_ASSERT_LOCKED()                                             
\
 do {                                                                   \
+       if (rw_status(&netlock) != RW_WRITE)                            \
+               splassert_fail(RW_WRITE, rw_status(&netlock), __func__);\
        splsoftassert(IPL_SOFTNET);                                     \
 } while (0)
 
+#define        NET_ASSERT_UNLOCKED()                                           
\
+do {                                                                   \
+       if (rw_status(&netlock) == RW_WRITE)                            \
+               splassert_fail(0, rw_status(&netlock), __func__);       \
+} while (0)
+
 __returns_twice int    setjmp(label_t *);
 __dead void    longjmp(label_t *);
 #endif
diff --git sys/uvm/uvm_vnode.c sys/uvm/uvm_vnode.c
index 2abfac2f14d..6590534358b 100644
--- sys/uvm/uvm_vnode.c
+++ sys/uvm/uvm_vnode.c
@@ -1176,6 +1176,15 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int 
npages, int flags, int rw)
                result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL, curproc);
 
        if (result == 0) {
+               int netlocked = (rw_status(&netlock) == RW_WRITE);
+
+               /*
+                * This process may already have the NET_LOCK(), if we
+                * faulted in copyin() or copyout() in the network stack.
+                */
+               if (netlocked)
+                       rw_exit_write(&netlock);
+
                /* NOTE: vnode now locked! */
                if (rw == UIO_READ)
                        result = VOP_READ(vn, &uio, 0, curproc->p_ucred);
@@ -1184,6 +1193,10 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int 
npages, int flags, int rw)
                            (flags & PGO_PDFREECLUST) ? IO_NOCACHE : 0,
                            curproc->p_ucred);
 
+               if (netlocked)
+                       rw_enter_write(&netlock);
+
+
                if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
                        VOP_UNLOCK(vn, curproc);
        }

Reply via email to