Diff below brings back the fix for the deadlock between uvn_io() and
uvn_flush() (uvm/uvm_vnode.c r1.110) that doesn't introduces a lock
ordering issue with the inode lock.

This solution makes a thread return VM_PAGER_AGAIN and restart the fault
if there's some contention on the underlying vnode. This approach has
been previously reverted because tb@ and robert@ reported that chrome &
firefox where starting very slowly with it.

This delay at starting a highly contended multi-threaded process is due
to the 1sec delay in uvm_fault_lower() if VM_PAGER_AGAIN is returned.
So the diff below works around this by using a very small value.  It is
not clear to me which correct value should be used there, using 5nsec
basically reduce the 'sleep' time to the overhead of inserting itself on
the global sleep queue.

This has been tested as parted of the bigger UVM unlocking diff but I'd
appreciate more specific tests.

I won't put this in before snapshots are build again ;)

Comments?  Oks?

Index: uvm/uvm_fault.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
retrieving revision 1.120
diff -u -p -r1.120 uvm_fault.c
--- uvm/uvm_fault.c     26 Mar 2021 13:40:05 -0000      1.120
+++ uvm/uvm_fault.c     2 Oct 2021 06:31:48 -0000
@@ -1260,7 +1260,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
 
                        if (result == VM_PAGER_AGAIN) {
                                tsleep_nsec(&nowake, PVM, "fltagain2",
-                                   SEC_TO_NSEC(1));
+                                   MSEC_TO_NSEC(5));
                                return ERESTART;
                        }
 
Index: uvm/uvm_pager.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_pager.h,v
retrieving revision 1.32
diff -u -p -r1.32 uvm_pager.h
--- uvm/uvm_pager.h     12 Mar 2021 14:15:49 -0000      1.32
+++ uvm/uvm_pager.h     2 Oct 2021 06:31:48 -0000
@@ -111,6 +111,7 @@ struct uvm_pagerops {
 #define PGO_LOCKED     0x040   /* fault data structures are locked [get] */
 #define PGO_PDFREECLUST        0x080   /* daemon's free cluster flag 
[uvm_pager_put] */
 #define PGO_REALLOCSWAP        0x100   /* reallocate swap area 
[pager_dropcluster] */
+#define PGO_NOWAIT     0x200   /* do not wait for inode lock */
 
 /* page we are not interested in getting */
 #define PGO_DONTCARE ((struct vm_page *) -1L)  /* [get only] */
Index: uvm/uvm_vnode.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v
retrieving revision 1.114
diff -u -p -r1.114 uvm_vnode.c
--- uvm/uvm_vnode.c     16 Jun 2021 09:02:21 -0000      1.114
+++ uvm/uvm_vnode.c     2 Oct 2021 06:31:48 -0000
@@ -90,9 +90,6 @@ int            uvn_io(struct uvm_vnode *, vm_page
 int             uvn_put(struct uvm_object *, vm_page_t *, int, boolean_t);
 void            uvn_reference(struct uvm_object *);
 
-int             uvm_vnode_lock(struct uvm_vnode *);
-void            uvm_vnode_unlock(struct uvm_vnode *);
-
 /*
  * master pager structure
  */
@@ -878,16 +875,11 @@ uvn_cluster(struct uvm_object *uobj, vof
 int
 uvn_put(struct uvm_object *uobj, struct vm_page **pps, int npages, int flags)
 {
-       struct uvm_vnode *uvn = (struct uvm_vnode *)uobj;
        int retval;
 
        KERNEL_ASSERT_LOCKED();
 
-       retval = uvm_vnode_lock(uvn);
-       if (retval)
-               return retval;
-       retval = uvn_io(uvn, pps, npages, flags, UIO_WRITE);
-       uvm_vnode_unlock(uvn);
+       retval = uvn_io((struct uvm_vnode*)uobj, pps, npages, flags, UIO_WRITE);
 
        return retval;
 }
@@ -905,10 +897,9 @@ int
 uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
     int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
 {
-       struct uvm_vnode *uvn = (struct uvm_vnode *)uobj;
        voff_t current_offset;
        struct vm_page *ptmp;
-       int lcv, result, gotpages, retval;
+       int lcv, result, gotpages;
        boolean_t done;
 
        KERNEL_ASSERT_LOCKED();
@@ -983,18 +974,6 @@ uvn_get(struct uvm_object *uobj, voff_t 
        }
 
        /*
-        * Before getting non-resident pages which must be populate with data
-        * using I/O on the backing vnode, lock the same vnode. Such pages are
-        * about to be allocated and busied (i.e. PG_BUSY) by the current
-        * thread. Allocating and busying the page(s) before acquiring the
-        * vnode lock could cause a deadlock with uvn_flush() which acquires the
-        * vnode lock before waiting on pages to become unbusy and then flushed.
-        */
-       retval = uvm_vnode_lock(uvn);
-       if (retval)
-               return retval;
-
-       /*
         * step 2: get non-resident or busy pages.
         * data structures are unlocked.
         *
@@ -1080,15 +1059,14 @@ uvn_get(struct uvm_object *uobj, voff_t 
                 * we have a "fake/busy/clean" page that we just allocated.  do
                 * I/O to fill it with valid data.
                 */
-               result = uvn_io(uvn, &ptmp, 1, PGO_SYNCIO, UIO_READ);
+               result = uvn_io((struct uvm_vnode *) uobj, &ptmp, 1,
+                   PGO_SYNCIO|PGO_NOWAIT, UIO_READ);
 
                /*
                 * I/O done.  because we used syncio the result can not be
                 * PEND or AGAIN.
                 */
                if (result != VM_PAGER_OK) {
-                       uvm_vnode_unlock(uvn);
-
                        if (ptmp->pg_flags & PG_WANTED)
                                wakeup(ptmp);
 
@@ -1119,15 +1097,12 @@ uvn_get(struct uvm_object *uobj, voff_t 
 
        }
 
-       uvm_vnode_unlock(uvn);
-
        return (VM_PAGER_OK);
 }
 
 /*
  * uvn_io: do I/O to a vnode
  *
- * => uvn: the backing vnode must be locked
  * => prefer map unlocked (not required)
  * => flags: PGO_SYNCIO -- use sync. I/O
  * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync.
@@ -1145,6 +1120,7 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t 
        int waitf, result, mapinflags;
        size_t got, wanted;
        int netunlocked = 0;
+       int lkflags = (flags & PGO_NOWAIT) ? LK_NOWAIT : 0;
 
        /* init values */
        waitf = (flags & PGO_SYNCIO) ? M_WAITOK : M_NOWAIT;
@@ -1213,17 +1189,42 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t 
        }
 
        /* do the I/O!  (XXX: curproc?) */
-       if (rw == UIO_READ)
-               result = VOP_READ(vn, &uio, 0, curproc->p_ucred);
-       else
-               result = VOP_WRITE(vn, &uio,
-                   (flags & PGO_PDFREECLUST) ? IO_NOCACHE : 0,
-                   curproc->p_ucred);
+       /*
+        * This process may already have this vnode locked, if we faulted in
+        * copyin() or copyout() on a region backed by this vnode
+        * while doing I/O to the vnode.  If this is the case, don't
+        * panic.. instead, return the error to the user.
+        *
+        * XXX this is a stopgap to prevent a panic.
+        * Ideally, this kind of operation *should* work.
+        */
+       result = 0;
+       if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
+               result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL | lkflags);
+       if (result == 0) {
+               /* NOTE: vnode now locked! */
+               if (rw == UIO_READ)
+                       result = VOP_READ(vn, &uio, 0, curproc->p_ucred);
+               else
+                       result = VOP_WRITE(vn, &uio,
+                           (flags & PGO_PDFREECLUST) ? IO_NOCACHE : 0,
+                           curproc->p_ucred);
+
+               if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
+                       VOP_UNLOCK(vn);
+
+       }
 
        if (netunlocked)
                NET_LOCK();
 
-       /* zero out rest of buffer (if needed) */
+
+       /* NOTE: vnode now unlocked (unless vnislocked) */
+       /*
+        * result == unix style errno (0 == OK!)
+        *
+        * zero out rest of buffer (if needed)
+        */
        if (result == 0) {
                got = wanted - uio.uio_resid;
 
@@ -1244,14 +1245,16 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t 
                wakeup(&uvn->u_nio);
        }
 
-       if (result == 0)
+       if (result == 0) {
                return VM_PAGER_OK;
-
-       if (result == EIO) {
-               /* Signal back to uvm_vnode_unlock(). */
-               uvn->u_flags |= UVM_VNODE_IOERROR;
+       } else if (result == EBUSY) {
+               KASSERT(flags & PGO_NOWAIT);
+               return VM_PAGER_AGAIN;
+       } else {
+               while (rebooting)
+                       tsleep_nsec(&rebooting, PVM, "uvndead", INFSLP);
+               return VM_PAGER_ERROR;
        }
-       return VM_PAGER_ERROR;
 }
 
 /*
@@ -1467,53 +1470,4 @@ uvm_vnp_sync(struct mount *mp)
        }
 
        rw_exit_write(&uvn_sync_lock);
-}
-
-int
-uvm_vnode_lock(struct uvm_vnode *uvn)
-{
-       int error;
-       int netunlocked = 0;
-
-       if (uvn->u_flags & UVM_VNODE_VNISLOCKED)
-               return VM_PAGER_OK;
-
-       /*
-        * This thread may already have the net lock, if we faulted in copyin()
-        * or copyout() in the network stack.
-        */
-       if (rw_status(&netlock) == RW_WRITE) {
-               NET_UNLOCK();
-               netunlocked = 1;
-       }
-
-       /*
-        * This thread may already have this vnode locked, if we faulted in
-        * copyin() or copyout() on a region backed by this vnode
-        * while doing I/O to the vnode. If this is the case, don't panic but
-        * instead return an error; as dictated by the LK_RECURSEFAIL flag.
-        *
-        * XXX this is a stopgap to prevent a panic.
-        * Ideally, this kind of operation *should* work.
-        */
-       error = vn_lock(uvn->u_vnode, LK_EXCLUSIVE | LK_RECURSEFAIL);
-       if (netunlocked)
-               NET_LOCK();
-       return error ? VM_PAGER_ERROR : VM_PAGER_OK;
-}
-
-void
-uvm_vnode_unlock(struct uvm_vnode *uvn)
-{
-       int error;
-
-       if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
-               VOP_UNLOCK(uvn->u_vnode);
-
-       error = uvn->u_flags & UVM_VNODE_IOERROR;
-       uvn->u_flags &= ~UVM_VNODE_IOERROR;
-       if (error) {
-               while (rebooting)
-                       tsleep_nsec(&rebooting, PVM, "uvndead", INFSLP);
-       }
 }
Index: uvm/uvm_vnode.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_vnode.h,v
retrieving revision 1.17
diff -u -p -r1.17 uvm_vnode.h
--- uvm/uvm_vnode.h     4 Mar 2021 08:38:48 -0000       1.17
+++ uvm/uvm_vnode.h     2 Oct 2021 06:31:48 -0000
@@ -84,7 +84,6 @@ struct uvm_vnode {
                                           i/o sync to clear so it can do
                                           i/o */
 #define UVM_VNODE_WRITEABLE    0x200   /* uvn has pages that are writeable */
-#define UVM_VNODE_IOERROR      0x400   /* i/o error occurred in uvn_io() */
 
 /*
  * UVM_VNODE_BLOCKED: any condition that should new processes from

Reply via email to