Diff below brings back the fix for the deadlock between uvn_io() and uvn_flush() (uvm/uvm_vnode.c r1.110) that doesn't introduces a lock ordering issue with the inode lock.
This solution makes a thread return VM_PAGER_AGAIN and restart the fault if there's some contention on the underlying vnode. This approach has been previously reverted because tb@ and robert@ reported that chrome & firefox where starting very slowly with it. This delay at starting a highly contended multi-threaded process is due to the 1sec delay in uvm_fault_lower() if VM_PAGER_AGAIN is returned. So the diff below works around this by using a very small value. It is not clear to me which correct value should be used there, using 5nsec basically reduce the 'sleep' time to the overhead of inserting itself on the global sleep queue. This has been tested as parted of the bigger UVM unlocking diff but I'd appreciate more specific tests. I won't put this in before snapshots are build again ;) Comments? Oks? Index: uvm/uvm_fault.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_fault.c,v retrieving revision 1.120 diff -u -p -r1.120 uvm_fault.c --- uvm/uvm_fault.c 26 Mar 2021 13:40:05 -0000 1.120 +++ uvm/uvm_fault.c 2 Oct 2021 06:31:48 -0000 @@ -1260,7 +1260,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf if (result == VM_PAGER_AGAIN) { tsleep_nsec(&nowake, PVM, "fltagain2", - SEC_TO_NSEC(1)); + MSEC_TO_NSEC(5)); return ERESTART; } Index: uvm/uvm_pager.h =================================================================== RCS file: /cvs/src/sys/uvm/uvm_pager.h,v retrieving revision 1.32 diff -u -p -r1.32 uvm_pager.h --- uvm/uvm_pager.h 12 Mar 2021 14:15:49 -0000 1.32 +++ uvm/uvm_pager.h 2 Oct 2021 06:31:48 -0000 @@ -111,6 +111,7 @@ struct uvm_pagerops { #define PGO_LOCKED 0x040 /* fault data structures are locked [get] */ #define PGO_PDFREECLUST 0x080 /* daemon's free cluster flag [uvm_pager_put] */ #define PGO_REALLOCSWAP 0x100 /* reallocate swap area [pager_dropcluster] */ +#define PGO_NOWAIT 0x200 /* do not wait for inode lock */ /* page we are not interested in getting */ #define PGO_DONTCARE ((struct vm_page *) -1L) /* [get only] */ Index: uvm/uvm_vnode.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v retrieving revision 1.114 diff -u -p -r1.114 uvm_vnode.c --- uvm/uvm_vnode.c 16 Jun 2021 09:02:21 -0000 1.114 +++ uvm/uvm_vnode.c 2 Oct 2021 06:31:48 -0000 @@ -90,9 +90,6 @@ int uvn_io(struct uvm_vnode *, vm_page int uvn_put(struct uvm_object *, vm_page_t *, int, boolean_t); void uvn_reference(struct uvm_object *); -int uvm_vnode_lock(struct uvm_vnode *); -void uvm_vnode_unlock(struct uvm_vnode *); - /* * master pager structure */ @@ -878,16 +875,11 @@ uvn_cluster(struct uvm_object *uobj, vof int uvn_put(struct uvm_object *uobj, struct vm_page **pps, int npages, int flags) { - struct uvm_vnode *uvn = (struct uvm_vnode *)uobj; int retval; KERNEL_ASSERT_LOCKED(); - retval = uvm_vnode_lock(uvn); - if (retval) - return retval; - retval = uvn_io(uvn, pps, npages, flags, UIO_WRITE); - uvm_vnode_unlock(uvn); + retval = uvn_io((struct uvm_vnode*)uobj, pps, npages, flags, UIO_WRITE); return retval; } @@ -905,10 +897,9 @@ int uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags) { - struct uvm_vnode *uvn = (struct uvm_vnode *)uobj; voff_t current_offset; struct vm_page *ptmp; - int lcv, result, gotpages, retval; + int lcv, result, gotpages; boolean_t done; KERNEL_ASSERT_LOCKED(); @@ -983,18 +974,6 @@ uvn_get(struct uvm_object *uobj, voff_t } /* - * Before getting non-resident pages which must be populate with data - * using I/O on the backing vnode, lock the same vnode. Such pages are - * about to be allocated and busied (i.e. PG_BUSY) by the current - * thread. Allocating and busying the page(s) before acquiring the - * vnode lock could cause a deadlock with uvn_flush() which acquires the - * vnode lock before waiting on pages to become unbusy and then flushed. - */ - retval = uvm_vnode_lock(uvn); - if (retval) - return retval; - - /* * step 2: get non-resident or busy pages. * data structures are unlocked. * @@ -1080,15 +1059,14 @@ uvn_get(struct uvm_object *uobj, voff_t * we have a "fake/busy/clean" page that we just allocated. do * I/O to fill it with valid data. */ - result = uvn_io(uvn, &ptmp, 1, PGO_SYNCIO, UIO_READ); + result = uvn_io((struct uvm_vnode *) uobj, &ptmp, 1, + PGO_SYNCIO|PGO_NOWAIT, UIO_READ); /* * I/O done. because we used syncio the result can not be * PEND or AGAIN. */ if (result != VM_PAGER_OK) { - uvm_vnode_unlock(uvn); - if (ptmp->pg_flags & PG_WANTED) wakeup(ptmp); @@ -1119,15 +1097,12 @@ uvn_get(struct uvm_object *uobj, voff_t } - uvm_vnode_unlock(uvn); - return (VM_PAGER_OK); } /* * uvn_io: do I/O to a vnode * - * => uvn: the backing vnode must be locked * => prefer map unlocked (not required) * => flags: PGO_SYNCIO -- use sync. I/O * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync. @@ -1145,6 +1120,7 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t int waitf, result, mapinflags; size_t got, wanted; int netunlocked = 0; + int lkflags = (flags & PGO_NOWAIT) ? LK_NOWAIT : 0; /* init values */ waitf = (flags & PGO_SYNCIO) ? M_WAITOK : M_NOWAIT; @@ -1213,17 +1189,42 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t } /* do the I/O! (XXX: curproc?) */ - if (rw == UIO_READ) - result = VOP_READ(vn, &uio, 0, curproc->p_ucred); - else - result = VOP_WRITE(vn, &uio, - (flags & PGO_PDFREECLUST) ? IO_NOCACHE : 0, - curproc->p_ucred); + /* + * This process may already have this vnode locked, if we faulted in + * copyin() or copyout() on a region backed by this vnode + * while doing I/O to the vnode. If this is the case, don't + * panic.. instead, return the error to the user. + * + * XXX this is a stopgap to prevent a panic. + * Ideally, this kind of operation *should* work. + */ + result = 0; + if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0) + result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL | lkflags); + if (result == 0) { + /* NOTE: vnode now locked! */ + if (rw == UIO_READ) + result = VOP_READ(vn, &uio, 0, curproc->p_ucred); + else + result = VOP_WRITE(vn, &uio, + (flags & PGO_PDFREECLUST) ? IO_NOCACHE : 0, + curproc->p_ucred); + + if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0) + VOP_UNLOCK(vn); + + } if (netunlocked) NET_LOCK(); - /* zero out rest of buffer (if needed) */ + + /* NOTE: vnode now unlocked (unless vnislocked) */ + /* + * result == unix style errno (0 == OK!) + * + * zero out rest of buffer (if needed) + */ if (result == 0) { got = wanted - uio.uio_resid; @@ -1244,14 +1245,16 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t wakeup(&uvn->u_nio); } - if (result == 0) + if (result == 0) { return VM_PAGER_OK; - - if (result == EIO) { - /* Signal back to uvm_vnode_unlock(). */ - uvn->u_flags |= UVM_VNODE_IOERROR; + } else if (result == EBUSY) { + KASSERT(flags & PGO_NOWAIT); + return VM_PAGER_AGAIN; + } else { + while (rebooting) + tsleep_nsec(&rebooting, PVM, "uvndead", INFSLP); + return VM_PAGER_ERROR; } - return VM_PAGER_ERROR; } /* @@ -1467,53 +1470,4 @@ uvm_vnp_sync(struct mount *mp) } rw_exit_write(&uvn_sync_lock); -} - -int -uvm_vnode_lock(struct uvm_vnode *uvn) -{ - int error; - int netunlocked = 0; - - if (uvn->u_flags & UVM_VNODE_VNISLOCKED) - return VM_PAGER_OK; - - /* - * This thread may already have the net lock, if we faulted in copyin() - * or copyout() in the network stack. - */ - if (rw_status(&netlock) == RW_WRITE) { - NET_UNLOCK(); - netunlocked = 1; - } - - /* - * This thread may already have this vnode locked, if we faulted in - * copyin() or copyout() on a region backed by this vnode - * while doing I/O to the vnode. If this is the case, don't panic but - * instead return an error; as dictated by the LK_RECURSEFAIL flag. - * - * XXX this is a stopgap to prevent a panic. - * Ideally, this kind of operation *should* work. - */ - error = vn_lock(uvn->u_vnode, LK_EXCLUSIVE | LK_RECURSEFAIL); - if (netunlocked) - NET_LOCK(); - return error ? VM_PAGER_ERROR : VM_PAGER_OK; -} - -void -uvm_vnode_unlock(struct uvm_vnode *uvn) -{ - int error; - - if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0) - VOP_UNLOCK(uvn->u_vnode); - - error = uvn->u_flags & UVM_VNODE_IOERROR; - uvn->u_flags &= ~UVM_VNODE_IOERROR; - if (error) { - while (rebooting) - tsleep_nsec(&rebooting, PVM, "uvndead", INFSLP); - } } Index: uvm/uvm_vnode.h =================================================================== RCS file: /cvs/src/sys/uvm/uvm_vnode.h,v retrieving revision 1.17 diff -u -p -r1.17 uvm_vnode.h --- uvm/uvm_vnode.h 4 Mar 2021 08:38:48 -0000 1.17 +++ uvm/uvm_vnode.h 2 Oct 2021 06:31:48 -0000 @@ -84,7 +84,6 @@ struct uvm_vnode { i/o sync to clear so it can do i/o */ #define UVM_VNODE_WRITEABLE 0x200 /* uvn has pages that are writeable */ -#define UVM_VNODE_IOERROR 0x400 /* i/o error occurred in uvn_io() */ /* * UVM_VNODE_BLOCKED: any condition that should new processes from