Previous fix from gnezdo@ pointed out that `u_flags' accesses should be
serialized by `vmobjlock'. Diff below documents this and fix the
remaining places where the lock isn't yet taken. One exception still
remains, the first loop of uvm_vnp_sync(). This cannot be fixed right
now due to possible deadlocks but that's not a reason for not documenting
& fixing the rest of this file.
This has been tested on amd64 and arm64.
Comments? Oks?
Index: uvm/uvm_vnode.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v
retrieving revision 1.128
diff -u -p -r1.128 uvm_vnode.c
--- uvm/uvm_vnode.c 10 Sep 2022 16:14:36 -0000 1.128
+++ uvm/uvm_vnode.c 10 Sep 2022 18:23:57 -0000
@@ -68,11 +68,8 @@
* we keep a simpleq of vnodes that are currently being sync'd.
*/
-LIST_HEAD(uvn_list_struct, uvm_vnode);
-struct uvn_list_struct uvn_wlist; /* writeable uvns */
-
-SIMPLEQ_HEAD(uvn_sq_struct, uvm_vnode);
-struct uvn_sq_struct uvn_sync_q; /* sync'ing uvns */
+LIST_HEAD(, uvm_vnode) uvn_wlist; /* [K] writeable uvns */
+SIMPLEQ_HEAD(, uvm_vnode) uvn_sync_q; /* [S] sync'ing uvns */
struct rwlock uvn_sync_lock; /* locks sync operation */
extern int rebooting;
@@ -144,41 +141,40 @@ uvn_attach(struct vnode *vp, vm_prot_t a
struct partinfo pi;
u_quad_t used_vnode_size = 0;
- /* first get a lock on the uvn. */
- while (uvn->u_flags & UVM_VNODE_BLOCKED) {
- uvn->u_flags |= UVM_VNODE_WANTED;
- tsleep_nsec(uvn, PVM, "uvn_attach", INFSLP);
- }
-
/* if we're mapping a BLK device, make sure it is a disk. */
if (vp->v_type == VBLK && bdevsw[major(vp->v_rdev)].d_type != D_DISK) {
return NULL;
}
+ /* first get a lock on the uvn. */
+ rw_enter(uvn->u_obj.vmobjlock, RW_WRITE);
+ while (uvn->u_flags & UVM_VNODE_BLOCKED) {
+ uvn->u_flags |= UVM_VNODE_WANTED;
+ rwsleep_nsec(uvn, uvn->u_obj.vmobjlock, PVM, "uvn_attach",
+ INFSLP);
+ }
+
/*
* now uvn must not be in a blocked state.
* first check to see if it is already active, in which case
* we can bump the reference count, check to see if we need to
* add it to the writeable list, and then return.
*/
- rw_enter(uvn->u_obj.vmobjlock, RW_WRITE);
if (uvn->u_flags & UVM_VNODE_VALID) { /* already active? */
KASSERT(uvn->u_obj.uo_refs > 0);
uvn->u_obj.uo_refs++; /* bump uvn ref! */
- rw_exit(uvn->u_obj.vmobjlock);
/* check for new writeable uvn */
if ((accessprot & PROT_WRITE) != 0 &&
(uvn->u_flags & UVM_VNODE_WRITEABLE) == 0) {
- LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
- /* we are now on wlist! */
uvn->u_flags |= UVM_VNODE_WRITEABLE;
+ LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
}
+ rw_exit(uvn->u_obj.vmobjlock);
return (&uvn->u_obj);
}
- rw_exit(uvn->u_obj.vmobjlock);
/*
* need to call VOP_GETATTR() to get the attributes, but that could
@@ -189,6 +185,7 @@ uvn_attach(struct vnode *vp, vm_prot_t a
* it.
*/
uvn->u_flags = UVM_VNODE_ALOCK;
+ rw_exit(uvn->u_obj.vmobjlock);
if (vp->v_type == VBLK) {
/*
@@ -213,9 +210,11 @@ uvn_attach(struct vnode *vp, vm_prot_t a
}
if (result != 0) {
+ rw_enter(uvn->u_obj.vmobjlock, RW_WRITE);
if (uvn->u_flags & UVM_VNODE_WANTED)
wakeup(uvn);
uvn->u_flags = 0;
+ rw_exit(uvn->u_obj.vmobjlock);
return NULL;
}
@@ -236,18 +235,19 @@ uvn_attach(struct vnode *vp, vm_prot_t a
uvn->u_nio = 0;
uvn->u_size = used_vnode_size;
- /* if write access, we need to add it to the wlist */
- if (accessprot & PROT_WRITE) {
- LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
- uvn->u_flags |= UVM_VNODE_WRITEABLE; /* we are on wlist! */
- }
-
/*
* add a reference to the vnode. this reference will stay as long
* as there is a valid mapping of the vnode. dropped when the
* reference count goes to zero.
*/
vref(vp);
+
+ /* if write access, we need to add it to the wlist */
+ if (accessprot & PROT_WRITE) {
+ uvn->u_flags |= UVM_VNODE_WRITEABLE;
+ LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
+ }
+
if (oldflags & UVM_VNODE_WANTED)
wakeup(uvn);
@@ -273,6 +273,7 @@ uvn_reference(struct uvm_object *uobj)
struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
#endif
+ rw_enter(uobj->vmobjlock, RW_WRITE);
#ifdef DEBUG
if ((uvn->u_flags & UVM_VNODE_VALID) == 0) {
printf("uvn_reference: ref=%d, flags=0x%x\n",
@@ -280,7 +281,6 @@ uvn_reference(struct uvm_object *uobj)
panic("uvn_reference: invalid state");
}
#endif
- rw_enter(uobj->vmobjlock, RW_WRITE);
uobj->uo_refs++;
rw_exit(uobj->vmobjlock);
}
@@ -858,6 +858,8 @@ uvn_cluster(struct uvm_object *uobj, vof
struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
*loffset = offset;
+ KASSERT(rw_write_held(uobj->vmobjlock));
+
if (*loffset >= uvn->u_size)
panic("uvn_cluster: offset out of range");
@@ -867,8 +869,6 @@ uvn_cluster(struct uvm_object *uobj, vof
*hoffset = *loffset + MAXBSIZE;
if (*hoffset > round_page(uvn->u_size)) /* past end? */
*hoffset = round_page(uvn->u_size);
-
- return;
}
/*
@@ -1132,8 +1132,9 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t
off_t file_offset;
int waitf, result, mapinflags;
size_t got, wanted;
- int netunlocked = 0;
+ int vnlocked, netunlocked = 0;
int lkflags = (flags & PGO_NOWAIT) ? LK_NOWAIT : 0;
+ voff_t uvnsize;
KASSERT(rw_write_held(uobj->vmobjlock));
@@ -1172,6 +1173,8 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t
* (this time with sleep ok).
*/
uvn->u_nio++; /* we have an I/O in progress! */
+ vnlocked = (uvn->u_flags & UVM_VNODE_VNISLOCKED);
+ uvnsize = uvn->u_size;
rw_exit(uobj->vmobjlock);
if (kva == 0)
kva = uvm_pagermapin(pps, npages,
@@ -1185,8 +1188,8 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t
/* fill out uio/iov */
iov.iov_base = (caddr_t) kva;
wanted = (size_t)npages << PAGE_SHIFT;
- if (file_offset + wanted > uvn->u_size)
- wanted = uvn->u_size - file_offset; /* XXX: needed? */
+ if (file_offset + wanted > uvnsize)
+ wanted = uvnsize - file_offset; /* XXX: needed? */
iov.iov_len = wanted;
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
@@ -1217,7 +1220,7 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t
*/
result = 0;
KERNEL_LOCK();
- if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
+ if (!vnlocked)
result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL | lkflags);
if (result == 0) {
/* NOTE: vnode now locked! */
@@ -1228,7 +1231,7 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t
(flags & PGO_PDFREECLUST) ? IO_NOCACHE : 0,
curproc->p_ucred);
- if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
+ if (!vnlocked)
VOP_UNLOCK(vn);
}
Index: uvm/uvm_vnode.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_vnode.h,v
retrieving revision 1.19
diff -u -p -r1.19 uvm_vnode.h
--- uvm/uvm_vnode.h 10 Sep 2022 16:14:36 -0000 1.19
+++ uvm/uvm_vnode.h 10 Sep 2022 17:55:33 -0000
@@ -44,24 +44,27 @@
* vnode handle into the VM system.
*/
+struct vnode;
+
/*
* the uvm_vnode structure.
+ *
+ * Locks used to protect struct members in this file:
+ * I immutable after creation
+ * K kernel lock
+ * S uvn_sync_lock
+ * v u_obj's vmobjlock
*/
-
-struct vnode;
-
struct uvm_vnode {
struct uvm_object u_obj; /* the actual VM object */
- struct vnode *u_vnode; /* pointer back to vnode */
- int u_flags; /* flags */
- int u_nio; /* number of running I/O requests */
- voff_t u_size; /* size of object */
+ struct vnode *u_vnode; /* [I] pointer back to vnode */
+ int u_flags; /* [v] flags */
+ int u_nio; /* [v] number of running I/O requests */
+ voff_t u_size; /* [v] size of object */
- /* the following entry is locked by uvn_wl_lock */
- LIST_ENTRY(uvm_vnode) u_wlist; /* list of writeable vnode objects */
+ LIST_ENTRY(uvm_vnode) u_wlist; /* [K] list of writeable vnode objs */
- /* the following entry is locked by uvn_sync_lock */
- SIMPLEQ_ENTRY(uvm_vnode) u_syncq; /* vnode objects due for a "sync" */
+ SIMPLEQ_ENTRY(uvm_vnode) u_syncq; /* [S] vnode objs due for a "sync" */
};
/*