Diff below unlock the bottom part of the UVM fault handler. I'm
interested in squashing the remaining bugs. Please test with your usual
setup & report back.
Thanks,
Martin
diff --git sys/arch/amd64/conf/GENERIC.MP sys/arch/amd64/conf/GENERIC.MP
index bb842f6d96e..e5334c19eac 100644
--- sys/arch/amd64/conf/GENERIC.MP
+++ sys/arch/amd64/conf/GENERIC.MP
@@ -4,6 +4,6 @@ include "arch/amd64/conf/GENERIC"
option MULTIPROCESSOR
#option MP_LOCKDEBUG
-#option WITNESS
+option WITNESS
cpu* at mainbus?
diff --git sys/arch/i386/conf/GENERIC.MP sys/arch/i386/conf/GENERIC.MP
index 980a572b8fd..ef7ded61501 100644
--- sys/arch/i386/conf/GENERIC.MP
+++ sys/arch/i386/conf/GENERIC.MP
@@ -7,6 +7,6 @@ include "arch/i386/conf/GENERIC"
option MULTIPROCESSOR # Multiple processor support
#option MP_LOCKDEBUG
-#option WITNESS
+option WITNESS
cpu* at mainbus?
diff --git sys/dev/pci/drm/i915/gem/i915_gem_shmem.c
sys/dev/pci/drm/i915/gem/i915_gem_shmem.c
index ce8e2eca141..47b567087e7 100644
--- sys/dev/pci/drm/i915/gem/i915_gem_shmem.c
+++ sys/dev/pci/drm/i915/gem/i915_gem_shmem.c
@@ -268,8 +268,10 @@ shmem_truncate(struct drm_i915_gem_object *obj)
#ifdef __linux__
shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
#else
+ rw_enter(obj->base.uao->vmobjlock, RW_WRITE);
obj->base.uao->pgops->pgo_flush(obj->base.uao, 0, obj->base.size,
PGO_ALLPAGES | PGO_FREE);
+ rw_exit(obj->base.uao->vmobjlock);
#endif
obj->mm.madv = __I915_MADV_PURGED;
obj->mm.pages = ERR_PTR(-EFAULT);
diff --git sys/dev/pci/drm/radeon/radeon_ttm.c
sys/dev/pci/drm/radeon/radeon_ttm.c
index eb879b5c72c..837a9f94298 100644
--- sys/dev/pci/drm/radeon/radeon_ttm.c
+++ sys/dev/pci/drm/radeon/radeon_ttm.c
@@ -1006,6 +1006,8 @@ radeon_ttm_fault(struct uvm_faultinfo *ufi, vaddr_t
vaddr, vm_page_t *pps,
struct radeon_device *rdev;
int r;
+ KASSERT(rw_write_held(ufi->entry->object.uvm_obj->vmobjlock));
+
bo = (struct drm_gem_object *)ufi->entry->object.uvm_obj;
rdev = bo->dev->dev_private;
down_read(&rdev->pm.mclk_lock);
diff --git sys/uvm/uvm_aobj.c sys/uvm/uvm_aobj.c
index 20051d95dc1..127218c4c40 100644
--- sys/uvm/uvm_aobj.c
+++ sys/uvm/uvm_aobj.c
@@ -31,7 +31,7 @@
/*
* uvm_aobj.c: anonymous memory uvm_object pager
*
- * author: Chuck Silvers <[email protected]>
+* author: Chuck Silvers <[email protected]>
* started: Jan-1998
*
* - design mostly from Chuck Cranor
@@ -184,7 +184,7 @@ const struct uvm_pagerops aobj_pager = {
* deadlock.
*/
static LIST_HEAD(aobjlist, uvm_aobj) uao_list =
LIST_HEAD_INITIALIZER(uao_list);
-static struct mutex uao_list_lock = MUTEX_INITIALIZER(IPL_NONE);
+static struct mutex uao_list_lock = MUTEX_INITIALIZER(IPL_MPFLOOR);
/*
@@ -277,6 +277,7 @@ uao_find_swslot(struct uvm_object *uobj, int pageidx)
* uao_set_swslot: set the swap slot for a page in an aobj.
*
* => setting a slot to zero frees the slot
+ * => object must be locked by caller
* => we return the old slot number, or -1 if we failed to allocate
* memory to record the new slot number
*/
@@ -286,7 +287,7 @@ uao_set_swslot(struct uvm_object *uobj, int pageidx, int
slot)
struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
int oldslot;
- KERNEL_ASSERT_LOCKED();
+ KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0);
KASSERT(UVM_OBJ_IS_AOBJ(uobj));
/*
@@ -358,7 +359,9 @@ uao_free(struct uvm_aobj *aobj)
struct uvm_object *uobj = &aobj->u_obj;
KASSERT(UVM_OBJ_IS_AOBJ(uobj));
+ KASSERT(rw_write_held(uobj->vmobjlock));
uao_dropswap_range(uobj, 0, 0);
+ rw_exit(uobj->vmobjlock);
if (UAO_USES_SWHASH(aobj)) {
/*
@@ -671,6 +674,7 @@ struct uvm_object *
uao_create(vsize_t size, int flags)
{
static struct uvm_aobj kernel_object_store;
+ static struct rwlock bootstrap_kernel_object_lock;
static int kobj_alloced = 0;
int pages = round_page(size) >> PAGE_SHIFT;
struct uvm_aobj *aobj;
@@ -742,6 +746,11 @@ uao_create(vsize_t size, int flags)
* Initialise UVM object.
*/
uvm_obj_init(&aobj->u_obj, &aobj_pager, refs);
+ if (flags & UAO_FLAG_KERNOBJ) {
+ /* Use a temporary static lock for kernel_object. */
+ rw_init(&bootstrap_kernel_object_lock, "kobjlk");
+ uvm_obj_setlock(&aobj->u_obj, &bootstrap_kernel_object_lock);
+ }
/*
* now that aobj is ready, add it to the global list
@@ -822,20 +831,20 @@ uao_detach(struct uvm_object *uobj)
* involved in is complete), release any swap resources and free
* the page itself.
*/
- uvm_lock_pageq();
- while((pg = RBT_ROOT(uvm_objtree, &uobj->memt)) != NULL) {
+ rw_enter(uobj->vmobjlock, RW_WRITE);
+ while ((pg = RBT_ROOT(uvm_objtree, &uobj->memt)) != NULL) {
+ pmap_page_protect(pg, PROT_NONE);
if (pg->pg_flags & PG_BUSY) {
atomic_setbits_int(&pg->pg_flags, PG_WANTED);
- uvm_unlock_pageq();
- tsleep_nsec(pg, PVM, "uao_det", INFSLP);
- uvm_lock_pageq();
+ rwsleep_nsec(pg, uobj->vmobjlock, PVM, "uao_det",
+ INFSLP);
continue;
}
- pmap_page_protect(pg, PROT_NONE);
uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
+ uvm_lock_pageq();
uvm_pagefree(pg);
+ uvm_unlock_pageq();
}
- uvm_unlock_pageq();
/*
* Finally, free the anonymous UVM object itself.
@@ -864,7 +873,7 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t
stop, int flags)
voff_t curoff;
KASSERT(UVM_OBJ_IS_AOBJ(uobj));
- KERNEL_ASSERT_LOCKED();
+ KASSERT(rw_write_held(uobj->vmobjlock));
if (flags & PGO_ALLPAGES) {
start = 0;
@@ -901,7 +910,8 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t
stop, int flags)
/* Make sure page is unbusy, else wait for it. */
if (pp->pg_flags & PG_BUSY) {
atomic_setbits_int(&pp->pg_flags, PG_WANTED);
- tsleep_nsec(pp, PVM, "uaoflsh", INFSLP);
+ rwsleep_nsec(pp, uobj->vmobjlock, PVM, "uaoflsh",
+ INFSLP);
curoff -= PAGE_SIZE;
continue;
}
@@ -972,7 +982,7 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t
stop, int flags)
* 2: page is zero-fill -> allocate a new page and zero it.
* 3: page is swapped out -> fetch the page from swap.
*
- * cases 1 and 2 can be handled with PGO_LOCKED, case 3 cannot.
+ * cases 1 can be handled with PGO_LOCKED, cases 2 and 3 cannot.
* so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
* then we will need to return VM_PAGER_UNLOCK.
*
@@ -992,7 +1002,7 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct
vm_page **pps,
boolean_t done;
KASSERT(UVM_OBJ_IS_AOBJ(uobj));
- KERNEL_ASSERT_LOCKED();
+ KASSERT(rw_write_held(uobj->vmobjlock));
/*
* get number of pages
@@ -1115,7 +1125,10 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct
vm_page **pps,
/* out of RAM? */
if (ptmp == NULL) {
+ rw_exit(uobj->vmobjlock);
uvm_wait("uao_getpage");
+ rw_enter(uobj->vmobjlock, RW_WRITE);
+ /* goto top of pps while loop */
continue;
}
@@ -1135,7 +1148,8 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct
vm_page **pps,
/* page is there, see if we need to wait on it */
if ((ptmp->pg_flags & PG_BUSY) != 0) {
atomic_setbits_int(&ptmp->pg_flags, PG_WANTED);
- tsleep_nsec(ptmp, PVM, "uao_get", INFSLP);
+ rwsleep_nsec(ptmp, uobj->vmobjlock, PVM,
+ "uao_get", INFSLP);
continue; /* goto top of pps while loop */
}
@@ -1169,8 +1183,12 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct
vm_page **pps,
} else {
/*
* page in the swapped-out page.
+ * unlock object for i/o, relock when done.
*/
+
+ rw_exit(uobj->vmobjlock);
rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
+ rw_enter(uobj->vmobjlock, RW_WRITE);
/*
* I/O done. check for errors.
@@ -1194,6 +1212,7 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct
vm_page **pps,
uvm_lock_pageq();
uvm_pagefree(ptmp);
uvm_unlock_pageq();
+ rw_exit(uobj->vmobjlock);
return rv;
}
@@ -1215,11 +1234,14 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct
vm_page **pps,
} /* lcv loop */
+ rw_exit(uobj->vmobjlock);
return VM_PAGER_OK;
}
/*
* uao_dropswap: release any swap resources from this aobj page.
+ *
+ * => aobj must be locked or have a reference count of 0.
*/
int
uao_dropswap(struct uvm_object *uobj, int pageidx)
@@ -1238,6 +1260,7 @@ uao_dropswap(struct uvm_object *uobj, int pageidx)
/*
* page in every page in every aobj that is paged-out to a range of swslots.
*
+ * => aobj must be locked and is returned locked.
* => returns TRUE if pagein was aborted due to lack of memory.
*/
boolean_t
@@ -1272,7 +1295,9 @@ uao_swap_off(int startslot, int endslot)
/*
* Page in all pages in the swap slot range.
*/
+ rw_enter(aobj->u_obj.vmobjlock, RW_WRITE);
rv = uao_pagein(aobj, startslot, endslot);
+ rw_exit(aobj->u_obj.vmobjlock);
/* Drop the reference of the current object. */
uao_detach(&aobj->u_obj);
@@ -1375,14 +1400,21 @@ restart:
static boolean_t
uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
{
+ struct uvm_object *uobj = &aobj->u_obj;
struct vm_page *pg;
int rv, slot, npages;
pg = NULL;
npages = 1;
+
+ KASSERT(rw_write_held(uobj->vmobjlock));
rv = uao_get(&aobj->u_obj, (voff_t)pageidx << PAGE_SHIFT,
&pg, &npages, 0, PROT_READ | PROT_WRITE, 0, 0);
+ /*
+ * relock and finish up.
+ */
+ rw_enter(uobj->vmobjlock, RW_WRITE);
switch (rv) {
case VM_PAGER_OK:
break;
@@ -1430,7 +1462,7 @@ uao_dropswap_range(struct uvm_object *uobj, voff_t start,
voff_t end)
int swpgonlydelta = 0;
KASSERT(UVM_OBJ_IS_AOBJ(uobj));
- /* KASSERT(mutex_owned(uobj->vmobjlock)); */
+ KASSERT(rw_write_held(uobj->vmobjlock));
if (end == 0) {
end = INT64_MAX;
diff --git sys/uvm/uvm_device.c sys/uvm/uvm_device.c
index e5d035f2947..994ab537a82 100644
--- sys/uvm/uvm_device.c
+++ sys/uvm/uvm_device.c
@@ -166,7 +166,9 @@ udv_attach(dev_t device, vm_prot_t accessprot, voff_t off,
vsize_t size)
/*
* bump reference count, unhold, return.
*/
+ rw_enter(lcv->u_obj.vmobjlock, RW_WRITE);
lcv->u_obj.uo_refs++;
+ rw_exit(lcv->u_obj.vmobjlock);
mtx_enter(&udv_lock);
if (lcv->u_flags & UVM_DEVICE_WANTED)
@@ -228,8 +230,9 @@ udv_attach(dev_t device, vm_prot_t accessprot, voff_t off,
vsize_t size)
static void
udv_reference(struct uvm_object *uobj)
{
- KERNEL_ASSERT_LOCKED();
+ rw_enter(uobj->vmobjlock, RW_WRITE);
uobj->uo_refs++;
+ rw_exit(uobj->vmobjlock);
}
/*
@@ -248,8 +251,10 @@ udv_detach(struct uvm_object *uobj)
* loop until done
*/
again:
+ rw_enter(uobj->vmobjlock, RW_WRITE);
if (uobj->uo_refs > 1) {
uobj->uo_refs--;
+ rw_exit(uobj->vmobjlock);
return;
}
KASSERT(uobj->uo_npages == 0 && RBT_EMPTY(uvm_objtree, &uobj->memt));
@@ -260,10 +265,7 @@ again:
mtx_enter(&udv_lock);
if (udv->u_flags & UVM_DEVICE_HOLD) {
udv->u_flags |= UVM_DEVICE_WANTED;
- /*
- * lock interleaving. -- this is ok in this case since the
- * locks are both IPL_NONE
- */
+ rw_exit(uobj->vmobjlock);
msleep_nsec(udv, &udv_lock, PVM | PNORELOCK, "udv_detach",
INFSLP);
goto again;
@@ -276,6 +278,7 @@ again:
if (udv->u_flags & UVM_DEVICE_WANTED)
wakeup(udv);
mtx_leave(&udv_lock);
+ rw_exit(uobj->vmobjlock);
uvm_obj_destroy(uobj);
free(udv, M_TEMP, sizeof(*udv));
diff --git sys/uvm/uvm_fault.c sys/uvm/uvm_fault.c
index c90d9b3fa81..ed72f1bbf92 100644
--- sys/uvm/uvm_fault.c
+++ sys/uvm/uvm_fault.c
@@ -326,7 +326,8 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap
*amap,
if (pg->uobject) {
/* Owner of page is UVM object. */
uvmfault_unlockall(ufi, amap, NULL);
- tsleep_nsec(pg, PVM, "anonget1", INFSLP);
+ rwsleep_nsec(pg, pg->uobject->vmobjlock,
+ PVM | PNORELOCK, "anonget1", INFSLP);
} else {
/* Owner of page is anon. */
uvmfault_unlockall(ufi, NULL, NULL);
@@ -620,6 +621,7 @@ uvm_fault(vm_map_t orig_map, vaddr_t vaddr, vm_fault_t
fault_type,
*/
if (uobj != NULL && uobj->pgops->pgo_fault != NULL) {
KERNEL_LOCK();
+ rw_enter(uobj->vmobjlock, RW_WRITE);
error = uobj->pgops->pgo_fault(&ufi,
flt.startva, pages, flt.npages,
flt.centeridx, fault_type, flt.access_type,
@@ -634,10 +636,8 @@ uvm_fault(vm_map_t orig_map, vaddr_t vaddr, vm_fault_t
fault_type,
error = EACCES;
} else {
/* case 2: fault on backing obj or zero fill */
- KERNEL_LOCK();
error = uvm_fault_lower(&ufi, &flt, pages,
fault_type);
- KERNEL_UNLOCK();
}
}
}
@@ -793,10 +793,10 @@ uvm_fault_check(struct uvm_faultinfo *ufi, struct
uvm_faultctx *flt,
voff_t uoff;
uoff = (flt->startva - ufi->entry->start) +
ufi->entry->offset;
- KERNEL_LOCK();
+ rw_enter(uobj->vmobjlock, RW_WRITE);
(void) uobj->pgops->pgo_flush(uobj, uoff, uoff +
((vsize_t)nback << PAGE_SHIFT), PGO_DEACTIVATE);
- KERNEL_UNLOCK();
+ rw_exit(uobj->vmobjlock);
}
/* now forget about the backpages */
@@ -1098,6 +1098,8 @@ uvm_fault_lower_lookup(
int lcv, gotpages;
vaddr_t currva;
+ rw_enter(uobj->vmobjlock, RW_WRITE);
+
counters_inc(uvmexp_counters, flt_lget);
gotpages = flt->npages;
(void) uobj->pgops->pgo_get(uobj,
@@ -1211,6 +1213,14 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct
uvm_faultctx *flt,
* made it BUSY.
*/
+ /*
+ * locked:
+ */
+ KASSERT(amap == NULL ||
+ rw_write_held(amap->am_lock));
+ KASSERT(uobj == NULL ||
+ rw_write_held(uobj->vmobjlock));
+
/*
* note that uobjpage can not be PGO_DONTCARE at this point. we now
* set uobjpage to PGO_DONTCARE if we are doing a zero fill. if we
@@ -1268,6 +1278,7 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct
uvm_faultctx *flt,
return (EIO);
uobjpage = PGO_DONTCARE;
+ uobj = NULL;
promote = TRUE;
}
@@ -1276,6 +1287,12 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct
uvm_faultctx *flt,
if (locked && amap != NULL)
amap_lock(amap);
+ /* might be changed */
+ if (uobjpage != PGO_DONTCARE) {
+ uobj = uobjpage->uobject;
+ rw_enter(uobj->vmobjlock, RW_WRITE);
+ }
+
/*
* Re-verify that amap slot is still free. if there is
* a problem, we clean up.
@@ -1300,10 +1317,12 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct
uvm_faultctx *flt,
atomic_clearbits_int(&uobjpage->pg_flags,
PG_BUSY|PG_WANTED);
UVM_PAGE_OWN(uobjpage, NULL);
- return ERESTART;
}
- if (locked == FALSE)
+
+ if (locked == FALSE) {
+ rw_exit(uobj->vmobjlock);
return ERESTART;
+ }
/*
* we have the data in uobjpage which is PG_BUSY
@@ -1423,6 +1442,7 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct
uvm_faultctx *flt,
uvm_lock_pageq();
uvm_pageactivate(uobjpage);
uvm_unlock_pageq();
+ rw_exit(uobj->vmobjlock);
uobj = NULL;
} else {
counters_inc(uvmexp_counters, flt_przero);
@@ -1434,7 +1454,7 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct
uvm_faultctx *flt,
if (amap_add(&ufi->entry->aref,
ufi->orig_rvaddr - ufi->entry->start, anon, 0)) {
- uvmfault_unlockall(ufi, amap, NULL);
+ uvmfault_unlockall(ufi, amap, uobj);
uvm_anfree(anon);
counters_inc(uvmexp_counters, flt_noamap);
@@ -1483,25 +1503,32 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct
uvm_faultctx *flt,
return ERESTART;
}
- uvm_lock_pageq();
-
if (fault_type == VM_FAULT_WIRE) {
+ uvm_lock_pageq();
uvm_pagewire(pg);
+ uvm_unlock_pageq();
if (pg->pg_flags & PQ_AOBJ) {
/*
* since the now-wired page cannot be paged out,
* release its swap resources for others to use.
- * since an aobj page with no swap cannot be PG_CLEAN,
- * clear its clean flag now.
+ * since an aobj page with no swap cannot be clean,
+ * mark it dirty now.
+ *
+ * use pg->uobject here. if the page is from a
+ * tmpfs vnode, the pages are backed by its UAO and
+ * not the vnode.
*/
+ KASSERT(uobj != NULL);
+ KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock);
atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
}
} else {
/* activate it */
+ uvm_lock_pageq();
uvm_pageactivate(pg);
+ uvm_unlock_pageq();
}
- uvm_unlock_pageq();
if (pg->pg_flags & PG_WANTED)
wakeup(pg);
@@ -1567,7 +1594,7 @@ uvm_fault_unwire(vm_map_t map, vaddr_t start, vaddr_t end)
void
uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end)
{
- vm_map_entry_t entry, next;
+ vm_map_entry_t entry, oentry = NULL, next;
pmap_t pmap = vm_map_pmap(map);
vaddr_t va;
paddr_t pa;
@@ -1578,12 +1605,9 @@ uvm_fault_unwire_locked(vm_map_t map, vaddr_t start,
vaddr_t end)
/*
* we assume that the area we are unwiring has actually been wired
* in the first place. this means that we should be able to extract
- * the PAs from the pmap. we also lock out the page daemon so that
- * we can call uvm_pageunwire.
+ * the PAs from the pmap.
*/
- uvm_lock_pageq();
-
/*
* find the beginning map entry for the region.
*/
@@ -1605,6 +1629,17 @@ uvm_fault_unwire_locked(vm_map_t map, vaddr_t start,
vaddr_t end)
entry = next;
}
+ /*
+ * lock it.
+ */
+ if (entry != oentry) {
+ if (oentry != NULL) {
+ uvm_map_unlock_entry(oentry);
+ }
+ uvm_map_lock_entry(entry);
+ oentry = entry;
+ }
+
/*
* if the entry is no longer wired, tell the pmap.
*/
@@ -1612,11 +1647,16 @@ uvm_fault_unwire_locked(vm_map_t map, vaddr_t start,
vaddr_t end)
pmap_unwire(pmap, va);
pg = PHYS_TO_VM_PAGE(pa);
- if (pg)
+ if (pg) {
+ uvm_lock_pageq();
uvm_pageunwire(pg);
+ uvm_unlock_pageq();
+ }
}
- uvm_unlock_pageq();
+ if (oentry != NULL) {
+ uvm_map_unlock_entry(entry);
+ }
}
/*
@@ -1650,6 +1690,8 @@ void
uvmfault_unlockall(struct uvm_faultinfo *ufi, struct vm_amap *amap,
struct uvm_object *uobj)
{
+ if (uobj)
+ rw_exit(uobj->vmobjlock);
if (amap != NULL)
amap_unlock(amap);
uvmfault_unlockmaps(ufi, FALSE);
diff --git sys/uvm/uvm_km.c sys/uvm/uvm_km.c
index fc31ae99dff..5f36935c09d 100644
--- sys/uvm/uvm_km.c
+++ sys/uvm/uvm_km.c
@@ -249,13 +249,15 @@ uvm_km_pgremove(struct uvm_object *uobj, vaddr_t startva,
vaddr_t endva)
int swpgonlydelta = 0;
KASSERT(UVM_OBJ_IS_AOBJ(uobj));
+ KASSERT(rw_write_held(uobj->vmobjlock));
pmap_remove(pmap_kernel(), startva, endva);
for (curoff = start ; curoff < end ; curoff += PAGE_SIZE) {
pp = uvm_pagelookup(uobj, curoff);
if (pp && pp->pg_flags & PG_BUSY) {
atomic_setbits_int(&pp->pg_flags, PG_WANTED);
- tsleep_nsec(pp, PVM, "km_pgrm", INFSLP);
+ rwsleep_nsec(pp, uobj->vmobjlock, PVM, "km_pgrm",
+ INFSLP);
curoff -= PAGE_SIZE; /* loop back to us */
continue;
}
@@ -383,6 +385,9 @@ uvm_km_kmemalloc_pla(struct vm_map *map, struct uvm_object
*obj, vsize_t size,
return (0);
}
+ if (obj != NULL)
+ rw_enter(obj->vmobjlock, RW_WRITE);
+
loopva = kva;
while (loopva != kva + size) {
pg = TAILQ_FIRST(&pgl);
@@ -409,6 +414,9 @@ uvm_km_kmemalloc_pla(struct vm_map *map, struct uvm_object
*obj, vsize_t size,
KASSERT(TAILQ_EMPTY(&pgl));
pmap_update(pmap_kernel());
+ if (obj != NULL)
+ rw_exit(obj->vmobjlock);
+
return kva;
}
@@ -474,12 +482,14 @@ uvm_km_alloc1(struct vm_map *map, vsize_t size, vsize_t
align, boolean_t zeroit)
/* now allocate the memory. we must be careful about released pages. */
loopva = kva;
while (size) {
+ rw_enter(uvm.kernel_object->vmobjlock, RW_WRITE);
/* allocate ram */
pg = uvm_pagealloc(uvm.kernel_object, offset, NULL, 0);
if (pg) {
atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
UVM_PAGE_OWN(pg, NULL);
}
+ rw_exit(uvm.kernel_object->vmobjlock);
if (__predict_false(pg == NULL)) {
if (curproc == uvm.pagedaemon_proc) {
/*
diff --git sys/uvm/uvm_map.c sys/uvm/uvm_map.c
index d153bbfd20b..06553a814c6 100644
--- sys/uvm/uvm_map.c
+++ sys/uvm/uvm_map.c
@@ -124,6 +124,8 @@ struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int);
void uvm_mapent_free(struct vm_map_entry*);
void uvm_unmap_kill_entry(struct vm_map*,
struct vm_map_entry*);
+void uvm_unmap_kill_entry_withlock(struct vm_map *,
+ struct vm_map_entry *, int);
void uvm_unmap_detach_intrsafe(struct uvm_map_deadq *);
void uvm_mapent_mkfree(struct vm_map*,
struct vm_map_entry*, struct vm_map_entry**,
@@ -499,6 +501,28 @@ uvm_map_reference(struct vm_map *map)
atomic_inc_int(&map->ref_count);
}
+void
+uvm_map_lock_entry(struct vm_map_entry *entry)
+{
+ if (entry->aref.ar_amap != NULL) {
+ amap_lock(entry->aref.ar_amap);
+ }
+ if (UVM_ET_ISOBJ(entry)) {
+ rw_enter(entry->object.uvm_obj->vmobjlock, RW_WRITE);
+ }
+}
+
+void
+uvm_map_unlock_entry(struct vm_map_entry *entry)
+{
+ if (UVM_ET_ISOBJ(entry)) {
+ rw_exit(entry->object.uvm_obj->vmobjlock);
+ }
+ if (entry->aref.ar_amap != NULL) {
+ amap_unlock(entry->aref.ar_amap);
+ }
+}
+
/*
* Calculate the dused delta.
*/
@@ -2101,7 +2125,8 @@ uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry
*entry,
* Unwire and release referenced amap and object from map entry.
*/
void
-uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry)
+uvm_unmap_kill_entry_withlock(struct vm_map *map, struct vm_map_entry *entry,
+ int needlock)
{
/* Unwire removed map entry. */
if (VM_MAPENT_ISWIRED(entry)) {
@@ -2111,6 +2136,9 @@ uvm_unmap_kill_entry(struct vm_map *map, struct
vm_map_entry *entry)
KERNEL_UNLOCK();
}
+ if (needlock)
+ uvm_map_lock_entry(entry);
+
/* Entry-type specific code. */
if (UVM_ET_ISHOLE(entry)) {
/* Nothing to be done for holes. */
@@ -2157,17 +2185,19 @@ uvm_unmap_kill_entry(struct vm_map *map, struct
vm_map_entry *entry)
*/
uvm_km_pgremove(entry->object.uvm_obj, entry->start,
entry->end);
-
- /*
- * null out kernel_object reference, we've just
- * dropped it
- */
- entry->etype &= ~UVM_ET_OBJ;
- entry->object.uvm_obj = NULL; /* to be safe */
} else {
/* remove mappings the standard way. */
pmap_remove(map->pmap, entry->start, entry->end);
}
+
+ if (needlock)
+ uvm_map_unlock_entry(entry);
+}
+
+void
+uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry)
+{
+ uvm_unmap_kill_entry_withlock(map, entry, 0);
}
/*
@@ -2227,7 +2257,7 @@ uvm_unmap_remove(struct vm_map *map, vaddr_t start,
vaddr_t end,
map->sserial++;
/* Kill entry. */
- uvm_unmap_kill_entry(map, entry);
+ uvm_unmap_kill_entry_withlock(map, entry, 1);
/* Update space usage. */
if ((map->flags & VM_MAP_ISVMSPACE) &&
@@ -3420,8 +3450,10 @@ uvm_map_protect(struct vm_map *map, vaddr_t start,
vaddr_t end,
*/
iter->wired_count = 0;
}
+ uvm_map_lock_entry(iter);
pmap_protect(map->pmap, iter->start, iter->end,
iter->protection & mask);
+ uvm_map_unlock_entry(iter);
}
/*
@@ -3967,11 +3999,13 @@ uvm_mapent_forkcopy(struct vmspace *new_vm, struct
vm_map *new_map,
*/
if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
if (old_entry->max_protection & PROT_WRITE) {
+ uvm_map_lock_entry(old_entry);
pmap_protect(old_map->pmap,
old_entry->start,
old_entry->end,
old_entry->protection &
~PROT_WRITE);
+ uvm_map_unlock_entry(old_entry);
pmap_update(old_map->pmap);
}
old_entry->etype |= UVM_ET_NEEDSCOPY;
@@ -4751,9 +4785,11 @@ flush_object:
((flags & PGO_FREE) == 0 ||
((entry->max_protection & PROT_WRITE) != 0 &&
(entry->etype & UVM_ET_COPYONWRITE) == 0))) {
+ rw_enter(uobj->vmobjlock, RW_WRITE);
rv = uobj->pgops->pgo_flush(uobj,
cp_start - entry->start + entry->offset,
cp_end - entry->start + entry->offset, flags);
+ rw_exit(uobj->vmobjlock);
if (rv == FALSE)
error = EFAULT;
diff --git sys/uvm/uvm_map.h sys/uvm/uvm_map.h
index 12092ebfcd2..6c02bc93137 100644
--- sys/uvm/uvm_map.h
+++ sys/uvm/uvm_map.h
@@ -442,6 +442,9 @@ void vm_map_unbusy_ln(struct vm_map*, char*,
int);
#define vm_map_unbusy(map) vm_map_unbusy_ln(map, NULL, 0)
#endif
+void uvm_map_lock_entry(struct vm_map_entry *);
+void uvm_map_unlock_entry(struct vm_map_entry *);
+
#endif /* _KERNEL */
/*
diff --git sys/uvm/uvm_object.c sys/uvm/uvm_object.c
index 675cd9de2da..8b52a14459f 100644
--- sys/uvm/uvm_object.c
+++ sys/uvm/uvm_object.c
@@ -1,7 +1,7 @@
/* $OpenBSD: uvm_object.c,v 1.22 2021/10/23 14:42:08 mpi Exp $ */
/*
- * Copyright (c) 2006 The NetBSD Foundation, Inc.
+ * Copyright (c) 2006, 2010, 2019 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -38,6 +38,7 @@
#include <sys/systm.h>
#include <sys/mman.h>
#include <sys/atomic.h>
+#include <sys/rwlock.h>
#include <uvm/uvm.h>
@@ -51,15 +52,27 @@ const struct uvm_pagerops bufcache_pager = {
/* nothing */
};
-/* We will fetch this page count per step */
+/* Page count to fetch per single step. */
#define FETCH_PAGECOUNT 16
/*
- * uvm_obj_init: initialise a uvm object.
+ * uvm_obj_init: initialize UVM memory object.
*/
void
uvm_obj_init(struct uvm_object *uobj, const struct uvm_pagerops *pgops, int
refs)
{
+ int alock;
+
+ alock = ((pgops != NULL) && (pgops != &pmap_pager) &&
+ (pgops != &bufcache_pager) && (refs != UVM_OBJ_KERN));
+
+ if (alock) {
+ /* Allocate and assign a lock. */
+ rw_obj_alloc(&uobj->vmobjlock, "uobjlk");
+ } else {
+ /* The lock will need to be set via uvm_obj_setlock(). */
+ uobj->vmobjlock = NULL;
+ }
uobj->pgops = pgops;
RBT_INIT(uvm_objtree, &uobj->memt);
uobj->uo_npages = 0;
@@ -73,12 +86,38 @@ void
uvm_obj_destroy(struct uvm_object *uo)
{
KASSERT(RBT_EMPTY(uvm_objtree, &uo->memt));
+
+ rw_obj_free(uo->vmobjlock);
+}
+
+/*
+ * uvm_obj_setlock: assign a vmobjlock to the UVM object.
+ *
+ * => Caller is responsible to ensure that UVM objects is not use.
+ * => Only dynamic lock may be previously set. We drop the reference then.
+ */
+void
+uvm_obj_setlock(struct uvm_object *uo, struct rwlock *lockptr)
+{
+ struct rwlock *olockptr = uo->vmobjlock;
+
+ if (olockptr) {
+ /* Drop the reference on the old lock. */
+ rw_obj_free(olockptr);
+ }
+ if (lockptr == NULL) {
+ /* If new lock is not passed - allocate default one. */
+ rw_obj_alloc(&lockptr, "uobjlk");
+ }
+ uo->vmobjlock = lockptr;
}
#ifndef SMALL_KERNEL
/*
- * uvm_obj_wire: wire the pages of entire uobj
+ * uvm_obj_wire: wire the pages of entire UVM object.
*
+ * => NOTE: this function should only be used for types of objects
+ * where PG_RELEASED flag is never set (aobj objects)
* => caller must pass page-aligned start and end values
* => if the caller passes in a pageq pointer, we'll return a list of
* wired pages.
@@ -94,6 +133,7 @@ uvm_obj_wire(struct uvm_object *uobj, voff_t start, voff_t
end,
left = (end - start) >> PAGE_SHIFT;
+ rw_enter(uobj->vmobjlock, RW_WRITE);
while (left) {
npages = MIN(FETCH_PAGECOUNT, left);
@@ -107,6 +147,7 @@ uvm_obj_wire(struct uvm_object *uobj, voff_t start, voff_t
end,
if (error)
goto error;
+ rw_enter(uobj->vmobjlock, RW_WRITE);
for (i = 0; i < npages; i++) {
KASSERT(pgs[i] != NULL);
@@ -134,6 +175,7 @@ uvm_obj_wire(struct uvm_object *uobj, voff_t start, voff_t
end,
left -= npages;
offset += (voff_t)npages << PAGE_SHIFT;
}
+ rw_exit(uobj->vmobjlock);
return 0;
@@ -145,17 +187,17 @@ error:
}
/*
- * uobj_unwirepages: unwire the pages of entire uobj
+ * uvm_obj_unwire: unwire the pages of entire UVM object.
*
* => caller must pass page-aligned start and end values
*/
-
void
uvm_obj_unwire(struct uvm_object *uobj, voff_t start, voff_t end)
{
struct vm_page *pg;
off_t offset;
+ rw_enter(uobj->vmobjlock, RW_WRITE);
uvm_lock_pageq();
for (offset = start; offset < end; offset += PAGE_SIZE) {
pg = uvm_pagelookup(uobj, offset);
@@ -166,6 +208,7 @@ uvm_obj_unwire(struct uvm_object *uobj, voff_t start,
voff_t end)
uvm_pageunwire(pg);
}
uvm_unlock_pageq();
+ rw_exit(uobj->vmobjlock);
}
#endif /* !SMALL_KERNEL */
diff --git sys/uvm/uvm_object.h sys/uvm/uvm_object.h
index 9a74600c9df..5fc32ca3eb8 100644
--- sys/uvm/uvm_object.h
+++ sys/uvm/uvm_object.h
@@ -32,14 +32,25 @@
#define _UVM_UVM_OBJECT_H_
/*
- * uvm_object.h
- */
-
-/*
- * uvm_object: all that is left of mach objects.
+ * The UVM memory object interface. Notes:
+ *
+ * A UVM memory object represents a list of pages, which are managed by
+ * the object's pager operations (uvm_object::pgops). All pages belonging
+ * to an object are owned by it and thus protected by the object lock.
+ *
+ * The lock (uvm_object::vmobjlock) may be shared amongst the UVM objects.
+ * By default, the lock is allocated dynamically using rw_obj_init() cache.
+ * Lock sharing is normally used when there is an underlying object. For
+ * example, vnode representing a file may have an underlying node, which
+ * is the case for tmpfs and layered file systems. In such case, vnode's
+ * UVM object and the underlying UVM object shares the lock.
+ *
+ * The reference count is managed atomically for the anonymous UVM objects.
+ * For other objects, it is arbitrary (may use the lock or atomics).
*/
struct uvm_object {
+ struct rwlock *vmobjlock; /* lock on object */
const struct uvm_pagerops *pgops; /* pager ops */
RBT_HEAD(uvm_objtree, vm_page) memt; /* pages in object */
int uo_npages; /* # of pages in memt */
@@ -52,10 +63,10 @@ struct uvm_object {
* memory objects don't have reference counts -- they never die).
*
* this value is used to detected kernel object mappings at uvm_unmap()
- * time. normally when an object is unmapped its pages eventually become
- * deactivated and then paged out and/or freed. this is not useful
+ * time. normally when an object is unmapped its pages eventaully become
+ * deactivated and then paged out and/or freed. this is not useful
* for kernel objects... when a kernel object is unmapped we always want
- * to free the resources associated with the mapping. UVM_OBJ_KERN
+ * to free the resources associated with the mapping. UVM_OBJ_KERN
* allows us to decide which type of unmapping we want to do.
*
* in addition, we have kernel objects which may be used in an
@@ -100,8 +111,12 @@ RBT_PROTOTYPE(uvm_objtree, vm_page, objt, uvm_pagecmp)
#define UVM_OBJ_IS_BUFCACHE(uobj) \
((uobj)->pgops == &bufcache_pager)
+#define UVM_OBJ_IS_DUMMY(uobj) \
+ (UVM_OBJ_IS_PMAP(uobj) || UVM_OBJ_IS_BUFCACHE(uobj))
+
void uvm_obj_init(struct uvm_object *, const struct uvm_pagerops *, int);
void uvm_obj_destroy(struct uvm_object *);
+void uvm_obj_setlock(struct uvm_object *, struct rwlock *);
int uvm_obj_wire(struct uvm_object *, voff_t, voff_t, struct pglist *);
void uvm_obj_unwire(struct uvm_object *, voff_t, voff_t);
void uvm_obj_free(struct uvm_object *);
diff --git sys/uvm/uvm_page.c sys/uvm/uvm_page.c
index a90b23af6df..b0d705994d1 100644
--- sys/uvm/uvm_page.c
+++ sys/uvm/uvm_page.c
@@ -118,6 +118,7 @@ static vaddr_t virtual_space_end;
*/
static void uvm_pageinsert(struct vm_page *);
static void uvm_pageremove(struct vm_page *);
+int uvm_page_owner_locked_p(struct vm_page *);
/*
* inline functions
@@ -125,7 +126,7 @@ static void uvm_pageremove(struct vm_page *);
/*
* uvm_pageinsert: insert a page in the object
*
- * => caller must lock page queues XXX questionable
+ * => caller must lock object
* => call should have already set pg's object and offset pointers
* and bumped the version counter
*/
@@ -134,7 +135,10 @@ uvm_pageinsert(struct vm_page *pg)
{
struct vm_page *dupe;
+ KASSERT(UVM_OBJ_IS_DUMMY(pg->uobject) ||
+ rw_write_held(pg->uobject->vmobjlock));
KASSERT((pg->pg_flags & PG_TABLED) == 0);
+
dupe = RBT_INSERT(uvm_objtree, &pg->uobject->memt, pg);
/* not allowed to insert over another page */
KASSERT(dupe == NULL);
@@ -145,12 +149,15 @@ uvm_pageinsert(struct vm_page *pg)
/*
* uvm_page_remove: remove page from object
*
- * => caller must lock page queues
+ * => caller must lock object
*/
static inline void
uvm_pageremove(struct vm_page *pg)
{
+ KASSERT(UVM_OBJ_IS_DUMMY(pg->uobject) ||
+ rw_write_held(pg->uobject->vmobjlock));
KASSERT(pg->pg_flags & PG_TABLED);
+
RBT_REMOVE(uvm_objtree, &pg->uobject->memt, pg);
atomic_clearbits_int(&pg->pg_flags, PG_TABLED);
@@ -683,11 +690,19 @@ uvm_pagealloc_pg(struct vm_page *pg, struct uvm_object
*obj, voff_t off,
{
int flags;
+ KASSERT(obj == NULL || anon == NULL);
+ KASSERT(anon == NULL || off == 0);
+ KASSERT(off == trunc_page(off));
+ KASSERT(obj == NULL || UVM_OBJ_IS_DUMMY(obj) ||
+ rw_write_held(obj->vmobjlock));
+ KASSERT(anon == NULL || anon->an_lock == NULL ||
+ rw_write_held(anon->an_lock));
+
flags = PG_BUSY | PG_FAKE;
pg->offset = off;
pg->uobject = obj;
pg->uanon = anon;
-
+ KASSERT(uvm_page_owner_locked_p(pg));
if (anon) {
anon->an_page = pg;
flags |= PQ_ANON;
@@ -846,7 +861,9 @@ uvm_pagerealloc_multi(struct uvm_object *obj, voff_t off,
vsize_t size,
uvm_pagecopy(tpg, pg);
KASSERT(tpg->wire_count == 1);
tpg->wire_count = 0;
+ uvm_lock_pageq();
uvm_pagefree(tpg);
+ uvm_unlock_pageq();
uvm_pagealloc_pg(pg, obj, offset, NULL);
}
}
@@ -873,6 +890,10 @@ uvm_pagealloc(struct uvm_object *obj, voff_t off, struct
vm_anon *anon,
KASSERT(obj == NULL || anon == NULL);
KASSERT(anon == NULL || off == 0);
KASSERT(off == trunc_page(off));
+ KASSERT(obj == NULL || UVM_OBJ_IS_DUMMY(obj) ||
+ rw_write_held(obj->vmobjlock));
+ KASSERT(anon == NULL || anon->an_lock == NULL ||
+ rw_write_held(anon->an_lock));
pmr_flags = UVM_PLA_NOWAIT;
@@ -940,10 +961,9 @@ uvm_pageclean(struct vm_page *pg)
{
u_int flags_to_clear = 0;
-#if all_pmap_are_fixed
- if (pg->pg_flags & (PG_TABLED|PQ_ACTIVE|PQ_INACTIVE))
+ if ((pg->pg_flags & (PG_TABLED|PQ_ACTIVE|PQ_INACTIVE)) &&
+ (pg->uobject == NULL || !UVM_OBJ_IS_PMAP(pg->uobject)))
MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
-#endif
#ifdef DEBUG
if (pg->uobject == (void *)0xdeadbeef &&
@@ -953,6 +973,10 @@ uvm_pageclean(struct vm_page *pg)
#endif
KASSERT((pg->pg_flags & PG_DEV) == 0);
+ KASSERT(pg->uobject == NULL || UVM_OBJ_IS_DUMMY(pg->uobject) ||
+ rw_write_held(pg->uobject->vmobjlock));
+ KASSERT(pg->uobject != NULL || pg->uanon == NULL ||
+ rw_write_held(pg->uanon->an_lock));
/*
* if the page was an object page (and thus "TABLED"), remove it
@@ -1009,10 +1033,9 @@ uvm_pageclean(struct vm_page *pg)
void
uvm_pagefree(struct vm_page *pg)
{
-#if all_pmap_are_fixed
- if (pg->pg_flags & (PG_TABLED|PQ_ACTIVE|PQ_INACTIVE))
+ if ((pg->pg_flags & (PG_TABLED|PQ_ACTIVE|PQ_INACTIVE)) &&
+ (pg->uobject == NULL || !UVM_OBJ_IS_PMAP(pg->uobject)))
MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
-#endif
uvm_pageclean(pg);
uvm_pmr_freepages(pg, 1);
@@ -1037,6 +1060,10 @@ uvm_page_unbusy(struct vm_page **pgs, int npgs)
if (pg == NULL || pg == PGO_DONTCARE) {
continue;
}
+
+ KASSERT(uvm_page_owner_locked_p(pg));
+ KASSERT(pg->pg_flags & PG_BUSY);
+
if (pg->pg_flags & PG_WANTED) {
wakeup(pg);
}
@@ -1207,6 +1234,7 @@ uvm_pagelookup(struct uvm_object *obj, voff_t off)
void
uvm_pagewire(struct vm_page *pg)
{
+ KASSERT(uvm_page_owner_locked_p(pg));
MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
if (pg->wire_count == 0) {
@@ -1237,6 +1265,7 @@ uvm_pagewire(struct vm_page *pg)
void
uvm_pageunwire(struct vm_page *pg)
{
+ KASSERT(uvm_page_owner_locked_p(pg));
MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
pg->wire_count--;
@@ -1258,6 +1287,7 @@ uvm_pageunwire(struct vm_page *pg)
void
uvm_pagedeactivate(struct vm_page *pg)
{
+ KASSERT(uvm_page_owner_locked_p(pg));
MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
if (pg->pg_flags & PQ_ACTIVE) {
@@ -1294,6 +1324,7 @@ uvm_pagedeactivate(struct vm_page *pg)
void
uvm_pageactivate(struct vm_page *pg)
{
+ KASSERT(uvm_page_owner_locked_p(pg));
MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
if (pg->pg_flags & PQ_INACTIVE) {
@@ -1341,6 +1372,24 @@ uvm_pagecopy(struct vm_page *src, struct vm_page *dst)
pmap_copy_page(src, dst);
}
+/*
+ * uvm_page_owner_locked_p: return true if object associated with page is
+ * locked. this is a weak check for runtime assertions only.
+ */
+int
+uvm_page_owner_locked_p(struct vm_page *pg)
+{
+ if (pg->uobject != NULL) {
+ if (UVM_OBJ_IS_DUMMY(pg->uobject))
+ return 1;
+ return rw_write_held(pg->uobject->vmobjlock);
+ }
+ if (pg->uanon != NULL) {
+ return rw_write_held(pg->uanon->an_lock);
+ }
+ return 1;
+}
+
/*
* uvm_pagecount: count the number of physical pages in the address range.
*/
diff --git sys/uvm/uvm_pager.c sys/uvm/uvm_pager.c
index 286e7c2a025..46ba9cfab84 100644
--- sys/uvm/uvm_pager.c
+++ sys/uvm/uvm_pager.c
@@ -543,11 +543,15 @@ ReTry:
/* XXX daddr_t -> int */
int nswblk = (result == VM_PAGER_AGAIN) ? swblk : 0;
if (pg->pg_flags & PQ_ANON) {
+ rw_enter(pg->uanon->an_lock, RW_WRITE);
pg->uanon->an_swslot = nswblk;
+ rw_exit(pg->uanon->an_lock);
} else {
+ rw_enter(pg->uobject->vmobjlock, RW_WRITE);
uao_set_swslot(pg->uobject,
pg->offset >> PAGE_SHIFT,
nswblk);
+ rw_exit(pg->uobject->vmobjlock);
}
}
if (result == VM_PAGER_AGAIN) {
@@ -612,6 +616,8 @@ uvm_pager_dropcluster(struct uvm_object *uobj, struct
vm_page *pg,
{
int lcv;
+ KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
+
/* drop all pages but "pg" */
for (lcv = 0 ; lcv < *npages ; lcv++) {
/* skip "pg" or empty slot */
@@ -625,10 +631,13 @@ uvm_pager_dropcluster(struct uvm_object *uobj, struct
vm_page *pg,
*/
if (!uobj) {
if (ppsp[lcv]->pg_flags & PQ_ANON) {
+ rw_enter(ppsp[lcv]->uanon->an_lock, RW_WRITE);
if (flags & PGO_REALLOCSWAP)
/* zap swap block */
ppsp[lcv]->uanon->an_swslot = 0;
} else {
+ rw_enter(ppsp[lcv]->uobject->vmobjlock,
+ RW_WRITE);
if (flags & PGO_REALLOCSWAP)
uao_set_swslot(ppsp[lcv]->uobject,
ppsp[lcv]->offset >> PAGE_SHIFT, 0);
@@ -649,7 +658,6 @@ uvm_pager_dropcluster(struct uvm_object *uobj, struct
vm_page *pg,
UVM_PAGE_OWN(ppsp[lcv], NULL);
/* kills anon and frees pg */
- rw_enter(ppsp[lcv]->uanon->an_lock, RW_WRITE);
uvm_anon_release(ppsp[lcv]->uanon);
continue;
@@ -672,6 +680,14 @@ uvm_pager_dropcluster(struct uvm_object *uobj, struct
vm_page *pg,
pmap_clear_modify(ppsp[lcv]);
atomic_setbits_int(&ppsp[lcv]->pg_flags, PG_CLEAN);
}
+
+ /* if anonymous cluster, unlock object and move on */
+ if (!uobj) {
+ if (ppsp[lcv]->pg_flags & PQ_ANON)
+ rw_exit(ppsp[lcv]->uanon->an_lock);
+ else
+ rw_exit(ppsp[lcv]->uobject->vmobjlock);
+ }
}
}
@@ -736,6 +752,7 @@ uvm_aio_aiodone(struct buf *bp)
swap = (pg->pg_flags & PQ_SWAPBACKED) != 0;
if (!swap) {
uobj = pg->uobject;
+ rw_enter(uobj->vmobjlock, RW_WRITE);
}
}
KASSERT(swap || pg->uobject == uobj);
@@ -763,6 +780,9 @@ uvm_aio_aiodone(struct buf *bp)
}
}
uvm_page_unbusy(pgs, npages);
+ if (!swap) {
+ rw_exit(uobj->vmobjlock);
+ }
#ifdef UVM_SWAP_ENCRYPT
freed:
diff --git sys/uvm/uvm_pdaemon.c sys/uvm/uvm_pdaemon.c
index e0ab150cddc..1ac4b29d256 100644
--- sys/uvm/uvm_pdaemon.c
+++ sys/uvm/uvm_pdaemon.c
@@ -440,19 +440,6 @@ uvmpd_scan_inactive(struct pglist *pglst)
uvmexp.pdscans++;
nextpg = TAILQ_NEXT(p, pageq);
- /*
- * move referenced pages back to active queue and
- * skip to next page (unlikely to happen since
- * inactive pages shouldn't have any valid mappings
- * and we cleared reference before deactivating).
- */
-
- if (pmap_is_referenced(p)) {
- uvm_pageactivate(p);
- uvmexp.pdreact++;
- continue;
- }
-
if (p->pg_flags & PQ_ANON) {
anon = p->uanon;
KASSERT(anon != NULL);
@@ -461,6 +448,16 @@ uvmpd_scan_inactive(struct pglist *pglst)
/* lock failed, skip this page */
continue;
}
+ /*
+ * move referenced pages back to active queue
+ * and skip to next page.
+ */
+ if (pmap_is_referenced(p)) {
+ uvm_pageactivate(p);
+ rw_exit(anon->an_lock);
+ uvmexp.pdreact++;
+ continue;
+ }
if (p->pg_flags & PG_BUSY) {
rw_exit(anon->an_lock);
uvmexp.pdbusy++;
@@ -471,7 +468,23 @@ uvmpd_scan_inactive(struct pglist *pglst)
} else {
uobj = p->uobject;
KASSERT(uobj != NULL);
+ if (rw_enter(uobj->vmobjlock,
+ RW_WRITE|RW_NOSLEEP)) {
+ /* lock failed, skip this page */
+ continue;
+ }
+ /*
+ * move referenced pages back to active queue
+ * and skip to next page.
+ */
+ if (pmap_is_referenced(p)) {
+ uvm_pageactivate(p);
+ rw_exit(uobj->vmobjlock);
+ uvmexp.pdreact++;
+ continue;
+ }
if (p->pg_flags & PG_BUSY) {
+ rw_exit(uobj->vmobjlock);
uvmexp.pdbusy++;
/* someone else owns page, skip it */
continue;
@@ -507,6 +520,8 @@ uvmpd_scan_inactive(struct pglist *pglst)
/* remove from object */
anon->an_page = NULL;
rw_exit(anon->an_lock);
+ } else {
+ rw_exit(uobj->vmobjlock);
}
continue;
}
@@ -518,6 +533,8 @@ uvmpd_scan_inactive(struct pglist *pglst)
if (free + uvmexp.paging > uvmexp.freetarg << 2) {
if (anon) {
rw_exit(anon->an_lock);
+ } else {
+ rw_exit(uobj->vmobjlock);
}
continue;
}
@@ -533,6 +550,8 @@ uvmpd_scan_inactive(struct pglist *pglst)
uvm_pageactivate(p);
if (anon) {
rw_exit(anon->an_lock);
+ } else {
+ rw_exit(uobj->vmobjlock);
}
continue;
}
@@ -602,6 +621,9 @@ uvmpd_scan_inactive(struct pglist *pglst)
UVM_PAGE_OWN(p, NULL);
if (anon)
rw_exit(anon->an_lock);
+ else
+ rw_exit(
+ uobj->vmobjlock);
continue;
}
swcpages = 0; /* cluster is empty */
@@ -635,6 +657,8 @@ uvmpd_scan_inactive(struct pglist *pglst)
if (p) { /* if we just added a page to cluster */
if (anon)
rw_exit(anon->an_lock);
+ else
+ rw_exit(uobj->vmobjlock);
/* cluster not full yet? */
if (swcpages < swnpages)
@@ -748,6 +772,8 @@ uvmpd_scan_inactive(struct pglist *pglst)
if (swap_backed) {
if (anon)
rw_enter(anon->an_lock, RW_WRITE);
+ else
+ rw_enter(uobj->vmobjlock, RW_WRITE);
}
#ifdef DIAGNOSTIC
@@ -810,6 +836,8 @@ uvmpd_scan_inactive(struct pglist *pglst)
*/
if (anon)
rw_exit(anon->an_lock);
+ else if (uobj)
+ rw_exit(uobj->vmobjlock);
if (nextpg && (nextpg->pg_flags & PQ_INACTIVE) == 0) {
nextpg = TAILQ_FIRST(pglst); /* reload! */
@@ -920,8 +948,12 @@ uvmpd_scan(void)
KASSERT(p->uanon != NULL);
if (rw_enter(p->uanon->an_lock, RW_WRITE|RW_NOSLEEP))
continue;
- } else
+ } else {
KASSERT(p->uobject != NULL);
+ if (rw_enter(p->uobject->vmobjlock,
+ RW_WRITE|RW_NOSLEEP))
+ continue;
+ }
/*
* if there's a shortage of swap, free any swap allocated
@@ -959,6 +991,8 @@ uvmpd_scan(void)
}
if (p->pg_flags & PQ_ANON)
rw_exit(p->uanon->an_lock);
+ else
+ rw_exit(p->uobject->vmobjlock);
}
}
@@ -982,6 +1016,10 @@ uvmpd_drop(struct pglist *pglst)
continue;
if (p->pg_flags & PG_CLEAN) {
+ struct uvm_object * uobj = p->uobject;
+
+ rw_enter(uobj->vmobjlock, RW_WRITE);
+ uvm_lock_pageq();
/*
* we now have the page queues locked.
* the page is not busy. if the page is clean we
@@ -997,6 +1035,8 @@ uvmpd_drop(struct pglist *pglst)
pmap_page_protect(p, PROT_NONE);
uvm_pagefree(p);
}
+ uvm_unlock_pageq();
+ rw_exit(uobj->vmobjlock);
}
}
}
@@ -1004,13 +1044,9 @@ uvmpd_drop(struct pglist *pglst)
void
uvmpd_hibernate(void)
{
- uvm_lock_pageq();
-
uvmpd_drop(&uvm.page_inactive_swp);
uvmpd_drop(&uvm.page_inactive_obj);
uvmpd_drop(&uvm.page_active);
-
- uvm_unlock_pageq();
}
#endif
diff --git sys/uvm/uvm_vnode.c sys/uvm/uvm_vnode.c
index 3cbdd5222b6..af69e8352ed 100644
--- sys/uvm/uvm_vnode.c
+++ sys/uvm/uvm_vnode.c
@@ -280,8 +280,9 @@ uvn_reference(struct uvm_object *uobj)
panic("uvn_reference: invalid state");
}
#endif
- KERNEL_ASSERT_LOCKED();
+ rw_enter(uobj->vmobjlock, RW_WRITE);
uobj->uo_refs++;
+ rw_exit(uobj->vmobjlock);
}
/*
@@ -300,9 +301,10 @@ uvn_detach(struct uvm_object *uobj)
struct vnode *vp;
int oldflags;
- KERNEL_ASSERT_LOCKED();
+ rw_enter(uobj->vmobjlock, RW_WRITE);
uobj->uo_refs--; /* drop ref! */
if (uobj->uo_refs) { /* still more refs */
+ rw_exit(uobj->vmobjlock);
return;
}
@@ -323,8 +325,7 @@ uvn_detach(struct uvm_object *uobj)
if (uvn->u_flags & UVM_VNODE_CANPERSIST) {
/* won't block */
uvn_flush(uobj, 0, 0, PGO_DEACTIVATE|PGO_ALLPAGES);
- vrele(vp); /* drop vnode reference */
- return;
+ goto out;
}
/* its a goner! */
@@ -353,7 +354,8 @@ uvn_detach(struct uvm_object *uobj)
/* wait on any outstanding io */
while (uobj->uo_npages && uvn->u_flags & UVM_VNODE_RELKILL) {
uvn->u_flags |= UVM_VNODE_IOSYNC;
- tsleep_nsec(&uvn->u_nio, PVM, "uvn_term", INFSLP);
+ rwsleep_nsec(&uvn->u_nio, uobj->vmobjlock, PVM, "uvn_term",
+ INFSLP);
}
if ((uvn->u_flags & UVM_VNODE_RELKILL) == 0)
@@ -373,6 +375,8 @@ uvn_detach(struct uvm_object *uobj)
/* wake up any sleepers */
if (oldflags & UVM_VNODE_WANTED)
wakeup(uvn);
+out:
+ rw_exit(uobj->vmobjlock);
/* drop our reference to the vnode. */
vrele(vp);
@@ -409,10 +413,13 @@ void
uvm_vnp_terminate(struct vnode *vp)
{
struct uvm_vnode *uvn = vp->v_uvm;
+ struct uvm_object *uobj = &uvn->u_obj;
int oldflags;
/* check if it is valid */
+ rw_enter(uobj->vmobjlock, RW_WRITE);
if ((uvn->u_flags & UVM_VNODE_VALID) == 0) {
+ rw_exit(uobj->vmobjlock);
return;
}
@@ -479,7 +486,8 @@ uvm_vnp_terminate(struct vnode *vp)
*/
#endif
uvn->u_flags |= UVM_VNODE_IOSYNC;
- tsleep_nsec(&uvn->u_nio, PVM, "uvn_term", INFSLP);
+ rwsleep_nsec(&uvn->u_nio, uobj->vmobjlock, PVM, "uvn_term",
+ INFSLP);
}
/*
@@ -512,6 +520,8 @@ uvm_vnp_terminate(struct vnode *vp)
if (oldflags & UVM_VNODE_WANTED)
wakeup(uvn);
+
+ rw_exit(uobj->vmobjlock);
}
/*
@@ -589,7 +599,7 @@ uvn_flush(struct uvm_object *uobj, voff_t start, voff_t
stop, int flags)
boolean_t retval, need_iosync, needs_clean;
voff_t curoff;
- KERNEL_ASSERT_LOCKED();
+ KASSERT(rw_write_held(uobj->vmobjlock));
TAILQ_INIT(&dead);
/* get init vals and determine how we are going to traverse object */
@@ -673,8 +683,8 @@ uvn_flush(struct uvm_object *uobj, voff_t start, voff_t
stop, int flags)
atomic_setbits_int(&pp->pg_flags,
PG_WANTED);
uvm_unlock_pageq();
- tsleep_nsec(pp, PVM, "uvn_flsh",
- INFSLP);
+ rwsleep_nsec(pp, uobj->vmobjlock, PVM,
+ "uvn_flsh", INFSLP);
uvm_lock_pageq();
curoff -= PAGE_SIZE;
continue;
@@ -824,7 +834,8 @@ ReTry:
if (need_iosync) {
while (uvn->u_nio != 0) {
uvn->u_flags |= UVM_VNODE_IOSYNC;
- tsleep_nsec(&uvn->u_nio, PVM, "uvn_flush", INFSLP);
+ rwsleep_nsec(&uvn->u_nio, uobj->vmobjlock, PVM,
+ "uvn_flush", INFSLP);
}
if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED)
wakeup(&uvn->u_flags);
@@ -878,7 +889,7 @@ uvn_put(struct uvm_object *uobj, struct vm_page **pps, int
npages, int flags)
{
int retval;
- KERNEL_ASSERT_LOCKED();
+ KASSERT(rw_write_held(uobj->vmobjlock));
retval = uvn_io((struct uvm_vnode*)uobj, pps, npages, flags, UIO_WRITE);
@@ -903,7 +914,8 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct
vm_page **pps,
int lcv, result, gotpages;
boolean_t done;
- KERNEL_ASSERT_LOCKED();
+ KASSERT(((flags & PGO_LOCKED) != 0 && rw_lock_held(uobj->vmobjlock)) ||
+ (flags & PGO_LOCKED) == 0);
/* step 1: handled the case where fault data structures are locked. */
if (flags & PGO_LOCKED) {
@@ -1033,7 +1045,8 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct
vm_page **pps,
/* page is there, see if we need to wait on it */
if ((ptmp->pg_flags & PG_BUSY) != 0) {
atomic_setbits_int(&ptmp->pg_flags, PG_WANTED);
- tsleep_nsec(ptmp, PVM, "uvn_get", INFSLP);
+ rwsleep_nsec(ptmp, uobj->vmobjlock, PVM,
+ "uvn_get", INFSLP);
continue; /* goto top of pps while loop */
}
@@ -1077,6 +1090,7 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct
vm_page **pps,
uvm_lock_pageq();
uvm_pagefree(ptmp);
uvm_unlock_pageq();
+ rw_exit(uobj->vmobjlock);
return result;
}
@@ -1098,6 +1112,8 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct
vm_page **pps,
}
+
+ rw_exit(uobj->vmobjlock);
return (VM_PAGER_OK);
}
@@ -1113,6 +1129,7 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct
vm_page **pps,
int
uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages, int flags, int rw)
{
+ struct uvm_object *uobj = &uvn->u_obj;
struct vnode *vn;
struct uio uio;
struct iovec iov;
@@ -1123,6 +1140,8 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages,
int flags, int rw)
int netunlocked = 0;
int lkflags = (flags & PGO_NOWAIT) ? LK_NOWAIT : 0;
+ KASSERT(rw_write_held(uobj->vmobjlock));
+
/* init values */
waitf = (flags & PGO_SYNCIO) ? M_WAITOK : M_NOWAIT;
vn = uvn->u_vnode;
@@ -1134,7 +1153,8 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages,
int flags, int rw)
return VM_PAGER_AGAIN;
}
uvn->u_flags |= UVM_VNODE_IOSYNCWANTED;
- tsleep_nsec(&uvn->u_flags, PVM, "uvn_iosync", INFSLP);
+ rwsleep_nsec(&uvn->u_flags, uobj->vmobjlock, PVM, "uvn_iosync",
+ INFSLP);
}
/* check size */
@@ -1157,6 +1177,7 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages,
int flags, int rw)
* (this time with sleep ok).
*/
uvn->u_nio++; /* we have an I/O in progress! */
+ rw_exit(uobj->vmobjlock);
if (kva == 0)
kva = uvm_pagermapin(pps, npages,
mapinflags | UVMPAGER_MAPIN_WAITOK);
@@ -1200,6 +1221,7 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages,
int flags, int rw)
* Ideally, this kind of operation *should* work.
*/
result = 0;
+ KERNEL_LOCK();
if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL | lkflags);
if (result == 0) {
@@ -1215,6 +1237,7 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages,
int flags, int rw)
VOP_UNLOCK(vn);
}
+ KERNEL_UNLOCK();
if (netunlocked)
NET_LOCK();
@@ -1241,6 +1264,7 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages,
int flags, int rw)
uvm_pagermapout(kva, npages);
/* now clean up the object (i.e. drop I/O count) */
+ rw_enter(uobj->vmobjlock, RW_WRITE);
uvn->u_nio--; /* I/O DONE! */
if ((uvn->u_flags & UVM_VNODE_IOSYNC) != 0 && uvn->u_nio == 0) {
wakeup(&uvn->u_nio);
@@ -1252,8 +1276,12 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int
npages, int flags, int rw)
KASSERT(flags & PGO_NOWAIT);
return VM_PAGER_AGAIN;
} else {
- while (rebooting)
- tsleep_nsec(&rebooting, PVM, "uvndead", INFSLP);
+ if (rebooting) {
+ KERNEL_LOCK();
+ while (rebooting)
+ tsleep_nsec(&rebooting, PVM, "uvndead", INFSLP);
+ KERNEL_UNLOCK();
+ }
return VM_PAGER_ERROR;
}
}
@@ -1300,11 +1328,14 @@ int
uvm_vnp_uncache(struct vnode *vp)
{
struct uvm_vnode *uvn = vp->v_uvm;
+ struct uvm_object *uobj = &uvn->u_obj;
/* lock uvn part of the vnode and check if we need to do anything */
+ rw_enter(uobj->vmobjlock, RW_WRITE);
if ((uvn->u_flags & UVM_VNODE_VALID) == 0 ||
(uvn->u_flags & UVM_VNODE_BLOCKED) != 0) {
+ rw_exit(uobj->vmobjlock);
return TRUE;
}
@@ -1314,6 +1345,7 @@ uvm_vnp_uncache(struct vnode *vp)
*/
uvn->u_flags &= ~UVM_VNODE_CANPERSIST;
if (uvn->u_obj.uo_refs) {
+ rw_exit(uobj->vmobjlock);
return FALSE;
}
@@ -1323,6 +1355,7 @@ uvm_vnp_uncache(struct vnode *vp)
*/
vref(vp); /* seems ok, even with VOP_LOCK */
uvn->u_obj.uo_refs++; /* value is now 1 */
+ rw_exit(uobj->vmobjlock);
#ifdef VFSLCKDEBUG
/*
@@ -1374,6 +1407,11 @@ void
uvm_vnp_setsize(struct vnode *vp, off_t newsize)
{
struct uvm_vnode *uvn = vp->v_uvm;
+ struct uvm_object *uobj = &uvn->u_obj;
+
+ KERNEL_ASSERT_LOCKED();
+
+ rw_enter(uobj->vmobjlock, RW_WRITE);
/* lock uvn and check for valid object, and if valid: do it! */
if (uvn->u_flags & UVM_VNODE_VALID) {
@@ -1389,6 +1427,7 @@ uvm_vnp_setsize(struct vnode *vp, off_t newsize)
}
uvn->u_size = newsize;
}
+ rw_exit(uobj->vmobjlock);
}
/*
@@ -1447,6 +1486,7 @@ uvm_vnp_sync(struct mount *mp)
/* step 3: we now have a list of uvn's that may need cleaning. */
SIMPLEQ_FOREACH(uvn, &uvn_sync_q, u_syncq) {
+ rw_enter(uvn->u_obj.vmobjlock, RW_WRITE);
#ifdef DEBUG
if (uvn->u_flags & UVM_VNODE_DYING) {
printf("uvm_vnp_sync: dying vnode on sync list\n");
@@ -1465,6 +1505,7 @@ uvm_vnp_sync(struct mount *mp)
LIST_REMOVE(uvn, u_wlist);
uvn->u_flags &= ~UVM_VNODE_WRITEABLE;
}
+ rw_exit(uvn->u_obj.vmobjlock);
/* now drop our reference to the uvn */
uvn_detach(&uvn->u_obj);