Diff below unlock the bottom part of the UVM fault handler. I'm interested in squashing the remaining bugs. Please test with your usual setup & report back.
Thanks, Martin diff --git sys/arch/amd64/conf/GENERIC.MP sys/arch/amd64/conf/GENERIC.MP index bb842f6d96e..e5334c19eac 100644 --- sys/arch/amd64/conf/GENERIC.MP +++ sys/arch/amd64/conf/GENERIC.MP @@ -4,6 +4,6 @@ include "arch/amd64/conf/GENERIC" option MULTIPROCESSOR #option MP_LOCKDEBUG -#option WITNESS +option WITNESS cpu* at mainbus? diff --git sys/arch/i386/conf/GENERIC.MP sys/arch/i386/conf/GENERIC.MP index 980a572b8fd..ef7ded61501 100644 --- sys/arch/i386/conf/GENERIC.MP +++ sys/arch/i386/conf/GENERIC.MP @@ -7,6 +7,6 @@ include "arch/i386/conf/GENERIC" option MULTIPROCESSOR # Multiple processor support #option MP_LOCKDEBUG -#option WITNESS +option WITNESS cpu* at mainbus? diff --git sys/dev/pci/drm/i915/gem/i915_gem_shmem.c sys/dev/pci/drm/i915/gem/i915_gem_shmem.c index ce8e2eca141..47b567087e7 100644 --- sys/dev/pci/drm/i915/gem/i915_gem_shmem.c +++ sys/dev/pci/drm/i915/gem/i915_gem_shmem.c @@ -268,8 +268,10 @@ shmem_truncate(struct drm_i915_gem_object *obj) #ifdef __linux__ shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); #else + rw_enter(obj->base.uao->vmobjlock, RW_WRITE); obj->base.uao->pgops->pgo_flush(obj->base.uao, 0, obj->base.size, PGO_ALLPAGES | PGO_FREE); + rw_exit(obj->base.uao->vmobjlock); #endif obj->mm.madv = __I915_MADV_PURGED; obj->mm.pages = ERR_PTR(-EFAULT); diff --git sys/dev/pci/drm/radeon/radeon_ttm.c sys/dev/pci/drm/radeon/radeon_ttm.c index eb879b5c72c..837a9f94298 100644 --- sys/dev/pci/drm/radeon/radeon_ttm.c +++ sys/dev/pci/drm/radeon/radeon_ttm.c @@ -1006,6 +1006,8 @@ radeon_ttm_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, vm_page_t *pps, struct radeon_device *rdev; int r; + KASSERT(rw_write_held(ufi->entry->object.uvm_obj->vmobjlock)); + bo = (struct drm_gem_object *)ufi->entry->object.uvm_obj; rdev = bo->dev->dev_private; down_read(&rdev->pm.mclk_lock); diff --git sys/uvm/uvm_aobj.c sys/uvm/uvm_aobj.c index 20051d95dc1..127218c4c40 100644 --- sys/uvm/uvm_aobj.c +++ sys/uvm/uvm_aobj.c @@ -31,7 +31,7 @@ /* * uvm_aobj.c: anonymous memory uvm_object pager * - * author: Chuck Silvers <c...@chuq.com> +* author: Chuck Silvers <c...@chuq.com> * started: Jan-1998 * * - design mostly from Chuck Cranor @@ -184,7 +184,7 @@ const struct uvm_pagerops aobj_pager = { * deadlock. */ static LIST_HEAD(aobjlist, uvm_aobj) uao_list = LIST_HEAD_INITIALIZER(uao_list); -static struct mutex uao_list_lock = MUTEX_INITIALIZER(IPL_NONE); +static struct mutex uao_list_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); /* @@ -277,6 +277,7 @@ uao_find_swslot(struct uvm_object *uobj, int pageidx) * uao_set_swslot: set the swap slot for a page in an aobj. * * => setting a slot to zero frees the slot + * => object must be locked by caller * => we return the old slot number, or -1 if we failed to allocate * memory to record the new slot number */ @@ -286,7 +287,7 @@ uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot) struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; int oldslot; - KERNEL_ASSERT_LOCKED(); + KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0); KASSERT(UVM_OBJ_IS_AOBJ(uobj)); /* @@ -358,7 +359,9 @@ uao_free(struct uvm_aobj *aobj) struct uvm_object *uobj = &aobj->u_obj; KASSERT(UVM_OBJ_IS_AOBJ(uobj)); + KASSERT(rw_write_held(uobj->vmobjlock)); uao_dropswap_range(uobj, 0, 0); + rw_exit(uobj->vmobjlock); if (UAO_USES_SWHASH(aobj)) { /* @@ -671,6 +674,7 @@ struct uvm_object * uao_create(vsize_t size, int flags) { static struct uvm_aobj kernel_object_store; + static struct rwlock bootstrap_kernel_object_lock; static int kobj_alloced = 0; int pages = round_page(size) >> PAGE_SHIFT; struct uvm_aobj *aobj; @@ -742,6 +746,11 @@ uao_create(vsize_t size, int flags) * Initialise UVM object. */ uvm_obj_init(&aobj->u_obj, &aobj_pager, refs); + if (flags & UAO_FLAG_KERNOBJ) { + /* Use a temporary static lock for kernel_object. */ + rw_init(&bootstrap_kernel_object_lock, "kobjlk"); + uvm_obj_setlock(&aobj->u_obj, &bootstrap_kernel_object_lock); + } /* * now that aobj is ready, add it to the global list @@ -822,20 +831,20 @@ uao_detach(struct uvm_object *uobj) * involved in is complete), release any swap resources and free * the page itself. */ - uvm_lock_pageq(); - while((pg = RBT_ROOT(uvm_objtree, &uobj->memt)) != NULL) { + rw_enter(uobj->vmobjlock, RW_WRITE); + while ((pg = RBT_ROOT(uvm_objtree, &uobj->memt)) != NULL) { + pmap_page_protect(pg, PROT_NONE); if (pg->pg_flags & PG_BUSY) { atomic_setbits_int(&pg->pg_flags, PG_WANTED); - uvm_unlock_pageq(); - tsleep_nsec(pg, PVM, "uao_det", INFSLP); - uvm_lock_pageq(); + rwsleep_nsec(pg, uobj->vmobjlock, PVM, "uao_det", + INFSLP); continue; } - pmap_page_protect(pg, PROT_NONE); uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT); + uvm_lock_pageq(); uvm_pagefree(pg); + uvm_unlock_pageq(); } - uvm_unlock_pageq(); /* * Finally, free the anonymous UVM object itself. @@ -864,7 +873,7 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) voff_t curoff; KASSERT(UVM_OBJ_IS_AOBJ(uobj)); - KERNEL_ASSERT_LOCKED(); + KASSERT(rw_write_held(uobj->vmobjlock)); if (flags & PGO_ALLPAGES) { start = 0; @@ -901,7 +910,8 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) /* Make sure page is unbusy, else wait for it. */ if (pp->pg_flags & PG_BUSY) { atomic_setbits_int(&pp->pg_flags, PG_WANTED); - tsleep_nsec(pp, PVM, "uaoflsh", INFSLP); + rwsleep_nsec(pp, uobj->vmobjlock, PVM, "uaoflsh", + INFSLP); curoff -= PAGE_SIZE; continue; } @@ -972,7 +982,7 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) * 2: page is zero-fill -> allocate a new page and zero it. * 3: page is swapped out -> fetch the page from swap. * - * cases 1 and 2 can be handled with PGO_LOCKED, case 3 cannot. + * cases 1 can be handled with PGO_LOCKED, cases 2 and 3 cannot. * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES), * then we will need to return VM_PAGER_UNLOCK. * @@ -992,7 +1002,7 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, boolean_t done; KASSERT(UVM_OBJ_IS_AOBJ(uobj)); - KERNEL_ASSERT_LOCKED(); + KASSERT(rw_write_held(uobj->vmobjlock)); /* * get number of pages @@ -1115,7 +1125,10 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, /* out of RAM? */ if (ptmp == NULL) { + rw_exit(uobj->vmobjlock); uvm_wait("uao_getpage"); + rw_enter(uobj->vmobjlock, RW_WRITE); + /* goto top of pps while loop */ continue; } @@ -1135,7 +1148,8 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, /* page is there, see if we need to wait on it */ if ((ptmp->pg_flags & PG_BUSY) != 0) { atomic_setbits_int(&ptmp->pg_flags, PG_WANTED); - tsleep_nsec(ptmp, PVM, "uao_get", INFSLP); + rwsleep_nsec(ptmp, uobj->vmobjlock, PVM, + "uao_get", INFSLP); continue; /* goto top of pps while loop */ } @@ -1169,8 +1183,12 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, } else { /* * page in the swapped-out page. + * unlock object for i/o, relock when done. */ + + rw_exit(uobj->vmobjlock); rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO); + rw_enter(uobj->vmobjlock, RW_WRITE); /* * I/O done. check for errors. @@ -1194,6 +1212,7 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, uvm_lock_pageq(); uvm_pagefree(ptmp); uvm_unlock_pageq(); + rw_exit(uobj->vmobjlock); return rv; } @@ -1215,11 +1234,14 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, } /* lcv loop */ + rw_exit(uobj->vmobjlock); return VM_PAGER_OK; } /* * uao_dropswap: release any swap resources from this aobj page. + * + * => aobj must be locked or have a reference count of 0. */ int uao_dropswap(struct uvm_object *uobj, int pageidx) @@ -1238,6 +1260,7 @@ uao_dropswap(struct uvm_object *uobj, int pageidx) /* * page in every page in every aobj that is paged-out to a range of swslots. * + * => aobj must be locked and is returned locked. * => returns TRUE if pagein was aborted due to lack of memory. */ boolean_t @@ -1272,7 +1295,9 @@ uao_swap_off(int startslot, int endslot) /* * Page in all pages in the swap slot range. */ + rw_enter(aobj->u_obj.vmobjlock, RW_WRITE); rv = uao_pagein(aobj, startslot, endslot); + rw_exit(aobj->u_obj.vmobjlock); /* Drop the reference of the current object. */ uao_detach(&aobj->u_obj); @@ -1375,14 +1400,21 @@ restart: static boolean_t uao_pagein_page(struct uvm_aobj *aobj, int pageidx) { + struct uvm_object *uobj = &aobj->u_obj; struct vm_page *pg; int rv, slot, npages; pg = NULL; npages = 1; + + KASSERT(rw_write_held(uobj->vmobjlock)); rv = uao_get(&aobj->u_obj, (voff_t)pageidx << PAGE_SHIFT, &pg, &npages, 0, PROT_READ | PROT_WRITE, 0, 0); + /* + * relock and finish up. + */ + rw_enter(uobj->vmobjlock, RW_WRITE); switch (rv) { case VM_PAGER_OK: break; @@ -1430,7 +1462,7 @@ uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end) int swpgonlydelta = 0; KASSERT(UVM_OBJ_IS_AOBJ(uobj)); - /* KASSERT(mutex_owned(uobj->vmobjlock)); */ + KASSERT(rw_write_held(uobj->vmobjlock)); if (end == 0) { end = INT64_MAX; diff --git sys/uvm/uvm_device.c sys/uvm/uvm_device.c index e5d035f2947..994ab537a82 100644 --- sys/uvm/uvm_device.c +++ sys/uvm/uvm_device.c @@ -166,7 +166,9 @@ udv_attach(dev_t device, vm_prot_t accessprot, voff_t off, vsize_t size) /* * bump reference count, unhold, return. */ + rw_enter(lcv->u_obj.vmobjlock, RW_WRITE); lcv->u_obj.uo_refs++; + rw_exit(lcv->u_obj.vmobjlock); mtx_enter(&udv_lock); if (lcv->u_flags & UVM_DEVICE_WANTED) @@ -228,8 +230,9 @@ udv_attach(dev_t device, vm_prot_t accessprot, voff_t off, vsize_t size) static void udv_reference(struct uvm_object *uobj) { - KERNEL_ASSERT_LOCKED(); + rw_enter(uobj->vmobjlock, RW_WRITE); uobj->uo_refs++; + rw_exit(uobj->vmobjlock); } /* @@ -248,8 +251,10 @@ udv_detach(struct uvm_object *uobj) * loop until done */ again: + rw_enter(uobj->vmobjlock, RW_WRITE); if (uobj->uo_refs > 1) { uobj->uo_refs--; + rw_exit(uobj->vmobjlock); return; } KASSERT(uobj->uo_npages == 0 && RBT_EMPTY(uvm_objtree, &uobj->memt)); @@ -260,10 +265,7 @@ again: mtx_enter(&udv_lock); if (udv->u_flags & UVM_DEVICE_HOLD) { udv->u_flags |= UVM_DEVICE_WANTED; - /* - * lock interleaving. -- this is ok in this case since the - * locks are both IPL_NONE - */ + rw_exit(uobj->vmobjlock); msleep_nsec(udv, &udv_lock, PVM | PNORELOCK, "udv_detach", INFSLP); goto again; @@ -276,6 +278,7 @@ again: if (udv->u_flags & UVM_DEVICE_WANTED) wakeup(udv); mtx_leave(&udv_lock); + rw_exit(uobj->vmobjlock); uvm_obj_destroy(uobj); free(udv, M_TEMP, sizeof(*udv)); diff --git sys/uvm/uvm_fault.c sys/uvm/uvm_fault.c index c90d9b3fa81..ed72f1bbf92 100644 --- sys/uvm/uvm_fault.c +++ sys/uvm/uvm_fault.c @@ -326,7 +326,8 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap, if (pg->uobject) { /* Owner of page is UVM object. */ uvmfault_unlockall(ufi, amap, NULL); - tsleep_nsec(pg, PVM, "anonget1", INFSLP); + rwsleep_nsec(pg, pg->uobject->vmobjlock, + PVM | PNORELOCK, "anonget1", INFSLP); } else { /* Owner of page is anon. */ uvmfault_unlockall(ufi, NULL, NULL); @@ -620,6 +621,7 @@ uvm_fault(vm_map_t orig_map, vaddr_t vaddr, vm_fault_t fault_type, */ if (uobj != NULL && uobj->pgops->pgo_fault != NULL) { KERNEL_LOCK(); + rw_enter(uobj->vmobjlock, RW_WRITE); error = uobj->pgops->pgo_fault(&ufi, flt.startva, pages, flt.npages, flt.centeridx, fault_type, flt.access_type, @@ -634,10 +636,8 @@ uvm_fault(vm_map_t orig_map, vaddr_t vaddr, vm_fault_t fault_type, error = EACCES; } else { /* case 2: fault on backing obj or zero fill */ - KERNEL_LOCK(); error = uvm_fault_lower(&ufi, &flt, pages, fault_type); - KERNEL_UNLOCK(); } } } @@ -793,10 +793,10 @@ uvm_fault_check(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, voff_t uoff; uoff = (flt->startva - ufi->entry->start) + ufi->entry->offset; - KERNEL_LOCK(); + rw_enter(uobj->vmobjlock, RW_WRITE); (void) uobj->pgops->pgo_flush(uobj, uoff, uoff + ((vsize_t)nback << PAGE_SHIFT), PGO_DEACTIVATE); - KERNEL_UNLOCK(); + rw_exit(uobj->vmobjlock); } /* now forget about the backpages */ @@ -1098,6 +1098,8 @@ uvm_fault_lower_lookup( int lcv, gotpages; vaddr_t currva; + rw_enter(uobj->vmobjlock, RW_WRITE); + counters_inc(uvmexp_counters, flt_lget); gotpages = flt->npages; (void) uobj->pgops->pgo_get(uobj, @@ -1211,6 +1213,14 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, * made it BUSY. */ + /* + * locked: + */ + KASSERT(amap == NULL || + rw_write_held(amap->am_lock)); + KASSERT(uobj == NULL || + rw_write_held(uobj->vmobjlock)); + /* * note that uobjpage can not be PGO_DONTCARE at this point. we now * set uobjpage to PGO_DONTCARE if we are doing a zero fill. if we @@ -1268,6 +1278,7 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, return (EIO); uobjpage = PGO_DONTCARE; + uobj = NULL; promote = TRUE; } @@ -1276,6 +1287,12 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, if (locked && amap != NULL) amap_lock(amap); + /* might be changed */ + if (uobjpage != PGO_DONTCARE) { + uobj = uobjpage->uobject; + rw_enter(uobj->vmobjlock, RW_WRITE); + } + /* * Re-verify that amap slot is still free. if there is * a problem, we clean up. @@ -1300,10 +1317,12 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, atomic_clearbits_int(&uobjpage->pg_flags, PG_BUSY|PG_WANTED); UVM_PAGE_OWN(uobjpage, NULL); - return ERESTART; } - if (locked == FALSE) + + if (locked == FALSE) { + rw_exit(uobj->vmobjlock); return ERESTART; + } /* * we have the data in uobjpage which is PG_BUSY @@ -1423,6 +1442,7 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, uvm_lock_pageq(); uvm_pageactivate(uobjpage); uvm_unlock_pageq(); + rw_exit(uobj->vmobjlock); uobj = NULL; } else { counters_inc(uvmexp_counters, flt_przero); @@ -1434,7 +1454,7 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, if (amap_add(&ufi->entry->aref, ufi->orig_rvaddr - ufi->entry->start, anon, 0)) { - uvmfault_unlockall(ufi, amap, NULL); + uvmfault_unlockall(ufi, amap, uobj); uvm_anfree(anon); counters_inc(uvmexp_counters, flt_noamap); @@ -1483,25 +1503,32 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, return ERESTART; } - uvm_lock_pageq(); - if (fault_type == VM_FAULT_WIRE) { + uvm_lock_pageq(); uvm_pagewire(pg); + uvm_unlock_pageq(); if (pg->pg_flags & PQ_AOBJ) { /* * since the now-wired page cannot be paged out, * release its swap resources for others to use. - * since an aobj page with no swap cannot be PG_CLEAN, - * clear its clean flag now. + * since an aobj page with no swap cannot be clean, + * mark it dirty now. + * + * use pg->uobject here. if the page is from a + * tmpfs vnode, the pages are backed by its UAO and + * not the vnode. */ + KASSERT(uobj != NULL); + KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock); atomic_clearbits_int(&pg->pg_flags, PG_CLEAN); uao_dropswap(uobj, pg->offset >> PAGE_SHIFT); } } else { /* activate it */ + uvm_lock_pageq(); uvm_pageactivate(pg); + uvm_unlock_pageq(); } - uvm_unlock_pageq(); if (pg->pg_flags & PG_WANTED) wakeup(pg); @@ -1567,7 +1594,7 @@ uvm_fault_unwire(vm_map_t map, vaddr_t start, vaddr_t end) void uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end) { - vm_map_entry_t entry, next; + vm_map_entry_t entry, oentry = NULL, next; pmap_t pmap = vm_map_pmap(map); vaddr_t va; paddr_t pa; @@ -1578,12 +1605,9 @@ uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end) /* * we assume that the area we are unwiring has actually been wired * in the first place. this means that we should be able to extract - * the PAs from the pmap. we also lock out the page daemon so that - * we can call uvm_pageunwire. + * the PAs from the pmap. */ - uvm_lock_pageq(); - /* * find the beginning map entry for the region. */ @@ -1605,6 +1629,17 @@ uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end) entry = next; } + /* + * lock it. + */ + if (entry != oentry) { + if (oentry != NULL) { + uvm_map_unlock_entry(oentry); + } + uvm_map_lock_entry(entry); + oentry = entry; + } + /* * if the entry is no longer wired, tell the pmap. */ @@ -1612,11 +1647,16 @@ uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end) pmap_unwire(pmap, va); pg = PHYS_TO_VM_PAGE(pa); - if (pg) + if (pg) { + uvm_lock_pageq(); uvm_pageunwire(pg); + uvm_unlock_pageq(); + } } - uvm_unlock_pageq(); + if (oentry != NULL) { + uvm_map_unlock_entry(entry); + } } /* @@ -1650,6 +1690,8 @@ void uvmfault_unlockall(struct uvm_faultinfo *ufi, struct vm_amap *amap, struct uvm_object *uobj) { + if (uobj) + rw_exit(uobj->vmobjlock); if (amap != NULL) amap_unlock(amap); uvmfault_unlockmaps(ufi, FALSE); diff --git sys/uvm/uvm_km.c sys/uvm/uvm_km.c index fc31ae99dff..5f36935c09d 100644 --- sys/uvm/uvm_km.c +++ sys/uvm/uvm_km.c @@ -249,13 +249,15 @@ uvm_km_pgremove(struct uvm_object *uobj, vaddr_t startva, vaddr_t endva) int swpgonlydelta = 0; KASSERT(UVM_OBJ_IS_AOBJ(uobj)); + KASSERT(rw_write_held(uobj->vmobjlock)); pmap_remove(pmap_kernel(), startva, endva); for (curoff = start ; curoff < end ; curoff += PAGE_SIZE) { pp = uvm_pagelookup(uobj, curoff); if (pp && pp->pg_flags & PG_BUSY) { atomic_setbits_int(&pp->pg_flags, PG_WANTED); - tsleep_nsec(pp, PVM, "km_pgrm", INFSLP); + rwsleep_nsec(pp, uobj->vmobjlock, PVM, "km_pgrm", + INFSLP); curoff -= PAGE_SIZE; /* loop back to us */ continue; } @@ -383,6 +385,9 @@ uvm_km_kmemalloc_pla(struct vm_map *map, struct uvm_object *obj, vsize_t size, return (0); } + if (obj != NULL) + rw_enter(obj->vmobjlock, RW_WRITE); + loopva = kva; while (loopva != kva + size) { pg = TAILQ_FIRST(&pgl); @@ -409,6 +414,9 @@ uvm_km_kmemalloc_pla(struct vm_map *map, struct uvm_object *obj, vsize_t size, KASSERT(TAILQ_EMPTY(&pgl)); pmap_update(pmap_kernel()); + if (obj != NULL) + rw_exit(obj->vmobjlock); + return kva; } @@ -474,12 +482,14 @@ uvm_km_alloc1(struct vm_map *map, vsize_t size, vsize_t align, boolean_t zeroit) /* now allocate the memory. we must be careful about released pages. */ loopva = kva; while (size) { + rw_enter(uvm.kernel_object->vmobjlock, RW_WRITE); /* allocate ram */ pg = uvm_pagealloc(uvm.kernel_object, offset, NULL, 0); if (pg) { atomic_clearbits_int(&pg->pg_flags, PG_BUSY); UVM_PAGE_OWN(pg, NULL); } + rw_exit(uvm.kernel_object->vmobjlock); if (__predict_false(pg == NULL)) { if (curproc == uvm.pagedaemon_proc) { /* diff --git sys/uvm/uvm_map.c sys/uvm/uvm_map.c index d153bbfd20b..06553a814c6 100644 --- sys/uvm/uvm_map.c +++ sys/uvm/uvm_map.c @@ -124,6 +124,8 @@ struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int); void uvm_mapent_free(struct vm_map_entry*); void uvm_unmap_kill_entry(struct vm_map*, struct vm_map_entry*); +void uvm_unmap_kill_entry_withlock(struct vm_map *, + struct vm_map_entry *, int); void uvm_unmap_detach_intrsafe(struct uvm_map_deadq *); void uvm_mapent_mkfree(struct vm_map*, struct vm_map_entry*, struct vm_map_entry**, @@ -499,6 +501,28 @@ uvm_map_reference(struct vm_map *map) atomic_inc_int(&map->ref_count); } +void +uvm_map_lock_entry(struct vm_map_entry *entry) +{ + if (entry->aref.ar_amap != NULL) { + amap_lock(entry->aref.ar_amap); + } + if (UVM_ET_ISOBJ(entry)) { + rw_enter(entry->object.uvm_obj->vmobjlock, RW_WRITE); + } +} + +void +uvm_map_unlock_entry(struct vm_map_entry *entry) +{ + if (UVM_ET_ISOBJ(entry)) { + rw_exit(entry->object.uvm_obj->vmobjlock); + } + if (entry->aref.ar_amap != NULL) { + amap_unlock(entry->aref.ar_amap); + } +} + /* * Calculate the dused delta. */ @@ -2101,7 +2125,8 @@ uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry, * Unwire and release referenced amap and object from map entry. */ void -uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry) +uvm_unmap_kill_entry_withlock(struct vm_map *map, struct vm_map_entry *entry, + int needlock) { /* Unwire removed map entry. */ if (VM_MAPENT_ISWIRED(entry)) { @@ -2111,6 +2136,9 @@ uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry) KERNEL_UNLOCK(); } + if (needlock) + uvm_map_lock_entry(entry); + /* Entry-type specific code. */ if (UVM_ET_ISHOLE(entry)) { /* Nothing to be done for holes. */ @@ -2157,17 +2185,19 @@ uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry) */ uvm_km_pgremove(entry->object.uvm_obj, entry->start, entry->end); - - /* - * null out kernel_object reference, we've just - * dropped it - */ - entry->etype &= ~UVM_ET_OBJ; - entry->object.uvm_obj = NULL; /* to be safe */ } else { /* remove mappings the standard way. */ pmap_remove(map->pmap, entry->start, entry->end); } + + if (needlock) + uvm_map_unlock_entry(entry); +} + +void +uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry) +{ + uvm_unmap_kill_entry_withlock(map, entry, 0); } /* @@ -2227,7 +2257,7 @@ uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, map->sserial++; /* Kill entry. */ - uvm_unmap_kill_entry(map, entry); + uvm_unmap_kill_entry_withlock(map, entry, 1); /* Update space usage. */ if ((map->flags & VM_MAP_ISVMSPACE) && @@ -3420,8 +3450,10 @@ uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, */ iter->wired_count = 0; } + uvm_map_lock_entry(iter); pmap_protect(map->pmap, iter->start, iter->end, iter->protection & mask); + uvm_map_unlock_entry(iter); } /* @@ -3967,11 +3999,13 @@ uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map, */ if (!UVM_ET_ISNEEDSCOPY(old_entry)) { if (old_entry->max_protection & PROT_WRITE) { + uvm_map_lock_entry(old_entry); pmap_protect(old_map->pmap, old_entry->start, old_entry->end, old_entry->protection & ~PROT_WRITE); + uvm_map_unlock_entry(old_entry); pmap_update(old_map->pmap); } old_entry->etype |= UVM_ET_NEEDSCOPY; @@ -4751,9 +4785,11 @@ flush_object: ((flags & PGO_FREE) == 0 || ((entry->max_protection & PROT_WRITE) != 0 && (entry->etype & UVM_ET_COPYONWRITE) == 0))) { + rw_enter(uobj->vmobjlock, RW_WRITE); rv = uobj->pgops->pgo_flush(uobj, cp_start - entry->start + entry->offset, cp_end - entry->start + entry->offset, flags); + rw_exit(uobj->vmobjlock); if (rv == FALSE) error = EFAULT; diff --git sys/uvm/uvm_map.h sys/uvm/uvm_map.h index 12092ebfcd2..6c02bc93137 100644 --- sys/uvm/uvm_map.h +++ sys/uvm/uvm_map.h @@ -442,6 +442,9 @@ void vm_map_unbusy_ln(struct vm_map*, char*, int); #define vm_map_unbusy(map) vm_map_unbusy_ln(map, NULL, 0) #endif +void uvm_map_lock_entry(struct vm_map_entry *); +void uvm_map_unlock_entry(struct vm_map_entry *); + #endif /* _KERNEL */ /* diff --git sys/uvm/uvm_object.c sys/uvm/uvm_object.c index 675cd9de2da..8b52a14459f 100644 --- sys/uvm/uvm_object.c +++ sys/uvm/uvm_object.c @@ -1,7 +1,7 @@ /* $OpenBSD: uvm_object.c,v 1.22 2021/10/23 14:42:08 mpi Exp $ */ /* - * Copyright (c) 2006 The NetBSD Foundation, Inc. + * Copyright (c) 2006, 2010, 2019 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -38,6 +38,7 @@ #include <sys/systm.h> #include <sys/mman.h> #include <sys/atomic.h> +#include <sys/rwlock.h> #include <uvm/uvm.h> @@ -51,15 +52,27 @@ const struct uvm_pagerops bufcache_pager = { /* nothing */ }; -/* We will fetch this page count per step */ +/* Page count to fetch per single step. */ #define FETCH_PAGECOUNT 16 /* - * uvm_obj_init: initialise a uvm object. + * uvm_obj_init: initialize UVM memory object. */ void uvm_obj_init(struct uvm_object *uobj, const struct uvm_pagerops *pgops, int refs) { + int alock; + + alock = ((pgops != NULL) && (pgops != &pmap_pager) && + (pgops != &bufcache_pager) && (refs != UVM_OBJ_KERN)); + + if (alock) { + /* Allocate and assign a lock. */ + rw_obj_alloc(&uobj->vmobjlock, "uobjlk"); + } else { + /* The lock will need to be set via uvm_obj_setlock(). */ + uobj->vmobjlock = NULL; + } uobj->pgops = pgops; RBT_INIT(uvm_objtree, &uobj->memt); uobj->uo_npages = 0; @@ -73,12 +86,38 @@ void uvm_obj_destroy(struct uvm_object *uo) { KASSERT(RBT_EMPTY(uvm_objtree, &uo->memt)); + + rw_obj_free(uo->vmobjlock); +} + +/* + * uvm_obj_setlock: assign a vmobjlock to the UVM object. + * + * => Caller is responsible to ensure that UVM objects is not use. + * => Only dynamic lock may be previously set. We drop the reference then. + */ +void +uvm_obj_setlock(struct uvm_object *uo, struct rwlock *lockptr) +{ + struct rwlock *olockptr = uo->vmobjlock; + + if (olockptr) { + /* Drop the reference on the old lock. */ + rw_obj_free(olockptr); + } + if (lockptr == NULL) { + /* If new lock is not passed - allocate default one. */ + rw_obj_alloc(&lockptr, "uobjlk"); + } + uo->vmobjlock = lockptr; } #ifndef SMALL_KERNEL /* - * uvm_obj_wire: wire the pages of entire uobj + * uvm_obj_wire: wire the pages of entire UVM object. * + * => NOTE: this function should only be used for types of objects + * where PG_RELEASED flag is never set (aobj objects) * => caller must pass page-aligned start and end values * => if the caller passes in a pageq pointer, we'll return a list of * wired pages. @@ -94,6 +133,7 @@ uvm_obj_wire(struct uvm_object *uobj, voff_t start, voff_t end, left = (end - start) >> PAGE_SHIFT; + rw_enter(uobj->vmobjlock, RW_WRITE); while (left) { npages = MIN(FETCH_PAGECOUNT, left); @@ -107,6 +147,7 @@ uvm_obj_wire(struct uvm_object *uobj, voff_t start, voff_t end, if (error) goto error; + rw_enter(uobj->vmobjlock, RW_WRITE); for (i = 0; i < npages; i++) { KASSERT(pgs[i] != NULL); @@ -134,6 +175,7 @@ uvm_obj_wire(struct uvm_object *uobj, voff_t start, voff_t end, left -= npages; offset += (voff_t)npages << PAGE_SHIFT; } + rw_exit(uobj->vmobjlock); return 0; @@ -145,17 +187,17 @@ error: } /* - * uobj_unwirepages: unwire the pages of entire uobj + * uvm_obj_unwire: unwire the pages of entire UVM object. * * => caller must pass page-aligned start and end values */ - void uvm_obj_unwire(struct uvm_object *uobj, voff_t start, voff_t end) { struct vm_page *pg; off_t offset; + rw_enter(uobj->vmobjlock, RW_WRITE); uvm_lock_pageq(); for (offset = start; offset < end; offset += PAGE_SIZE) { pg = uvm_pagelookup(uobj, offset); @@ -166,6 +208,7 @@ uvm_obj_unwire(struct uvm_object *uobj, voff_t start, voff_t end) uvm_pageunwire(pg); } uvm_unlock_pageq(); + rw_exit(uobj->vmobjlock); } #endif /* !SMALL_KERNEL */ diff --git sys/uvm/uvm_object.h sys/uvm/uvm_object.h index 9a74600c9df..5fc32ca3eb8 100644 --- sys/uvm/uvm_object.h +++ sys/uvm/uvm_object.h @@ -32,14 +32,25 @@ #define _UVM_UVM_OBJECT_H_ /* - * uvm_object.h - */ - -/* - * uvm_object: all that is left of mach objects. + * The UVM memory object interface. Notes: + * + * A UVM memory object represents a list of pages, which are managed by + * the object's pager operations (uvm_object::pgops). All pages belonging + * to an object are owned by it and thus protected by the object lock. + * + * The lock (uvm_object::vmobjlock) may be shared amongst the UVM objects. + * By default, the lock is allocated dynamically using rw_obj_init() cache. + * Lock sharing is normally used when there is an underlying object. For + * example, vnode representing a file may have an underlying node, which + * is the case for tmpfs and layered file systems. In such case, vnode's + * UVM object and the underlying UVM object shares the lock. + * + * The reference count is managed atomically for the anonymous UVM objects. + * For other objects, it is arbitrary (may use the lock or atomics). */ struct uvm_object { + struct rwlock *vmobjlock; /* lock on object */ const struct uvm_pagerops *pgops; /* pager ops */ RBT_HEAD(uvm_objtree, vm_page) memt; /* pages in object */ int uo_npages; /* # of pages in memt */ @@ -52,10 +63,10 @@ struct uvm_object { * memory objects don't have reference counts -- they never die). * * this value is used to detected kernel object mappings at uvm_unmap() - * time. normally when an object is unmapped its pages eventually become - * deactivated and then paged out and/or freed. this is not useful + * time. normally when an object is unmapped its pages eventaully become + * deactivated and then paged out and/or freed. this is not useful * for kernel objects... when a kernel object is unmapped we always want - * to free the resources associated with the mapping. UVM_OBJ_KERN + * to free the resources associated with the mapping. UVM_OBJ_KERN * allows us to decide which type of unmapping we want to do. * * in addition, we have kernel objects which may be used in an @@ -100,8 +111,12 @@ RBT_PROTOTYPE(uvm_objtree, vm_page, objt, uvm_pagecmp) #define UVM_OBJ_IS_BUFCACHE(uobj) \ ((uobj)->pgops == &bufcache_pager) +#define UVM_OBJ_IS_DUMMY(uobj) \ + (UVM_OBJ_IS_PMAP(uobj) || UVM_OBJ_IS_BUFCACHE(uobj)) + void uvm_obj_init(struct uvm_object *, const struct uvm_pagerops *, int); void uvm_obj_destroy(struct uvm_object *); +void uvm_obj_setlock(struct uvm_object *, struct rwlock *); int uvm_obj_wire(struct uvm_object *, voff_t, voff_t, struct pglist *); void uvm_obj_unwire(struct uvm_object *, voff_t, voff_t); void uvm_obj_free(struct uvm_object *); diff --git sys/uvm/uvm_page.c sys/uvm/uvm_page.c index a90b23af6df..b0d705994d1 100644 --- sys/uvm/uvm_page.c +++ sys/uvm/uvm_page.c @@ -118,6 +118,7 @@ static vaddr_t virtual_space_end; */ static void uvm_pageinsert(struct vm_page *); static void uvm_pageremove(struct vm_page *); +int uvm_page_owner_locked_p(struct vm_page *); /* * inline functions @@ -125,7 +126,7 @@ static void uvm_pageremove(struct vm_page *); /* * uvm_pageinsert: insert a page in the object * - * => caller must lock page queues XXX questionable + * => caller must lock object * => call should have already set pg's object and offset pointers * and bumped the version counter */ @@ -134,7 +135,10 @@ uvm_pageinsert(struct vm_page *pg) { struct vm_page *dupe; + KASSERT(UVM_OBJ_IS_DUMMY(pg->uobject) || + rw_write_held(pg->uobject->vmobjlock)); KASSERT((pg->pg_flags & PG_TABLED) == 0); + dupe = RBT_INSERT(uvm_objtree, &pg->uobject->memt, pg); /* not allowed to insert over another page */ KASSERT(dupe == NULL); @@ -145,12 +149,15 @@ uvm_pageinsert(struct vm_page *pg) /* * uvm_page_remove: remove page from object * - * => caller must lock page queues + * => caller must lock object */ static inline void uvm_pageremove(struct vm_page *pg) { + KASSERT(UVM_OBJ_IS_DUMMY(pg->uobject) || + rw_write_held(pg->uobject->vmobjlock)); KASSERT(pg->pg_flags & PG_TABLED); + RBT_REMOVE(uvm_objtree, &pg->uobject->memt, pg); atomic_clearbits_int(&pg->pg_flags, PG_TABLED); @@ -683,11 +690,19 @@ uvm_pagealloc_pg(struct vm_page *pg, struct uvm_object *obj, voff_t off, { int flags; + KASSERT(obj == NULL || anon == NULL); + KASSERT(anon == NULL || off == 0); + KASSERT(off == trunc_page(off)); + KASSERT(obj == NULL || UVM_OBJ_IS_DUMMY(obj) || + rw_write_held(obj->vmobjlock)); + KASSERT(anon == NULL || anon->an_lock == NULL || + rw_write_held(anon->an_lock)); + flags = PG_BUSY | PG_FAKE; pg->offset = off; pg->uobject = obj; pg->uanon = anon; - + KASSERT(uvm_page_owner_locked_p(pg)); if (anon) { anon->an_page = pg; flags |= PQ_ANON; @@ -846,7 +861,9 @@ uvm_pagerealloc_multi(struct uvm_object *obj, voff_t off, vsize_t size, uvm_pagecopy(tpg, pg); KASSERT(tpg->wire_count == 1); tpg->wire_count = 0; + uvm_lock_pageq(); uvm_pagefree(tpg); + uvm_unlock_pageq(); uvm_pagealloc_pg(pg, obj, offset, NULL); } } @@ -873,6 +890,10 @@ uvm_pagealloc(struct uvm_object *obj, voff_t off, struct vm_anon *anon, KASSERT(obj == NULL || anon == NULL); KASSERT(anon == NULL || off == 0); KASSERT(off == trunc_page(off)); + KASSERT(obj == NULL || UVM_OBJ_IS_DUMMY(obj) || + rw_write_held(obj->vmobjlock)); + KASSERT(anon == NULL || anon->an_lock == NULL || + rw_write_held(anon->an_lock)); pmr_flags = UVM_PLA_NOWAIT; @@ -940,10 +961,9 @@ uvm_pageclean(struct vm_page *pg) { u_int flags_to_clear = 0; -#if all_pmap_are_fixed - if (pg->pg_flags & (PG_TABLED|PQ_ACTIVE|PQ_INACTIVE)) + if ((pg->pg_flags & (PG_TABLED|PQ_ACTIVE|PQ_INACTIVE)) && + (pg->uobject == NULL || !UVM_OBJ_IS_PMAP(pg->uobject))) MUTEX_ASSERT_LOCKED(&uvm.pageqlock); -#endif #ifdef DEBUG if (pg->uobject == (void *)0xdeadbeef && @@ -953,6 +973,10 @@ uvm_pageclean(struct vm_page *pg) #endif KASSERT((pg->pg_flags & PG_DEV) == 0); + KASSERT(pg->uobject == NULL || UVM_OBJ_IS_DUMMY(pg->uobject) || + rw_write_held(pg->uobject->vmobjlock)); + KASSERT(pg->uobject != NULL || pg->uanon == NULL || + rw_write_held(pg->uanon->an_lock)); /* * if the page was an object page (and thus "TABLED"), remove it @@ -1009,10 +1033,9 @@ uvm_pageclean(struct vm_page *pg) void uvm_pagefree(struct vm_page *pg) { -#if all_pmap_are_fixed - if (pg->pg_flags & (PG_TABLED|PQ_ACTIVE|PQ_INACTIVE)) + if ((pg->pg_flags & (PG_TABLED|PQ_ACTIVE|PQ_INACTIVE)) && + (pg->uobject == NULL || !UVM_OBJ_IS_PMAP(pg->uobject))) MUTEX_ASSERT_LOCKED(&uvm.pageqlock); -#endif uvm_pageclean(pg); uvm_pmr_freepages(pg, 1); @@ -1037,6 +1060,10 @@ uvm_page_unbusy(struct vm_page **pgs, int npgs) if (pg == NULL || pg == PGO_DONTCARE) { continue; } + + KASSERT(uvm_page_owner_locked_p(pg)); + KASSERT(pg->pg_flags & PG_BUSY); + if (pg->pg_flags & PG_WANTED) { wakeup(pg); } @@ -1207,6 +1234,7 @@ uvm_pagelookup(struct uvm_object *obj, voff_t off) void uvm_pagewire(struct vm_page *pg) { + KASSERT(uvm_page_owner_locked_p(pg)); MUTEX_ASSERT_LOCKED(&uvm.pageqlock); if (pg->wire_count == 0) { @@ -1237,6 +1265,7 @@ uvm_pagewire(struct vm_page *pg) void uvm_pageunwire(struct vm_page *pg) { + KASSERT(uvm_page_owner_locked_p(pg)); MUTEX_ASSERT_LOCKED(&uvm.pageqlock); pg->wire_count--; @@ -1258,6 +1287,7 @@ uvm_pageunwire(struct vm_page *pg) void uvm_pagedeactivate(struct vm_page *pg) { + KASSERT(uvm_page_owner_locked_p(pg)); MUTEX_ASSERT_LOCKED(&uvm.pageqlock); if (pg->pg_flags & PQ_ACTIVE) { @@ -1294,6 +1324,7 @@ uvm_pagedeactivate(struct vm_page *pg) void uvm_pageactivate(struct vm_page *pg) { + KASSERT(uvm_page_owner_locked_p(pg)); MUTEX_ASSERT_LOCKED(&uvm.pageqlock); if (pg->pg_flags & PQ_INACTIVE) { @@ -1341,6 +1372,24 @@ uvm_pagecopy(struct vm_page *src, struct vm_page *dst) pmap_copy_page(src, dst); } +/* + * uvm_page_owner_locked_p: return true if object associated with page is + * locked. this is a weak check for runtime assertions only. + */ +int +uvm_page_owner_locked_p(struct vm_page *pg) +{ + if (pg->uobject != NULL) { + if (UVM_OBJ_IS_DUMMY(pg->uobject)) + return 1; + return rw_write_held(pg->uobject->vmobjlock); + } + if (pg->uanon != NULL) { + return rw_write_held(pg->uanon->an_lock); + } + return 1; +} + /* * uvm_pagecount: count the number of physical pages in the address range. */ diff --git sys/uvm/uvm_pager.c sys/uvm/uvm_pager.c index 286e7c2a025..46ba9cfab84 100644 --- sys/uvm/uvm_pager.c +++ sys/uvm/uvm_pager.c @@ -543,11 +543,15 @@ ReTry: /* XXX daddr_t -> int */ int nswblk = (result == VM_PAGER_AGAIN) ? swblk : 0; if (pg->pg_flags & PQ_ANON) { + rw_enter(pg->uanon->an_lock, RW_WRITE); pg->uanon->an_swslot = nswblk; + rw_exit(pg->uanon->an_lock); } else { + rw_enter(pg->uobject->vmobjlock, RW_WRITE); uao_set_swslot(pg->uobject, pg->offset >> PAGE_SHIFT, nswblk); + rw_exit(pg->uobject->vmobjlock); } } if (result == VM_PAGER_AGAIN) { @@ -612,6 +616,8 @@ uvm_pager_dropcluster(struct uvm_object *uobj, struct vm_page *pg, { int lcv; + KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock)); + /* drop all pages but "pg" */ for (lcv = 0 ; lcv < *npages ; lcv++) { /* skip "pg" or empty slot */ @@ -625,10 +631,13 @@ uvm_pager_dropcluster(struct uvm_object *uobj, struct vm_page *pg, */ if (!uobj) { if (ppsp[lcv]->pg_flags & PQ_ANON) { + rw_enter(ppsp[lcv]->uanon->an_lock, RW_WRITE); if (flags & PGO_REALLOCSWAP) /* zap swap block */ ppsp[lcv]->uanon->an_swslot = 0; } else { + rw_enter(ppsp[lcv]->uobject->vmobjlock, + RW_WRITE); if (flags & PGO_REALLOCSWAP) uao_set_swslot(ppsp[lcv]->uobject, ppsp[lcv]->offset >> PAGE_SHIFT, 0); @@ -649,7 +658,6 @@ uvm_pager_dropcluster(struct uvm_object *uobj, struct vm_page *pg, UVM_PAGE_OWN(ppsp[lcv], NULL); /* kills anon and frees pg */ - rw_enter(ppsp[lcv]->uanon->an_lock, RW_WRITE); uvm_anon_release(ppsp[lcv]->uanon); continue; @@ -672,6 +680,14 @@ uvm_pager_dropcluster(struct uvm_object *uobj, struct vm_page *pg, pmap_clear_modify(ppsp[lcv]); atomic_setbits_int(&ppsp[lcv]->pg_flags, PG_CLEAN); } + + /* if anonymous cluster, unlock object and move on */ + if (!uobj) { + if (ppsp[lcv]->pg_flags & PQ_ANON) + rw_exit(ppsp[lcv]->uanon->an_lock); + else + rw_exit(ppsp[lcv]->uobject->vmobjlock); + } } } @@ -736,6 +752,7 @@ uvm_aio_aiodone(struct buf *bp) swap = (pg->pg_flags & PQ_SWAPBACKED) != 0; if (!swap) { uobj = pg->uobject; + rw_enter(uobj->vmobjlock, RW_WRITE); } } KASSERT(swap || pg->uobject == uobj); @@ -763,6 +780,9 @@ uvm_aio_aiodone(struct buf *bp) } } uvm_page_unbusy(pgs, npages); + if (!swap) { + rw_exit(uobj->vmobjlock); + } #ifdef UVM_SWAP_ENCRYPT freed: diff --git sys/uvm/uvm_pdaemon.c sys/uvm/uvm_pdaemon.c index e0ab150cddc..1ac4b29d256 100644 --- sys/uvm/uvm_pdaemon.c +++ sys/uvm/uvm_pdaemon.c @@ -440,19 +440,6 @@ uvmpd_scan_inactive(struct pglist *pglst) uvmexp.pdscans++; nextpg = TAILQ_NEXT(p, pageq); - /* - * move referenced pages back to active queue and - * skip to next page (unlikely to happen since - * inactive pages shouldn't have any valid mappings - * and we cleared reference before deactivating). - */ - - if (pmap_is_referenced(p)) { - uvm_pageactivate(p); - uvmexp.pdreact++; - continue; - } - if (p->pg_flags & PQ_ANON) { anon = p->uanon; KASSERT(anon != NULL); @@ -461,6 +448,16 @@ uvmpd_scan_inactive(struct pglist *pglst) /* lock failed, skip this page */ continue; } + /* + * move referenced pages back to active queue + * and skip to next page. + */ + if (pmap_is_referenced(p)) { + uvm_pageactivate(p); + rw_exit(anon->an_lock); + uvmexp.pdreact++; + continue; + } if (p->pg_flags & PG_BUSY) { rw_exit(anon->an_lock); uvmexp.pdbusy++; @@ -471,7 +468,23 @@ uvmpd_scan_inactive(struct pglist *pglst) } else { uobj = p->uobject; KASSERT(uobj != NULL); + if (rw_enter(uobj->vmobjlock, + RW_WRITE|RW_NOSLEEP)) { + /* lock failed, skip this page */ + continue; + } + /* + * move referenced pages back to active queue + * and skip to next page. + */ + if (pmap_is_referenced(p)) { + uvm_pageactivate(p); + rw_exit(uobj->vmobjlock); + uvmexp.pdreact++; + continue; + } if (p->pg_flags & PG_BUSY) { + rw_exit(uobj->vmobjlock); uvmexp.pdbusy++; /* someone else owns page, skip it */ continue; @@ -507,6 +520,8 @@ uvmpd_scan_inactive(struct pglist *pglst) /* remove from object */ anon->an_page = NULL; rw_exit(anon->an_lock); + } else { + rw_exit(uobj->vmobjlock); } continue; } @@ -518,6 +533,8 @@ uvmpd_scan_inactive(struct pglist *pglst) if (free + uvmexp.paging > uvmexp.freetarg << 2) { if (anon) { rw_exit(anon->an_lock); + } else { + rw_exit(uobj->vmobjlock); } continue; } @@ -533,6 +550,8 @@ uvmpd_scan_inactive(struct pglist *pglst) uvm_pageactivate(p); if (anon) { rw_exit(anon->an_lock); + } else { + rw_exit(uobj->vmobjlock); } continue; } @@ -602,6 +621,9 @@ uvmpd_scan_inactive(struct pglist *pglst) UVM_PAGE_OWN(p, NULL); if (anon) rw_exit(anon->an_lock); + else + rw_exit( + uobj->vmobjlock); continue; } swcpages = 0; /* cluster is empty */ @@ -635,6 +657,8 @@ uvmpd_scan_inactive(struct pglist *pglst) if (p) { /* if we just added a page to cluster */ if (anon) rw_exit(anon->an_lock); + else + rw_exit(uobj->vmobjlock); /* cluster not full yet? */ if (swcpages < swnpages) @@ -748,6 +772,8 @@ uvmpd_scan_inactive(struct pglist *pglst) if (swap_backed) { if (anon) rw_enter(anon->an_lock, RW_WRITE); + else + rw_enter(uobj->vmobjlock, RW_WRITE); } #ifdef DIAGNOSTIC @@ -810,6 +836,8 @@ uvmpd_scan_inactive(struct pglist *pglst) */ if (anon) rw_exit(anon->an_lock); + else if (uobj) + rw_exit(uobj->vmobjlock); if (nextpg && (nextpg->pg_flags & PQ_INACTIVE) == 0) { nextpg = TAILQ_FIRST(pglst); /* reload! */ @@ -920,8 +948,12 @@ uvmpd_scan(void) KASSERT(p->uanon != NULL); if (rw_enter(p->uanon->an_lock, RW_WRITE|RW_NOSLEEP)) continue; - } else + } else { KASSERT(p->uobject != NULL); + if (rw_enter(p->uobject->vmobjlock, + RW_WRITE|RW_NOSLEEP)) + continue; + } /* * if there's a shortage of swap, free any swap allocated @@ -959,6 +991,8 @@ uvmpd_scan(void) } if (p->pg_flags & PQ_ANON) rw_exit(p->uanon->an_lock); + else + rw_exit(p->uobject->vmobjlock); } } @@ -982,6 +1016,10 @@ uvmpd_drop(struct pglist *pglst) continue; if (p->pg_flags & PG_CLEAN) { + struct uvm_object * uobj = p->uobject; + + rw_enter(uobj->vmobjlock, RW_WRITE); + uvm_lock_pageq(); /* * we now have the page queues locked. * the page is not busy. if the page is clean we @@ -997,6 +1035,8 @@ uvmpd_drop(struct pglist *pglst) pmap_page_protect(p, PROT_NONE); uvm_pagefree(p); } + uvm_unlock_pageq(); + rw_exit(uobj->vmobjlock); } } } @@ -1004,13 +1044,9 @@ uvmpd_drop(struct pglist *pglst) void uvmpd_hibernate(void) { - uvm_lock_pageq(); - uvmpd_drop(&uvm.page_inactive_swp); uvmpd_drop(&uvm.page_inactive_obj); uvmpd_drop(&uvm.page_active); - - uvm_unlock_pageq(); } #endif diff --git sys/uvm/uvm_vnode.c sys/uvm/uvm_vnode.c index 3cbdd5222b6..af69e8352ed 100644 --- sys/uvm/uvm_vnode.c +++ sys/uvm/uvm_vnode.c @@ -280,8 +280,9 @@ uvn_reference(struct uvm_object *uobj) panic("uvn_reference: invalid state"); } #endif - KERNEL_ASSERT_LOCKED(); + rw_enter(uobj->vmobjlock, RW_WRITE); uobj->uo_refs++; + rw_exit(uobj->vmobjlock); } /* @@ -300,9 +301,10 @@ uvn_detach(struct uvm_object *uobj) struct vnode *vp; int oldflags; - KERNEL_ASSERT_LOCKED(); + rw_enter(uobj->vmobjlock, RW_WRITE); uobj->uo_refs--; /* drop ref! */ if (uobj->uo_refs) { /* still more refs */ + rw_exit(uobj->vmobjlock); return; } @@ -323,8 +325,7 @@ uvn_detach(struct uvm_object *uobj) if (uvn->u_flags & UVM_VNODE_CANPERSIST) { /* won't block */ uvn_flush(uobj, 0, 0, PGO_DEACTIVATE|PGO_ALLPAGES); - vrele(vp); /* drop vnode reference */ - return; + goto out; } /* its a goner! */ @@ -353,7 +354,8 @@ uvn_detach(struct uvm_object *uobj) /* wait on any outstanding io */ while (uobj->uo_npages && uvn->u_flags & UVM_VNODE_RELKILL) { uvn->u_flags |= UVM_VNODE_IOSYNC; - tsleep_nsec(&uvn->u_nio, PVM, "uvn_term", INFSLP); + rwsleep_nsec(&uvn->u_nio, uobj->vmobjlock, PVM, "uvn_term", + INFSLP); } if ((uvn->u_flags & UVM_VNODE_RELKILL) == 0) @@ -373,6 +375,8 @@ uvn_detach(struct uvm_object *uobj) /* wake up any sleepers */ if (oldflags & UVM_VNODE_WANTED) wakeup(uvn); +out: + rw_exit(uobj->vmobjlock); /* drop our reference to the vnode. */ vrele(vp); @@ -409,10 +413,13 @@ void uvm_vnp_terminate(struct vnode *vp) { struct uvm_vnode *uvn = vp->v_uvm; + struct uvm_object *uobj = &uvn->u_obj; int oldflags; /* check if it is valid */ + rw_enter(uobj->vmobjlock, RW_WRITE); if ((uvn->u_flags & UVM_VNODE_VALID) == 0) { + rw_exit(uobj->vmobjlock); return; } @@ -479,7 +486,8 @@ uvm_vnp_terminate(struct vnode *vp) */ #endif uvn->u_flags |= UVM_VNODE_IOSYNC; - tsleep_nsec(&uvn->u_nio, PVM, "uvn_term", INFSLP); + rwsleep_nsec(&uvn->u_nio, uobj->vmobjlock, PVM, "uvn_term", + INFSLP); } /* @@ -512,6 +520,8 @@ uvm_vnp_terminate(struct vnode *vp) if (oldflags & UVM_VNODE_WANTED) wakeup(uvn); + + rw_exit(uobj->vmobjlock); } /* @@ -589,7 +599,7 @@ uvn_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) boolean_t retval, need_iosync, needs_clean; voff_t curoff; - KERNEL_ASSERT_LOCKED(); + KASSERT(rw_write_held(uobj->vmobjlock)); TAILQ_INIT(&dead); /* get init vals and determine how we are going to traverse object */ @@ -673,8 +683,8 @@ uvn_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) atomic_setbits_int(&pp->pg_flags, PG_WANTED); uvm_unlock_pageq(); - tsleep_nsec(pp, PVM, "uvn_flsh", - INFSLP); + rwsleep_nsec(pp, uobj->vmobjlock, PVM, + "uvn_flsh", INFSLP); uvm_lock_pageq(); curoff -= PAGE_SIZE; continue; @@ -824,7 +834,8 @@ ReTry: if (need_iosync) { while (uvn->u_nio != 0) { uvn->u_flags |= UVM_VNODE_IOSYNC; - tsleep_nsec(&uvn->u_nio, PVM, "uvn_flush", INFSLP); + rwsleep_nsec(&uvn->u_nio, uobj->vmobjlock, PVM, + "uvn_flush", INFSLP); } if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED) wakeup(&uvn->u_flags); @@ -878,7 +889,7 @@ uvn_put(struct uvm_object *uobj, struct vm_page **pps, int npages, int flags) { int retval; - KERNEL_ASSERT_LOCKED(); + KASSERT(rw_write_held(uobj->vmobjlock)); retval = uvn_io((struct uvm_vnode*)uobj, pps, npages, flags, UIO_WRITE); @@ -903,7 +914,8 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, int lcv, result, gotpages; boolean_t done; - KERNEL_ASSERT_LOCKED(); + KASSERT(((flags & PGO_LOCKED) != 0 && rw_lock_held(uobj->vmobjlock)) || + (flags & PGO_LOCKED) == 0); /* step 1: handled the case where fault data structures are locked. */ if (flags & PGO_LOCKED) { @@ -1033,7 +1045,8 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, /* page is there, see if we need to wait on it */ if ((ptmp->pg_flags & PG_BUSY) != 0) { atomic_setbits_int(&ptmp->pg_flags, PG_WANTED); - tsleep_nsec(ptmp, PVM, "uvn_get", INFSLP); + rwsleep_nsec(ptmp, uobj->vmobjlock, PVM, + "uvn_get", INFSLP); continue; /* goto top of pps while loop */ } @@ -1077,6 +1090,7 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, uvm_lock_pageq(); uvm_pagefree(ptmp); uvm_unlock_pageq(); + rw_exit(uobj->vmobjlock); return result; } @@ -1098,6 +1112,8 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, } + + rw_exit(uobj->vmobjlock); return (VM_PAGER_OK); } @@ -1113,6 +1129,7 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, int uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages, int flags, int rw) { + struct uvm_object *uobj = &uvn->u_obj; struct vnode *vn; struct uio uio; struct iovec iov; @@ -1123,6 +1140,8 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages, int flags, int rw) int netunlocked = 0; int lkflags = (flags & PGO_NOWAIT) ? LK_NOWAIT : 0; + KASSERT(rw_write_held(uobj->vmobjlock)); + /* init values */ waitf = (flags & PGO_SYNCIO) ? M_WAITOK : M_NOWAIT; vn = uvn->u_vnode; @@ -1134,7 +1153,8 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages, int flags, int rw) return VM_PAGER_AGAIN; } uvn->u_flags |= UVM_VNODE_IOSYNCWANTED; - tsleep_nsec(&uvn->u_flags, PVM, "uvn_iosync", INFSLP); + rwsleep_nsec(&uvn->u_flags, uobj->vmobjlock, PVM, "uvn_iosync", + INFSLP); } /* check size */ @@ -1157,6 +1177,7 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages, int flags, int rw) * (this time with sleep ok). */ uvn->u_nio++; /* we have an I/O in progress! */ + rw_exit(uobj->vmobjlock); if (kva == 0) kva = uvm_pagermapin(pps, npages, mapinflags | UVMPAGER_MAPIN_WAITOK); @@ -1200,6 +1221,7 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages, int flags, int rw) * Ideally, this kind of operation *should* work. */ result = 0; + KERNEL_LOCK(); if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0) result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL | lkflags); if (result == 0) { @@ -1215,6 +1237,7 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages, int flags, int rw) VOP_UNLOCK(vn); } + KERNEL_UNLOCK(); if (netunlocked) NET_LOCK(); @@ -1241,6 +1264,7 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages, int flags, int rw) uvm_pagermapout(kva, npages); /* now clean up the object (i.e. drop I/O count) */ + rw_enter(uobj->vmobjlock, RW_WRITE); uvn->u_nio--; /* I/O DONE! */ if ((uvn->u_flags & UVM_VNODE_IOSYNC) != 0 && uvn->u_nio == 0) { wakeup(&uvn->u_nio); @@ -1252,8 +1276,12 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages, int flags, int rw) KASSERT(flags & PGO_NOWAIT); return VM_PAGER_AGAIN; } else { - while (rebooting) - tsleep_nsec(&rebooting, PVM, "uvndead", INFSLP); + if (rebooting) { + KERNEL_LOCK(); + while (rebooting) + tsleep_nsec(&rebooting, PVM, "uvndead", INFSLP); + KERNEL_UNLOCK(); + } return VM_PAGER_ERROR; } } @@ -1300,11 +1328,14 @@ int uvm_vnp_uncache(struct vnode *vp) { struct uvm_vnode *uvn = vp->v_uvm; + struct uvm_object *uobj = &uvn->u_obj; /* lock uvn part of the vnode and check if we need to do anything */ + rw_enter(uobj->vmobjlock, RW_WRITE); if ((uvn->u_flags & UVM_VNODE_VALID) == 0 || (uvn->u_flags & UVM_VNODE_BLOCKED) != 0) { + rw_exit(uobj->vmobjlock); return TRUE; } @@ -1314,6 +1345,7 @@ uvm_vnp_uncache(struct vnode *vp) */ uvn->u_flags &= ~UVM_VNODE_CANPERSIST; if (uvn->u_obj.uo_refs) { + rw_exit(uobj->vmobjlock); return FALSE; } @@ -1323,6 +1355,7 @@ uvm_vnp_uncache(struct vnode *vp) */ vref(vp); /* seems ok, even with VOP_LOCK */ uvn->u_obj.uo_refs++; /* value is now 1 */ + rw_exit(uobj->vmobjlock); #ifdef VFSLCKDEBUG /* @@ -1374,6 +1407,11 @@ void uvm_vnp_setsize(struct vnode *vp, off_t newsize) { struct uvm_vnode *uvn = vp->v_uvm; + struct uvm_object *uobj = &uvn->u_obj; + + KERNEL_ASSERT_LOCKED(); + + rw_enter(uobj->vmobjlock, RW_WRITE); /* lock uvn and check for valid object, and if valid: do it! */ if (uvn->u_flags & UVM_VNODE_VALID) { @@ -1389,6 +1427,7 @@ uvm_vnp_setsize(struct vnode *vp, off_t newsize) } uvn->u_size = newsize; } + rw_exit(uobj->vmobjlock); } /* @@ -1447,6 +1486,7 @@ uvm_vnp_sync(struct mount *mp) /* step 3: we now have a list of uvn's that may need cleaning. */ SIMPLEQ_FOREACH(uvn, &uvn_sync_q, u_syncq) { + rw_enter(uvn->u_obj.vmobjlock, RW_WRITE); #ifdef DEBUG if (uvn->u_flags & UVM_VNODE_DYING) { printf("uvm_vnp_sync: dying vnode on sync list\n"); @@ -1465,6 +1505,7 @@ uvm_vnp_sync(struct mount *mp) LIST_REMOVE(uvn, u_wlist); uvn->u_flags &= ~UVM_VNODE_WRITEABLE; } + rw_exit(uvn->u_obj.vmobjlock); /* now drop our reference to the uvn */ uvn_detach(&uvn->u_obj);