> Op 29-06-2022 16:17 schreef Martin Pieuchot <m...@openbsd.org>: > > > The aiodone daemon accounts for and frees/releases pages they were > written to swap. It is only used for asynchronous write. The diff > below uses this knowledge to: > > - Stop suggesting that uvm_swap_get() can be asynchronous. There's an > assert for PGO_SYNCIO 3 lines above. > > - Remove unused support for asynchronous read, including error > conditions, from uvm_aio_aiodone_pages(). > > - Grab the proper lock for each page that has been written to swap. > This allows to enable an assert in uvm_page_unbusy(). > > - Move the uvm_anon_release() call outside of uvm_page_unbusy() and > assert for the different anon cases. This will allows us to unify > code paths waiting for busy pages. > > This is adapted/simplified from what is in NetBSD. > > ok?
I don't fully understand the old code, but the diff makes sense combined with what you state above. Two small nits below. ok kettenis@ > Index: uvm/uvm_aobj.c > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v > retrieving revision 1.103 > diff -u -p -r1.103 uvm_aobj.c > --- uvm/uvm_aobj.c 29 Dec 2021 20:22:06 -0000 1.103 > +++ uvm/uvm_aobj.c 29 Jun 2022 11:16:35 -0000 > @@ -143,7 +143,6 @@ struct pool uvm_aobj_pool; > > static struct uao_swhash_elt *uao_find_swhash_elt(struct uvm_aobj *, int, > boolean_t); > -static int uao_find_swslot(struct uvm_object *, int); > static boolean_t uao_flush(struct uvm_object *, voff_t, > voff_t, int); > static void uao_free(struct uvm_aobj *); > @@ -241,7 +240,7 @@ uao_find_swhash_elt(struct uvm_aobj *aob > /* > * uao_find_swslot: find the swap slot number for an aobj/pageidx > */ > -inline static int > +int > uao_find_swslot(struct uvm_object *uobj, int pageidx) > { > struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; > Index: uvm/uvm_aobj.h > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_aobj.h,v > retrieving revision 1.17 > diff -u -p -r1.17 uvm_aobj.h > --- uvm/uvm_aobj.h 21 Oct 2020 09:08:14 -0000 1.17 > +++ uvm/uvm_aobj.h 29 Jun 2022 11:16:35 -0000 > @@ -60,6 +60,7 @@ > > void uao_init(void); > int uao_set_swslot(struct uvm_object *, int, int); > +int uao_find_swslot (struct uvm_object *, int); Spurious space > int uao_dropswap(struct uvm_object *, int); > int uao_swap_off(int, int); > int uao_shrink(struct uvm_object *, int); > Index: uvm/uvm_page.c > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_page.c,v > retrieving revision 1.166 > diff -u -p -r1.166 uvm_page.c > --- uvm/uvm_page.c 12 May 2022 12:48:36 -0000 1.166 > +++ uvm/uvm_page.c 29 Jun 2022 11:47:55 -0000 > @@ -1036,13 +1036,14 @@ uvm_pagefree(struct vm_page *pg) > * uvm_page_unbusy: unbusy an array of pages. > * > * => pages must either all belong to the same object, or all belong to > anons. > + * => if pages are object-owned, object must be locked. > * => if pages are anon-owned, anons must have 0 refcount. > + * => caller must make sure that anon-owned pages are not PG_RELEASED. > */ > void > uvm_page_unbusy(struct vm_page **pgs, int npgs) > { > struct vm_page *pg; > - struct uvm_object *uobj; > int i; > > for (i = 0; i < npgs; i++) { > @@ -1052,35 +1053,19 @@ uvm_page_unbusy(struct vm_page **pgs, in > continue; > } > > -#if notyet > - /* > - * XXX swap case in uvm_aio_aiodone() is not holding the > lock. > - * > - * This isn't compatible with the PG_RELEASED anon case below. > - */ > KASSERT(uvm_page_owner_locked_p(pg)); > -#endif > KASSERT(pg->pg_flags & PG_BUSY); > > if (pg->pg_flags & PG_WANTED) { > wakeup(pg); > } > if (pg->pg_flags & PG_RELEASED) { > - uobj = pg->uobject; > - if (uobj != NULL) { > - uvm_lock_pageq(); > - pmap_page_protect(pg, PROT_NONE); > - /* XXX won't happen right now */ > - if (pg->pg_flags & PQ_AOBJ) > - uao_dropswap(uobj, > - pg->offset >> PAGE_SHIFT); > - uvm_pagefree(pg); > - uvm_unlock_pageq(); > - } else { > - rw_enter(pg->uanon->an_lock, RW_WRITE); > - uvm_anon_release(pg->uanon); > - } > + KASSERT(pg->uobject != NULL || > + (pg->uanon != NULL && pg->uanon->an_ref > 0)); > + atomic_clearbits_int(&pg->pg_flags, PG_RELEASED); > + uvm_pagefree(pg); > } else { > + KASSERT((pg->pg_flags & PG_FAKE) == 0); > atomic_clearbits_int(&pg->pg_flags, PG_WANTED|PG_BUSY); > UVM_PAGE_OWN(pg, NULL); > } > Index: uvm/uvm_pager.c > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_pager.c,v > retrieving revision 1.81 > diff -u -p -r1.81 uvm_pager.c > --- uvm/uvm_pager.c 28 Jun 2022 19:07:40 -0000 1.81 > +++ uvm/uvm_pager.c 29 Jun 2022 11:16:35 -0000 > @@ -735,50 +735,77 @@ void > uvm_aio_aiodone_pages(struct vm_page **pgs, int npages, boolean_t write, > int error) > { > - struct vm_page *pg; > struct uvm_object *uobj; > + struct vm_page *pg; > + struct rwlock *slock; > boolean_t swap; > - int i; > + int i, swslot; > > + slock = NULL; > uobj = NULL; > + pg = pgs[0]; > + swap = (pg->uanon != NULL && pg->uobject == NULL) || > + (pg->pg_flags & PQ_AOBJ) != 0; > + > + KASSERT(swap); > + KASSERT(write); > + > + if (error) { > + if (pg->uobject != NULL) { > + swslot = uao_find_swslot(pg->uobject, > + pg->offset >> PAGE_SHIFT); > + } else { > + swslot = pg->uanon->an_swslot; > + } > + KASSERT(swslot); > + } > > for (i = 0; i < npages; i++) { > + int anon_disposed = 0; > + > pg = pgs[i]; > + KASSERT((pg->pg_flags & PG_FAKE) == 0); > > - if (i == 0) { > - swap = (pg->pg_flags & PQ_SWAPBACKED) != 0; > - if (!swap) { > - uobj = pg->uobject; > - rw_enter(uobj->vmobjlock, RW_WRITE); > - } > + /* > + * lock each page's object (or anon) individually since > + * each page may need a different lock. > + */ > + if (pg->uobject != NULL) { > + slock = pg->uobject->vmobjlock; > + } else { > + slock = pg->uanon->an_lock; > } > - KASSERT(swap || pg->uobject == uobj); > + rw_enter(slock, RW_WRITE); > + anon_disposed = (pg->pg_flags & PG_RELEASED) != 0; > + KASSERT(!anon_disposed || pg->uobject != NULL || > + pg->uanon->an_ref == 0); > + uvm_lock_pageq(); > > /* > - * if this is a read and we got an error, mark the pages > - * PG_RELEASED so that uvm_page_unbusy() will free them. > + * this was a successful write, Add the word "if" at the start? > + * mark the page PG_CLEAN. > */ > - if (!write && error) { > - atomic_setbits_int(&pg->pg_flags, PG_RELEASED); > - continue; > + if (!error) { > + pmap_clear_reference(pg); > + pmap_clear_modify(pg); > + atomic_setbits_int(&pg->pg_flags, PG_CLEAN); > } > - KASSERT(!write || (pgs[i]->pg_flags & PG_FAKE) == 0); > > /* > - * if this is a read and the page is PG_FAKE, > - * or this was a successful write, > - * mark the page PG_CLEAN and not PG_FAKE. > + * unlock everything for this page now. > */ > - if ((pgs[i]->pg_flags & PG_FAKE) || (write && error != ENOMEM)) > { > - pmap_clear_reference(pgs[i]); > - pmap_clear_modify(pgs[i]); > - atomic_setbits_int(&pgs[i]->pg_flags, PG_CLEAN); > - atomic_clearbits_int(&pgs[i]->pg_flags, PG_FAKE); > + if (pg->uobject == NULL && anon_disposed) { > + uvm_unlock_pageq(); > + uvm_anon_release(pg->uanon); > + } else { > + uvm_page_unbusy(&pg, 1); > + uvm_unlock_pageq(); > + rw_exit(slock); > } > } > - uvm_page_unbusy(pgs, npages); > - if (!swap) { > - rw_exit(uobj->vmobjlock); > + > + if (error) { > + uvm_swap_markbad(swslot, npages); > } > } > > Index: uvm/uvm_swap.c > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_swap.c,v > retrieving revision 1.158 > diff -u -p -r1.158 uvm_swap.c > --- uvm/uvm_swap.c 28 Jun 2022 19:39:54 -0000 1.158 > +++ uvm/uvm_swap.c 29 Jun 2022 11:18:05 -0000 > @@ -1603,8 +1603,7 @@ uvm_swap_get(struct vm_page *page, int s > } > > KERNEL_LOCK(); > - result = uvm_swap_io(&page, swslot, 1, B_READ | > - ((flags & PGO_SYNCIO) ? 0 : B_ASYNC)); > + result = uvm_swap_io(&page, swslot, 1, B_READ); > KERNEL_UNLOCK(); > > if (result == VM_PAGER_OK || result == VM_PAGER_PEND) {