> Op 29-06-2022 16:17 schreef Martin Pieuchot <[email protected]>:
>
>
> The aiodone daemon accounts for and frees/releases pages they were
> written to swap. It is only used for asynchronous write. The diff
> below uses this knowledge to:
>
> - Stop suggesting that uvm_swap_get() can be asynchronous. There's an
> assert for PGO_SYNCIO 3 lines above.
>
> - Remove unused support for asynchronous read, including error
> conditions, from uvm_aio_aiodone_pages().
>
> - Grab the proper lock for each page that has been written to swap.
> This allows to enable an assert in uvm_page_unbusy().
>
> - Move the uvm_anon_release() call outside of uvm_page_unbusy() and
> assert for the different anon cases. This will allows us to unify
> code paths waiting for busy pages.
>
> This is adapted/simplified from what is in NetBSD.
>
> ok?
I don't fully understand the old code, but the diff makes sense combined with
what you state above. Two small nits below.
ok kettenis@
> Index: uvm/uvm_aobj.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
> retrieving revision 1.103
> diff -u -p -r1.103 uvm_aobj.c
> --- uvm/uvm_aobj.c 29 Dec 2021 20:22:06 -0000 1.103
> +++ uvm/uvm_aobj.c 29 Jun 2022 11:16:35 -0000
> @@ -143,7 +143,6 @@ struct pool uvm_aobj_pool;
>
> static struct uao_swhash_elt *uao_find_swhash_elt(struct uvm_aobj *, int,
> boolean_t);
> -static int uao_find_swslot(struct uvm_object *, int);
> static boolean_t uao_flush(struct uvm_object *, voff_t,
> voff_t, int);
> static void uao_free(struct uvm_aobj *);
> @@ -241,7 +240,7 @@ uao_find_swhash_elt(struct uvm_aobj *aob
> /*
> * uao_find_swslot: find the swap slot number for an aobj/pageidx
> */
> -inline static int
> +int
> uao_find_swslot(struct uvm_object *uobj, int pageidx)
> {
> struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
> Index: uvm/uvm_aobj.h
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_aobj.h,v
> retrieving revision 1.17
> diff -u -p -r1.17 uvm_aobj.h
> --- uvm/uvm_aobj.h 21 Oct 2020 09:08:14 -0000 1.17
> +++ uvm/uvm_aobj.h 29 Jun 2022 11:16:35 -0000
> @@ -60,6 +60,7 @@
>
> void uao_init(void);
> int uao_set_swslot(struct uvm_object *, int, int);
> +int uao_find_swslot (struct uvm_object *, int);
Spurious space
> int uao_dropswap(struct uvm_object *, int);
> int uao_swap_off(int, int);
> int uao_shrink(struct uvm_object *, int);
> Index: uvm/uvm_page.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_page.c,v
> retrieving revision 1.166
> diff -u -p -r1.166 uvm_page.c
> --- uvm/uvm_page.c 12 May 2022 12:48:36 -0000 1.166
> +++ uvm/uvm_page.c 29 Jun 2022 11:47:55 -0000
> @@ -1036,13 +1036,14 @@ uvm_pagefree(struct vm_page *pg)
> * uvm_page_unbusy: unbusy an array of pages.
> *
> * => pages must either all belong to the same object, or all belong to
> anons.
> + * => if pages are object-owned, object must be locked.
> * => if pages are anon-owned, anons must have 0 refcount.
> + * => caller must make sure that anon-owned pages are not PG_RELEASED.
> */
> void
> uvm_page_unbusy(struct vm_page **pgs, int npgs)
> {
> struct vm_page *pg;
> - struct uvm_object *uobj;
> int i;
>
> for (i = 0; i < npgs; i++) {
> @@ -1052,35 +1053,19 @@ uvm_page_unbusy(struct vm_page **pgs, in
> continue;
> }
>
> -#if notyet
> - /*
> - * XXX swap case in uvm_aio_aiodone() is not holding the
> lock.
> - *
> - * This isn't compatible with the PG_RELEASED anon case below.
> - */
> KASSERT(uvm_page_owner_locked_p(pg));
> -#endif
> KASSERT(pg->pg_flags & PG_BUSY);
>
> if (pg->pg_flags & PG_WANTED) {
> wakeup(pg);
> }
> if (pg->pg_flags & PG_RELEASED) {
> - uobj = pg->uobject;
> - if (uobj != NULL) {
> - uvm_lock_pageq();
> - pmap_page_protect(pg, PROT_NONE);
> - /* XXX won't happen right now */
> - if (pg->pg_flags & PQ_AOBJ)
> - uao_dropswap(uobj,
> - pg->offset >> PAGE_SHIFT);
> - uvm_pagefree(pg);
> - uvm_unlock_pageq();
> - } else {
> - rw_enter(pg->uanon->an_lock, RW_WRITE);
> - uvm_anon_release(pg->uanon);
> - }
> + KASSERT(pg->uobject != NULL ||
> + (pg->uanon != NULL && pg->uanon->an_ref > 0));
> + atomic_clearbits_int(&pg->pg_flags, PG_RELEASED);
> + uvm_pagefree(pg);
> } else {
> + KASSERT((pg->pg_flags & PG_FAKE) == 0);
> atomic_clearbits_int(&pg->pg_flags, PG_WANTED|PG_BUSY);
> UVM_PAGE_OWN(pg, NULL);
> }
> Index: uvm/uvm_pager.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_pager.c,v
> retrieving revision 1.81
> diff -u -p -r1.81 uvm_pager.c
> --- uvm/uvm_pager.c 28 Jun 2022 19:07:40 -0000 1.81
> +++ uvm/uvm_pager.c 29 Jun 2022 11:16:35 -0000
> @@ -735,50 +735,77 @@ void
> uvm_aio_aiodone_pages(struct vm_page **pgs, int npages, boolean_t write,
> int error)
> {
> - struct vm_page *pg;
> struct uvm_object *uobj;
> + struct vm_page *pg;
> + struct rwlock *slock;
> boolean_t swap;
> - int i;
> + int i, swslot;
>
> + slock = NULL;
> uobj = NULL;
> + pg = pgs[0];
> + swap = (pg->uanon != NULL && pg->uobject == NULL) ||
> + (pg->pg_flags & PQ_AOBJ) != 0;
> +
> + KASSERT(swap);
> + KASSERT(write);
> +
> + if (error) {
> + if (pg->uobject != NULL) {
> + swslot = uao_find_swslot(pg->uobject,
> + pg->offset >> PAGE_SHIFT);
> + } else {
> + swslot = pg->uanon->an_swslot;
> + }
> + KASSERT(swslot);
> + }
>
> for (i = 0; i < npages; i++) {
> + int anon_disposed = 0;
> +
> pg = pgs[i];
> + KASSERT((pg->pg_flags & PG_FAKE) == 0);
>
> - if (i == 0) {
> - swap = (pg->pg_flags & PQ_SWAPBACKED) != 0;
> - if (!swap) {
> - uobj = pg->uobject;
> - rw_enter(uobj->vmobjlock, RW_WRITE);
> - }
> + /*
> + * lock each page's object (or anon) individually since
> + * each page may need a different lock.
> + */
> + if (pg->uobject != NULL) {
> + slock = pg->uobject->vmobjlock;
> + } else {
> + slock = pg->uanon->an_lock;
> }
> - KASSERT(swap || pg->uobject == uobj);
> + rw_enter(slock, RW_WRITE);
> + anon_disposed = (pg->pg_flags & PG_RELEASED) != 0;
> + KASSERT(!anon_disposed || pg->uobject != NULL ||
> + pg->uanon->an_ref == 0);
> + uvm_lock_pageq();
>
> /*
> - * if this is a read and we got an error, mark the pages
> - * PG_RELEASED so that uvm_page_unbusy() will free them.
> + * this was a successful write,
Add the word "if" at the start?
> + * mark the page PG_CLEAN.
> */
> - if (!write && error) {
> - atomic_setbits_int(&pg->pg_flags, PG_RELEASED);
> - continue;
> + if (!error) {
> + pmap_clear_reference(pg);
> + pmap_clear_modify(pg);
> + atomic_setbits_int(&pg->pg_flags, PG_CLEAN);
> }
> - KASSERT(!write || (pgs[i]->pg_flags & PG_FAKE) == 0);
>
> /*
> - * if this is a read and the page is PG_FAKE,
> - * or this was a successful write,
> - * mark the page PG_CLEAN and not PG_FAKE.
> + * unlock everything for this page now.
> */
> - if ((pgs[i]->pg_flags & PG_FAKE) || (write && error != ENOMEM))
> {
> - pmap_clear_reference(pgs[i]);
> - pmap_clear_modify(pgs[i]);
> - atomic_setbits_int(&pgs[i]->pg_flags, PG_CLEAN);
> - atomic_clearbits_int(&pgs[i]->pg_flags, PG_FAKE);
> + if (pg->uobject == NULL && anon_disposed) {
> + uvm_unlock_pageq();
> + uvm_anon_release(pg->uanon);
> + } else {
> + uvm_page_unbusy(&pg, 1);
> + uvm_unlock_pageq();
> + rw_exit(slock);
> }
> }
> - uvm_page_unbusy(pgs, npages);
> - if (!swap) {
> - rw_exit(uobj->vmobjlock);
> +
> + if (error) {
> + uvm_swap_markbad(swslot, npages);
> }
> }
>
> Index: uvm/uvm_swap.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_swap.c,v
> retrieving revision 1.158
> diff -u -p -r1.158 uvm_swap.c
> --- uvm/uvm_swap.c 28 Jun 2022 19:39:54 -0000 1.158
> +++ uvm/uvm_swap.c 29 Jun 2022 11:18:05 -0000
> @@ -1603,8 +1603,7 @@ uvm_swap_get(struct vm_page *page, int s
> }
>
> KERNEL_LOCK();
> - result = uvm_swap_io(&page, swslot, 1, B_READ |
> - ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
> + result = uvm_swap_io(&page, swslot, 1, B_READ);
> KERNEL_UNLOCK();
>
> if (result == VM_PAGER_OK || result == VM_PAGER_PEND) {