> Op 29-06-2022 16:17 schreef Martin Pieuchot <m...@openbsd.org>:
> 
>  
> The aiodone daemon accounts for and frees/releases pages they were
> written to swap.  It is only used for asynchronous write.  The diff
> below uses this knowledge to:
> 
> - Stop suggesting that uvm_swap_get() can be asynchronous.  There's an
>   assert for PGO_SYNCIO 3 lines above.
> 
> - Remove unused support for asynchronous read, including error
>   conditions, from uvm_aio_aiodone_pages().
> 
> - Grab the proper lock for each page that has been written to swap.
>   This allows to enable an assert in uvm_page_unbusy().
> 
> - Move the uvm_anon_release() call outside of uvm_page_unbusy() and
>   assert for the different anon cases.  This will allows us to unify
>   code paths waiting for busy pages.
> 
> This is adapted/simplified from what is in NetBSD.
> 
> ok?

I don't fully understand the old code, but the diff makes sense combined with 
what you state above.  Two small nits below.

ok kettenis@
 
> Index: uvm/uvm_aobj.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
> retrieving revision 1.103
> diff -u -p -r1.103 uvm_aobj.c
> --- uvm/uvm_aobj.c    29 Dec 2021 20:22:06 -0000      1.103
> +++ uvm/uvm_aobj.c    29 Jun 2022 11:16:35 -0000
> @@ -143,7 +143,6 @@ struct pool uvm_aobj_pool;
>  
>  static struct uao_swhash_elt *uao_find_swhash_elt(struct uvm_aobj *, int,
>                                    boolean_t);
> -static int                    uao_find_swslot(struct uvm_object *, int);
>  static boolean_t              uao_flush(struct uvm_object *, voff_t,
>                                    voff_t, int);
>  static void                   uao_free(struct uvm_aobj *);
> @@ -241,7 +240,7 @@ uao_find_swhash_elt(struct uvm_aobj *aob
>  /*
>   * uao_find_swslot: find the swap slot number for an aobj/pageidx
>   */
> -inline static int
> +int
>  uao_find_swslot(struct uvm_object *uobj, int pageidx)
>  {
>       struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
> Index: uvm/uvm_aobj.h
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_aobj.h,v
> retrieving revision 1.17
> diff -u -p -r1.17 uvm_aobj.h
> --- uvm/uvm_aobj.h    21 Oct 2020 09:08:14 -0000      1.17
> +++ uvm/uvm_aobj.h    29 Jun 2022 11:16:35 -0000
> @@ -60,6 +60,7 @@
>  
>  void uao_init(void);
>  int uao_set_swslot(struct uvm_object *, int, int);
> +int uao_find_swslot (struct uvm_object *, int);

Spurious space

>  int uao_dropswap(struct uvm_object *, int);
>  int uao_swap_off(int, int);
>  int uao_shrink(struct uvm_object *, int);
> Index: uvm/uvm_page.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_page.c,v
> retrieving revision 1.166
> diff -u -p -r1.166 uvm_page.c
> --- uvm/uvm_page.c    12 May 2022 12:48:36 -0000      1.166
> +++ uvm/uvm_page.c    29 Jun 2022 11:47:55 -0000
> @@ -1036,13 +1036,14 @@ uvm_pagefree(struct vm_page *pg)
>   * uvm_page_unbusy: unbusy an array of pages.
>   *
>   * => pages must either all belong to the same object, or all belong to 
> anons.
> + * => if pages are object-owned, object must be locked.
>   * => if pages are anon-owned, anons must have 0 refcount.
> + * => caller must make sure that anon-owned pages are not PG_RELEASED.
>   */
>  void
>  uvm_page_unbusy(struct vm_page **pgs, int npgs)
>  {
>       struct vm_page *pg;
> -     struct uvm_object *uobj;
>       int i;
>  
>       for (i = 0; i < npgs; i++) {
> @@ -1052,35 +1053,19 @@ uvm_page_unbusy(struct vm_page **pgs, in
>                       continue;
>               }
>  
> -#if notyet
> -             /*
> -                 * XXX swap case in uvm_aio_aiodone() is not holding the 
> lock.
> -              *
> -              * This isn't compatible with the PG_RELEASED anon case below.
> -              */
>               KASSERT(uvm_page_owner_locked_p(pg));
> -#endif
>               KASSERT(pg->pg_flags & PG_BUSY);
>  
>               if (pg->pg_flags & PG_WANTED) {
>                       wakeup(pg);
>               }
>               if (pg->pg_flags & PG_RELEASED) {
> -                     uobj = pg->uobject;
> -                     if (uobj != NULL) {
> -                             uvm_lock_pageq();
> -                             pmap_page_protect(pg, PROT_NONE);
> -                             /* XXX won't happen right now */
> -                             if (pg->pg_flags & PQ_AOBJ)
> -                                     uao_dropswap(uobj,
> -                                         pg->offset >> PAGE_SHIFT);
> -                             uvm_pagefree(pg);
> -                             uvm_unlock_pageq();
> -                     } else {
> -                             rw_enter(pg->uanon->an_lock, RW_WRITE);
> -                             uvm_anon_release(pg->uanon);
> -                     }
> +                     KASSERT(pg->uobject != NULL ||
> +                         (pg->uanon != NULL && pg->uanon->an_ref > 0));
> +                     atomic_clearbits_int(&pg->pg_flags, PG_RELEASED);
> +                     uvm_pagefree(pg);
>               } else {
> +                     KASSERT((pg->pg_flags & PG_FAKE) == 0);
>                       atomic_clearbits_int(&pg->pg_flags, PG_WANTED|PG_BUSY);
>                       UVM_PAGE_OWN(pg, NULL);
>               }
> Index: uvm/uvm_pager.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_pager.c,v
> retrieving revision 1.81
> diff -u -p -r1.81 uvm_pager.c
> --- uvm/uvm_pager.c   28 Jun 2022 19:07:40 -0000      1.81
> +++ uvm/uvm_pager.c   29 Jun 2022 11:16:35 -0000
> @@ -735,50 +735,77 @@ void
>  uvm_aio_aiodone_pages(struct vm_page **pgs, int npages, boolean_t write,
>      int error)
>  {
> -     struct vm_page *pg;
>       struct uvm_object *uobj;
> +     struct vm_page *pg;
> +     struct rwlock *slock;
>       boolean_t swap;
> -     int i;
> +     int i, swslot;
>  
> +     slock = NULL;
>       uobj = NULL;
> +     pg = pgs[0];
> +     swap = (pg->uanon != NULL && pg->uobject == NULL) ||
> +             (pg->pg_flags & PQ_AOBJ) != 0;
> +
> +     KASSERT(swap);
> +     KASSERT(write);
> +
> +     if (error) {
> +             if (pg->uobject != NULL) {
> +                     swslot = uao_find_swslot(pg->uobject,
> +                         pg->offset >> PAGE_SHIFT);
> +             } else {
> +                     swslot = pg->uanon->an_swslot;
> +             }
> +             KASSERT(swslot);
> +     }
>  
>       for (i = 0; i < npages; i++) {
> +             int anon_disposed = 0;
> +
>               pg = pgs[i];
> +             KASSERT((pg->pg_flags & PG_FAKE) == 0);
>  
> -             if (i == 0) {
> -                     swap = (pg->pg_flags & PQ_SWAPBACKED) != 0;
> -                     if (!swap) {
> -                             uobj = pg->uobject;
> -                             rw_enter(uobj->vmobjlock, RW_WRITE);
> -                     }
> +             /*
> +              * lock each page's object (or anon) individually since
> +              * each page may need a different lock.
> +              */
> +             if (pg->uobject != NULL) {
> +                     slock = pg->uobject->vmobjlock;
> +             } else {
> +                     slock = pg->uanon->an_lock;
>               }
> -             KASSERT(swap || pg->uobject == uobj);
> +             rw_enter(slock, RW_WRITE);
> +             anon_disposed = (pg->pg_flags & PG_RELEASED) != 0;
> +             KASSERT(!anon_disposed || pg->uobject != NULL ||
> +                 pg->uanon->an_ref == 0);
> +             uvm_lock_pageq();
>  
>               /*
> -              * if this is a read and we got an error, mark the pages
> -              * PG_RELEASED so that uvm_page_unbusy() will free them.
> +              * this was a successful write,

Add the word "if" at the start?

> +              * mark the page PG_CLEAN.
>                */
> -             if (!write && error) {
> -                     atomic_setbits_int(&pg->pg_flags, PG_RELEASED);
> -                     continue;
> +             if (!error) {
> +                     pmap_clear_reference(pg);
> +                     pmap_clear_modify(pg);
> +                     atomic_setbits_int(&pg->pg_flags, PG_CLEAN);
>               }
> -             KASSERT(!write || (pgs[i]->pg_flags & PG_FAKE) == 0);
>  
>               /*
> -              * if this is a read and the page is PG_FAKE,
> -              * or this was a successful write,
> -              * mark the page PG_CLEAN and not PG_FAKE.
> +              * unlock everything for this page now.
>                */
> -             if ((pgs[i]->pg_flags & PG_FAKE) || (write && error != ENOMEM)) 
> {
> -                     pmap_clear_reference(pgs[i]);
> -                     pmap_clear_modify(pgs[i]);
> -                     atomic_setbits_int(&pgs[i]->pg_flags, PG_CLEAN);
> -                     atomic_clearbits_int(&pgs[i]->pg_flags, PG_FAKE);
> +             if (pg->uobject == NULL && anon_disposed) {
> +                     uvm_unlock_pageq();
> +                     uvm_anon_release(pg->uanon);
> +             } else {
> +                     uvm_page_unbusy(&pg, 1);
> +                     uvm_unlock_pageq();
> +                     rw_exit(slock);
>               }
>       }
> -     uvm_page_unbusy(pgs, npages);
> -     if (!swap) {
> -             rw_exit(uobj->vmobjlock);
> +
> +     if (error) {
> +             uvm_swap_markbad(swslot, npages);
>       }
>  }
>  
> Index: uvm/uvm_swap.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_swap.c,v
> retrieving revision 1.158
> diff -u -p -r1.158 uvm_swap.c
> --- uvm/uvm_swap.c    28 Jun 2022 19:39:54 -0000      1.158
> +++ uvm/uvm_swap.c    29 Jun 2022 11:18:05 -0000
> @@ -1603,8 +1603,7 @@ uvm_swap_get(struct vm_page *page, int s
>       }
>  
>       KERNEL_LOCK();
> -     result = uvm_swap_io(&page, swslot, 1, B_READ |
> -         ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
> +     result = uvm_swap_io(&page, swslot, 1, B_READ);
>       KERNEL_UNLOCK();
>  
>       if (result == VM_PAGER_OK || result == VM_PAGER_PEND) {

Reply via email to