The aiodone daemon accounts for and frees/releases pages they were
written to swap.  It is only used for asynchronous write.  The diff
below uses this knowledge to:

- Stop suggesting that uvm_swap_get() can be asynchronous.  There's an
  assert for PGO_SYNCIO 3 lines above.

- Remove unused support for asynchronous read, including error
  conditions, from uvm_aio_aiodone_pages().

- Grab the proper lock for each page that has been written to swap.
  This allows to enable an assert in uvm_page_unbusy().

- Move the uvm_anon_release() call outside of uvm_page_unbusy() and
  assert for the different anon cases.  This will allows us to unify
  code paths waiting for busy pages.

This is adapted/simplified from what is in NetBSD.

ok?

Index: uvm/uvm_aobj.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
retrieving revision 1.103
diff -u -p -r1.103 uvm_aobj.c
--- uvm/uvm_aobj.c      29 Dec 2021 20:22:06 -0000      1.103
+++ uvm/uvm_aobj.c      29 Jun 2022 11:16:35 -0000
@@ -143,7 +143,6 @@ struct pool uvm_aobj_pool;
 
 static struct uao_swhash_elt   *uao_find_swhash_elt(struct uvm_aobj *, int,
                                     boolean_t);
-static int                      uao_find_swslot(struct uvm_object *, int);
 static boolean_t                uao_flush(struct uvm_object *, voff_t,
                                     voff_t, int);
 static void                     uao_free(struct uvm_aobj *);
@@ -241,7 +240,7 @@ uao_find_swhash_elt(struct uvm_aobj *aob
 /*
  * uao_find_swslot: find the swap slot number for an aobj/pageidx
  */
-inline static int
+int
 uao_find_swslot(struct uvm_object *uobj, int pageidx)
 {
        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
Index: uvm/uvm_aobj.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_aobj.h,v
retrieving revision 1.17
diff -u -p -r1.17 uvm_aobj.h
--- uvm/uvm_aobj.h      21 Oct 2020 09:08:14 -0000      1.17
+++ uvm/uvm_aobj.h      29 Jun 2022 11:16:35 -0000
@@ -60,6 +60,7 @@
 
 void uao_init(void);
 int uao_set_swslot(struct uvm_object *, int, int);
+int uao_find_swslot (struct uvm_object *, int);
 int uao_dropswap(struct uvm_object *, int);
 int uao_swap_off(int, int);
 int uao_shrink(struct uvm_object *, int);
Index: uvm/uvm_page.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_page.c,v
retrieving revision 1.166
diff -u -p -r1.166 uvm_page.c
--- uvm/uvm_page.c      12 May 2022 12:48:36 -0000      1.166
+++ uvm/uvm_page.c      29 Jun 2022 11:47:55 -0000
@@ -1036,13 +1036,14 @@ uvm_pagefree(struct vm_page *pg)
  * uvm_page_unbusy: unbusy an array of pages.
  *
  * => pages must either all belong to the same object, or all belong to anons.
+ * => if pages are object-owned, object must be locked.
  * => if pages are anon-owned, anons must have 0 refcount.
+ * => caller must make sure that anon-owned pages are not PG_RELEASED.
  */
 void
 uvm_page_unbusy(struct vm_page **pgs, int npgs)
 {
        struct vm_page *pg;
-       struct uvm_object *uobj;
        int i;
 
        for (i = 0; i < npgs; i++) {
@@ -1052,35 +1053,19 @@ uvm_page_unbusy(struct vm_page **pgs, in
                        continue;
                }
 
-#if notyet
-               /*
-                 * XXX swap case in uvm_aio_aiodone() is not holding the lock.
-                *
-                * This isn't compatible with the PG_RELEASED anon case below.
-                */
                KASSERT(uvm_page_owner_locked_p(pg));
-#endif
                KASSERT(pg->pg_flags & PG_BUSY);
 
                if (pg->pg_flags & PG_WANTED) {
                        wakeup(pg);
                }
                if (pg->pg_flags & PG_RELEASED) {
-                       uobj = pg->uobject;
-                       if (uobj != NULL) {
-                               uvm_lock_pageq();
-                               pmap_page_protect(pg, PROT_NONE);
-                               /* XXX won't happen right now */
-                               if (pg->pg_flags & PQ_AOBJ)
-                                       uao_dropswap(uobj,
-                                           pg->offset >> PAGE_SHIFT);
-                               uvm_pagefree(pg);
-                               uvm_unlock_pageq();
-                       } else {
-                               rw_enter(pg->uanon->an_lock, RW_WRITE);
-                               uvm_anon_release(pg->uanon);
-                       }
+                       KASSERT(pg->uobject != NULL ||
+                           (pg->uanon != NULL && pg->uanon->an_ref > 0));
+                       atomic_clearbits_int(&pg->pg_flags, PG_RELEASED);
+                       uvm_pagefree(pg);
                } else {
+                       KASSERT((pg->pg_flags & PG_FAKE) == 0);
                        atomic_clearbits_int(&pg->pg_flags, PG_WANTED|PG_BUSY);
                        UVM_PAGE_OWN(pg, NULL);
                }
Index: uvm/uvm_pager.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_pager.c,v
retrieving revision 1.81
diff -u -p -r1.81 uvm_pager.c
--- uvm/uvm_pager.c     28 Jun 2022 19:07:40 -0000      1.81
+++ uvm/uvm_pager.c     29 Jun 2022 11:16:35 -0000
@@ -735,50 +735,77 @@ void
 uvm_aio_aiodone_pages(struct vm_page **pgs, int npages, boolean_t write,
     int error)
 {
-       struct vm_page *pg;
        struct uvm_object *uobj;
+       struct vm_page *pg;
+       struct rwlock *slock;
        boolean_t swap;
-       int i;
+       int i, swslot;
 
+       slock = NULL;
        uobj = NULL;
+       pg = pgs[0];
+       swap = (pg->uanon != NULL && pg->uobject == NULL) ||
+               (pg->pg_flags & PQ_AOBJ) != 0;
+
+       KASSERT(swap);
+       KASSERT(write);
+
+       if (error) {
+               if (pg->uobject != NULL) {
+                       swslot = uao_find_swslot(pg->uobject,
+                           pg->offset >> PAGE_SHIFT);
+               } else {
+                       swslot = pg->uanon->an_swslot;
+               }
+               KASSERT(swslot);
+       }
 
        for (i = 0; i < npages; i++) {
+               int anon_disposed = 0;
+
                pg = pgs[i];
+               KASSERT((pg->pg_flags & PG_FAKE) == 0);
 
-               if (i == 0) {
-                       swap = (pg->pg_flags & PQ_SWAPBACKED) != 0;
-                       if (!swap) {
-                               uobj = pg->uobject;
-                               rw_enter(uobj->vmobjlock, RW_WRITE);
-                       }
+               /*
+                * lock each page's object (or anon) individually since
+                * each page may need a different lock.
+                */
+               if (pg->uobject != NULL) {
+                       slock = pg->uobject->vmobjlock;
+               } else {
+                       slock = pg->uanon->an_lock;
                }
-               KASSERT(swap || pg->uobject == uobj);
+               rw_enter(slock, RW_WRITE);
+               anon_disposed = (pg->pg_flags & PG_RELEASED) != 0;
+               KASSERT(!anon_disposed || pg->uobject != NULL ||
+                   pg->uanon->an_ref == 0);
+               uvm_lock_pageq();
 
                /*
-                * if this is a read and we got an error, mark the pages
-                * PG_RELEASED so that uvm_page_unbusy() will free them.
+                * this was a successful write,
+                * mark the page PG_CLEAN.
                 */
-               if (!write && error) {
-                       atomic_setbits_int(&pg->pg_flags, PG_RELEASED);
-                       continue;
+               if (!error) {
+                       pmap_clear_reference(pg);
+                       pmap_clear_modify(pg);
+                       atomic_setbits_int(&pg->pg_flags, PG_CLEAN);
                }
-               KASSERT(!write || (pgs[i]->pg_flags & PG_FAKE) == 0);
 
                /*
-                * if this is a read and the page is PG_FAKE,
-                * or this was a successful write,
-                * mark the page PG_CLEAN and not PG_FAKE.
+                * unlock everything for this page now.
                 */
-               if ((pgs[i]->pg_flags & PG_FAKE) || (write && error != ENOMEM)) 
{
-                       pmap_clear_reference(pgs[i]);
-                       pmap_clear_modify(pgs[i]);
-                       atomic_setbits_int(&pgs[i]->pg_flags, PG_CLEAN);
-                       atomic_clearbits_int(&pgs[i]->pg_flags, PG_FAKE);
+               if (pg->uobject == NULL && anon_disposed) {
+                       uvm_unlock_pageq();
+                       uvm_anon_release(pg->uanon);
+               } else {
+                       uvm_page_unbusy(&pg, 1);
+                       uvm_unlock_pageq();
+                       rw_exit(slock);
                }
        }
-       uvm_page_unbusy(pgs, npages);
-       if (!swap) {
-               rw_exit(uobj->vmobjlock);
+
+       if (error) {
+               uvm_swap_markbad(swslot, npages);
        }
 }
 
Index: uvm/uvm_swap.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_swap.c,v
retrieving revision 1.158
diff -u -p -r1.158 uvm_swap.c
--- uvm/uvm_swap.c      28 Jun 2022 19:39:54 -0000      1.158
+++ uvm/uvm_swap.c      29 Jun 2022 11:18:05 -0000
@@ -1603,8 +1603,7 @@ uvm_swap_get(struct vm_page *page, int s
        }
 
        KERNEL_LOCK();
-       result = uvm_swap_io(&page, swslot, 1, B_READ |
-           ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
+       result = uvm_swap_io(&page, swslot, 1, B_READ);
        KERNEL_UNLOCK();
 
        if (result == VM_PAGER_OK || result == VM_PAGER_PEND) {

Reply via email to