Diff below brings in & adapt most of the changes from NetBSD's r1.37 of
uvm_pdaemon.c.  My motivation for doing this is to untangle the inner
loop of uvmpd_scan_inactive() which will allow us to split the global
`pageqlock' mutex in a next step.

The idea behind this change is to get rid of the too-complex uvm_pager*
abstraction by checking early if a page is going to be flushed or
swapped to disk.  The loop is then clearly divided into two cases which
makes it more readable. 

This also opens the door to a better integration between UVM's vnode
layer and the buffer cache.

The main loop of uvmpd_scan_inactive() can be understood as below:

. If a page can be flushed we can call "uvn_flush()" directly and pass the
  PGO_ALLPAGES flag instead of building a cluster beforehand.  Note that,
  in its current form uvn_flush() is synchronous.

. If the page needs to be swapped, mark it as PG_PAGEOUT, build a cluster
  and once it is full call uvm_swap_put(). 

Please test this diff, do not hesitate to play with the `vm.swapencrypt.enable'
sysctl(2).

Index: uvm/uvm_aobj.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
retrieving revision 1.103
diff -u -p -r1.103 uvm_aobj.c
--- uvm/uvm_aobj.c      29 Dec 2021 20:22:06 -0000      1.103
+++ uvm/uvm_aobj.c      24 May 2022 12:31:34 -0000
@@ -143,7 +143,7 @@ struct pool uvm_aobj_pool;
 
 static struct uao_swhash_elt   *uao_find_swhash_elt(struct uvm_aobj *, int,
                                     boolean_t);
-static int                      uao_find_swslot(struct uvm_object *, int);
+int                             uao_find_swslot(struct uvm_object *, int);
 static boolean_t                uao_flush(struct uvm_object *, voff_t,
                                     voff_t, int);
 static void                     uao_free(struct uvm_aobj *);
@@ -241,7 +241,7 @@ uao_find_swhash_elt(struct uvm_aobj *aob
 /*
  * uao_find_swslot: find the swap slot number for an aobj/pageidx
  */
-inline static int
+int
 uao_find_swslot(struct uvm_object *uobj, int pageidx)
 {
        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
Index: uvm/uvm_aobj.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_aobj.h,v
retrieving revision 1.17
diff -u -p -r1.17 uvm_aobj.h
--- uvm/uvm_aobj.h      21 Oct 2020 09:08:14 -0000      1.17
+++ uvm/uvm_aobj.h      24 May 2022 12:31:34 -0000
@@ -60,6 +60,7 @@
 
 void uao_init(void);
 int uao_set_swslot(struct uvm_object *, int, int);
+int uao_find_swslot (struct uvm_object *, int);
 int uao_dropswap(struct uvm_object *, int);
 int uao_swap_off(int, int);
 int uao_shrink(struct uvm_object *, int);
Index: uvm/uvm_map.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_map.c,v
retrieving revision 1.291
diff -u -p -r1.291 uvm_map.c
--- uvm/uvm_map.c       4 May 2022 14:58:26 -0000       1.291
+++ uvm/uvm_map.c       24 May 2022 12:31:34 -0000
@@ -3215,8 +3215,9 @@ uvm_object_printit(struct uvm_object *uo
  * uvm_page_printit: actually print the page
  */
 static const char page_flagbits[] =
-       "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"
-       "\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ"
+       "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5PAGEOUT\6RELEASED\7FAKE\10RDONLY"
+       "\11ZERO\12DEV\13CLEANCHK"
+       "\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ"
        "\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5";
 
 void
Index: uvm/uvm_page.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_page.c,v
retrieving revision 1.166
diff -u -p -r1.166 uvm_page.c
--- uvm/uvm_page.c      12 May 2022 12:48:36 -0000      1.166
+++ uvm/uvm_page.c      24 May 2022 12:32:54 -0000
@@ -960,6 +960,7 @@ uvm_pageclean(struct vm_page *pg)
 {
        u_int flags_to_clear = 0;
 
+       KASSERT((pg->pg_flags & PG_PAGEOUT) == 0);
        if ((pg->pg_flags & (PG_TABLED|PQ_ACTIVE|PQ_INACTIVE)) &&
            (pg->uobject == NULL || !UVM_OBJ_IS_PMAP(pg->uobject)))
                MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
@@ -978,11 +979,14 @@ uvm_pageclean(struct vm_page *pg)
            rw_write_held(pg->uanon->an_lock));
 
        /*
-        * if the page was an object page (and thus "TABLED"), remove it
-        * from the object.
+        * remove page from its object or anon.
         */
-       if (pg->pg_flags & PG_TABLED)
+       if (pg->pg_flags & PG_TABLED) {
                uvm_pageremove(pg);
+       } else if (pg->uanon != NULL) {
+               pg->uanon->an_page = NULL;
+               pg->uanon = NULL;
+       }
 
        /*
         * now remove the page from the queues
@@ -996,10 +1000,6 @@ uvm_pageclean(struct vm_page *pg)
                pg->wire_count = 0;
                uvmexp.wired--;
        }
-       if (pg->uanon) {
-               pg->uanon->an_page = NULL;
-               pg->uanon = NULL;
-       }
 
        /* Clean page state bits. */
        flags_to_clear |= PQ_ANON|PQ_AOBJ|PQ_ENCRYPT|PG_ZERO|PG_FAKE|PG_BUSY|
@@ -1042,7 +1042,6 @@ void
 uvm_page_unbusy(struct vm_page **pgs, int npgs)
 {
        struct vm_page *pg;
-       struct uvm_object *uobj;
        int i;
 
        for (i = 0; i < npgs; i++) {
@@ -1052,35 +1051,17 @@ uvm_page_unbusy(struct vm_page **pgs, in
                        continue;
                }
 
-#if notyet
-               /*
-                 * XXX swap case in uvm_aio_aiodone() is not holding the lock.
-                *
-                * This isn't compatible with the PG_RELEASED anon case below.
-                */
                KASSERT(uvm_page_owner_locked_p(pg));
-#endif
                KASSERT(pg->pg_flags & PG_BUSY);
 
                if (pg->pg_flags & PG_WANTED) {
                        wakeup(pg);
                }
                if (pg->pg_flags & PG_RELEASED) {
-                       uobj = pg->uobject;
-                       if (uobj != NULL) {
-                               uvm_lock_pageq();
-                               pmap_page_protect(pg, PROT_NONE);
-                               /* XXX won't happen right now */
-                               if (pg->pg_flags & PQ_AOBJ)
-                                       uao_dropswap(uobj,
-                                           pg->offset >> PAGE_SHIFT);
-                               uvm_pagefree(pg);
-                               uvm_unlock_pageq();
-                       } else {
-                               rw_enter(pg->uanon->an_lock, RW_WRITE);
-                               uvm_anon_release(pg->uanon);
-                       }
+                       atomic_clearbits_int(&pg->pg_flags, PG_RELEASED);
+                       uvm_pagefree(pg);
                } else {
+                       KASSERT((pg->pg_flags & PG_FAKE) == 0);
                        atomic_clearbits_int(&pg->pg_flags, PG_WANTED|PG_BUSY);
                        UVM_PAGE_OWN(pg, NULL);
                }
@@ -1099,6 +1080,8 @@ uvm_page_unbusy(struct vm_page **pgs, in
 void
 uvm_page_own(struct vm_page *pg, char *tag)
 {
+       KASSERT((pg->pg_flags & PG_PAGEOUT) == 0);
+
        /* gain ownership? */
        if (tag) {
                if (pg->owner_tag) {
@@ -1216,10 +1199,15 @@ struct vm_page *
 uvm_pagelookup(struct uvm_object *obj, voff_t off)
 {
        /* XXX if stack is too much, handroll */
-       struct vm_page pg;
+       struct vm_page p, *pg;
+
+       p.offset = off;
+       pg = RBT_FIND(uvm_objtree, &obj->memt, &p);
 
-       pg.offset = off;
-       return RBT_FIND(uvm_objtree, &obj->memt, &pg);
+       KASSERT(pg == NULL || obj->uo_npages != 0);
+       KASSERT(pg == NULL || (pg->pg_flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
+           (pg->pg_flags & PG_BUSY) != 0);
+       return (pg);
 }
 
 /*
@@ -1303,7 +1291,9 @@ uvm_pagedeactivate(struct vm_page *pg)
 void
 uvm_pageactivate(struct vm_page *pg)
 {
+#ifdef notyet
        KASSERT(uvm_page_owner_locked_p(pg));
+#endif
        MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
 
        uvm_pagedequeue(pg);
Index: uvm/uvm_page.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_page.h,v
retrieving revision 1.68
diff -u -p -r1.68 uvm_page.h
--- uvm/uvm_page.h      12 May 2022 12:48:36 -0000      1.68
+++ uvm/uvm_page.h      24 May 2022 12:31:34 -0000
@@ -138,12 +138,13 @@ struct vm_page {
 #define        PG_WANTED       0x00000002      /* someone is waiting for page 
*/
 #define        PG_TABLED       0x00000004      /* page is in VP table  */
 #define        PG_CLEAN        0x00000008      /* page has not been modified */
-#define PG_CLEANCHK    0x00000010      /* clean bit has been checked */
+#define PG_PAGEOUT     0x00000010      /* page to be freed for pagedaemon */
 #define PG_RELEASED    0x00000020      /* page released while paging */
 #define        PG_FAKE         0x00000040      /* page is not yet initialized 
*/
 #define PG_RDONLY      0x00000080      /* page must be mapped read-only */
 #define PG_ZERO                0x00000100      /* page is pre-zero'd */
 #define PG_DEV         0x00000200      /* page is in device space, lay off */
+#define PG_CLEANCHK    0x00000400      /* clean bit has been checked */
 
 #define PG_PAGER1      0x00001000      /* pager-specific flag */
 #define PG_MASK                0x0000ffff
Index: uvm/uvm_pager.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_pager.c,v
retrieving revision 1.78
diff -u -p -r1.78 uvm_pager.c
--- uvm/uvm_pager.c     18 Feb 2022 09:04:38 -0000      1.78
+++ uvm/uvm_pager.c     24 May 2022 12:33:25 -0000
@@ -745,8 +745,9 @@ uvm_aio_aiodone(struct buf *bp)
        int npages = bp->b_bufsize >> PAGE_SHIFT;
        struct vm_page *pg, *pgs[MAXPHYS >> PAGE_SHIFT];
        struct uvm_object *uobj;
-       int i, error;
-       boolean_t write, swap;
+       struct rwlock *slock;
+       int i, error, swslot, wanted = 0;
+       boolean_t write, swap, pageout;
 
        KASSERT(npages <= MAXPHYS >> PAGE_SHIFT);
        splassert(IPL_BIO);
@@ -768,45 +769,128 @@ uvm_aio_aiodone(struct buf *bp)
                goto freed;
        }
 #endif /* UVM_SWAP_ENCRYPT */
+
+       swslot = 0;
+       slock = NULL;
+       pg = pgs[0];
+       swap = (pg->uanon != NULL && pg->uobject == NULL) ||
+               (pg->pg_flags & PQ_AOBJ) != 0;
+       pageout = (pg->pg_flags & PG_PAGEOUT) != 0;
+       if (!swap) {
+               uobj = pg->uobject;
+               slock = uobj->vmobjlock;
+               rw_enter(slock, RW_WRITE);
+               uvm_lock_pageq();
+       } else if (error) {
+               if (pg->uobject != NULL) {
+                       swslot = uao_find_swslot(pg->uobject,
+                           pg->offset >> PAGE_SHIFT);
+               } else {
+                       swslot = pg->uanon->an_swslot;
+               }
+               KASSERT(swslot);
+       }
+
        for (i = 0; i < npages; i++) {
                pg = pgs[i];
+               KASSERT(swap || pg->uobject == uobj);
+               KASSERT(pageout ^ ((pg->pg_flags & PG_PAGEOUT) == 0));
 
-               if (i == 0) {
-                       swap = (pg->pg_flags & PQ_SWAPBACKED) != 0;
-                       if (!swap) {
-                               uobj = pg->uobject;
-                               rw_enter(uobj->vmobjlock, RW_WRITE);
+               /*
+                * for swap i/os, lock each page's object (or anon)
+                * individually since each page may need a different lock.
+                */
+               if (swap) {
+                       if (pg->uobject != NULL) {
+                               slock = pg->uobject->vmobjlock;
+                       } else {
+                               slock = pg->uanon->an_lock;
                        }
+                       rw_enter(slock, RW_WRITE);
+                       uvm_lock_pageq();
                }
-               KASSERT(swap || pg->uobject == uobj);
 
                /*
-                * if this is a read and we got an error, mark the pages
-                * PG_RELEASED so that uvm_page_unbusy() will free them.
+                * process errors.  for reads, just mark the page to be freed.
+                * for writes, if the error was ENOMEM, we assume this was
+                * a transient failure so we mark the page dirty so that
+                * we'll try to write it again later.  for all other write
+                * errors, we assume the error is permanent, thus the data
+                * in the page is lost.  bummer.
                 */
-               if (!write && error) {
-                       atomic_setbits_int(&pg->pg_flags, PG_RELEASED);
-                       continue;
+               if (error) {
+                       if (!write) {
+                               atomic_setbits_int(&pg->pg_flags, PG_RELEASED);
+                               continue;
+                       } else if (error == ENOMEM) {
+                               if (pg->pg_flags & PG_PAGEOUT) {
+                                       atomic_clearbits_int(&pg->pg_flags,
+                                           PG_PAGEOUT);
+                                       uvmexp.paging--;
+                               }
+                               atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
+                               uvm_pageactivate(pg);
+                       }
                }
-               KASSERT(!write || (pgs[i]->pg_flags & PG_FAKE) == 0);
 
                /*
-                * if this is a read and the page is PG_FAKE,
-                * or this was a successful write,
-                * mark the page PG_CLEAN and not PG_FAKE.
+                * if the page is PG_FAKE, this must have been a read to
+                * initialize the page.  clear PG_FAKE and activate the page.
+                * we must also clear the pmap "modified" flag since it may
+                * still be set from the page's previous identity.
                 */
-               if ((pgs[i]->pg_flags & PG_FAKE) || (write && error != ENOMEM)) 
{
-                       pmap_clear_reference(pgs[i]);
-                       pmap_clear_modify(pgs[i]);
-                       atomic_setbits_int(&pgs[i]->pg_flags, PG_CLEAN);
+               if (pg->pg_flags & PG_FAKE) {
+                       KASSERT(!write);
                        atomic_clearbits_int(&pgs[i]->pg_flags, PG_FAKE);
+                       uvm_pageactivate(pg);
+                       pmap_clear_modify(pg);
+               }
+
+               /*
+                * do accounting for pagedaemon i/o and arrange to free
+                * the pages instead of just unbusying them.
+                */
+               if (pg->pg_flags & PG_PAGEOUT) {
+                       atomic_clearbits_int(&pg->pg_flags, PG_PAGEOUT);
+                       uvmexp.paging--;
+
+                       /*
+                        * If a process faulted on a page of the anon being
+                        * swapped out it is waiting and we cannot release
+                        * it.
+                        */
+                       if (pg->pg_flags & PG_WANTED) {
+                               KASSERT(swap);
+                               wanted++;
+                       } else {
+                               atomic_setbits_int(&pg->pg_flags, PG_RELEASED);
+                       }
+               }
+
+               /*
+                * for swap pages, unlock everything for this page now.
+                */
+               if (swap) {
+                       uvm_page_unbusy(&pg, 1);
+                       uvm_unlock_pageq();
+                       rw_exit(slock);
                }
        }
-       uvm_page_unbusy(pgs, npages);
        if (!swap) {
-               rw_exit(uobj->vmobjlock);
+               uvm_page_unbusy(pgs, npages);
+               uvm_unlock_pageq();
+               rw_exit(slock);
+       } else {
+               KASSERT(write);
+               KASSERT(pageout);
+
+               /* these pages are now only in swap. */
+               KASSERT(uvmexp.swpgonly + (npages-wanted) <= uvmexp.swpginuse);
+               atomic_add_int(&uvmexp.swpgonly, (npages-wanted));
+               if (error) {
+                       uvm_swap_markbad(swslot, npages);
+               }
        }
-
 #ifdef UVM_SWAP_ENCRYPT
 freed:
 #endif
Index: uvm/uvm_pdaemon.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
retrieving revision 1.99
diff -u -p -r1.99 uvm_pdaemon.c
--- uvm/uvm_pdaemon.c   12 May 2022 12:49:31 -0000      1.99
+++ uvm/uvm_pdaemon.c   24 May 2022 12:35:54 -0000
@@ -1,5 +1,5 @@
 /*     $OpenBSD: uvm_pdaemon.c,v 1.99 2022/05/12 12:49:31 mpi Exp $    */
-/*     $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $   */
+/*     $NetBSD: uvm_pdaemon.c,v 1.37 2001/09/15 20:36:47 chs Exp $     */
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -101,10 +101,14 @@ extern void drmbackoff(long);
  * local prototypes
  */
 
+struct rwlock  *uvmpd_trylockowner(struct vm_page *);
 void           uvmpd_scan(void);
-boolean_t      uvmpd_scan_inactive(struct pglist *);
+void           uvmpd_balancequeue(int);
+void           uvmpd_scan_inactive(struct pglist *);
 void           uvmpd_tune(void);
 void           uvmpd_drop(struct pglist *);
+void           uvmpd_dropswap(struct vm_page *);
+
 
 /*
  * uvm_wait: wait (sleep) for the page daemon to free some pages
@@ -208,7 +212,7 @@ uvm_pageout(void *arg)
 {
        struct uvm_constraint_range constraint;
        struct uvm_pmalloc *pma;
-       int npages = 0;
+       int npages = 0, free;
 
        /* ensure correct priority and set paging parameters... */
        uvm.pagedaemon_proc = curproc;
@@ -263,9 +267,10 @@ uvm_pageout(void *arg)
                size = 0;
                if (pma != NULL)
                        size += pma->pm_size >> PAGE_SHIFT;
-               if (uvmexp.free - BUFPAGES_DEFICIT < uvmexp.freetarg)
-                       size += uvmexp.freetarg - (uvmexp.free -
-                           BUFPAGES_DEFICIT);
+
+               free = uvmexp.free - BUFPAGES_DEFICIT;
+               if (free < uvmexp.freetarg)
+                       size += uvmexp.freetarg - free;
                if (size == 0)
                        size = 16; /* XXX */
                uvm_unlock_pageq();
@@ -278,8 +283,9 @@ uvm_pageout(void *arg)
                /*
                 * scan if needed
                 */
+               free = uvmexp.free - BUFPAGES_DEFICIT;
                if (pma != NULL ||
-                   ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) ||
+                   (free < uvmexp.freetarg) ||
                    ((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) {
                        uvmpd_scan();
                }
@@ -348,9 +354,6 @@ uvm_aiodone_daemon(void *arg)
                /* process each i/o that's done. */
                free = uvmexp.free;
                while (bp != NULL) {
-                       if (bp->b_flags & B_PDAEMON) {
-                               uvmexp.paging -= bp->b_bufsize >> PAGE_SHIFT;
-                       }
                        nbp = TAILQ_NEXT(bp, b_freelist);
                        s = splbio();   /* b_iodone must by called at splbio */
                        (*bp->b_iodone)(bp);
@@ -366,7 +369,198 @@ uvm_aiodone_daemon(void *arg)
        }
 }
 
+/*
+ * uvmpd_trylockowner: trylock the page's owner.
+ *
+ * => return the locked rwlock on success.  otherwise, return NULL.
+ */
+struct rwlock *
+uvmpd_trylockowner(struct vm_page *pg)
+{
+
+       struct uvm_object *uobj = pg->uobject;
+       struct rwlock *slock;
+
+       if (uobj != NULL) {
+               slock = uobj->vmobjlock;
+       } else {
+               struct vm_anon *anon = pg->uanon;
+
+               KASSERT(anon != NULL);
+               slock = anon->an_lock;
+       }
+
+       if (rw_enter(slock, RW_WRITE|RW_NOSLEEP)) {
+               return NULL;
+       }
+
+       return slock;
+}
+
+struct swapcluster {
+       int swc_slot;
+       int swc_nallocated;
+       int swc_nused;
+       struct vm_page *swc_pages[round_page(MAXPHYS) >> PAGE_SHIFT];
+};
+
+void
+swapcluster_init(struct swapcluster *swc)
+{
+
+       swc->swc_slot = 0;
+       swc->swc_nused = 0;
+}
+
+int
+swapcluster_allocslots(struct swapcluster *swc)
+{
+       int slot;
+       int npages;
+
+       if (swc->swc_slot != 0) {
+               return 0;
+       }
+
+       /* Even with strange MAXPHYS, the shift
+          implicitly rounds down to a page. */
+       npages = MAXPHYS >> PAGE_SHIFT;
+       slot = uvm_swap_alloc(&npages, TRUE);
+       if (slot == 0) {
+               return ENOMEM;
+       }
+       swc->swc_slot = slot;
+       swc->swc_nallocated = npages;
+       swc->swc_nused = 0;
+
+       return 0;
+}
+
+int
+swapcluster_add(struct swapcluster *swc, struct vm_page *pg)
+{
+       int slot;
+       struct uvm_object *uobj;
+
+       KASSERT(swc->swc_slot != 0);
+       KASSERT(swc->swc_nused < swc->swc_nallocated);
+       KASSERT((pg->pg_flags & PQ_SWAPBACKED) != 0);
+
+       slot = swc->swc_slot + swc->swc_nused;
+       uobj = pg->uobject;
+       if (uobj == NULL) {
+               KASSERT(rw_write_held(pg->uanon->an_lock));
+               pg->uanon->an_swslot = slot;
+       } else {
+               int result;
+
+               KASSERT(rw_write_held(uobj->vmobjlock));
+               result = uao_set_swslot(uobj, pg->offset >> PAGE_SHIFT, slot);
+               if (result == -1) {
+                       return ENOMEM;
+               }
+       }
+       swc->swc_pages[swc->swc_nused] = pg;
+       swc->swc_nused++;
+
+       return 0;
+}
+
+void
+swapcluster_flush(struct swapcluster *swc, boolean_t now)
+{
+       int slot;
+       int nused;
+       int nallocated;
+       int error = 0;
+
+       if (swc->swc_slot == 0) {
+               return;
+       }
+       KASSERT(swc->swc_nused <= swc->swc_nallocated);
+
+       slot = swc->swc_slot;
+       nused = swc->swc_nused;
+       nallocated = swc->swc_nallocated;
+
+       /*
+        * if this is the final pageout we could have a few
+        * unused swap blocks.  if so, free them now.
+        */
+       if (nused < nallocated) {
+               if (!now) {
+                       return;
+               }
+               uvm_swap_free(slot + nused, nallocated - nused);
+       }
+
+       /*
+        * now start the pageout.
+        */
+       if (nused > 0) {
+               uvmexp.pdpageouts++;
+               error = uvm_swap_put(slot, swc->swc_pages, nused, 0);
+               if (error != VM_PAGER_OK && error != VM_PAGER_PEND) {
+                       int i;
+
+                       KASSERT(error == VM_PAGER_AGAIN);
+
+                       for (i = 0; i < nused; i++) {
+                               struct rwlock *slock;
+                               struct vm_page *pg = swc->swc_pages[i];
+
+                               KASSERT(pg->pg_flags & PG_PAGEOUT);
+                               KASSERT((pg->pg_flags & PG_RELEASED) == 0);
+
+                               if (pg->pg_flags & PQ_ANON) {
+                                       slock = pg->uanon->an_lock;
+                               } else {
+                                       slock = pg->uobject->vmobjlock;
+                               }
+                               rw_enter(slock, RW_WRITE);
+                               uvm_lock_pageq();
+                               atomic_clearbits_int(&pg->pg_flags, PG_PAGEOUT);
+                               uvmexp.paging--;
+                               uvmpd_dropswap(pg);
+                               uvm_page_unbusy(&pg, 1);
+                               uvm_unlock_pageq();
+                               rw_exit(slock);
+                       }
+               }
+       }
+
+       /*
+        * zero swslot to indicate that we are
+        * no longer building a swap-backed cluster.
+        */
+       swc->swc_slot = 0;
+       swc->swc_nused = 0;
+}
+
+int
+swapcluster_nused(struct swapcluster *swc)
+{
 
+       return swc->swc_nused;
+}
+
+/*
+ * uvmpd_dropswap: free any swap allocated to this page.
+ *
+ * => called with owner locked.
+ */
+void
+uvmpd_dropswap(struct vm_page *pg)
+{
+       struct vm_anon *anon = pg->uanon;
+
+       if ((pg->pg_flags & PQ_ANON) && anon->an_swslot) {
+               uvm_swap_free(anon->an_swslot, 1);
+               anon->an_swslot = 0;
+       } else if (pg->pg_flags & PQ_AOBJ) {
+               uao_dropswap(pg->uobject, pg->offset >> PAGE_SHIFT);
+       }
+}
 
 /*
  * uvmpd_scan_inactive: scan an inactive list for pages to clean or free.
@@ -377,487 +571,223 @@ uvm_aiodone_daemon(void *arg)
  * => we handle the building of swap-backed clusters
  * => we return TRUE if we are exiting because we met our target
  */
-
-boolean_t
+void
 uvmpd_scan_inactive(struct pglist *pglst)
 {
-       boolean_t retval = FALSE;       /* assume we haven't hit target */
-       int free, result;
+       struct swapcluster swc;
        struct vm_page *p, *nextpg;
        struct uvm_object *uobj;
-       struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp;
-       int npages;
-       struct vm_page *swpps[MAXBSIZE >> PAGE_SHIFT];  /* XXX: see below */
-       int swnpages, swcpages;                         /* XXX: see below */
-       int swslot;
        struct vm_anon *anon;
-       boolean_t swap_backed;
-       vaddr_t start;
-       int dirtyreacts;
+       struct rwlock *slock;
+       int dirtyreacts, free, error;
 
        /*
         * swslot is non-zero if we are building a swap cluster.  we want
         * to stay in the loop while we have a page to scan or we have
         * a swap-cluster to build.
         */
-       swslot = 0;
-       swnpages = swcpages = 0;
-       free = 0;
+       swapcluster_init(&swc);
        dirtyreacts = 0;
-
-       for (p = TAILQ_FIRST(pglst); p != NULL || swslot != 0; p = nextpg) {
-               /*
-                * note that p can be NULL iff we have traversed the whole
-                * list and need to do one final swap-backed clustered pageout.
-                */
+       for (p = TAILQ_FIRST(pglst); p != NULL; p = nextpg) {
                uobj = NULL;
                anon = NULL;
 
-               if (p) {
-                       /*
-                        * update our copy of "free" and see if we've met
-                        * our target
-                        */
-                       free = uvmexp.free - BUFPAGES_DEFICIT;
+               /*
+                * see if we've met the free target
+                */
+               free = uvmexp.free - BUFPAGES_DEFICIT;
+               if (free + uvmexp.paging
+                   + swapcluster_nused(&swc)
+                   >= uvmexp.freetarg << 2 ||
+                   dirtyreacts == UVMPD_NUMDIRTYREACTS) {
+                       break;
+               }
 
-                       if (free + uvmexp.paging >= uvmexp.freetarg << 2 ||
-                           dirtyreacts == UVMPD_NUMDIRTYREACTS) {
-                               retval = TRUE;
-
-                               if (swslot == 0) {
-                                       /* exit now if no swap-i/o pending */
-                                       break;
-                               }
+               /*
+                * we are below target and have a new page to consider.
+                */
+               uvmexp.pdscans++;
+               nextpg = TAILQ_NEXT(p, pageq);
 
-                               /* set p to null to signal final swap i/o */
-                               p = NULL;
-                       }
+               /*
+                * move referenced pages back to active queue
+                * and skip to next page.
+                */
+               if (pmap_is_referenced(p)) {
+                       uvm_pageactivate(p);
+                       uvmexp.pdreact++;
+                       continue;
                }
 
-               if (p) {        /* if (we have a new page to consider) */
-                       /*
-                        * we are below target and have a new page to consider.
-                        */
-                       uvmexp.pdscans++;
-                       nextpg = TAILQ_NEXT(p, pageq);
-
-                       if (p->pg_flags & PQ_ANON) {
-                               anon = p->uanon;
-                               KASSERT(anon != NULL);
-                               if (rw_enter(anon->an_lock,
-                                   RW_WRITE|RW_NOSLEEP)) {
-                                       /* lock failed, skip this page */
-                                       continue;
-                               }
-                               /*
-                                * move referenced pages back to active queue
-                                * and skip to next page.
-                                */
-                               if (pmap_is_referenced(p)) {
-                                       uvm_pageactivate(p);
-                                       rw_exit(anon->an_lock);
-                                       uvmexp.pdreact++;
-                                       continue;
-                               }
-                               if (p->pg_flags & PG_BUSY) {
-                                       rw_exit(anon->an_lock);
-                                       uvmexp.pdbusy++;
-                                       /* someone else owns page, skip it */
-                                       continue;
-                               }
-                               uvmexp.pdanscan++;
-                       } else {
-                               uobj = p->uobject;
-                               KASSERT(uobj != NULL);
-                               if (rw_enter(uobj->vmobjlock,
-                                   RW_WRITE|RW_NOSLEEP)) {
-                                       /* lock failed, skip this page */
-                                       continue;
-                               }
-                               /*
-                                * move referenced pages back to active queue
-                                * and skip to next page.
-                                */
-                               if (pmap_is_referenced(p)) {
-                                       uvm_pageactivate(p);
-                                       rw_exit(uobj->vmobjlock);
-                                       uvmexp.pdreact++;
-                                       continue;
-                               }
-                               if (p->pg_flags & PG_BUSY) {
-                                       rw_exit(uobj->vmobjlock);
-                                       uvmexp.pdbusy++;
-                                       /* someone else owns page, skip it */
-                                       continue;
-                               }
-                               uvmexp.pdobscan++;
-                       }
+               anon = p->uanon;
+               uobj = p->uobject;
 
-                       /*
-                        * we now have the page queues locked.
-                        * the page is not busy.   if the page is clean we
-                        * can free it now and continue.
-                        */
-                       if (p->pg_flags & PG_CLEAN) {
-                               if (p->pg_flags & PQ_SWAPBACKED) {
-                                       /* this page now lives only in swap */
-                                       atomic_inc_int(&uvmexp.swpgonly);
-                               }
+               /*
+                * first we attempt to lock the object that this page
+                * belongs to.  if our attempt fails we skip on to
+                * the next page (no harm done).  it is important to
+                * "try" locking the object as we are locking in the
+                * wrong order (pageq -> object) and we don't want to
+                * deadlock.
+                */
+               slock = uvmpd_trylockowner(p);
+               if (slock == NULL) {
+                       continue;
+               }
 
-                               /* zap all mappings with pmap_page_protect... */
-                               pmap_page_protect(p, PROT_NONE);
-                               uvm_pagefree(p);
-                               uvmexp.pdfreed++;
-
-                               if (anon) {
-
-                                       /*
-                                        * an anonymous page can only be clean
-                                        * if it has backing store assigned.
-                                        */
-
-                                       KASSERT(anon->an_swslot != 0);
-
-                                       /* remove from object */
-                                       anon->an_page = NULL;
-                                       rw_exit(anon->an_lock);
-                               } else {
-                                       rw_exit(uobj->vmobjlock);
-                               }
-                               continue;
-                       }
+               if (p->pg_flags & PG_BUSY) {
+                       rw_exit(slock);
+                       uvmexp.pdbusy++;
+                       continue;
+               }
 
-                       /*
-                        * this page is dirty, skip it if we'll have met our
-                        * free target when all the current pageouts complete.
-                        */
-                       if (free + uvmexp.paging > uvmexp.freetarg << 2) {
-                               if (anon) {
-                                       rw_exit(anon->an_lock);
-                               } else {
-                                       rw_exit(uobj->vmobjlock);
-                               }
-                               continue;
-                       }
+               /* does the page belong to an object? */
+               if (uobj != NULL) {
+                       uvmexp.pdobscan++;
+               } else {
+                       KASSERT(anon != NULL);
+                       uvmexp.pdanscan++;
+               }
 
-                       /*
-                        * this page is dirty, but we can't page it out
-                        * since all pages in swap are only in swap.
-                        * reactivate it so that we eventually cycle
-                        * all pages thru the inactive queue.
-                        */
-                       if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) {
-                               dirtyreacts++;
-                               uvm_pageactivate(p);
-                               if (anon) {
-                                       rw_exit(anon->an_lock);
-                               } else {
-                                       rw_exit(uobj->vmobjlock);
-                               }
-                               continue;
+               /*
+                * we now have the object and the page queues locked.
+                * if the page is not swap-backed, call the object's
+                * pager to flush and free the page.
+                */
+               if ((p->pg_flags & PQ_SWAPBACKED) == 0) {
+                       uvm_unlock_pageq();
+                       error = (uobj->pgops->pgo_flush)(uobj,
+                           p->offset, p->offset + PAGE_SIZE,
+                           PGO_CLEANIT|PGO_FREE|PGO_ALLPAGES);
+                       rw_exit(uobj->vmobjlock);
+                       uvm_lock_pageq();
+                       if (nextpg &&
+                           (nextpg->pg_flags & PQ_INACTIVE) == 0) {
+                               nextpg = TAILQ_FIRST(pglst);
                        }
+                       continue;
+               }
 
-                       /*
-                        * if the page is swap-backed and dirty and swap space
-                        * is full, free any swap allocated to the page
-                        * so that other pages can be paged out.
+               /*
+                * the page is swap-backed.  remove all the permissions
+                * from the page so we can sync the modified info
+                * without any race conditions.  if the page is clean
+                * we can free it now and continue.
+                */
+               pmap_page_protect(p, PROT_NONE);
+               if ((p->pg_flags & PG_CLEAN) && pmap_clear_modify(p)) {
+                       atomic_clearbits_int(&p->pg_flags, PG_CLEAN);
+               }
+               if (p->pg_flags & PG_CLEAN) {
+                       int slot;
+                       int pageidx;
+
+                       pageidx = p->offset >> PAGE_SHIFT;
+                       uvm_pagefree(p);
+                       uvmexp.pdfreed++;
+
+                       /*
+                        * for anons, we need to remove the page
+                        * from the anon ourselves.  for aobjs,
+                        *  pagefree did that for us.
                         */
-                       KASSERT(uvmexp.swpginuse <= uvmexp.swpages);
-                       if ((p->pg_flags & PQ_SWAPBACKED) &&
-                           uvmexp.swpginuse == uvmexp.swpages) {
-
-                               if ((p->pg_flags & PQ_ANON) &&
-                                   p->uanon->an_swslot) {
-                                       uvm_swap_free(p->uanon->an_swslot, 1);
-                                       p->uanon->an_swslot = 0;
-                               }
-                               if (p->pg_flags & PQ_AOBJ) {
-                                       uao_dropswap(p->uobject,
-                                                    p->offset >> PAGE_SHIFT);
-                               }
+                       if (anon) {
+                               KASSERT(anon->an_swslot != 0);
+                               anon->an_page = NULL;
+                               slot = anon->an_swslot;
+                       } else {
+                               slot = uao_find_swslot(uobj, pageidx);
                        }
+                       rw_exit(slock);
 
-                       /*
-                        * the page we are looking at is dirty.   we must
-                        * clean it before it can be freed.  to do this we
-                        * first mark the page busy so that no one else will
-                        * touch the page.   we write protect all the mappings
-                        * of the page so that no one touches it while it is
-                        * in I/O.
-                        */
-
-                       swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0);
-                       atomic_setbits_int(&p->pg_flags, PG_BUSY);
-                       UVM_PAGE_OWN(p, "scan_inactive");
-                       pmap_page_protect(p, PROT_READ);
-                       uvmexp.pgswapout++;
-
-                       /*
-                        * for swap-backed pages we need to (re)allocate
-                        * swap space.
-                        */
-                       if (swap_backed) {
-                               /* free old swap slot (if any) */
-                               if (anon) {
-                                       if (anon->an_swslot) {
-                                               uvm_swap_free(anon->an_swslot,
-                                                   1);
-                                               anon->an_swslot = 0;
-                                       }
-                               } else {
-                                       uao_dropswap(uobj,
-                                                    p->offset >> PAGE_SHIFT);
-                               }
-
-                               /* start new cluster (if necessary) */
-                               if (swslot == 0) {
-                                       swnpages = MAXBSIZE >> PAGE_SHIFT;
-                                       swslot = uvm_swap_alloc(&swnpages,
-                                           TRUE);
-                                       if (swslot == 0) {
-                                               /* no swap?  give up! */
-                                               atomic_clearbits_int(
-                                                   &p->pg_flags,
-                                                   PG_BUSY);
-                                               UVM_PAGE_OWN(p, NULL);
-                                               if (anon)
-                                                       rw_exit(anon->an_lock);
-                                               else
-                                                       rw_exit(
-                                                           uobj->vmobjlock);
-                                               continue;
-                                       }
-                                       swcpages = 0;   /* cluster is empty */
-                               }
-
-                               /* add block to cluster */
-                               swpps[swcpages] = p;
-                               if (anon)
-                                       anon->an_swslot = swslot + swcpages;
-                               else
-                                       uao_set_swslot(uobj,
-                                           p->offset >> PAGE_SHIFT,
-                                           swslot + swcpages);
-                               swcpages++;
+                       if (slot > 0) {
+                               /* this page is now only in swap */
+                               KASSERT(uvmexp.swpgonly <
+                                       uvmexp.swpginuse);
+                               atomic_inc_int(&uvmexp.swpgonly);
                        }
-               } else {
-                       /* if p == NULL we must be doing a last swap i/o */
-                       swap_backed = TRUE;
+                       continue;
                }
 
                /*
-                * now consider doing the pageout.
-                *
-                * for swap-backed pages, we do the pageout if we have either
-                * filled the cluster (in which case (swnpages == swcpages) or
-                * run out of pages (p == NULL).
-                *
-                * for object pages, we always do the pageout.
+                * this page is dirty, skip it if we'll have met our
+                * free target when all the current pageouts complete.
                 */
-               if (swap_backed) {
-                       if (p) {        /* if we just added a page to cluster */
-                               if (anon)
-                                       rw_exit(anon->an_lock);
-                               else
-                                       rw_exit(uobj->vmobjlock);
-
-                               /* cluster not full yet? */
-                               if (swcpages < swnpages)
-                                       continue;
-                       }
-
-                       /* starting I/O now... set up for it */
-                       npages = swcpages;
-                       ppsp = swpps;
-                       /* for swap-backed pages only */
-                       start = (vaddr_t) swslot;
-
-                       /* if this is final pageout we could have a few
-                        * extra swap blocks */
-                       if (swcpages < swnpages) {
-                               uvm_swap_free(swslot + swcpages,
-                                   (swnpages - swcpages));
-                       }
-               } else {
-                       /* normal object pageout */
-                       ppsp = pps;
-                       npages = sizeof(pps) / sizeof(struct vm_page *);
-                       /* not looked at because PGO_ALLPAGES is set */
-                       start = 0;
+               if (free + uvmexp.paging > uvmexp.freetarg << 2) {
+                       rw_exit(slock);
+                       continue;
                }
 
                /*
-                * now do the pageout.
-                *
-                * for swap_backed pages we have already built the cluster.
-                * for !swap_backed pages, uvm_pager_put will call the object's
-                * "make put cluster" function to build a cluster on our behalf.
-                *
-                * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct
-                * it to free the cluster pages for us on a successful I/O (it
-                * always does this for un-successful I/O requests).  this
-                * allows us to do clustered pageout without having to deal
-                * with cluster pages at this level.
-                *
-                * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
-                *  IN: locked: page queues
-                * OUT: locked: 
-                *     !locked: pageqs
+                * free any swap allocated to the page since
+                * we'll have to write it again with its new data.
                 */
-
-               uvmexp.pdpageouts++;
-               result = uvm_pager_put(swap_backed ? NULL : uobj, p,
-                   &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
+               uvmpd_dropswap(p);
 
                /*
-                * if we did i/o to swap, zero swslot to indicate that we are
-                * no longer building a swap-backed cluster.
+                * if all pages in swap are only in swap,
+                * the swap space is full and we can't page out
+                * any more swap-backed pages.  reactivate this page
+                * so that we eventually cycle all pages through
+                * the inactive queue.
                 */
+               if (uvm_swapisfull()) {
+                       dirtyreacts++;
+                       uvm_pageactivate(p);
+                       rw_exit(slock);
+                       continue;
+               }
 
-               if (swap_backed)
-                       swslot = 0;             /* done with this cluster */
 
                /*
-                * first, we check for VM_PAGER_PEND which means that the
-                * async I/O is in progress and the async I/O done routine
-                * will clean up after us.   in this case we move on to the
-                * next page.
-                *
-                * there is a very remote chance that the pending async i/o can
-                * finish _before_ we get here.   if that happens, our page "p"
-                * may no longer be on the inactive queue.   so we verify this
-                * when determining the next page (starting over at the head if
-                * we've lost our inactive page).
+                * start new swap pageout cluster (if necessary).
                 */
-
-               if (result == VM_PAGER_PEND) {
-                       uvmexp.paging += npages;
-                       uvm_lock_pageq();
-                       uvmexp.pdpending++;
-                       if (p) {
-                               if (p->pg_flags & PQ_INACTIVE)
-                                       nextpg = TAILQ_NEXT(p, pageq);
-                               else
-                                       nextpg = TAILQ_FIRST(pglst);
-                       } else {
-                               nextpg = NULL;
-                       }
+               if (swapcluster_allocslots(&swc)) {
+                       rw_exit(slock);
                        continue;
                }
 
-               /* clean up "p" if we have one */
-               if (p) {
-                       /*
-                        * the I/O request to "p" is done and uvm_pager_put
-                        * has freed any cluster pages it may have allocated
-                        * during I/O.  all that is left for us to do is
-                        * clean up page "p" (which is still PG_BUSY).
-                        *
-                        * our result could be one of the following:
-                        *   VM_PAGER_OK: successful pageout
-                        *
-                        *   VM_PAGER_AGAIN: tmp resource shortage, we skip
-                        *     to next page
-                        *   VM_PAGER_{FAIL,ERROR,BAD}: an error.   we
-                        *     "reactivate" page to get it out of the way (it
-                        *     will eventually drift back into the inactive
-                        *     queue for a retry).
-                        *   VM_PAGER_UNLOCK: should never see this as it is
-                        *     only valid for "get" operations
-                        */
-
-                       /* relock p's object: page queues not lock yet, so
-                        * no need for "try" */
-
-                       /* !swap_backed case: already locked... */
-                       if (swap_backed) {
-                               if (anon)
-                                       rw_enter(anon->an_lock, RW_WRITE);
-                               else
-                                       rw_enter(uobj->vmobjlock, RW_WRITE);
-                       }
-
-#ifdef DIAGNOSTIC
-                       if (result == VM_PAGER_UNLOCK)
-                               panic("pagedaemon: pageout returned "
-                                   "invalid 'unlock' code");
-#endif
-
-                       /* handle PG_WANTED now */
-                       if (p->pg_flags & PG_WANTED)
-                               wakeup(p);
-
-                       atomic_clearbits_int(&p->pg_flags, PG_BUSY|PG_WANTED);
-                       UVM_PAGE_OWN(p, NULL);
-
-                       /* released during I/O? Can only happen for anons */
-                       if (p->pg_flags & PG_RELEASED) {
-                               KASSERT(anon != NULL);
-                               /*
-                                * remove page so we can get nextpg,
-                                * also zero out anon so we don't use
-                                * it after the free.
-                                */
-                               anon->an_page = NULL;
-                               p->uanon = NULL;
+               /*
+                * at this point, we're definitely going reuse this
+                * page.  mark the page busy and delayed-free.
+                * we should remove the page from the page queues
+                * so we don't ever look at it again.
+                * adjust counters and such.
+                */
+               atomic_setbits_int(&p->pg_flags, PG_BUSY);
+               UVM_PAGE_OWN(p, "scan_inactive");
+
+               atomic_setbits_int(&p->pg_flags, PG_PAGEOUT);
+               uvmexp.paging++;
+               uvm_pagedequeue(p);
 
-                               rw_exit(anon->an_lock);
-                               uvm_anfree(anon);       /* kills anon */
-                               pmap_page_protect(p, PROT_NONE);
-                               anon = NULL;
-                               uvm_lock_pageq();
-                               nextpg = TAILQ_NEXT(p, pageq);
-                               /* free released page */
-                               uvm_pagefree(p);
-                       } else {        /* page was not released during I/O */
-                               uvm_lock_pageq();
-                               nextpg = TAILQ_NEXT(p, pageq);
-                               if (result != VM_PAGER_OK) {
-                                       /* pageout was a failure... */
-                                       if (result != VM_PAGER_AGAIN)
-                                               uvm_pageactivate(p);
-                                       pmap_clear_reference(p);
-                                       /* XXXCDC: if (swap_backed) FREE p's
-                                        * swap block? */
-                               } else {
-                                       /* pageout was a success... */
-                                       pmap_clear_reference(p);
-                                       pmap_clear_modify(p);
-                                       atomic_setbits_int(&p->pg_flags,
-                                           PG_CLEAN);
-                               }
-                       }
+               uvmexp.pgswapout++;
+               uvm_unlock_pageq();
 
-                       /*
-                        * drop object lock (if there is an object left).   do
-                        * a safety check of nextpg to make sure it is on the
-                        * inactive queue (it should be since PG_BUSY pages on
-                        * the inactive queue can't be re-queued [note: not
-                        * true for active queue]).
-                        */
-                       if (anon)
-                               rw_exit(anon->an_lock);
-                       else if (uobj)
-                               rw_exit(uobj->vmobjlock);
+               /*
+                * add the new page to the cluster.
+                */
+               error = swapcluster_add(&swc, p);
+               KASSERT(error == 0);
+               rw_exit(slock);
 
-                       if (nextpg && (nextpg->pg_flags & PQ_INACTIVE) == 0) {
-                               nextpg = TAILQ_FIRST(pglst);    /* reload! */
-                       }
-               } else {
-                       /*
-                        * if p is null in this loop, make sure it stays null
-                        * in the next loop.
-                        */
-                       nextpg = NULL;
+               swapcluster_flush(&swc, FALSE);
+               uvm_lock_pageq();
 
-                       /*
-                        * lock page queues here just so they're always locked
-                        * at the end of the loop.
-                        */
-                       uvm_lock_pageq();
+               /*
+                * the pageout is in progress.  bump counters and set up
+                * for the next loop.
+                */
+               uvmexp.pdpending++;
+               if (nextpg && (nextpg->pg_flags & PQ_INACTIVE) == 0) {
+                       nextpg = TAILQ_FIRST(pglst);
                }
+
        }
-       return (retval);
+
+       uvm_unlock_pageq();
+       swapcluster_flush(&swc, TRUE);
+       uvm_lock_pageq();
 }
 
 /*
@@ -869,16 +799,11 @@ uvmpd_scan_inactive(struct pglist *pglst
 void
 uvmpd_scan(void)
 {
-       int free, inactive_shortage, swap_shortage, pages_freed;
-       struct vm_page *p, *nextpg;
-       struct uvm_object *uobj;
-       struct vm_anon *anon;
-       struct rwlock *slock;
+       int free, swap_shortage, pages_freed;
 
        MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
 
        uvmexp.pdrevs++;                /* counter */
-       uobj = NULL;
 
        /*
         * get current "free" page count
@@ -905,26 +830,16 @@ uvmpd_scan(void)
         * to inactive ones.
         */
 
-       /*
-        * alternate starting queue between swap and object based on the
-        * low bit of uvmexp.pdrevs (which we bump by one each call).
-        */
        pages_freed = uvmexp.pdfreed;
        (void) uvmpd_scan_inactive(&uvm.page_inactive);
        pages_freed = uvmexp.pdfreed - pages_freed;
 
        /*
-        * we have done the scan to get free pages.   now we work on meeting
-        * our inactive target.
-        */
-       inactive_shortage = uvmexp.inactarg - uvmexp.inactive - BUFPAGES_INACT;
-
-       /*
         * detect if we're not going to be able to page anything out
         * until we free some swap resources from active pages.
         */
-       free = uvmexp.free - BUFPAGES_DEFICIT;
        swap_shortage = 0;
+       free = uvmexp.free - BUFPAGES_DEFICIT;
        if (free < uvmexp.freetarg &&
            uvmexp.swpginuse == uvmexp.swpages &&
            !uvm_swapisfull() &&
@@ -932,6 +847,23 @@ uvmpd_scan(void)
                swap_shortage = uvmexp.freetarg - free;
        }
 
+       uvmpd_balancequeue(swap_shortage);
+}
+
+void
+uvmpd_balancequeue(int swap_shortage)
+{
+       int inactive_shortage;
+       struct rwlock *slock;
+       struct vm_page *p, *nextpg;
+
+
+       /*
+        * we have done the scan to get free pages.   now we work on meeting
+        * our inactive target.
+        */
+       inactive_shortage = uvmexp.inactarg - uvmexp.inactive - BUFPAGES_INACT;
+
        for (p = TAILQ_FIRST(&uvm.page_active);
             p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
             p = nextpg) {
@@ -943,19 +875,9 @@ uvmpd_scan(void)
                /*
                 * lock the page's owner.
                 */
-               if (p->uobject != NULL) {
-                       uobj = p->uobject;
-                       slock = uobj->vmobjlock;
-                       if (rw_enter(slock, RW_WRITE|RW_NOSLEEP)) {
-                               continue;
-                       }
-               } else {
-                       anon = p->uanon;
-                       KASSERT(p->uanon != NULL);
-                       slock = anon->an_lock;
-                       if (rw_enter(slock, RW_WRITE|RW_NOSLEEP)) {
-                               continue;
-                       }
+               slock = uvmpd_trylockowner(p);
+               if (slock == NULL) {
+                       continue;
                }
 
                /*
@@ -976,8 +898,7 @@ uvmpd_scan(void)
                                p->uanon->an_swslot = 0;
                                atomic_clearbits_int(&p->pg_flags, PG_CLEAN);
                                swap_shortage--;
-                       }
-                       if (p->pg_flags & PQ_AOBJ) {
+                       } else if (p->pg_flags & PQ_AOBJ) {
                                int slot = uao_set_swslot(p->uobject,
                                        p->offset >> PAGE_SHIFT, 0);
                                if (slot) {
@@ -990,8 +911,7 @@ uvmpd_scan(void)
                }
 
                /*
-                * deactivate this page if there's a shortage of
-                * inactive pages.
+                * if there's a shortage of inactive pages, deactivate.
                 */
                if (inactive_shortage > 0) {
                        pmap_page_protect(p, PROT_NONE);
Index: uvm/uvm_swap.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_swap.c,v
retrieving revision 1.155
diff -u -p -r1.155 uvm_swap.c
--- uvm/uvm_swap.c      28 Apr 2022 09:58:11 -0000      1.155
+++ uvm/uvm_swap.c      24 May 2022 12:31:34 -0000
@@ -1711,7 +1711,7 @@ uvm_swap_io(struct vm_page **pps, int st
 
        /* encrypt to swap */
        if (write && bounce) {
-               int i, opages;
+               int i, wanted = 0;
                caddr_t src, dst;
                u_int64_t block;
 
@@ -1741,13 +1741,43 @@ uvm_swap_io(struct vm_page **pps, int st
                }
 
                uvm_pagermapout(kva, npages);
+               kva = bouncekva;
 
-               /* dispose of pages we dont use anymore */
-               opages = npages;
-               uvm_pager_dropcluster(NULL, NULL, pps, &opages,
-                                     PGO_PDFREECLUST);
+               /* dispose the page we dont use anymore */
+               for (i = 0; i < npages; i++) {
+                       struct rwlock *slock;
+                       struct vm_page *pg = pps[i];
 
-               kva = bouncekva;
+                       KASSERT(pg->pg_flags & PG_PAGEOUT);
+
+                       if (pg->pg_flags & PQ_ANON) {
+                               slock = pg->uanon->an_lock;
+                       } else {
+                               slock = pg->uobject->vmobjlock;
+                       }
+                       rw_enter(slock, RW_WRITE);
+                       uvm_lock_pageq();
+                       atomic_clearbits_int(&pg->pg_flags, PG_PAGEOUT);
+                       uvmexp.paging--;
+
+                       /*
+                        * If a process faulted on a page of the anon being
+                        * swapped out it is waiting and we cannot release
+                        * it.
+                        */
+                       if (pg->pg_flags & PG_WANTED) {
+                               wanted++;
+                       } else {
+                               atomic_setbits_int(&pg->pg_flags, PG_RELEASED);
+                       }
+                       uvm_page_unbusy(&pg, 1);
+                       uvm_unlock_pageq();
+                       rw_exit(slock);
+               }
+
+               /* these pages are now only in swap. */
+               KASSERT(uvmexp.swpgonly + (npages-wanted) <= uvmexp.swpginuse);
+               atomic_add_int(&uvmexp.swpgonly, (npages-wanted));
        }
 
        /*

Reply via email to