The aiodone daemon accounts for and frees/releases pages they were
written to swap. It is only used for asynchronous write. The diff
below uses this knowledge to:
- Stop suggesting that uvm_swap_get() can be asynchronous. There's an
assert for PGO_SYNCIO 3 lines above.
- Remove unused support for asynchronous read, including error
conditions, from uvm_aio_aiodone_pages().
- Grab the proper lock for each page that has been written to swap.
This allows to enable an assert in uvm_page_unbusy().
- Move the uvm_anon_release() call outside of uvm_page_unbusy() and
assert for the different anon cases. This will allows us to unify
code paths waiting for busy pages.
This is adapted/simplified from what is in NetBSD.
ok?
Index: uvm/uvm_aobj.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
retrieving revision 1.103
diff -u -p -r1.103 uvm_aobj.c
--- uvm/uvm_aobj.c 29 Dec 2021 20:22:06 -0000 1.103
+++ uvm/uvm_aobj.c 29 Jun 2022 11:16:35 -0000
@@ -143,7 +143,6 @@ struct pool uvm_aobj_pool;
static struct uao_swhash_elt *uao_find_swhash_elt(struct uvm_aobj *, int,
boolean_t);
-static int uao_find_swslot(struct uvm_object *, int);
static boolean_t uao_flush(struct uvm_object *, voff_t,
voff_t, int);
static void uao_free(struct uvm_aobj *);
@@ -241,7 +240,7 @@ uao_find_swhash_elt(struct uvm_aobj *aob
/*
* uao_find_swslot: find the swap slot number for an aobj/pageidx
*/
-inline static int
+int
uao_find_swslot(struct uvm_object *uobj, int pageidx)
{
struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
Index: uvm/uvm_aobj.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_aobj.h,v
retrieving revision 1.17
diff -u -p -r1.17 uvm_aobj.h
--- uvm/uvm_aobj.h 21 Oct 2020 09:08:14 -0000 1.17
+++ uvm/uvm_aobj.h 29 Jun 2022 11:16:35 -0000
@@ -60,6 +60,7 @@
void uao_init(void);
int uao_set_swslot(struct uvm_object *, int, int);
+int uao_find_swslot (struct uvm_object *, int);
int uao_dropswap(struct uvm_object *, int);
int uao_swap_off(int, int);
int uao_shrink(struct uvm_object *, int);
Index: uvm/uvm_page.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_page.c,v
retrieving revision 1.166
diff -u -p -r1.166 uvm_page.c
--- uvm/uvm_page.c 12 May 2022 12:48:36 -0000 1.166
+++ uvm/uvm_page.c 29 Jun 2022 11:47:55 -0000
@@ -1036,13 +1036,14 @@ uvm_pagefree(struct vm_page *pg)
* uvm_page_unbusy: unbusy an array of pages.
*
* => pages must either all belong to the same object, or all belong to anons.
+ * => if pages are object-owned, object must be locked.
* => if pages are anon-owned, anons must have 0 refcount.
+ * => caller must make sure that anon-owned pages are not PG_RELEASED.
*/
void
uvm_page_unbusy(struct vm_page **pgs, int npgs)
{
struct vm_page *pg;
- struct uvm_object *uobj;
int i;
for (i = 0; i < npgs; i++) {
@@ -1052,35 +1053,19 @@ uvm_page_unbusy(struct vm_page **pgs, in
continue;
}
-#if notyet
- /*
- * XXX swap case in uvm_aio_aiodone() is not holding the lock.
- *
- * This isn't compatible with the PG_RELEASED anon case below.
- */
KASSERT(uvm_page_owner_locked_p(pg));
-#endif
KASSERT(pg->pg_flags & PG_BUSY);
if (pg->pg_flags & PG_WANTED) {
wakeup(pg);
}
if (pg->pg_flags & PG_RELEASED) {
- uobj = pg->uobject;
- if (uobj != NULL) {
- uvm_lock_pageq();
- pmap_page_protect(pg, PROT_NONE);
- /* XXX won't happen right now */
- if (pg->pg_flags & PQ_AOBJ)
- uao_dropswap(uobj,
- pg->offset >> PAGE_SHIFT);
- uvm_pagefree(pg);
- uvm_unlock_pageq();
- } else {
- rw_enter(pg->uanon->an_lock, RW_WRITE);
- uvm_anon_release(pg->uanon);
- }
+ KASSERT(pg->uobject != NULL ||
+ (pg->uanon != NULL && pg->uanon->an_ref > 0));
+ atomic_clearbits_int(&pg->pg_flags, PG_RELEASED);
+ uvm_pagefree(pg);
} else {
+ KASSERT((pg->pg_flags & PG_FAKE) == 0);
atomic_clearbits_int(&pg->pg_flags, PG_WANTED|PG_BUSY);
UVM_PAGE_OWN(pg, NULL);
}
Index: uvm/uvm_pager.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_pager.c,v
retrieving revision 1.81
diff -u -p -r1.81 uvm_pager.c
--- uvm/uvm_pager.c 28 Jun 2022 19:07:40 -0000 1.81
+++ uvm/uvm_pager.c 29 Jun 2022 11:16:35 -0000
@@ -735,50 +735,77 @@ void
uvm_aio_aiodone_pages(struct vm_page **pgs, int npages, boolean_t write,
int error)
{
- struct vm_page *pg;
struct uvm_object *uobj;
+ struct vm_page *pg;
+ struct rwlock *slock;
boolean_t swap;
- int i;
+ int i, swslot;
+ slock = NULL;
uobj = NULL;
+ pg = pgs[0];
+ swap = (pg->uanon != NULL && pg->uobject == NULL) ||
+ (pg->pg_flags & PQ_AOBJ) != 0;
+
+ KASSERT(swap);
+ KASSERT(write);
+
+ if (error) {
+ if (pg->uobject != NULL) {
+ swslot = uao_find_swslot(pg->uobject,
+ pg->offset >> PAGE_SHIFT);
+ } else {
+ swslot = pg->uanon->an_swslot;
+ }
+ KASSERT(swslot);
+ }
for (i = 0; i < npages; i++) {
+ int anon_disposed = 0;
+
pg = pgs[i];
+ KASSERT((pg->pg_flags & PG_FAKE) == 0);
- if (i == 0) {
- swap = (pg->pg_flags & PQ_SWAPBACKED) != 0;
- if (!swap) {
- uobj = pg->uobject;
- rw_enter(uobj->vmobjlock, RW_WRITE);
- }
+ /*
+ * lock each page's object (or anon) individually since
+ * each page may need a different lock.
+ */
+ if (pg->uobject != NULL) {
+ slock = pg->uobject->vmobjlock;
+ } else {
+ slock = pg->uanon->an_lock;
}
- KASSERT(swap || pg->uobject == uobj);
+ rw_enter(slock, RW_WRITE);
+ anon_disposed = (pg->pg_flags & PG_RELEASED) != 0;
+ KASSERT(!anon_disposed || pg->uobject != NULL ||
+ pg->uanon->an_ref == 0);
+ uvm_lock_pageq();
/*
- * if this is a read and we got an error, mark the pages
- * PG_RELEASED so that uvm_page_unbusy() will free them.
+ * this was a successful write,
+ * mark the page PG_CLEAN.
*/
- if (!write && error) {
- atomic_setbits_int(&pg->pg_flags, PG_RELEASED);
- continue;
+ if (!error) {
+ pmap_clear_reference(pg);
+ pmap_clear_modify(pg);
+ atomic_setbits_int(&pg->pg_flags, PG_CLEAN);
}
- KASSERT(!write || (pgs[i]->pg_flags & PG_FAKE) == 0);
/*
- * if this is a read and the page is PG_FAKE,
- * or this was a successful write,
- * mark the page PG_CLEAN and not PG_FAKE.
+ * unlock everything for this page now.
*/
- if ((pgs[i]->pg_flags & PG_FAKE) || (write && error != ENOMEM))
{
- pmap_clear_reference(pgs[i]);
- pmap_clear_modify(pgs[i]);
- atomic_setbits_int(&pgs[i]->pg_flags, PG_CLEAN);
- atomic_clearbits_int(&pgs[i]->pg_flags, PG_FAKE);
+ if (pg->uobject == NULL && anon_disposed) {
+ uvm_unlock_pageq();
+ uvm_anon_release(pg->uanon);
+ } else {
+ uvm_page_unbusy(&pg, 1);
+ uvm_unlock_pageq();
+ rw_exit(slock);
}
}
- uvm_page_unbusy(pgs, npages);
- if (!swap) {
- rw_exit(uobj->vmobjlock);
+
+ if (error) {
+ uvm_swap_markbad(swslot, npages);
}
}
Index: uvm/uvm_swap.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_swap.c,v
retrieving revision 1.158
diff -u -p -r1.158 uvm_swap.c
--- uvm/uvm_swap.c 28 Jun 2022 19:39:54 -0000 1.158
+++ uvm/uvm_swap.c 29 Jun 2022 11:18:05 -0000
@@ -1603,8 +1603,7 @@ uvm_swap_get(struct vm_page *page, int s
}
KERNEL_LOCK();
- result = uvm_swap_io(&page, swslot, 1, B_READ |
- ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
+ result = uvm_swap_io(&page, swslot, 1, B_READ);
KERNEL_UNLOCK();
if (result == VM_PAGER_OK || result == VM_PAGER_PEND) {