Module Name:    src
Committed By:   ad
Date:           Sat Mar 14 20:23:51 UTC 2020

Modified Files:
        src/sys/kern: kern_synch.c
        src/sys/miscfs/genfs: genfs_io.c
        src/sys/rump/librump/rumpkern: ltsleep.c vm.c
        src/sys/sys: proc.h
        src/sys/ufs/lfs: lfs_pages.c lfs_vfsops.c
        src/sys/uvm: uvm.h uvm_amap.c uvm_anon.c uvm_aobj.c uvm_bio.c
            uvm_fault.c uvm_km.c uvm_loan.c uvm_page.c uvm_page.h uvm_vnode.c

Log Message:
Make page waits (WANTED vs BUSY) interlocked by pg->interlock.  Gets RW
locks out of the equation for sleep/wakeup, and allows observing+waiting
for busy pages when holding only a read lock.  Proposed on tech-kern.


To generate a diff of this commit:
cvs rdiff -u -r1.343 -r1.344 src/sys/kern/kern_synch.c
cvs rdiff -u -r1.91 -r1.92 src/sys/miscfs/genfs/genfs_io.c
cvs rdiff -u -r1.35 -r1.36 src/sys/rump/librump/rumpkern/ltsleep.c
cvs rdiff -u -r1.185 -r1.186 src/sys/rump/librump/rumpkern/vm.c
cvs rdiff -u -r1.359 -r1.360 src/sys/sys/proc.h
cvs rdiff -u -r1.22 -r1.23 src/sys/ufs/lfs/lfs_pages.c
cvs rdiff -u -r1.375 -r1.376 src/sys/ufs/lfs/lfs_vfsops.c
cvs rdiff -u -r1.75 -r1.76 src/sys/uvm/uvm.h
cvs rdiff -u -r1.117 -r1.118 src/sys/uvm/uvm_amap.c
cvs rdiff -u -r1.74 -r1.75 src/sys/uvm/uvm_anon.c
cvs rdiff -u -r1.136 -r1.137 src/sys/uvm/uvm_aobj.c
cvs rdiff -u -r1.104 -r1.105 src/sys/uvm/uvm_bio.c
cvs rdiff -u -r1.217 -r1.218 src/sys/uvm/uvm_fault.c
cvs rdiff -u -r1.156 -r1.157 src/sys/uvm/uvm_km.c
cvs rdiff -u -r1.96 -r1.97 src/sys/uvm/uvm_loan.c
cvs rdiff -u -r1.230 -r1.231 src/sys/uvm/uvm_page.c
cvs rdiff -u -r1.99 -r1.100 src/sys/uvm/uvm_page.h
cvs rdiff -u -r1.108 -r1.109 src/sys/uvm/uvm_vnode.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/kern_synch.c
diff -u src/sys/kern/kern_synch.c:1.343 src/sys/kern/kern_synch.c:1.344
--- src/sys/kern/kern_synch.c:1.343	Sat Mar 14 18:08:39 2020
+++ src/sys/kern/kern_synch.c	Sat Mar 14 20:23:51 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_synch.c,v 1.343 2020/03/14 18:08:39 ad Exp $	*/
+/*	$NetBSD: kern_synch.c,v 1.344 2020/03/14 20:23:51 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2004, 2006, 2007, 2008, 2009, 2019, 2020
@@ -69,7 +69,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.343 2020/03/14 18:08:39 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.344 2020/03/14 20:23:51 ad Exp $");
 
 #include "opt_kstack.h"
 #include "opt_dtrace.h"
@@ -220,46 +220,6 @@ mtsleep(wchan_t ident, pri_t priority, c
 }
 
 /*
- * XXXAD Temporary - for use of UVM only.  PLEASE DO NOT USE ELSEWHERE. 
- * Will go once there is a better solution, eg waits interlocked by
- * pg->interlock.  To wake an LWP sleeping with this, you need to hold a
- * write lock.
- */
-int
-rwtsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
-	 krwlock_t *rw)
-{
-	struct lwp *l = curlwp;
-	sleepq_t *sq;
-	kmutex_t *mp;
-	int error;
-	krw_t op;
-
-	KASSERT((l->l_pflag & LP_INTR) == 0);
-	KASSERT(ident != &lbolt);
-
-	if (sleepq_dontsleep(l)) {
-		(void)sleepq_abort(NULL, (priority & PNORELOCK) != 0);
-		if ((priority & PNORELOCK) != 0)
-			rw_exit(rw);
-		return 0;
-	}
-
-	l->l_kpriority = true;
-	sq = sleeptab_lookup(&sleeptab, ident, &mp);
-	sleepq_enter(sq, l, mp);
-	sleepq_enqueue(sq, ident, wmesg, &sleep_syncobj);
-	op = rw_lock_op(rw);
-	rw_exit(rw);
-	error = sleepq_block(timo, priority & PCATCH);
-
-	if ((priority & PNORELOCK) == 0)
-		rw_enter(rw, op);
-
-	return error;
-}
-
-/*
  * General sleep call for situations where a wake-up is not expected.
  */
 int

Index: src/sys/miscfs/genfs/genfs_io.c
diff -u src/sys/miscfs/genfs/genfs_io.c:1.91 src/sys/miscfs/genfs/genfs_io.c:1.92
--- src/sys/miscfs/genfs/genfs_io.c:1.91	Sat Mar 14 19:07:22 2020
+++ src/sys/miscfs/genfs/genfs_io.c	Sat Mar 14 20:23:51 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: genfs_io.c,v 1.91 2020/03/14 19:07:22 ad Exp $	*/
+/*	$NetBSD: genfs_io.c,v 1.92 2020/03/14 20:23:51 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.91 2020/03/14 19:07:22 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.92 2020/03/14 20:23:51 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -524,9 +524,6 @@ out:
 		if (i < ridx || i >= ridx + orignmempages || async) {
 			UVMHIST_LOG(ubchist, "unbusy pg %#jx offset 0x%jx",
 			    (uintptr_t)pg, pg->offset,0,0);
-			if (pg->flags & PG_WANTED) {
-				wakeup(pg);
-			}
 			if (pg->flags & PG_FAKE) {
 				KASSERT(overwrite);
 				uvm_pagezero(pg);
@@ -537,8 +534,9 @@ out:
 			}
 			uvm_pagelock(pg);
 			uvm_pageenqueue(pg);
+			uvm_pageunbusy(pg);
 			uvm_pageunlock(pg);
-			pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
+			pg->flags &= ~PG_FAKE;
 			UVM_PAGE_OWN(pg, NULL);
 		} else if (memwrite && !overwrite &&
 		    uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN) {
@@ -1093,8 +1091,7 @@ retry:
 				continue;
 			}
 			nextoff = pg->offset; /* visit this page again */
-			pg->flags |= PG_WANTED;
-			UVM_UNLOCK_AND_WAIT_RW(pg, slock, 0, "genput", 0);
+			uvm_pagewait(pg, slock, "genput");
 			/*
 			 * as we dropped the object lock, our cached pages can
 			 * be stale.

Index: src/sys/rump/librump/rumpkern/ltsleep.c
diff -u src/sys/rump/librump/rumpkern/ltsleep.c:1.35 src/sys/rump/librump/rumpkern/ltsleep.c:1.36
--- src/sys/rump/librump/rumpkern/ltsleep.c:1.35	Sun Feb 23 15:46:42 2020
+++ src/sys/rump/librump/rumpkern/ltsleep.c	Sat Mar 14 20:23:51 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: ltsleep.c,v 1.35 2020/02/23 15:46:42 ad Exp $	*/
+/*	$NetBSD: ltsleep.c,v 1.36 2020/03/14 20:23:51 ad Exp $	*/
 
 /*
  * Copyright (c) 2009, 2010 Antti Kantee.  All Rights Reserved.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ltsleep.c,v 1.35 2020/02/23 15:46:42 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ltsleep.c,v 1.36 2020/03/14 20:23:51 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -151,21 +151,6 @@ mtsleep(wchan_t ident, pri_t prio, const
 	return rv;
 }
 
-int
-rwtsleep(wchan_t ident, pri_t prio, const char *wmesg, int timo, krwlock_t *lock)
-{
-	krw_t op = rw_write_held(lock) ? RW_WRITER : RW_READER;
-	int rv;
-
-	mutex_spin_enter(&qlock);
-	rw_exit(lock);
-	rv = sleeper(ident, timo, true);
-	if ((prio & PNORELOCK) == 0)
-		rw_enter(lock, op);
-
-	return rv;
-}
-
 void
 wakeup(wchan_t ident)
 {

Index: src/sys/rump/librump/rumpkern/vm.c
diff -u src/sys/rump/librump/rumpkern/vm.c:1.185 src/sys/rump/librump/rumpkern/vm.c:1.186
--- src/sys/rump/librump/rumpkern/vm.c:1.185	Sat Mar 14 19:54:06 2020
+++ src/sys/rump/librump/rumpkern/vm.c	Sat Mar 14 20:23:51 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: vm.c,v 1.185 2020/03/14 19:54:06 ad Exp $	*/
+/*	$NetBSD: vm.c,v 1.186 2020/03/14 20:23:51 ad Exp $	*/
 
 /*
  * Copyright (c) 2007-2011 Antti Kantee.  All Rights Reserved.
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.185 2020/03/14 19:54:06 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.186 2020/03/14 20:23:51 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/atomic.h>
@@ -219,8 +219,12 @@ uvm_pagefree(struct vm_page *pg)
 
 	KASSERT(rw_write_held(uobj->vmobjlock));
 
-	if (pg->flags & PG_WANTED)
+	mutex_enter(&pg->interlock);
+	if (pg->pqflags & PQ_WANTED) {
+		pg->pqflags &= ~PQ_WANTED;
 		wakeup(pg);
+	}
+	mutex_exit(&pg->interlock);
 
 	uobj->uo_npages--;
 	pg2 = radix_tree_remove_node(&uobj->uo_pages, pg->offset >> PAGE_SHIFT);
@@ -682,13 +686,41 @@ uvm_page_unbusy(struct vm_page **pgs, in
 			continue;
 
 		KASSERT(pg->flags & PG_BUSY);
-		if (pg->flags & PG_WANTED)
-			wakeup(pg);
-		if (pg->flags & PG_RELEASED)
+		if (pg->flags & PG_RELEASED) {
 			uvm_pagefree(pg);
-		else
-			pg->flags &= ~(PG_WANTED|PG_BUSY);
+		} else {
+			uvm_pagelock(pg);
+			uvm_pageunbusy(pg);
+			uvm_pageunlock(pg);
+		}
+	}
+}
+
+void
+uvm_pagewait(struct vm_page *pg, krwlock_t *lock, const char *wmesg)
+{
+
+	KASSERT(rw_lock_held(lock));
+	KASSERT((pg->flags & PG_BUSY) != 0);
+
+	mutex_enter(&pg->interlock);
+	pg->pqflags |= PQ_WANTED;
+	rw_exit(lock);
+	UVM_UNLOCK_AND_WAIT(pg, &pg->interlock, false, wmesg, 0);
+}
+
+void
+uvm_pageunbusy(struct vm_page *pg)
+{
+
+	KASSERT((pg->flags & PG_BUSY) != 0);
+	KASSERT(mutex_owned(&pg->interlock));
+
+	if ((pg->pqflags & PQ_WANTED) != 0) {
+		pg->pqflags &= ~PQ_WANTED;
+		wakeup(pg);
 	}
+	pg->flags &= ~PG_BUSY;
 }
 
 void

Index: src/sys/sys/proc.h
diff -u src/sys/sys/proc.h:1.359 src/sys/sys/proc.h:1.360
--- src/sys/sys/proc.h:1.359	Sun Feb 23 15:46:42 2020
+++ src/sys/sys/proc.h	Sat Mar 14 20:23:51 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: proc.h,v 1.359 2020/02/23 15:46:42 ad Exp $	*/
+/*	$NetBSD: proc.h,v 1.360 2020/03/14 20:23:51 ad Exp $	*/
 
 /*-
  * Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@@ -513,7 +513,6 @@ void	fixjobc(struct proc *, struct pgrp 
 
 int	tsleep(wchan_t, pri_t, const char *, int);
 int	mtsleep(wchan_t, pri_t, const char *, int, kmutex_t *);
-int	rwtsleep(wchan_t, pri_t, const char *, int, krwlock_t *);
 void	wakeup(wchan_t);
 int	kpause(const char *, bool, int, kmutex_t *);
 void	exit1(struct lwp *, int, int) __dead;

Index: src/sys/ufs/lfs/lfs_pages.c
diff -u src/sys/ufs/lfs/lfs_pages.c:1.22 src/sys/ufs/lfs/lfs_pages.c:1.23
--- src/sys/ufs/lfs/lfs_pages.c:1.22	Sun Feb 23 15:46:42 2020
+++ src/sys/ufs/lfs/lfs_pages.c	Sat Mar 14 20:23:51 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_pages.c,v 1.22 2020/02/23 15:46:42 ad Exp $	*/
+/*	$NetBSD: lfs_pages.c,v 1.23 2020/03/14 20:23:51 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2019 The NetBSD Foundation, Inc.
@@ -60,7 +60,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_pages.c,v 1.22 2020/02/23 15:46:42 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_pages.c,v 1.23 2020/03/14 20:23:51 ad Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_compat_netbsd.h"
@@ -167,8 +167,7 @@ wait_for_page(struct vnode *vp, struct v
 	lastpg = pg;
 #endif
 
-	pg->flags |= PG_WANTED;
-	UVM_UNLOCK_AND_WAIT_RW(pg, vp->v_uobj.vmobjlock, 0, "lfsput", 0);
+	uvm_pagewait(pg, vp->v_uobj.vmobjlock, "lfsput");
 	rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
 }
 
@@ -349,9 +348,9 @@ check_dirty(struct lfs *fs, struct vnode
 					pg->flags |= PG_DELWRI;
 				}
 			}
-			if (pg->flags & PG_WANTED)
-				wakeup(pg);
-			pg->flags &= ~(PG_WANTED|PG_BUSY);
+			uvm_pagelock(pg);
+			uvm_pageunbusy(pg);
+			uvm_pageunlock(pg);
 			UVM_PAGE_OWN(pg, NULL);
 		}
 
@@ -495,9 +494,7 @@ retry:
 			pg = uvm_pagelookup(&vp->v_uobj, off);
 			KASSERT(pg != NULL);
 			while (pg->flags & PG_BUSY) {
-				pg->flags |= PG_WANTED;
-				UVM_UNLOCK_AND_WAIT_RW(pg, vp->v_uobj.vmobjlock, 0,
-						    "lfsput2", 0);
+				uvm_pagewait(pg, vp->v_uobj.vmobjlock, "lfsput2");
 				rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
 			}
 			uvm_pagelock(pg);

Index: src/sys/ufs/lfs/lfs_vfsops.c
diff -u src/sys/ufs/lfs/lfs_vfsops.c:1.375 src/sys/ufs/lfs/lfs_vfsops.c:1.376
--- src/sys/ufs/lfs/lfs_vfsops.c:1.375	Sat Mar 14 15:35:35 2020
+++ src/sys/ufs/lfs/lfs_vfsops.c	Sat Mar 14 20:23:51 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_vfsops.c,v 1.375 2020/03/14 15:35:35 ad Exp $	*/
+/*	$NetBSD: lfs_vfsops.c,v 1.376 2020/03/14 20:23:51 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007, 2007
@@ -61,7 +61,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.375 2020/03/14 15:35:35 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.376 2020/03/14 20:23:51 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_lfs.h"
@@ -2277,7 +2277,6 @@ lfs_gop_write(struct vnode *vp, struct v
 		DLOG((DLOG_PAGE, "pg[%d]->loan_count = %d\n", i,
 		      pg->loan_count));
 	}
-	/* uvm_pageunbusy takes care of PG_BUSY, PG_WANTED */
 	uvm_page_unbusy(pgs, npages);
 	mutex_exit(vp->v_interlock);
 	return EAGAIN;

Index: src/sys/uvm/uvm.h
diff -u src/sys/uvm/uvm.h:1.75 src/sys/uvm/uvm.h:1.76
--- src/sys/uvm/uvm.h:1.75	Sun Feb 23 15:46:43 2020
+++ src/sys/uvm/uvm.h	Sat Mar 14 20:23:51 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm.h,v 1.75 2020/02/23 15:46:43 ad Exp $	*/
+/*	$NetBSD: uvm.h,v 1.76 2020/03/14 20:23:51 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -174,13 +174,6 @@ do {									\
 	    msg, timo, slock);						\
 } while (/*CONSTCOND*/ 0)
 
-/* XXX temporary */
-#define	UVM_UNLOCK_AND_WAIT_RW(event, slock, intr, msg, timo)		\
-do {									\
-	(void) rwtsleep(event, PVM | PNORELOCK | (intr ? PCATCH : 0),	\
-	    msg, timo, slock);						\
-} while (/*CONSTCOND*/ 0)
-
 void uvm_kick_pdaemon(void);
 
 /*

Index: src/sys/uvm/uvm_amap.c
diff -u src/sys/uvm/uvm_amap.c:1.117 src/sys/uvm/uvm_amap.c:1.118
--- src/sys/uvm/uvm_amap.c:1.117	Sat Mar 14 18:08:40 2020
+++ src/sys/uvm/uvm_amap.c	Sat Mar 14 20:23:51 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_amap.c,v 1.117 2020/03/14 18:08:40 ad Exp $	*/
+/*	$NetBSD: uvm_amap.c,v 1.118 2020/03/14 20:23:51 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.117 2020/03/14 18:08:40 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.118 2020/03/14 20:23:51 ad Exp $");
 
 #include "opt_uvmhist.h"
 
@@ -1056,9 +1056,7 @@ ReStart:
 		 */
 
 		if (pg->flags & PG_BUSY) {
-			pg->flags |= PG_WANTED;
-			UVM_UNLOCK_AND_WAIT_RW(pg, amap->am_lock, false,
-			    "cownow", 0);
+			uvm_pagewait(pg, amap->am_lock, "cownow");
 			goto ReStart;
 		}
 
@@ -1097,8 +1095,9 @@ ReStart:
 		amap->am_anon[slot] = nanon;
 
 		/*
-		 * Drop PG_BUSY on new page.  Since its owner was locked all
-		 * this time - it cannot be PG_RELEASED or PG_WANTED.
+		 * Drop PG_BUSY on new page.  Since its owner was write
+		 * locked all this time - it cannot be PG_RELEASED or
+		 * waited on.
 		 */
 		uvm_pagelock(npg);
 		uvm_pageactivate(npg);

Index: src/sys/uvm/uvm_anon.c
diff -u src/sys/uvm/uvm_anon.c:1.74 src/sys/uvm/uvm_anon.c:1.75
--- src/sys/uvm/uvm_anon.c:1.74	Mon Feb 24 12:38:57 2020
+++ src/sys/uvm/uvm_anon.c	Sat Mar 14 20:23:51 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_anon.c,v 1.74 2020/02/24 12:38:57 rin Exp $	*/
+/*	$NetBSD: uvm_anon.c,v 1.75 2020/03/14 20:23:51 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v 1.74 2020/02/24 12:38:57 rin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v 1.75 2020/03/14 20:23:51 ad Exp $");
 
 #include "opt_uvmhist.h"
 
@@ -341,7 +341,7 @@ uvm_anon_pagein(struct vm_amap *amap, st
 	}
 
 	/*
-	 * Mark the page as dirty, clear its swslot and un-busy it.
+	 * Mark the page as dirty and clear its swslot.
 	 */
 
 	pg = anon->an_page;
@@ -359,11 +359,6 @@ uvm_anon_pagein(struct vm_amap *amap, st
 	uvm_pagelock(pg);
 	uvm_pagedeactivate(pg);
 	uvm_pageunlock(pg);
-	if (pg->flags & PG_WANTED) {
-		pg->flags &= ~PG_WANTED;
-		wakeup(pg);
-	}
-
 	rw_exit(anon->an_lock);
 	if (uobj) {
 		rw_exit(uobj->vmobjlock);

Index: src/sys/uvm/uvm_aobj.c
diff -u src/sys/uvm/uvm_aobj.c:1.136 src/sys/uvm/uvm_aobj.c:1.137
--- src/sys/uvm/uvm_aobj.c:1.136	Mon Feb 24 12:38:57 2020
+++ src/sys/uvm/uvm_aobj.c	Sat Mar 14 20:23:51 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_aobj.c,v 1.136 2020/02/24 12:38:57 rin Exp $	*/
+/*	$NetBSD: uvm_aobj.c,v 1.137 2020/03/14 20:23:51 ad Exp $	*/
 
 /*
  * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
@@ -38,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.136 2020/02/24 12:38:57 rin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.137 2020/03/14 20:23:51 ad Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_uvmhist.h"
@@ -621,9 +621,7 @@ uao_detach(struct uvm_object *uobj)
 		uvm_page_array_advance(&a);
 		pmap_page_protect(pg, VM_PROT_NONE);
 		if (pg->flags & PG_BUSY) {
-			pg->flags |= PG_WANTED;
-			UVM_UNLOCK_AND_WAIT_RW(pg, uobj->vmobjlock, false,
-			    "uao_det", 0);
+			uvm_pagewait(pg, uobj->vmobjlock, "uao_det");
 			uvm_page_array_clear(&a);
 			rw_enter(uobj->vmobjlock, RW_WRITER);
 			continue;
@@ -715,9 +713,7 @@ uao_put(struct uvm_object *uobj, voff_t 
 		 */
 
 		if (pg->flags & PG_BUSY) {
-			pg->flags |= PG_WANTED;
-			UVM_UNLOCK_AND_WAIT_RW(pg, uobj->vmobjlock, 0,
-			    "uao_put", 0);
+			uvm_pagewait(pg, uobj->vmobjlock, "uao_put");
 			uvm_page_array_clear(&a);
 			rw_enter(uobj->vmobjlock, RW_WRITER);
 			continue;
@@ -964,12 +960,10 @@ gotpage:
 
 			/* page is there, see if we need to wait on it */
 			if ((ptmp->flags & PG_BUSY) != 0) {
-				ptmp->flags |= PG_WANTED;
 				UVMHIST_LOG(pdhist,
 				    "sleeping, ptmp->flags %#jx\n",
 				    ptmp->flags,0,0,0);
-				UVM_UNLOCK_AND_WAIT_RW(ptmp, uobj->vmobjlock,
-				    false, "uao_get", 0);
+				uvm_pagewait(ptmp, uobj->vmobjlock, "uao_get");
 				rw_enter(uobj->vmobjlock, RW_WRITER);
 				continue;
 			}
@@ -1038,8 +1032,6 @@ gotpage:
 			if (error != 0) {
 				UVMHIST_LOG(pdhist, "<- done (error=%jd)",
 				    error,0,0,0);
-				if (ptmp->flags & PG_WANTED)
-					wakeup(ptmp);
 
 				/*
 				 * remove the swap slot from the aobj
@@ -1308,14 +1300,11 @@ uao_pagein_page(struct uvm_aobj *aobj, i
 	 */
 	uvm_pagelock(pg);
 	uvm_pageenqueue(pg);
+	uvm_pageunbusy(pg);
 	uvm_pageunlock(pg);
 
-	if (pg->flags & PG_WANTED) {
-		wakeup(pg);
-	}
-	pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
+	pg->flags &= ~(PG_FAKE);
 	uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
-	UVM_PAGE_OWN(pg, NULL);
 
 	return false;
 }

Index: src/sys/uvm/uvm_bio.c
diff -u src/sys/uvm/uvm_bio.c:1.104 src/sys/uvm/uvm_bio.c:1.105
--- src/sys/uvm/uvm_bio.c:1.104	Sun Feb 23 15:46:43 2020
+++ src/sys/uvm/uvm_bio.c	Sat Mar 14 20:23:51 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_bio.c,v 1.104 2020/02/23 15:46:43 ad Exp $	*/
+/*	$NetBSD: uvm_bio.c,v 1.105 2020/03/14 20:23:51 ad Exp $	*/
 
 /*
  * Copyright (c) 1998 Chuck Silvers.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.104 2020/02/23 15:46:43 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.105 2020/03/14 20:23:51 ad Exp $");
 
 #include "opt_uvmhist.h"
 #include "opt_ubc.h"
@@ -236,9 +236,6 @@ ubc_fault_page(const struct uvm_faultinf
 
 	KASSERT(rw_write_held(pg->uobject->vmobjlock));
 
-	if (pg->flags & PG_WANTED) {
-		wakeup(pg);
-	}
 	KASSERT((pg->flags & PG_FAKE) == 0);
 	if (pg->flags & PG_RELEASED) {
 		uvm_pagefree(pg);
@@ -286,9 +283,8 @@ ubc_fault_page(const struct uvm_faultinf
 
 	uvm_pagelock(pg);
 	uvm_pageactivate(pg);
+	uvm_pageunbusy(pg);
 	uvm_pageunlock(pg);
-	pg->flags &= ~(PG_BUSY|PG_WANTED);
-	UVM_PAGE_OWN(pg, NULL);
 
 	return error;
 }

Index: src/sys/uvm/uvm_fault.c
diff -u src/sys/uvm/uvm_fault.c:1.217 src/sys/uvm/uvm_fault.c:1.218
--- src/sys/uvm/uvm_fault.c:1.217	Mon Feb 24 12:38:57 2020
+++ src/sys/uvm/uvm_fault.c	Sat Mar 14 20:23:51 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_fault.c,v 1.217 2020/02/24 12:38:57 rin Exp $	*/
+/*	$NetBSD: uvm_fault.c,v 1.218 2020/03/14 20:23:51 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.217 2020/02/24 12:38:57 rin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.218 2020/03/14 20:23:51 ad Exp $");
 
 #include "opt_uvmhist.h"
 
@@ -329,7 +329,6 @@ uvmfault_anonget(struct uvm_faultinfo *u
 				UVMHIST_LOG(maphist, "<- OK",0,0,0,0);
 				return 0;
 			}
-			pg->flags |= PG_WANTED;
 			cpu_count(CPU_COUNT_FLTPGWAIT, 1);
 
 			/*
@@ -342,16 +341,13 @@ uvmfault_anonget(struct uvm_faultinfo *u
 				uvmfault_unlockall(ufi, amap, NULL);
 				UVMHIST_LOG(maphist, " unlock+wait on uobj",0,
 				    0,0,0);
-				UVM_UNLOCK_AND_WAIT_RW(pg,
-				    pg->uobject->vmobjlock,
-				    false, "anonget1", 0);
+				uvm_pagewait(pg, pg->uobject->vmobjlock, "anonget1");
 			} else {
 				/* Owner of page is anon. */
 				uvmfault_unlockall(ufi, NULL, NULL);
 				UVMHIST_LOG(maphist, " unlock+wait on anon",0,
 				    0,0,0);
-				UVM_UNLOCK_AND_WAIT_RW(pg, anon->an_lock,
-				    false, "anonget2", 0);
+				uvm_pagewait(pg, anon->an_lock, "anonget2");
 			}
 		} else {
 #if defined(VMSWAP)
@@ -420,9 +416,6 @@ uvmfault_anonget(struct uvm_faultinfo *u
 
 		if (we_own) {
 #if defined(VMSWAP)
-			if (pg->flags & PG_WANTED) {
-				wakeup(pg);
-			}
 			if (error) {
 
 				/*
@@ -486,10 +479,10 @@ released:
 
 			uvm_pagelock(pg);
 			uvm_pageactivate(pg);
+			uvm_pageunbusy(pg);
 			uvm_pageunlock(pg);
-			pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
+			pg->flags &= ~PG_FAKE;
 			uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_UNKNOWN);
-			UVM_PAGE_OWN(pg, NULL);
 #else
 			panic("%s: we_own", __func__);
 #endif /* defined(VMSWAP) */
@@ -1745,7 +1738,7 @@ uvm_fault_lower(
 	 *  - at this point uobjpage can not be NULL
 	 *  - at this point uobjpage can not be PG_RELEASED (since we checked
 	 *  for it above)
-	 *  - at this point uobjpage could be PG_WANTED (handle later)
+	 *  - at this point uobjpage could be waited on (handle later)
 	 */
 
 	KASSERT(uobjpage != NULL);
@@ -1858,12 +1851,11 @@ uvm_fault_lower_neighbor(
 	 * Since this page isn't the page that's actually faulting,
 	 * ignore pmap_enter() failures; it's not critical that we
 	 * enter these right now.
-	 * NOTE: page can't be PG_WANTED or PG_RELEASED because we've
+	 * NOTE: page can't be PG_RELEASED because we've
 	 * held the lock the whole time we've had the handle.
 	 */
 	KASSERT((pg->flags & PG_PAGEOUT) == 0);
 	KASSERT((pg->flags & PG_RELEASED) == 0);
-	KASSERT((pg->flags & PG_WANTED) == 0);
 	KASSERT(!UVM_OBJ_IS_CLEAN(pg->uobject) ||
 	    uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN);
 	pg->flags &= ~(PG_BUSY);
@@ -1995,12 +1987,10 @@ uvm_fault_lower_io(
 		UVMHIST_LOG(maphist,
 		    "  wasn't able to relock after fault: retry",
 		    0,0,0,0);
-		if (pg->flags & PG_WANTED) {
-			wakeup(pg);
-		}
 		if ((pg->flags & PG_RELEASED) == 0) {
-			pg->flags &= ~(PG_BUSY | PG_WANTED);
-			UVM_PAGE_OWN(pg, NULL);
+			uvm_pagelock(pg);
+			uvm_pageunbusy(pg);
+			uvm_pageunlock(pg);
 		} else {
 			cpu_count(CPU_COUNT_FLTPGRELE, 1);
 			uvm_pagefree(pg);
@@ -2100,10 +2090,9 @@ uvm_fault_lower_direct_loan(
 			 * drop ownership of page, it can't be released
 			 */
 
-			if (uobjpage->flags & PG_WANTED)
-				wakeup(uobjpage);
-			uobjpage->flags &= ~(PG_BUSY|PG_WANTED);
-			UVM_PAGE_OWN(uobjpage, NULL);
+			uvm_pagelock(uobjpage);
+			uvm_pageunbusy(uobjpage);
+			uvm_pageunlock(uobjpage);
 
 			uvmfault_unlockall(ufi, amap, uobj);
 			UVMHIST_LOG(maphist,
@@ -2182,12 +2171,9 @@ uvm_fault_lower_promote(
 		 * since we still hold the object lock.
 		 */
 
-		if (uobjpage->flags & PG_WANTED) {
-			/* still have the obj lock */
-			wakeup(uobjpage);
-		}
-		uobjpage->flags &= ~(PG_BUSY|PG_WANTED);
-		UVM_PAGE_OWN(uobjpage, NULL);
+		uvm_pagelock(uobjpage);
+		uvm_pageunbusy(uobjpage);
+		uvm_pageunlock(uobjpage);
 
 		UVMHIST_LOG(maphist,
 		    "  promote uobjpage %#jx to anon/page %#jx/%#jx",
@@ -2274,19 +2260,15 @@ uvm_fault_lower_enter(
 
 		uvm_pagelock(pg);
 		uvm_pageenqueue(pg);
+		uvm_pageunbusy(pg);
 		uvm_pageunlock(pg);
 
-		if (pg->flags & PG_WANTED)
-			wakeup(pg);
-
 		/*
 		 * note that pg can't be PG_RELEASED since we did not drop
 		 * the object lock since the last time we checked.
 		 */
 		KASSERT((pg->flags & PG_RELEASED) == 0);
-
-		pg->flags &= ~(PG_BUSY|PG_FAKE|PG_WANTED);
-		UVM_PAGE_OWN(pg, NULL);
+		pg->flags &= ~PG_FAKE;
 
 		uvmfault_unlockall(ufi, amap, uobj);
 		if (!uvm_reclaimable()) {
@@ -2308,10 +2290,10 @@ uvm_fault_lower_enter(
 	 * lock since the last time we checked.
 	 */
 	KASSERT((pg->flags & PG_RELEASED) == 0);
-	if (pg->flags & PG_WANTED)
-		wakeup(pg);
-	pg->flags &= ~(PG_BUSY|PG_FAKE|PG_WANTED);
-	UVM_PAGE_OWN(pg, NULL);
+	uvm_pagelock(pg);
+	uvm_pageunbusy(pg);
+	uvm_pageunlock(pg);
+	pg->flags &= ~PG_FAKE;
 
 	pmap_update(ufi->orig_map->pmap);
 	uvmfault_unlockall(ufi, amap, uobj);

Index: src/sys/uvm/uvm_km.c
diff -u src/sys/uvm/uvm_km.c:1.156 src/sys/uvm/uvm_km.c:1.157
--- src/sys/uvm/uvm_km.c:1.156	Mon Feb 24 12:38:57 2020
+++ src/sys/uvm/uvm_km.c	Sat Mar 14 20:23:51 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_km.c,v 1.156 2020/02/24 12:38:57 rin Exp $	*/
+/*	$NetBSD: uvm_km.c,v 1.157 2020/03/14 20:23:51 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -152,7 +152,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_km.c,v 1.156 2020/02/24 12:38:57 rin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_km.c,v 1.157 2020/03/14 20:23:51 ad Exp $");
 
 #include "opt_uvmhist.h"
 
@@ -456,9 +456,7 @@ uvm_km_pgremove(vaddr_t startva, vaddr_t
 		nextoff = curoff + PAGE_SIZE;
 		pg = uvm_pagelookup(uobj, curoff);
 		if (pg != NULL && pg->flags & PG_BUSY) {
-			pg->flags |= PG_WANTED;
-			UVM_UNLOCK_AND_WAIT_RW(pg, uobj->vmobjlock, 0,
-				    "km_pgrm", 0);
+			uvm_pagewait(pg, uobj->vmobjlock, "km_pgrm");
 			rw_enter(uobj->vmobjlock, RW_WRITER);
 			nextoff = curoff;
 			continue;
@@ -569,7 +567,7 @@ uvm_km_check_empty(struct vm_map *map, v
 		 * - we can recurse when allocating radix_node for
 		 *   kernel_object.
 		 */
-		if (rw_tryenter(uvm_kernel_object->vmobjlock, RW_WRITER)) {
+		if (rw_tryenter(uvm_kernel_object->vmobjlock, RW_READER)) {
 			struct vm_page *pg;
 
 			pg = uvm_pagelookup(uvm_kernel_object,

Index: src/sys/uvm/uvm_loan.c
diff -u src/sys/uvm/uvm_loan.c:1.96 src/sys/uvm/uvm_loan.c:1.97
--- src/sys/uvm/uvm_loan.c:1.96	Mon Feb 24 21:06:11 2020
+++ src/sys/uvm/uvm_loan.c	Sat Mar 14 20:23:51 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_loan.c,v 1.96 2020/02/24 21:06:11 ad Exp $	*/
+/*	$NetBSD: uvm_loan.c,v 1.97 2020/03/14 20:23:51 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.96 2020/02/24 21:06:11 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.97 2020/03/14 20:23:51 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -705,9 +705,6 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, 
 		 */
 
 		if (locked == false) {
-			if (pg->flags & PG_WANTED) {
-				wakeup(pg);
-			}
 			if (pg->flags & PG_RELEASED) {
 				uvm_pagefree(pg);
 				rw_exit(uobj->vmobjlock);
@@ -715,9 +712,8 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, 
 			}
 			uvm_pagelock(pg);
 			uvm_pageactivate(pg);
+			uvm_pageunbusy(pg);
 			uvm_pageunlock(pg);
-			pg->flags &= ~(PG_BUSY|PG_WANTED);
-			UVM_PAGE_OWN(pg, NULL);
 			rw_exit(uobj->vmobjlock);
 			return (0);
 		}
@@ -754,11 +750,9 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, 
 		/* XXX: locking */
 		anon = pg->uanon;
 		anon->an_ref++;
-		if (pg->flags & PG_WANTED) {
-			wakeup(pg);
-		}
-		pg->flags &= ~(PG_WANTED|PG_BUSY);
-		UVM_PAGE_OWN(pg, NULL);
+		uvm_pagelock(pg);
+		uvm_pageunbusy(pg);
+		uvm_pageunlock(pg);
 		rw_exit(uobj->vmobjlock);
 		**output = anon;
 		(*output)++;
@@ -787,12 +781,8 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, 
 	anon->an_page = pg;
 	anon->an_lock = /* TODO: share amap lock */
 	uvm_pageactivate(pg);
+	uvm_pageunbusy(pg);
 	uvm_pageunlock(pg);
-	if (pg->flags & PG_WANTED) {
-		wakeup(pg);
-	}
-	pg->flags &= ~(PG_WANTED|PG_BUSY);
-	UVM_PAGE_OWN(pg, NULL);
 	rw_exit(uobj->vmobjlock);
 	rw_exit(&anon->an_lock);
 	**output = anon;
@@ -804,11 +794,9 @@ fail:
 	/*
 	 * unlock everything and bail out.
 	 */
-	if (pg->flags & PG_WANTED) {
-		wakeup(pg);
-	}
-	pg->flags &= ~(PG_WANTED|PG_BUSY);
-	UVM_PAGE_OWN(pg, NULL);
+	uvm_pagelock(pg);
+	uvm_pageunbusy(pg);
+	uvm_pageunlock(pg);
 	uvmfault_unlockall(ufi, amap, uobj, NULL);
 	if (anon) {
 		anon->an_ref--;
@@ -863,12 +851,12 @@ again:
 		}
 
 		/* got a zero'd page. */
-		pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
+		pg->flags &= ~PG_FAKE;
 		pg->flags |= PG_RDONLY;
 		uvm_pagelock(pg);
 		uvm_pageactivate(pg);
+		uvm_pageunbusy(pg);
 		uvm_pageunlock(pg);
-		UVM_PAGE_OWN(pg, NULL);
 	}
 
 	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loaning to kernel-page */
@@ -1133,11 +1121,7 @@ uvm_loanbreak(struct vm_page *uobjpage)
 	pg->flags &= ~PG_FAKE;
 	KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_DIRTY);
 	pmap_page_protect(uobjpage, VM_PROT_NONE);
-	if (uobjpage->flags & PG_WANTED)
-		wakeup(uobjpage);
 	/* uobj still locked */
-	uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
-	UVM_PAGE_OWN(uobjpage, NULL);
 
 	/*
 	 * if the page is no longer referenced by
@@ -1146,6 +1130,7 @@ uvm_loanbreak(struct vm_page *uobjpage)
 	 */
 
 	uvm_pagelock2(uobjpage, pg);
+	uvm_pageunbusy(uobjpage);
 	if (uobjpage->uanon == NULL)
 		uvm_pagedequeue(uobjpage);
 

Index: src/sys/uvm/uvm_page.c
diff -u src/sys/uvm/uvm_page.c:1.230 src/sys/uvm/uvm_page.c:1.231
--- src/sys/uvm/uvm_page.c:1.230	Tue Mar  3 08:13:44 2020
+++ src/sys/uvm/uvm_page.c	Sat Mar 14 20:23:51 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_page.c,v 1.230 2020/03/03 08:13:44 skrll Exp $	*/
+/*	$NetBSD: uvm_page.c,v 1.231 2020/03/14 20:23:51 ad Exp $	*/
 
 /*-
  * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc.
@@ -95,7 +95,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.230 2020/03/03 08:13:44 skrll Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.231 2020/03/14 20:23:51 ad Exp $");
 
 #include "opt_ddb.h"
 #include "opt_uvm.h"
@@ -1572,10 +1572,11 @@ uvm_pagefree(struct vm_page *pg)
 			pg->uanon->an_page = NULL;
 			pg->uanon = NULL;
 		}
-		if (pg->flags & PG_WANTED) {
+		if (pg->pqflags & PQ_WANTED) {
 			wakeup(pg);
 		}
-		pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED|PG_PAGER1);
+		pg->pqflags &= ~PQ_WANTED;
+		pg->flags &= ~(PG_BUSY|PG_RELEASED|PG_PAGER1);
 #ifdef UVM_PAGE_TRKOWN
 		pg->owner_tag = NULL;
 #endif
@@ -1621,6 +1622,14 @@ uvm_pagefree(struct vm_page *pg)
 	}
 	if (locked) {
 		/*
+		 * wake anyone waiting on the page.
+		 */
+		if ((pg->pqflags & PQ_WANTED) != 0) {
+			pg->pqflags &= ~PQ_WANTED;
+			wakeup(pg);
+		}
+
+		/*
 		 * now remove the page from the queues.
 		 */
 		uvm_pagedequeue(pg);
@@ -1691,10 +1700,6 @@ uvm_page_unbusy(struct vm_page **pgs, in
 		KASSERT(uvm_page_owner_locked_p(pg, true));
 		KASSERT(pg->flags & PG_BUSY);
 		KASSERT((pg->flags & PG_PAGEOUT) == 0);
-		if (pg->flags & PG_WANTED) {
-			/* XXXAD thundering herd problem. */
-			wakeup(pg);
-		}
 		if (pg->flags & PG_RELEASED) {
 			UVMHIST_LOG(ubchist, "releasing pg %#jx",
 			    (uintptr_t)pg, 0, 0, 0);
@@ -1703,15 +1708,62 @@ uvm_page_unbusy(struct vm_page **pgs, in
 			pg->flags &= ~PG_RELEASED;
 			uvm_pagefree(pg);
 		} else {
-			UVMHIST_LOG(ubchist, "unbusying pg %#jx",
-			    (uintptr_t)pg, 0, 0, 0);
 			KASSERT((pg->flags & PG_FAKE) == 0);
-			pg->flags &= ~(PG_WANTED|PG_BUSY);
-			UVM_PAGE_OWN(pg, NULL);
+			uvm_pagelock(pg);
+			uvm_pageunbusy(pg);
+			uvm_pageunlock(pg);
 		}
 	}
 }
 
+/*
+ * uvm_pagewait: wait for a busy page
+ *
+ * => page must be known PG_BUSY
+ * => object must be read or write locked
+ * => object will be unlocked on return
+ */
+
+void
+uvm_pagewait(struct vm_page *pg, krwlock_t *lock, const char *wmesg)
+{
+
+	KASSERT(rw_lock_held(lock));
+	KASSERT((pg->flags & PG_BUSY) != 0);
+	KASSERT(uvm_page_owner_locked_p(pg, false));
+
+	mutex_enter(&pg->interlock);
+	pg->pqflags |= PQ_WANTED;
+	rw_exit(lock);
+	UVM_UNLOCK_AND_WAIT(pg, &pg->interlock, false, wmesg, 0);
+}
+
+/*
+ * uvm_pageunbusy: unbusy a single page
+ *
+ * => page must be known PG_BUSY
+ * => object must be write locked
+ * => page interlock must be held
+ */
+
+void
+uvm_pageunbusy(struct vm_page *pg)
+{
+
+	KASSERT((pg->flags & PG_BUSY) != 0);
+	KASSERT(uvm_page_owner_locked_p(pg, true));
+	KASSERT(mutex_owned(&pg->interlock));
+
+	UVMHIST_LOG(ubchist, "unbusying pg %#jx", (uintptr_t)pg, 0, 0, 0);
+
+	if ((pg->pqflags & PQ_WANTED) != 0) {
+		wakeup(pg);
+		pg->pqflags &= ~PQ_WANTED;
+	}
+	pg->flags &= ~PG_BUSY;
+	UVM_PAGE_OWN(pg, NULL);
+}
+
 #if defined(UVM_PAGE_TRKOWN)
 /*
  * uvm_page_own: set or release page ownership
@@ -1727,7 +1779,6 @@ uvm_page_own(struct vm_page *pg, const c
 {
 
 	KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0);
-	KASSERT((pg->flags & PG_WANTED) == 0);
 	KASSERT(uvm_page_owner_locked_p(pg, true));
 
 	/* gain ownership? */

Index: src/sys/uvm/uvm_page.h
diff -u src/sys/uvm/uvm_page.h:1.99 src/sys/uvm/uvm_page.h:1.100
--- src/sys/uvm/uvm_page.h:1.99	Fri Mar  6 02:46:17 2020
+++ src/sys/uvm/uvm_page.h	Sat Mar 14 20:23:51 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_page.h,v 1.99 2020/03/06 02:46:17 riastradh Exp $	*/
+/*	$NetBSD: uvm_page.h,v 1.100 2020/03/14 20:23:51 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -209,13 +209,8 @@ struct vm_page {
  * PG_BUSY:
  *	Page is long-term locked, usually because of I/O (transfer from the
  *	page memory to the backing store) is in progress.  LWP attempting
- *	to access the page shall set PG_WANTED and wait.
- *
- * PG_WANTED:
- *	Indicates that the page, which is currently PG_BUSY, is wanted by
- *	some other LWP.  The page owner (i.e. LWP which set PG_BUSY) is
- *	responsible to clear both flags and wake up any waiters once it has
- *	released the long-term lock (PG_BUSY).
+ *	to access the page shall set PQ_WANTED and wait.  PG_BUSY may only
+ *	be set with a write lock held on the object.
  *
  * PG_PAGEOUT:
  *	Indicates that the page is being paged-out in preparation for
@@ -251,7 +246,6 @@ struct vm_page {
 #define	PG_CLEAN	0x00000001	/* page is known clean */
 #define	PG_DIRTY	0x00000002	/* page is known dirty */
 #define	PG_BUSY		0x00000004	/* page is locked */
-#define	PG_WANTED	0x00000008	/* someone is waiting for page */
 #define	PG_PAGEOUT	0x00000010	/* page to be freed for pagedaemon */
 #define	PG_RELEASED	0x00000020	/* page to be freed when unbusied */
 #define	PG_FAKE		0x00000040	/* page is not yet initialized */
@@ -272,7 +266,7 @@ struct vm_page {
 #define	PG_SWAPBACKED	(PG_ANON|PG_AOBJ)
 
 #define	UVM_PGFLAGBITS \
-	"\20\1CLEAN\2DIRTY\3BUSY\4WANTED" \
+	"\20\1CLEAN\2DIRTY\3BUSY" \
 	"\5PAGEOUT\6RELEASED\7FAKE\10RDONLY" \
 	"\11ZERO\12TABLED\13AOBJ\14ANON" \
 	"\15FILE\16READAHEAD\17FREE\20MARKER" \
@@ -281,7 +275,21 @@ struct vm_page {
 /*
  * Flags stored in pg->pqflags, which is protected by pg->interlock.
  *
- * PQ_PRIVATE is for uvmpdpol to do whatever it wants with.
+ * PQ_PRIVATE:
+ *	... is for uvmpdpol to do whatever it wants with.
+ *
+ * PQ_INTENT_SET:
+ *	Indicates that the intent set on the page has not yet been realized.
+ *
+ * PQ_INTENT_QUEUED:
+ *	Indicates that the page is, or will soon be, on a per-CPU queue for
+ *	the intent to be realized.
+ *
+ * PQ_WANTED:
+ *	Indicates that the page, which is currently PG_BUSY, is wanted by
+ *	some other LWP.  The page owner (i.e. LWP which set PG_BUSY) is
+ *	responsible to clear both flags and wake up any waiters once it has
+ *	released the long-term lock (PG_BUSY).
  */
 
 #define	PQ_INTENT_A		0x00000000	/* intend activation */
@@ -292,11 +300,13 @@ struct vm_page {
 #define	PQ_INTENT_SET		0x00000004	/* not realized yet */
 #define	PQ_INTENT_QUEUED	0x00000008	/* queued for processing */
 #define	PQ_PRIVATE		0x00000ff0	/* private for pdpolicy */
+#define	PQ_WANTED		0x00001000	/* someone is waiting for page */
 
 #define	UVM_PQFLAGBITS \
 	"\20\1INTENT_0\2INTENT_1\3INTENT_SET\4INTENT_QUEUED" \
 	"\5PRIVATE1\6PRIVATE2\7PRIVATE3\10PRIVATE4" \
-	"\11PRIVATE5\12PRIVATE6\13PRIVATE7\14PRIVATE8"
+	"\11PRIVATE5\12PRIVATE6\13PRIVATE7\14PRIVATE8" \
+	"\15WANTED"
 
 /*
  * physical memory layout structure
@@ -363,6 +373,8 @@ void uvm_pagemarkdirty(struct vm_page *,
 bool uvm_pagecheckdirty(struct vm_page *, bool);
 bool uvm_pagereadonly_p(struct vm_page *);
 bool uvm_page_locked_p(struct vm_page *);
+void uvm_pageunbusy(struct vm_page *);
+void uvm_pagewait(struct vm_page *, krwlock_t *, const char *);
 
 int uvm_page_lookup_freelist(struct vm_page *);
 

Index: src/sys/uvm/uvm_vnode.c
diff -u src/sys/uvm/uvm_vnode.c:1.108 src/sys/uvm/uvm_vnode.c:1.109
--- src/sys/uvm/uvm_vnode.c:1.108	Tue Mar  3 13:32:44 2020
+++ src/sys/uvm/uvm_vnode.c	Sat Mar 14 20:23:51 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_vnode.c,v 1.108 2020/03/03 13:32:44 rjs Exp $	*/
+/*	$NetBSD: uvm_vnode.c,v 1.109 2020/03/14 20:23:51 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -45,7 +45,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_vnode.c,v 1.108 2020/03/03 13:32:44 rjs Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_vnode.c,v 1.109 2020/03/14 20:23:51 ad Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_uvmhist.h"
@@ -335,11 +335,9 @@ uvn_findpage(struct uvm_object *uobj, vo
 				UVMHIST_LOG(ubchist, "nowait",0,0,0,0);
 				goto skip;
 			}
-			pg->flags |= PG_WANTED;
 			UVMHIST_LOG(ubchist, "wait %#jx (color %ju)",
 			    (uintptr_t)pg, VM_PGCOLOR(pg), 0, 0);
-			UVM_UNLOCK_AND_WAIT_RW(pg, uobj->vmobjlock, 0,
-					       "uvnfp2", 0);
+			uvm_pagewait(pg, uobj->vmobjlock, "uvnfp2");
 			uvm_page_array_clear(a);
 			rw_enter(uobj->vmobjlock, RW_WRITER);
 			continue;

Reply via email to