Module Name:    src
Committed By:   ad
Date:           Mon Dec 30 18:08:38 UTC 2019

Modified Files:
        src/sys/uvm: uvm_pdaemon.c uvm_pdaemon.h uvm_pdpolicy.h
            uvm_pdpolicy_clock.c uvm_pdpolicy_clockpro.c

Log Message:
pagedaemon:

- Use marker pages to keep place in the queue when scanning, rather than
  relying on assumptions.

- In uvmpdpol_balancequeue(), lock the object once instead of twice.

- When draining pools, the situation is getting desperate, but try to avoid
  saturating the system with xcall, lock and interrupt activity by sleeping
  for 1 clock tick if being continually awoken and all pools have been
  cycled through at least once.

- Pause & resume the freelist cache during pool draining.

PR kern/54209: NetBSD 8 large memory performance extremely low
PR kern/54210: NetBSD-8 processes presumably not exiting
PR kern/54727: writing a large file causes unreasonable system behaviour


To generate a diff of this commit:
cvs rdiff -u -r1.118 -r1.119 src/sys/uvm/uvm_pdaemon.c
cvs rdiff -u -r1.17 -r1.18 src/sys/uvm/uvm_pdaemon.h
cvs rdiff -u -r1.4 -r1.5 src/sys/uvm/uvm_pdpolicy.h
cvs rdiff -u -r1.23 -r1.24 src/sys/uvm/uvm_pdpolicy_clock.c
cvs rdiff -u -r1.19 -r1.20 src/sys/uvm/uvm_pdpolicy_clockpro.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/uvm/uvm_pdaemon.c
diff -u src/sys/uvm/uvm_pdaemon.c:1.118 src/sys/uvm/uvm_pdaemon.c:1.119
--- src/sys/uvm/uvm_pdaemon.c:1.118	Sat Dec 21 16:10:20 2019
+++ src/sys/uvm/uvm_pdaemon.c	Mon Dec 30 18:08:37 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_pdaemon.c,v 1.118 2019/12/21 16:10:20 ad Exp $	*/
+/*	$NetBSD: uvm_pdaemon.c,v 1.119 2019/12/30 18:08:37 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.118 2019/12/21 16:10:20 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.119 2019/12/30 18:08:37 ad Exp $");
 
 #include "opt_uvmhist.h"
 #include "opt_readahead.h"
@@ -83,6 +83,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.
 
 #include <uvm/uvm.h>
 #include <uvm/uvm_pdpolicy.h>
+#include <uvm/uvm_pgflcache.h>
 
 #ifdef UVMHIST
 UVMHIST_DEFINE(pdhist);
@@ -598,7 +599,7 @@ swapcluster_nused(struct swapcluster *sw
  * => return true if a page had an associated slot.
  */
 
-static bool
+bool
 uvmpd_dropswap(struct vm_page *pg)
 {
 	bool result = false;
@@ -622,50 +623,6 @@ uvmpd_dropswap(struct vm_page *pg)
 	return result;
 }
 
-/*
- * uvmpd_trydropswap: try to free any swap allocated to this page.
- *
- * => return true if a slot is successfully freed.
- * => page interlock must be held, and will be dropped.
- */
-
-bool
-uvmpd_trydropswap(struct vm_page *pg)
-{
-	kmutex_t *slock;
-	bool result;
-
-	if ((pg->flags & PG_BUSY) != 0) {
-		mutex_exit(&pg->interlock);
-		return false;
-	}
-
-	/*
-	 * lock the page's owner.
-	 * this will drop pg->interlock.
-	 */
-
-	slock = uvmpd_trylockowner(pg);
-	if (slock == NULL) {
-		return false;
-	}
-
-	/*
-	 * skip this page if it's busy.
-	 */
-
-	if ((pg->flags & PG_BUSY) != 0) {
-		mutex_exit(slock);
-		return false;
-	}
-
-	result = uvmpd_dropswap(pg);
-
-	mutex_exit(slock);
-
-	return result;
-}
-
 #endif /* defined(VMSWAP) */
 
 /*
@@ -909,6 +866,8 @@ uvmpd_scan_queue(void)
 #endif /* defined(VMSWAP) */
 	}
 
+	uvmpdpol_scanfini();
+
 #if defined(VMSWAP)
 	swapcluster_flush(&swc, true);
 #endif /* defined(VMSWAP) */
@@ -1031,17 +990,44 @@ uvm_estimatepageable(int *active, int *i
 static void
 uvmpd_pool_drain_thread(void *arg)
 {
-	int bufcnt;
+	struct pool *firstpool, *curpool;
+	int bufcnt, lastslept;
+	bool cycled;
 
+	firstpool = NULL;
+	cycled = true;
 	for (;;) {
+		/*
+		 * sleep until awoken by the pagedaemon.
+		 */
 		mutex_enter(&uvmpd_lock);
 		if (!uvmpd_pool_drain_run) {
+			lastslept = hardclock_ticks;
 			cv_wait(&uvmpd_pool_drain_cv, &uvmpd_lock);
+			if (hardclock_ticks != lastslept) {
+				cycled = false;
+				firstpool = NULL;
+			}
 		}
 		uvmpd_pool_drain_run = false;
 		mutex_exit(&uvmpd_lock);
 
 		/*
+		 * rate limit draining, otherwise in desperate circumstances
+		 * this can totally saturate the system with xcall activity.
+		 */
+		if (cycled) {
+			kpause("uvmpdlmt", false, 1, NULL);
+			cycled = false;
+			firstpool = NULL;
+		}
+
+		/*
+		 * drain and temporarily disable the freelist cache.
+		 */
+		uvm_pgflcache_pause();
+
+		/*
 		 * kill unused metadata buffers.
 		 */
 		bufcnt = uvmexp.freetarg - uvm_free();
@@ -1053,9 +1039,16 @@ uvmpd_pool_drain_thread(void *arg)
 		mutex_exit(&bufcache_lock);
 
 		/*
-		 * drain a pool.
+		 * drain a pool, and then re-enable the freelist cache. 
 		 */
-		pool_drain(NULL);
+		(void)pool_drain(&curpool);
+		KASSERT(curpool != NULL);
+		if (firstpool == NULL) {
+			firstpool = curpool;
+		} else if (firstpool == curpool) {
+			cycled = true;
+		}
+		uvm_pgflcache_resume();
 	}
 	/*NOTREACHED*/
 }

Index: src/sys/uvm/uvm_pdaemon.h
diff -u src/sys/uvm/uvm_pdaemon.h:1.17 src/sys/uvm/uvm_pdaemon.h:1.18
--- src/sys/uvm/uvm_pdaemon.h:1.17	Wed Feb  2 15:25:27 2011
+++ src/sys/uvm/uvm_pdaemon.h	Mon Dec 30 18:08:38 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_pdaemon.h,v 1.17 2011/02/02 15:25:27 chuck Exp $	*/
+/*	$NetBSD: uvm_pdaemon.h,v 1.18 2019/12/30 18:08:38 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -79,9 +79,9 @@ bool uvm_reclaimable(void);
 
 kmutex_t *uvmpd_trylockowner(struct vm_page *);
 #ifdef VMSWAP
-bool uvmpd_trydropswap(struct vm_page *);
+bool uvmpd_dropswap(struct vm_page *);
 #else
-#define uvmpd_trydropswap(_a_) (/*CONSTCOND*/false)
+#define uvmpd_dropswap(_a_) (/*CONSTCOND*/false)
 #endif
 
 #endif /* _KERNEL */

Index: src/sys/uvm/uvm_pdpolicy.h
diff -u src/sys/uvm/uvm_pdpolicy.h:1.4 src/sys/uvm/uvm_pdpolicy.h:1.5
--- src/sys/uvm/uvm_pdpolicy.h:1.4	Fri Dec 13 20:10:22 2019
+++ src/sys/uvm/uvm_pdpolicy.h	Mon Dec 30 18:08:38 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_pdpolicy.h,v 1.4 2019/12/13 20:10:22 ad Exp $	*/
+/*	$NetBSD: uvm_pdpolicy.h,v 1.5 2019/12/30 18:08:38 ad Exp $	*/
 
 /*-
  * Copyright (c)2005, 2006 YAMAMOTO Takashi,
@@ -51,6 +51,7 @@ void uvmpdpol_anfree(struct vm_anon *);
 
 void uvmpdpol_tune(void);
 void uvmpdpol_scaninit(void);
+void uvmpdpol_scanfini(void);
 struct vm_page *uvmpdpol_selectvictim(kmutex_t **lock);
 void uvmpdpol_balancequeue(int);
 

Index: src/sys/uvm/uvm_pdpolicy_clock.c
diff -u src/sys/uvm/uvm_pdpolicy_clock.c:1.23 src/sys/uvm/uvm_pdpolicy_clock.c:1.24
--- src/sys/uvm/uvm_pdpolicy_clock.c:1.23	Fri Dec 27 13:13:17 2019
+++ src/sys/uvm/uvm_pdpolicy_clock.c	Mon Dec 30 18:08:38 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_pdpolicy_clock.c,v 1.23 2019/12/27 13:13:17 ad Exp $	*/
+/*	$NetBSD: uvm_pdpolicy_clock.c,v 1.24 2019/12/30 18:08:38 ad Exp $	*/
 /*	NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $	*/
 
 /*
@@ -69,7 +69,7 @@
 #else /* defined(PDSIM) */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.23 2019/12/27 13:13:17 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.24 2019/12/30 18:08:38 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/proc.h>
@@ -110,9 +110,8 @@ struct uvmpdpol_globalstate {
 };
 
 struct uvmpdpol_scanstate {
-	bool ss_first;
 	bool ss_anonreact, ss_filereact, ss_execreact;
-	struct vm_page *ss_nextpg;
+	struct vm_page ss_marker;
 };
 
 static void	uvmpdpol_pageactivate_locked(struct vm_page *);
@@ -177,8 +176,20 @@ uvmpdpol_scaninit(void)
 	ss->ss_anonreact = anonreact;
 	ss->ss_filereact = filereact;
 	ss->ss_execreact = execreact;
+	memset(&ss->ss_marker, 0, sizeof(ss->ss_marker));
+	ss->ss_marker.flags = PG_MARKER;
+	TAILQ_INSERT_HEAD(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
+	mutex_exit(&s->lock);
+}
+
+void
+uvmpdpol_scanfini(void)
+{
+	struct uvmpdpol_globalstate *s = &pdpol_state;
+	struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
 
-	ss->ss_first = true;
+	mutex_enter(&s->lock);
+	TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
 	mutex_exit(&s->lock);
 }
 
@@ -195,19 +206,11 @@ uvmpdpol_selectvictim(kmutex_t **plock)
 		struct vm_anon *anon;
 		struct uvm_object *uobj;
 
-		if (ss->ss_first) {
-			pg = TAILQ_FIRST(&pdpol_state.s_inactiveq);
-			ss->ss_first = false;
-		} else {
-			pg = ss->ss_nextpg;
-			if (pg != NULL && (pg->pqflags & PQ_INACTIVE) == 0) {
-				pg = TAILQ_FIRST(&pdpol_state.s_inactiveq);
-			}
-		}
+		pg = TAILQ_NEXT(&ss->ss_marker, pdqueue);
 		if (pg == NULL) {
 			break;
 		}
-		ss->ss_nextpg = TAILQ_NEXT(pg, pdqueue);
+		KASSERT((pg->flags & PG_MARKER) == 0);
 		uvmexp.pdscans++;
 
 		/*
@@ -225,6 +228,14 @@ uvmpdpol_selectvictim(kmutex_t **plock)
 		}
 
 		/*
+		 * now prepare to move on to the next page.
+		 */
+		TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker,
+		    pdqueue);
+		TAILQ_INSERT_AFTER(&pdpol_state.s_inactiveq, pg,
+		    &ss->ss_marker, pdqueue);
+
+		/*
 		 * enforce the minimum thresholds on different
 		 * types of memory usage.  if reusing the current
 		 * page would reduce that type of usage below its
@@ -300,7 +311,7 @@ uvmpdpol_balancequeue(int swap_shortage)
 {
 	struct uvmpdpol_globalstate *s = &pdpol_state;
 	int inactive_shortage;
-	struct vm_page *p, *nextpg;
+	struct vm_page *p, marker;
 	kmutex_t *lock;
 
 	/*
@@ -308,34 +319,22 @@ uvmpdpol_balancequeue(int swap_shortage)
 	 * our inactive target.
 	 */
 
-	mutex_enter(&s->lock);
-	inactive_shortage = pdpol_state.s_inactarg - pdpol_state.s_inactive;
-	for (p = TAILQ_FIRST(&pdpol_state.s_activeq);
-	     p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
-	     p = nextpg) {
-		nextpg = TAILQ_NEXT(p, pdqueue);
+	memset(&marker, 0, sizeof(marker));
+	marker.flags = PG_MARKER;
 
-		/*
-		 * if there's a shortage of swap slots, try to free it.
-		 */
-
-		if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0) {
-			mutex_enter(&p->interlock);
-			mutex_exit(&s->lock);
-			if (uvmpd_trydropswap(p)) {
-				swap_shortage--;
-			}
-			/* p->interlock now released */
-			mutex_enter(&s->lock);
+	mutex_enter(&s->lock);
+	TAILQ_INSERT_HEAD(&pdpol_state.s_activeq, &marker, pdqueue);
+	for (;;) {
+		inactive_shortage =
+		    pdpol_state.s_inactarg - pdpol_state.s_inactive;
+		if (inactive_shortage <= 0 && swap_shortage <= 0) {
+			break;
 		}
-
-		/*
-		 * if there's a shortage of inactive pages, deactivate.
-		 */
-
-		if (inactive_shortage <= 0) {
-			continue;
+		p = TAILQ_NEXT(&marker, pdqueue);
+		if (p == NULL) {
+			break;
 		}
+		KASSERT((p->flags & PG_MARKER) == 0);
 
 		/*
 		 * acquire interlock to stablize page identity.
@@ -350,17 +349,50 @@ uvmpdpol_balancequeue(int swap_shortage)
 	            	uvmpdpol_pagedequeue_locked(p);
 	            	continue;
 		}
-		mutex_exit(&s->lock);
-		lock = uvmpd_trylockowner(p);
-		/* p->interlock now released */
-		mutex_enter(&s->lock);
-		if (lock != NULL) {
+
+		/*
+		 * now prepare to move on to the next page.
+		 */
+
+		TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
+		TAILQ_INSERT_AFTER(&pdpol_state.s_activeq, p, &marker,
+		    pdqueue);
+
+		/*
+		 * try to lock the object that owns the page.  see comments
+		 * in uvmpdol_selectvictim().
+	         */
+	        mutex_exit(&s->lock);
+        	lock = uvmpd_trylockowner(p);
+        	/* p->interlock now released */
+        	mutex_enter(&s->lock);
+		if (lock == NULL) {
+			/* didn't get it - try the next page. */
+			continue;
+		}
+
+		/*
+		 * if there's a shortage of swap slots, try to free it.
+		 */
+		if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0 &&
+		    (p->flags & PG_BUSY) == 0) {
+			if (uvmpd_dropswap(p)) {
+				swap_shortage--;
+			}
+		}
+
+		/*
+		 * if there's a shortage of inactive pages, deactivate.
+		 */
+
+		if (inactive_shortage > 0) {
 			uvmpdpol_pagedeactivate_locked(p);
 			uvmexp.pddeact++;
 			inactive_shortage--;
-			mutex_exit(lock);
 		}
+		mutex_exit(lock);
 	}
+	TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
 	mutex_exit(&s->lock);
 }
 

Index: src/sys/uvm/uvm_pdpolicy_clockpro.c
diff -u src/sys/uvm/uvm_pdpolicy_clockpro.c:1.19 src/sys/uvm/uvm_pdpolicy_clockpro.c:1.20
--- src/sys/uvm/uvm_pdpolicy_clockpro.c:1.19	Fri Dec 27 13:13:17 2019
+++ src/sys/uvm/uvm_pdpolicy_clockpro.c	Mon Dec 30 18:08:38 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_pdpolicy_clockpro.c,v 1.19 2019/12/27 13:13:17 ad Exp $	*/
+/*	$NetBSD: uvm_pdpolicy_clockpro.c,v 1.20 2019/12/30 18:08:38 ad Exp $	*/
 
 /*-
  * Copyright (c)2005, 2006 YAMAMOTO Takashi,
@@ -43,7 +43,7 @@
 #else /* defined(PDSIM) */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.19 2019/12/27 13:13:17 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.20 2019/12/30 18:08:38 ad Exp $");
 
 #include "opt_ddb.h"
 
@@ -1258,6 +1258,12 @@ uvmpdpol_scaninit(void)
 	mutex_exit(&s->lock);
 }
 
+void
+uvmpdpol_scanfini(void)
+{
+
+}
+
 struct vm_page *
 uvmpdpol_selectvictim(kmutex_t **plock)
 {
@@ -1305,6 +1311,7 @@ static void
 clockpro_dropswap(pageq_t *q, int *todo)
 {
 	struct vm_page *pg;
+	kmutex_t *lock;
 
 	KASSERT(mutex_owned(&clockpro.lock));
 
@@ -1320,10 +1327,30 @@ clockpro_dropswap(pageq_t *q, int *todo)
 			mutex_exit(&pg->interlock);
 			continue;
 		}
-		if (uvmpd_trydropswap(pg)) {
-			(*todo)--;
+
+		/*
+		 * try to lock the object that owns the page.
+	         */
+	        mutex_exit(&clockpro.lock);
+        	lock = uvmpd_trylockowner(pg);
+        	/* pg->interlock now released */
+        	mutex_enter(&clockpro.lock);
+		if (lock == NULL) {
+			/* didn't get it - try the next page. */
+			/* XXXAD lost position in queue */
+			continue;
 		}
-		/* pg->interlock now dropped */
+
+		/*
+		 * if there's a shortage of swap slots, try to free it.
+		 */
+		if ((pg->flags & PG_SWAPBACKED) != 0 &&
+		    (pg->flags & PG_BUSY) == 0) {
+			if (uvmpd_dropswap(pg)) {
+				(*todo)--;
+			}
+		}
+		mutex_exit(lock);
 	}
 }
 

Reply via email to