Module Name: src Committed By: ad Date: Mon Dec 30 18:08:38 UTC 2019
Modified Files: src/sys/uvm: uvm_pdaemon.c uvm_pdaemon.h uvm_pdpolicy.h uvm_pdpolicy_clock.c uvm_pdpolicy_clockpro.c Log Message: pagedaemon: - Use marker pages to keep place in the queue when scanning, rather than relying on assumptions. - In uvmpdpol_balancequeue(), lock the object once instead of twice. - When draining pools, the situation is getting desperate, but try to avoid saturating the system with xcall, lock and interrupt activity by sleeping for 1 clock tick if being continually awoken and all pools have been cycled through at least once. - Pause & resume the freelist cache during pool draining. PR kern/54209: NetBSD 8 large memory performance extremely low PR kern/54210: NetBSD-8 processes presumably not exiting PR kern/54727: writing a large file causes unreasonable system behaviour To generate a diff of this commit: cvs rdiff -u -r1.118 -r1.119 src/sys/uvm/uvm_pdaemon.c cvs rdiff -u -r1.17 -r1.18 src/sys/uvm/uvm_pdaemon.h cvs rdiff -u -r1.4 -r1.5 src/sys/uvm/uvm_pdpolicy.h cvs rdiff -u -r1.23 -r1.24 src/sys/uvm/uvm_pdpolicy_clock.c cvs rdiff -u -r1.19 -r1.20 src/sys/uvm/uvm_pdpolicy_clockpro.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/uvm/uvm_pdaemon.c diff -u src/sys/uvm/uvm_pdaemon.c:1.118 src/sys/uvm/uvm_pdaemon.c:1.119 --- src/sys/uvm/uvm_pdaemon.c:1.118 Sat Dec 21 16:10:20 2019 +++ src/sys/uvm/uvm_pdaemon.c Mon Dec 30 18:08:37 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_pdaemon.c,v 1.118 2019/12/21 16:10:20 ad Exp $ */ +/* $NetBSD: uvm_pdaemon.c,v 1.119 2019/12/30 18:08:37 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -66,7 +66,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.118 2019/12/21 16:10:20 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.119 2019/12/30 18:08:37 ad Exp $"); #include "opt_uvmhist.h" #include "opt_readahead.h" @@ -83,6 +83,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon. #include <uvm/uvm.h> #include <uvm/uvm_pdpolicy.h> +#include <uvm/uvm_pgflcache.h> #ifdef UVMHIST UVMHIST_DEFINE(pdhist); @@ -598,7 +599,7 @@ swapcluster_nused(struct swapcluster *sw * => return true if a page had an associated slot. */ -static bool +bool uvmpd_dropswap(struct vm_page *pg) { bool result = false; @@ -622,50 +623,6 @@ uvmpd_dropswap(struct vm_page *pg) return result; } -/* - * uvmpd_trydropswap: try to free any swap allocated to this page. - * - * => return true if a slot is successfully freed. - * => page interlock must be held, and will be dropped. - */ - -bool -uvmpd_trydropswap(struct vm_page *pg) -{ - kmutex_t *slock; - bool result; - - if ((pg->flags & PG_BUSY) != 0) { - mutex_exit(&pg->interlock); - return false; - } - - /* - * lock the page's owner. - * this will drop pg->interlock. - */ - - slock = uvmpd_trylockowner(pg); - if (slock == NULL) { - return false; - } - - /* - * skip this page if it's busy. - */ - - if ((pg->flags & PG_BUSY) != 0) { - mutex_exit(slock); - return false; - } - - result = uvmpd_dropswap(pg); - - mutex_exit(slock); - - return result; -} - #endif /* defined(VMSWAP) */ /* @@ -909,6 +866,8 @@ uvmpd_scan_queue(void) #endif /* defined(VMSWAP) */ } + uvmpdpol_scanfini(); + #if defined(VMSWAP) swapcluster_flush(&swc, true); #endif /* defined(VMSWAP) */ @@ -1031,17 +990,44 @@ uvm_estimatepageable(int *active, int *i static void uvmpd_pool_drain_thread(void *arg) { - int bufcnt; + struct pool *firstpool, *curpool; + int bufcnt, lastslept; + bool cycled; + firstpool = NULL; + cycled = true; for (;;) { + /* + * sleep until awoken by the pagedaemon. + */ mutex_enter(&uvmpd_lock); if (!uvmpd_pool_drain_run) { + lastslept = hardclock_ticks; cv_wait(&uvmpd_pool_drain_cv, &uvmpd_lock); + if (hardclock_ticks != lastslept) { + cycled = false; + firstpool = NULL; + } } uvmpd_pool_drain_run = false; mutex_exit(&uvmpd_lock); /* + * rate limit draining, otherwise in desperate circumstances + * this can totally saturate the system with xcall activity. + */ + if (cycled) { + kpause("uvmpdlmt", false, 1, NULL); + cycled = false; + firstpool = NULL; + } + + /* + * drain and temporarily disable the freelist cache. + */ + uvm_pgflcache_pause(); + + /* * kill unused metadata buffers. */ bufcnt = uvmexp.freetarg - uvm_free(); @@ -1053,9 +1039,16 @@ uvmpd_pool_drain_thread(void *arg) mutex_exit(&bufcache_lock); /* - * drain a pool. + * drain a pool, and then re-enable the freelist cache. */ - pool_drain(NULL); + (void)pool_drain(&curpool); + KASSERT(curpool != NULL); + if (firstpool == NULL) { + firstpool = curpool; + } else if (firstpool == curpool) { + cycled = true; + } + uvm_pgflcache_resume(); } /*NOTREACHED*/ } Index: src/sys/uvm/uvm_pdaemon.h diff -u src/sys/uvm/uvm_pdaemon.h:1.17 src/sys/uvm/uvm_pdaemon.h:1.18 --- src/sys/uvm/uvm_pdaemon.h:1.17 Wed Feb 2 15:25:27 2011 +++ src/sys/uvm/uvm_pdaemon.h Mon Dec 30 18:08:38 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_pdaemon.h,v 1.17 2011/02/02 15:25:27 chuck Exp $ */ +/* $NetBSD: uvm_pdaemon.h,v 1.18 2019/12/30 18:08:38 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -79,9 +79,9 @@ bool uvm_reclaimable(void); kmutex_t *uvmpd_trylockowner(struct vm_page *); #ifdef VMSWAP -bool uvmpd_trydropswap(struct vm_page *); +bool uvmpd_dropswap(struct vm_page *); #else -#define uvmpd_trydropswap(_a_) (/*CONSTCOND*/false) +#define uvmpd_dropswap(_a_) (/*CONSTCOND*/false) #endif #endif /* _KERNEL */ Index: src/sys/uvm/uvm_pdpolicy.h diff -u src/sys/uvm/uvm_pdpolicy.h:1.4 src/sys/uvm/uvm_pdpolicy.h:1.5 --- src/sys/uvm/uvm_pdpolicy.h:1.4 Fri Dec 13 20:10:22 2019 +++ src/sys/uvm/uvm_pdpolicy.h Mon Dec 30 18:08:38 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_pdpolicy.h,v 1.4 2019/12/13 20:10:22 ad Exp $ */ +/* $NetBSD: uvm_pdpolicy.h,v 1.5 2019/12/30 18:08:38 ad Exp $ */ /*- * Copyright (c)2005, 2006 YAMAMOTO Takashi, @@ -51,6 +51,7 @@ void uvmpdpol_anfree(struct vm_anon *); void uvmpdpol_tune(void); void uvmpdpol_scaninit(void); +void uvmpdpol_scanfini(void); struct vm_page *uvmpdpol_selectvictim(kmutex_t **lock); void uvmpdpol_balancequeue(int); Index: src/sys/uvm/uvm_pdpolicy_clock.c diff -u src/sys/uvm/uvm_pdpolicy_clock.c:1.23 src/sys/uvm/uvm_pdpolicy_clock.c:1.24 --- src/sys/uvm/uvm_pdpolicy_clock.c:1.23 Fri Dec 27 13:13:17 2019 +++ src/sys/uvm/uvm_pdpolicy_clock.c Mon Dec 30 18:08:38 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_pdpolicy_clock.c,v 1.23 2019/12/27 13:13:17 ad Exp $ */ +/* $NetBSD: uvm_pdpolicy_clock.c,v 1.24 2019/12/30 18:08:38 ad Exp $ */ /* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */ /* @@ -69,7 +69,7 @@ #else /* defined(PDSIM) */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.23 2019/12/27 13:13:17 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.24 2019/12/30 18:08:38 ad Exp $"); #include <sys/param.h> #include <sys/proc.h> @@ -110,9 +110,8 @@ struct uvmpdpol_globalstate { }; struct uvmpdpol_scanstate { - bool ss_first; bool ss_anonreact, ss_filereact, ss_execreact; - struct vm_page *ss_nextpg; + struct vm_page ss_marker; }; static void uvmpdpol_pageactivate_locked(struct vm_page *); @@ -177,8 +176,20 @@ uvmpdpol_scaninit(void) ss->ss_anonreact = anonreact; ss->ss_filereact = filereact; ss->ss_execreact = execreact; + memset(&ss->ss_marker, 0, sizeof(ss->ss_marker)); + ss->ss_marker.flags = PG_MARKER; + TAILQ_INSERT_HEAD(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue); + mutex_exit(&s->lock); +} + +void +uvmpdpol_scanfini(void) +{ + struct uvmpdpol_globalstate *s = &pdpol_state; + struct uvmpdpol_scanstate *ss = &pdpol_scanstate; - ss->ss_first = true; + mutex_enter(&s->lock); + TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue); mutex_exit(&s->lock); } @@ -195,19 +206,11 @@ uvmpdpol_selectvictim(kmutex_t **plock) struct vm_anon *anon; struct uvm_object *uobj; - if (ss->ss_first) { - pg = TAILQ_FIRST(&pdpol_state.s_inactiveq); - ss->ss_first = false; - } else { - pg = ss->ss_nextpg; - if (pg != NULL && (pg->pqflags & PQ_INACTIVE) == 0) { - pg = TAILQ_FIRST(&pdpol_state.s_inactiveq); - } - } + pg = TAILQ_NEXT(&ss->ss_marker, pdqueue); if (pg == NULL) { break; } - ss->ss_nextpg = TAILQ_NEXT(pg, pdqueue); + KASSERT((pg->flags & PG_MARKER) == 0); uvmexp.pdscans++; /* @@ -225,6 +228,14 @@ uvmpdpol_selectvictim(kmutex_t **plock) } /* + * now prepare to move on to the next page. + */ + TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, + pdqueue); + TAILQ_INSERT_AFTER(&pdpol_state.s_inactiveq, pg, + &ss->ss_marker, pdqueue); + + /* * enforce the minimum thresholds on different * types of memory usage. if reusing the current * page would reduce that type of usage below its @@ -300,7 +311,7 @@ uvmpdpol_balancequeue(int swap_shortage) { struct uvmpdpol_globalstate *s = &pdpol_state; int inactive_shortage; - struct vm_page *p, *nextpg; + struct vm_page *p, marker; kmutex_t *lock; /* @@ -308,34 +319,22 @@ uvmpdpol_balancequeue(int swap_shortage) * our inactive target. */ - mutex_enter(&s->lock); - inactive_shortage = pdpol_state.s_inactarg - pdpol_state.s_inactive; - for (p = TAILQ_FIRST(&pdpol_state.s_activeq); - p != NULL && (inactive_shortage > 0 || swap_shortage > 0); - p = nextpg) { - nextpg = TAILQ_NEXT(p, pdqueue); + memset(&marker, 0, sizeof(marker)); + marker.flags = PG_MARKER; - /* - * if there's a shortage of swap slots, try to free it. - */ - - if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0) { - mutex_enter(&p->interlock); - mutex_exit(&s->lock); - if (uvmpd_trydropswap(p)) { - swap_shortage--; - } - /* p->interlock now released */ - mutex_enter(&s->lock); + mutex_enter(&s->lock); + TAILQ_INSERT_HEAD(&pdpol_state.s_activeq, &marker, pdqueue); + for (;;) { + inactive_shortage = + pdpol_state.s_inactarg - pdpol_state.s_inactive; + if (inactive_shortage <= 0 && swap_shortage <= 0) { + break; } - - /* - * if there's a shortage of inactive pages, deactivate. - */ - - if (inactive_shortage <= 0) { - continue; + p = TAILQ_NEXT(&marker, pdqueue); + if (p == NULL) { + break; } + KASSERT((p->flags & PG_MARKER) == 0); /* * acquire interlock to stablize page identity. @@ -350,17 +349,50 @@ uvmpdpol_balancequeue(int swap_shortage) uvmpdpol_pagedequeue_locked(p); continue; } - mutex_exit(&s->lock); - lock = uvmpd_trylockowner(p); - /* p->interlock now released */ - mutex_enter(&s->lock); - if (lock != NULL) { + + /* + * now prepare to move on to the next page. + */ + + TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue); + TAILQ_INSERT_AFTER(&pdpol_state.s_activeq, p, &marker, + pdqueue); + + /* + * try to lock the object that owns the page. see comments + * in uvmpdol_selectvictim(). + */ + mutex_exit(&s->lock); + lock = uvmpd_trylockowner(p); + /* p->interlock now released */ + mutex_enter(&s->lock); + if (lock == NULL) { + /* didn't get it - try the next page. */ + continue; + } + + /* + * if there's a shortage of swap slots, try to free it. + */ + if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0 && + (p->flags & PG_BUSY) == 0) { + if (uvmpd_dropswap(p)) { + swap_shortage--; + } + } + + /* + * if there's a shortage of inactive pages, deactivate. + */ + + if (inactive_shortage > 0) { uvmpdpol_pagedeactivate_locked(p); uvmexp.pddeact++; inactive_shortage--; - mutex_exit(lock); } + mutex_exit(lock); } + TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue); mutex_exit(&s->lock); } Index: src/sys/uvm/uvm_pdpolicy_clockpro.c diff -u src/sys/uvm/uvm_pdpolicy_clockpro.c:1.19 src/sys/uvm/uvm_pdpolicy_clockpro.c:1.20 --- src/sys/uvm/uvm_pdpolicy_clockpro.c:1.19 Fri Dec 27 13:13:17 2019 +++ src/sys/uvm/uvm_pdpolicy_clockpro.c Mon Dec 30 18:08:38 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.19 2019/12/27 13:13:17 ad Exp $ */ +/* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.20 2019/12/30 18:08:38 ad Exp $ */ /*- * Copyright (c)2005, 2006 YAMAMOTO Takashi, @@ -43,7 +43,7 @@ #else /* defined(PDSIM) */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.19 2019/12/27 13:13:17 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.20 2019/12/30 18:08:38 ad Exp $"); #include "opt_ddb.h" @@ -1258,6 +1258,12 @@ uvmpdpol_scaninit(void) mutex_exit(&s->lock); } +void +uvmpdpol_scanfini(void) +{ + +} + struct vm_page * uvmpdpol_selectvictim(kmutex_t **plock) { @@ -1305,6 +1311,7 @@ static void clockpro_dropswap(pageq_t *q, int *todo) { struct vm_page *pg; + kmutex_t *lock; KASSERT(mutex_owned(&clockpro.lock)); @@ -1320,10 +1327,30 @@ clockpro_dropswap(pageq_t *q, int *todo) mutex_exit(&pg->interlock); continue; } - if (uvmpd_trydropswap(pg)) { - (*todo)--; + + /* + * try to lock the object that owns the page. + */ + mutex_exit(&clockpro.lock); + lock = uvmpd_trylockowner(pg); + /* pg->interlock now released */ + mutex_enter(&clockpro.lock); + if (lock == NULL) { + /* didn't get it - try the next page. */ + /* XXXAD lost position in queue */ + continue; } - /* pg->interlock now dropped */ + + /* + * if there's a shortage of swap slots, try to free it. + */ + if ((pg->flags & PG_SWAPBACKED) != 0 && + (pg->flags & PG_BUSY) == 0) { + if (uvmpd_dropswap(pg)) { + (*todo)--; + } + } + mutex_exit(lock); } }