On Fri, Dec 11, 2020 at 09:49:07AM -0300, Martin Pieuchot wrote: > On 11/12/20(Fri) 12:52, Mark Kettenis wrote: > > > Date: Thu, 10 Dec 2020 16:13:22 -0600 > > > From: Scott Cheloha <scottchel...@gmail.com> > > > > > > Hi, > > > > > > We looked at removing the ticks from subr_pool.c a while back but it > > > got shelved. That may or may not have been my fault. I don't > > > remember. > > > > > > Anyway, I would normally suggest switching to getuptime(9) here, but > > > getuptime(9) counts in seconds and we're working with a 1 second > > > timeout in this code (pool_wait_free) so that's too coarse an > > > interface for this job. > > > > > > The next best thing I could come up with was introducing a coarse > > > sub-second interface for use in this file, "getnsecuptime()", which > > > calls getnanouptime(9) and converts the result to a 64-bit count of > > > nanoseconds. This is relatively fast (we don't read the underlying > > > timecounter hardware) and causes a minimal amount of code change (we > > > can use it inline because it returns an integral value). > > > > > > >From there the changes are simple: > > > > > > - Renames: ph_tick -> ph_timestamp, pr_cache_tick -> pr_cache_timestamp > > > > > > - Call getnsecuptime(9) wherever we read 'ticks'. > > > > > > - Change pool_wait_gc and pool_wait_free to counts of nanoseconds. > > > They could be macros, e.g. > > > > > > #define POOL_WAIT_GC 8000000000ULL > > > > > > but I'll leave that for a second diff to keep things simple. > > > > > > This compiles and I haven't changed any logic so I assume it isn't > > > broken. > > > > > > We could move getnsecuptime() into kern_tc.c but it isn't used > > > anywhere else yet so I'm hesitant to do so. > > > > > > Thoughts? > > > > Specifying the timeouts in nanoseconds isn't particularly useful I'd > > say. But I see we can't use SEC_TO_NSEC here because of the overflow > > check... > > I'm not sure to understand, can't we do: > > pool_wait_free = SEC_TO_NSEC(1); > pool_wait_gc = SEC_TO_NSEC(8); > > or are you pointing at something else?
We can do that at runtime but not at compile time. SEC_TO_NSEC(1) isn't a constant so that won't compile (I just tried). We _could_ do something like this: #define POOL_WAIT_FREE SEC_TO_NSEC(1) I think the compiler will probably inline the result and elide the overflow check because the input is a constant. I don't know how to verify this, but my limited understanding of compilers suggests that this is totally possible. > One comment below > > [...] > > > > Index: sys/pool.h > > > =================================================================== > > > RCS file: /cvs/src/sys/sys/pool.h,v > > > retrieving revision 1.77 > > > diff -u -p -r1.77 pool.h > > > --- sys/pool.h 19 Jul 2019 09:03:03 -0000 1.77 > > > +++ sys/pool.h 10 Dec 2020 22:08:33 -0000 > > > @@ -201,7 +201,7 @@ struct pool { > > > u_int pr_cache_items; /* target list length */ > > > u_int pr_cache_contention; > > > u_int pr_cache_contention_prev; > > > - int pr_cache_tick; /* time idle list was empty */ > > > + uint64_t pr_cache_timestamp; /* when idle list was empty */ > > > int pr_cache_nout; > > Do you see a change in of size in the struct? If so, does moving > `pr_cache_nout' after `pr_cache_ngc' helps? Right, the structs. We're changing both struct pool_page_header and struct pool. For the unmodified (-current) binary on amd64: $ gdb -q obj/bsd.gdb (gdb) p sizeof(struct pool_page_header) $1 = 112 (gdb) p sizeof(struct pool) $2 = 424 For the patched binary on amd64: (gdb) p sizeof(struct pool_page_header) $1 = 112 (gdb) p sizeof(struct pool) $2 = 432 kettenis@: The pool_page_header does not grow. That's good, right? If do as mpi@ suggested and we move pr_cache_nout after pr_cache_ngc... $ gdb -q obj/bsd.gdb (gdb) p sizeof(struct pool_page_header) $1 = 112 (gdb) p sizeof(struct pool) $2 = 424 Bingo! Same size. mpi@, how did you know? Patch attached with the struct shuffling and the new macros below. Index: kern/subr_pool.c =================================================================== RCS file: /cvs/src/sys/kern/subr_pool.c,v retrieving revision 1.230 diff -u -p -r1.230 subr_pool.c --- kern/subr_pool.c 24 Jan 2020 06:31:17 -0000 1.230 +++ kern/subr_pool.c 11 Dec 2020 17:50:46 -0000 @@ -41,6 +41,7 @@ #include <sys/syslog.h> #include <sys/sysctl.h> #include <sys/task.h> +#include <sys/time.h> #include <sys/timeout.h> #include <sys/percpu.h> @@ -148,7 +149,7 @@ struct pool_page_header { caddr_t ph_page; /* this page's address */ caddr_t ph_colored; /* page's colored address */ unsigned long ph_magic; - int ph_tick; + uint64_t ph_timestamp; }; #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) @@ -266,8 +267,19 @@ void pool_gc_sched(void *); struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); void pool_gc_pages(void *); struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); -int pool_wait_free = 1; -int pool_wait_gc = 8; + +#define POOL_WAIT_FREE SEC_TO_NSEC(1) +#define POOL_WAIT_GC SEC_TO_NSEC(8) + +/* XXX where do I put this? */ +uint64_t +getnsecuptime(void) +{ + struct timespec now; + + getnanouptime(&now); + return TIMESPEC_TO_NSEC(&now); +} RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare); @@ -797,7 +809,7 @@ pool_put(struct pool *pp, void *v) /* is it time to free a page? */ if (pp->pr_nidle > pp->pr_maxpages && (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && - (ticks - ph->ph_tick) > (hz * pool_wait_free)) { + getnsecuptime() - ph->ph_timestamp > POOL_WAIT_FREE) { freeph = ph; pool_p_remove(pp, freeph); } @@ -864,7 +876,7 @@ pool_do_put(struct pool *pp, void *v) */ pp->pr_nidle++; - ph->ph_tick = ticks; + ph->ph_timestamp = getnsecuptime(); TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); pool_update_curpage(pp); @@ -1566,7 +1578,7 @@ pool_gc_pages(void *null) /* is it time to free a page? */ if (pp->pr_nidle > pp->pr_minpages && (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && - (ticks - ph->ph_tick) > (hz * pool_wait_gc)) { + getnsecuptime() - ph->ph_timestamp > POOL_WAIT_GC) { freeph = ph; pool_p_remove(pp, freeph); } else @@ -1726,7 +1738,7 @@ pool_cache_init(struct pool *pp) arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic)); TAILQ_INIT(&pp->pr_cache_lists); pp->pr_cache_nitems = 0; - pp->pr_cache_tick = ticks; + pp->pr_cache_timestamp = getnsecuptime(); pp->pr_cache_items = 8; pp->pr_cache_contention = 0; pp->pr_cache_ngc = 0; @@ -1829,7 +1841,7 @@ pool_cache_list_free(struct pool *pp, st { pool_list_enter(pp); if (TAILQ_EMPTY(&pp->pr_cache_lists)) - pp->pr_cache_tick = ticks; + pp->pr_cache_timestamp = getnsecuptime(); pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci); TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl); @@ -2006,7 +2018,7 @@ pool_cache_gc(struct pool *pp) { unsigned int contention, delta; - if ((ticks - pp->pr_cache_tick) > (hz * pool_wait_gc) && + if (getnsecuptime() - pp->pr_cache_timestamp > POOL_WAIT_GC && !TAILQ_EMPTY(&pp->pr_cache_lists) && pl_enter_try(pp, &pp->pr_cache_lock)) { struct pool_cache_item *pl = NULL; @@ -2015,7 +2027,7 @@ pool_cache_gc(struct pool *pp) if (pl != NULL) { TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); - pp->pr_cache_tick = ticks; + pp->pr_cache_timestamp = getnsecuptime(); pp->pr_cache_ngc++; } Index: sys/pool.h =================================================================== RCS file: /cvs/src/sys/sys/pool.h,v retrieving revision 1.77 diff -u -p -r1.77 pool.h --- sys/pool.h 19 Jul 2019 09:03:03 -0000 1.77 +++ sys/pool.h 11 Dec 2020 17:50:46 -0000 @@ -201,9 +201,9 @@ struct pool { u_int pr_cache_items; /* target list length */ u_int pr_cache_contention; u_int pr_cache_contention_prev; - int pr_cache_tick; /* time idle list was empty */ - int pr_cache_nout; + uint64_t pr_cache_timestamp; /* time idle list was empty */ uint64_t pr_cache_ngc; /* # of times the gc released a list */ + int pr_cache_nout; u_int pr_align; u_int pr_maxcolors; /* Cache coloring */