We have iterated on various designs to reduce pressure on kmem map over time. One of the problems is that sometimes you want to allocate something in intr context (NOWAIT), and other times in process context (WAITOK), but then at free time they get all glommed together. This happens in pool.
Since pool always has a little extra info about the page it has allocated in the form of the pool header, we can remember how a page was allocated and free it to the right place. Thus we can make better choices about allocating and avoid grabbing mbufs and clusters from the tiny kmem map for waitok callers. What this diff does: Add a flags to the pool allocator free functions. The default multi page allocator will use this to defer to the non-intr code path if WAITOK. The addition of the pool_gc task means we can always free from process context. This diff could use testing on some weird machines, or systems that have experienced kmem pressure, etc. Index: kern/subr_pool.c =================================================================== RCS file: /cvs/src/sys/kern/subr_pool.c,v retrieving revision 1.223 diff -u -p -r1.223 subr_pool.c --- kern/subr_pool.c 8 Jun 2018 15:38:15 -0000 1.223 +++ kern/subr_pool.c 21 Jan 2019 04:15:13 -0000 @@ -155,6 +155,7 @@ struct pool_page_header { caddr_t ph_colored; /* page's colored address */ unsigned long ph_magic; int ph_tick; + int ph_flags; }; #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) @@ -225,13 +226,13 @@ void pool_get_done(struct pool *, void void pool_runqueue(struct pool *, int); void *pool_allocator_alloc(struct pool *, int, int *); -void pool_allocator_free(struct pool *, void *); +void pool_allocator_free(struct pool *, int, void *); /* * The default pool allocator. */ void *pool_page_alloc(struct pool *, int, int *); -void pool_page_free(struct pool *, void *); +void pool_page_free(struct pool *, int, void *); /* * safe for interrupts; this is the default allocator @@ -243,7 +244,7 @@ struct pool_allocator pool_allocator_sin }; void *pool_multi_alloc(struct pool *, int, int *); -void pool_multi_free(struct pool *, void *); +void pool_multi_free(struct pool *, int, void *); struct pool_allocator pool_allocator_multi = { pool_multi_alloc, @@ -252,7 +253,7 @@ struct pool_allocator pool_allocator_mul }; void *pool_multi_alloc_ni(struct pool *, int, int *); -void pool_multi_free_ni(struct pool *, void *); +void pool_multi_free_ni(struct pool *, int, void *); struct pool_allocator pool_allocator_multi_ni = { pool_multi_alloc_ni, @@ -787,7 +788,6 @@ pool_do_get(struct pool *pp, int flags, void pool_put(struct pool *pp, void *v) { - struct pool_page_header *ph, *freeph = NULL; #ifdef DIAGNOSTIC if (v == NULL) @@ -808,19 +808,8 @@ pool_put(struct pool *pp, void *v) pp->pr_nout--; pp->pr_nput++; - /* is it time to free a page? */ - if (pp->pr_nidle > pp->pr_maxpages && - (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && - (ticks - ph->ph_tick) > (hz * pool_wait_free)) { - freeph = ph; - pool_p_remove(pp, freeph); - } - pl_leave(pp, &pp->pr_lock); - if (freeph != NULL) - pool_p_free(pp, freeph); - if (!TAILQ_EMPTY(&pp->pr_requests)) { pl_enter(pp, &pp->pr_requests_lock); pool_runqueue(pp, PR_NOWAIT); @@ -933,10 +922,11 @@ pool_p_alloc(struct pool *pp, int flags, else { ph = pool_get(&phpool, flags); if (ph == NULL) { - pool_allocator_free(pp, addr); + pool_allocator_free(pp, flags, addr); return (NULL); } } + ph->ph_flags = flags; XSIMPLEQ_INIT(&ph->ph_items); ph->ph_page = addr; @@ -1010,7 +1000,7 @@ pool_p_free(struct pool *pp, struct pool #endif } - pool_allocator_free(pp, ph->ph_page); + pool_allocator_free(pp, ph->ph_flags, ph->ph_page); if (!POOL_INPGHDR(pp)) pool_put(&phpool, ph); @@ -1616,11 +1606,11 @@ pool_allocator_alloc(struct pool *pp, in } void -pool_allocator_free(struct pool *pp, void *v) +pool_allocator_free(struct pool *pp, int flags, void *v) { struct pool_allocator *pa = pp->pr_alloc; - (*pa->pa_free)(pp, v); + (*pa->pa_free)(pp, flags, v); } void * @@ -1635,7 +1625,7 @@ pool_page_alloc(struct pool *pp, int fla } void -pool_page_free(struct pool *pp, void *v) +pool_page_free(struct pool *pp, int flags, void *v) { km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); } @@ -1648,6 +1638,9 @@ pool_multi_alloc(struct pool *pp, int fl void *v; int s; + if (flags & PR_WAITOK) + return pool_multi_alloc_ni(pp, flags, slowdown); + if (POOL_INPGHDR(pp)) kv.kv_align = pp->pr_pgsize; @@ -1662,11 +1655,16 @@ pool_multi_alloc(struct pool *pp, int fl } void -pool_multi_free(struct pool *pp, void *v) +pool_multi_free(struct pool *pp, int flags, void *v) { struct kmem_va_mode kv = kv_intrsafe; int s; + if (flags & PR_WAITOK) { + pool_multi_free_ni(pp, flags, v); + return; + } + if (POOL_INPGHDR(pp)) kv.kv_align = pp->pr_pgsize; @@ -1696,7 +1694,7 @@ pool_multi_alloc_ni(struct pool *pp, int } void -pool_multi_free_ni(struct pool *pp, void *v) +pool_multi_free_ni(struct pool *pp, int flags, void *v) { struct kmem_va_mode kv = kv_any; Index: kern/uipc_mbuf.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v retrieving revision 1.265 diff -u -p -r1.265 uipc_mbuf.c --- kern/uipc_mbuf.c 9 Jan 2019 16:37:27 -0000 1.265 +++ kern/uipc_mbuf.c 21 Jan 2019 04:14:45 -0000 @@ -139,7 +139,7 @@ unsigned int mbuf_mem_limit; /* how much unsigned int mbuf_mem_alloc; /* how much memory has been allocated */ void *m_pool_alloc(struct pool *, int, int *); -void m_pool_free(struct pool *, void *); +void m_pool_free(struct pool *, int, void *); struct pool_allocator m_pool_allocator = { m_pool_alloc, @@ -1454,9 +1454,9 @@ m_pool_alloc(struct pool *pp, int flags, } void -m_pool_free(struct pool *pp, void *v) +m_pool_free(struct pool *pp, int flags, void *v) { - (*pool_allocator_multi.pa_free)(pp, v); + (*pool_allocator_multi.pa_free)(pp, flags, v); mtx_enter(&m_pool_mtx); mbuf_mem_alloc -= pp->pr_pgsize; Index: sys/pool.h =================================================================== RCS file: /cvs/src/sys/sys/pool.h,v retrieving revision 1.74 diff -u -p -r1.74 pool.h --- sys/pool.h 13 Aug 2017 20:26:33 -0000 1.74 +++ sys/pool.h 21 Jan 2019 04:12:35 -0000 @@ -107,7 +107,7 @@ TAILQ_HEAD(pool_requests, pool_request); struct pool_allocator { void *(*pa_alloc)(struct pool *, int, int *); - void (*pa_free)(struct pool *, void *); + void (*pa_free)(struct pool *, int, void *); size_t pa_pagesz; };