We have iterated on various designs to reduce pressure on kmem map over time.
One of the problems is that sometimes you want to allocate something in intr
context (NOWAIT), and other times in process context (WAITOK), but then at
free time they get all glommed together. This happens in pool.

Since pool always has a little extra info about the page it has allocated in
the form of the pool header, we can remember how a page was allocated and free
it to the right place. Thus we can make better choices about allocating and
avoid grabbing mbufs and clusters from the tiny kmem map for waitok callers.

What this diff does:

Add a flags to the pool allocator free functions. The default multi page
allocator will use this to defer to the non-intr code path if WAITOK.
The addition of the pool_gc task means we can always free from process
context.

This diff could use testing on some weird machines, or systems that have
experienced kmem pressure, etc.


Index: kern/subr_pool.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_pool.c,v
retrieving revision 1.223
diff -u -p -r1.223 subr_pool.c
--- kern/subr_pool.c    8 Jun 2018 15:38:15 -0000       1.223
+++ kern/subr_pool.c    21 Jan 2019 04:15:13 -0000
@@ -155,6 +155,7 @@ struct pool_page_header {
        caddr_t                 ph_colored;     /* page's colored address */
        unsigned long           ph_magic;
        int                     ph_tick;
+       int                     ph_flags;
 };
 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
@@ -225,13 +226,13 @@ void       pool_get_done(struct pool *, void 
 void    pool_runqueue(struct pool *, int);
 
 void   *pool_allocator_alloc(struct pool *, int, int *);
-void    pool_allocator_free(struct pool *, void *);
+void    pool_allocator_free(struct pool *, int, void *);
 
 /*
  * The default pool allocator.
  */
 void   *pool_page_alloc(struct pool *, int, int *);
-void   pool_page_free(struct pool *, void *);
+void   pool_page_free(struct pool *, int, void *);
 
 /*
  * safe for interrupts; this is the default allocator
@@ -243,7 +244,7 @@ struct pool_allocator pool_allocator_sin
 };
 
 void   *pool_multi_alloc(struct pool *, int, int *);
-void   pool_multi_free(struct pool *, void *);
+void   pool_multi_free(struct pool *, int, void *);
 
 struct pool_allocator pool_allocator_multi = {
        pool_multi_alloc,
@@ -252,7 +253,7 @@ struct pool_allocator pool_allocator_mul
 };
 
 void   *pool_multi_alloc_ni(struct pool *, int, int *);
-void   pool_multi_free_ni(struct pool *, void *);
+void   pool_multi_free_ni(struct pool *, int, void *);
 
 struct pool_allocator pool_allocator_multi_ni = {
        pool_multi_alloc_ni,
@@ -787,7 +788,6 @@ pool_do_get(struct pool *pp, int flags, 
 void
 pool_put(struct pool *pp, void *v)
 {
-       struct pool_page_header *ph, *freeph = NULL;
 
 #ifdef DIAGNOSTIC
        if (v == NULL)
@@ -808,19 +808,8 @@ pool_put(struct pool *pp, void *v)
        pp->pr_nout--;
        pp->pr_nput++;
 
-       /* is it time to free a page? */
-       if (pp->pr_nidle > pp->pr_maxpages &&
-           (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
-           (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
-               freeph = ph;
-               pool_p_remove(pp, freeph);
-       }
-
        pl_leave(pp, &pp->pr_lock);
 
-       if (freeph != NULL)
-               pool_p_free(pp, freeph);
-
        if (!TAILQ_EMPTY(&pp->pr_requests)) {
                pl_enter(pp, &pp->pr_requests_lock);
                pool_runqueue(pp, PR_NOWAIT);
@@ -933,10 +922,11 @@ pool_p_alloc(struct pool *pp, int flags,
        else {
                ph = pool_get(&phpool, flags);
                if (ph == NULL) {
-                       pool_allocator_free(pp, addr);
+                       pool_allocator_free(pp, flags, addr);
                        return (NULL);
                }
        }
+       ph->ph_flags = flags;
 
        XSIMPLEQ_INIT(&ph->ph_items);
        ph->ph_page = addr;
@@ -1010,7 +1000,7 @@ pool_p_free(struct pool *pp, struct pool
 #endif
        }
 
-       pool_allocator_free(pp, ph->ph_page);
+       pool_allocator_free(pp, ph->ph_flags, ph->ph_page);
 
        if (!POOL_INPGHDR(pp))
                pool_put(&phpool, ph);
@@ -1616,11 +1606,11 @@ pool_allocator_alloc(struct pool *pp, in
 }
 
 void
-pool_allocator_free(struct pool *pp, void *v)
+pool_allocator_free(struct pool *pp, int flags, void *v)
 {
        struct pool_allocator *pa = pp->pr_alloc;
 
-       (*pa->pa_free)(pp, v);
+       (*pa->pa_free)(pp, flags, v);
 }
 
 void *
@@ -1635,7 +1625,7 @@ pool_page_alloc(struct pool *pp, int fla
 }
 
 void
-pool_page_free(struct pool *pp, void *v)
+pool_page_free(struct pool *pp, int flags, void *v)
 {
        km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
 }
@@ -1648,6 +1638,9 @@ pool_multi_alloc(struct pool *pp, int fl
        void *v;
        int s;
 
+       if (flags & PR_WAITOK)
+               return pool_multi_alloc_ni(pp, flags, slowdown);
+
        if (POOL_INPGHDR(pp))
                kv.kv_align = pp->pr_pgsize;
 
@@ -1662,11 +1655,16 @@ pool_multi_alloc(struct pool *pp, int fl
 }
 
 void
-pool_multi_free(struct pool *pp, void *v)
+pool_multi_free(struct pool *pp, int flags, void *v)
 {
        struct kmem_va_mode kv = kv_intrsafe;
        int s;
 
+       if (flags & PR_WAITOK) {
+               pool_multi_free_ni(pp, flags, v);
+               return;
+       }
+
        if (POOL_INPGHDR(pp))
                kv.kv_align = pp->pr_pgsize;
 
@@ -1696,7 +1694,7 @@ pool_multi_alloc_ni(struct pool *pp, int
 }
 
 void
-pool_multi_free_ni(struct pool *pp, void *v)
+pool_multi_free_ni(struct pool *pp, int flags, void *v)
 {
        struct kmem_va_mode kv = kv_any;
 
Index: kern/uipc_mbuf.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
retrieving revision 1.265
diff -u -p -r1.265 uipc_mbuf.c
--- kern/uipc_mbuf.c    9 Jan 2019 16:37:27 -0000       1.265
+++ kern/uipc_mbuf.c    21 Jan 2019 04:14:45 -0000
@@ -139,7 +139,7 @@ unsigned int mbuf_mem_limit; /* how much
 unsigned int mbuf_mem_alloc; /* how much memory has been allocated */
 
 void   *m_pool_alloc(struct pool *, int, int *);
-void   m_pool_free(struct pool *, void *);
+void   m_pool_free(struct pool *, int, void *);
 
 struct pool_allocator m_pool_allocator = {
        m_pool_alloc,
@@ -1454,9 +1454,9 @@ m_pool_alloc(struct pool *pp, int flags,
 }
 
 void
-m_pool_free(struct pool *pp, void *v)
+m_pool_free(struct pool *pp, int flags, void *v)
 {
-       (*pool_allocator_multi.pa_free)(pp, v);
+       (*pool_allocator_multi.pa_free)(pp, flags, v);
 
        mtx_enter(&m_pool_mtx);
        mbuf_mem_alloc -= pp->pr_pgsize;
Index: sys/pool.h
===================================================================
RCS file: /cvs/src/sys/sys/pool.h,v
retrieving revision 1.74
diff -u -p -r1.74 pool.h
--- sys/pool.h  13 Aug 2017 20:26:33 -0000      1.74
+++ sys/pool.h  21 Jan 2019 04:12:35 -0000
@@ -107,7 +107,7 @@ TAILQ_HEAD(pool_requests, pool_request);
 
 struct pool_allocator {
        void            *(*pa_alloc)(struct pool *, int, int *);
-       void             (*pa_free)(struct pool *, void *);
+       void             (*pa_free)(struct pool *, int, void *);
        size_t             pa_pagesz;
 };
 

Reply via email to