> Date: Tue, 22 Nov 2016 12:42:39 +1000
> From: David Gwynne <da...@gwynne.id.au>
> 
> right now pools that make up mbufs are each limited individually.
> 
> the following diff instead has the mbuf layer have a global limit
> on the amount of memory that can be allocated to the pools. this
> is enforced by wrapping the multi page pool allocator with something
> that checks the mbuf memory limit first.
> 
> this means all mbufs will use a max of 2k * nmbclust bytes instead
> of each pool being able to use that amount each.
> 
> ok?

Mostly makes sense to me.  Not sure the complixty of copying the
supported page sizes from the multi-page pool allocator is worth the
additional complication.  I'd probably just initialize it the same way
using POOL_ALLOC_SIZES(PAGE_SIZE, 1UL<<31, POOL_ALLOC_ALIGNED).

Wouldn't it make sense to use atomic operations to keep track of the
amount of memory that was allocated?

Long run I suppose we want to drop nmbclust and let users tune the
total amount of memory available for clusters and set the initial
amount to a percentage of physical memory?


> Index: sys/pool.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/pool.h,v
> retrieving revision 1.68
> diff -u -p -r1.68 pool.h
> --- sys/pool.h        21 Nov 2016 01:44:06 -0000      1.68
> +++ sys/pool.h        22 Nov 2016 02:31:47 -0000
> @@ -205,6 +205,7 @@ struct pool {
>  #ifdef _KERNEL
>  
>  extern struct pool_allocator pool_allocator_single;
> +extern struct pool_allocator pool_allocator_multi;
>  
>  struct pool_request {
>       TAILQ_ENTRY(pool_request) pr_entry;
> Index: sys/mbuf.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/mbuf.h,v
> retrieving revision 1.222
> diff -u -p -r1.222 mbuf.h
> --- sys/mbuf.h        24 Oct 2016 04:38:44 -0000      1.222
> +++ sys/mbuf.h        22 Nov 2016 02:31:47 -0000
> @@ -416,6 +416,7 @@ struct mbuf_queue {
>  };
>  
>  #ifdef       _KERNEL
> +struct pool;
>  
>  extern       int nmbclust;                   /* limit on the # of clusters */
>  extern       int mblowat;                    /* mbuf low water mark */
> @@ -444,6 +445,7 @@ int       m_leadingspace(struct mbuf *);
>  int  m_trailingspace(struct mbuf *);
>  struct mbuf *m_clget(struct mbuf *, int, u_int);
>  void m_extref(struct mbuf *, struct mbuf *);
> +void m_pool_init(struct pool *, u_int, u_int, const char *);
>  void m_extfree_pool(caddr_t, u_int, void *);
>  void m_adj(struct mbuf *, int);
>  int  m_copyback(struct mbuf *, int, int, const void *, int);
> Index: kern/uipc_mbuf.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
> retrieving revision 1.238
> diff -u -p -r1.238 uipc_mbuf.c
> --- kern/uipc_mbuf.c  9 Nov 2016 08:55:11 -0000       1.238
> +++ kern/uipc_mbuf.c  22 Nov 2016 02:31:47 -0000
> @@ -133,6 +133,19 @@ void     m_extfree(struct mbuf *);
>  void nmbclust_update(void);
>  void m_zero(struct mbuf *);
>  
> +struct mutex m_pool_mtx = MUTEX_INITIALIZER(IPL_NET);
> +unsigned int mbuf_mem_limit; /* how much memory can we allocated */
> +unsigned int mbuf_mem_alloc; /* how much memory has been allocated */
> +
> +void *m_pool_alloc(struct pool *, int, int *);
> +void m_pool_free(struct pool *, void *);
> +
> +struct pool_allocator m_pool_allocator = {
> +     m_pool_alloc,
> +     m_pool_free,
> +     0 /* will be copied from pool_allocator_multi */
> +};
> +
>  static void (*mextfree_fns[4])(caddr_t, u_int, void *);
>  static u_int num_extfree_fns;
>  
> @@ -148,6 +161,11 @@ mbinit(void)
>       int i;
>       unsigned int lowbits;
>  
> +     m_pool_allocator.pa_pagesz = pool_allocator_multi.pa_pagesz;
> +
> +     nmbclust_update();
> +     mbuf_mem_alloc = 0;
> +
>  #if DIAGNOSTIC
>       if (mclsizes[0] != MCLBYTES)
>               panic("mbinit: the smallest cluster size != MCLBYTES");
> @@ -155,9 +173,7 @@ mbinit(void)
>               panic("mbinit: the largest cluster size != MAXMCLBYTES");
>  #endif
>  
> -     pool_init(&mbpool, MSIZE, 0, IPL_NET, 0, "mbufpl", NULL);
> -     pool_set_constraints(&mbpool, &kp_dma_contig);
> -     pool_setlowat(&mbpool, mblowat);
> +     m_pool_init(&mbpool, MSIZE, 64, "mbufpl");
>  
>       pool_init(&mtagpool, PACKET_TAG_MAXSIZE + sizeof(struct m_tag), 0,
>           IPL_NET, 0, "mtagpl", NULL);
> @@ -171,47 +187,32 @@ mbinit(void)
>                       snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk",
>                           mclsizes[i] >> 10);
>               }
> -             pool_init(&mclpools[i], mclsizes[i], 64, IPL_NET, 0,
> -                 mclnames[i], NULL);
> -             pool_set_constraints(&mclpools[i], &kp_dma_contig);
> -             pool_setlowat(&mclpools[i], mcllowat);
> +
> +             m_pool_init(&mclpools[i], mclsizes[i], 64, mclnames[i]);
>       }
>  
>       (void)mextfree_register(m_extfree_pool);
>       KASSERT(num_extfree_fns == 1);
> -
> -     nmbclust_update();
>  }
>  
>  void
>  mbcpuinit()
>  {
> +     int i;
> +
>       mbstat = counters_alloc_ncpus(mbstat, MBSTAT_COUNT, M_DEVBUF);
> +
> +     pool_cache_init(&mbpool);
> +     pool_cache_init(&mtagpool);
> +
> +     for (i = 0; i < nitems(mclsizes); i++)
> +             pool_cache_init(&mclpools[i]);
>  }
>  
>  void
>  nmbclust_update(void)
>  {
> -     unsigned int i, n;
> -
> -     /*
> -      * Set the hard limit on the mclpools to the number of
> -      * mbuf clusters the kernel is to support.  Log the limit
> -      * reached message max once a minute.
> -      */
> -     for (i = 0; i < nitems(mclsizes); i++) {
> -             n = (unsigned long long)nmbclust * MCLBYTES / mclsizes[i];
> -             (void)pool_sethardlimit(&mclpools[i], n, mclpool_warnmsg, 60);
> -             /*
> -              * XXX this needs to be reconsidered.
> -              * Setting the high water mark to nmbclust is too high
> -              * but we need to have enough spare buffers around so that
> -              * allocations in interrupt context don't fail or mclgeti()
> -              * drivers may end up with empty rings.
> -              */
> -             pool_sethiwat(&mclpools[i], n);
> -     }
> -     pool_sethiwat(&mbpool, nmbclust);
> +     mbuf_mem_limit = nmbclust * MCLBYTES;
>  }
>  
>  /*
> @@ -1377,6 +1378,52 @@ m_dup_pkt(struct mbuf *m0, unsigned int 
>  fail:
>       m_freem(m);
>       return (NULL);
> +}
> +
> +void *
> +m_pool_alloc(struct pool *pp, int flags, int *slowdown)
> +{
> +     void *v = NULL;
> +     int avail = 1;
> +
> +     if (mbuf_mem_alloc + pp->pr_pgsize > mbuf_mem_limit)
> +             return (NULL);
> +
> +     mtx_enter(&m_pool_mtx);
> +     if (mbuf_mem_alloc + pp->pr_pgsize > mbuf_mem_limit)
> +             avail = 0;
> +     else
> +             mbuf_mem_alloc += pp->pr_pgsize;
> +     mtx_leave(&m_pool_mtx);
> +
> +     if (avail) {
> +             v = (*pool_allocator_multi.pa_alloc)(pp, flags, slowdown);
> +
> +             if (v == NULL) {
> +                     mtx_enter(&m_pool_mtx);
> +                     mbuf_mem_alloc -= pp->pr_pgsize;
> +                     mtx_leave(&m_pool_mtx);
> +             }
> +     }
> +
> +     return (v);
> +}
> +
> +void
> +m_pool_free(struct pool *pp, void *v)
> +{
> +     (*pool_allocator_multi.pa_free)(pp, v);
> +
> +     mtx_enter(&m_pool_mtx);
> +     mbuf_mem_alloc -= pp->pr_pgsize;
> +     mtx_leave(&m_pool_mtx);
> +}
> +
> +void
> +m_pool_init(struct pool *pp, u_int size, u_int align, const char *wmesg)
> +{
> +     pool_init(pp, size, align, IPL_NET, 0, wmesg, &m_pool_allocator);
> +     pool_set_constraints(pp, &kp_dma_contig);
>  }
>  
>  #ifdef DDB
> Index: dev/pci/if_myx.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_myx.c,v
> retrieving revision 1.99
> diff -u -p -r1.99 if_myx.c
> --- dev/pci/if_myx.c  31 Oct 2016 01:38:57 -0000      1.99
> +++ dev/pci/if_myx.c  22 Nov 2016 02:31:47 -0000
> @@ -294,8 +294,6 @@ myx_attach(struct device *parent, struct
>  
>       /* this is sort of racy */
>       if (myx_mcl_pool == NULL) {
> -             extern struct kmem_pa_mode kp_dma_contig;
> -
>               myx_mcl_pool = malloc(sizeof(*myx_mcl_pool), M_DEVBUF,
>                   M_WAITOK);
>               if (myx_mcl_pool == NULL) {
> @@ -303,9 +301,9 @@ myx_attach(struct device *parent, struct
>                           DEVNAME(sc));
>                       goto unmap;
>               }
> -             pool_init(myx_mcl_pool, MYX_RXBIG_SIZE, MYX_BOUNDARY, IPL_NET,
> -                 0, "myxmcl", NULL);
> -             pool_set_constraints(myx_mcl_pool, &kp_dma_contig);
> +
> +             m_pool_init(myx_mcl_pool, MYX_RXBIG_SIZE, MYX_BOUNDARY,
> +                 "myxmcl");
>       }
>  
>       if (myx_pcie_dc(sc, pa) != 0)
> 
> 

Reply via email to