On Tue, Feb 01, 2022 at 08:00:36AM +0100, Otto Moerbeek wrote:

> On Fri, Jan 28, 2022 at 05:17:48PM +0100, Otto Moerbeek wrote:
> 
> > On Fri, Jan 28, 2022 at 04:33:28PM +0100, Alexander Bluhm wrote:
> > 
> > > On Sun, Jan 09, 2022 at 02:54:43PM +0100, Otto Moerbeek wrote:
> > > > currently malloc does cache a number of free'ed regions up to 128k in
> > > > size. This cache is indexed by size (in # of pages), so it is very
> > > > quick to check.
> > > >
> > > > Some programs allocate and deallocate larger allocations in a frantic
> > > > way.  Accodomate those programs by also keeping a cache of regions
> > > > betwen 128k and 2M, in a cache of variable sized regions.
> > > >
> > > > My test case speeds up about twice. A make build gets a small speedup.
> > > >
> > > > This has been tested by myself on amd64 quite intensively. I am asking
> > > > for more tests, especialy on more "exotic" platforms. I wil do arm64
> > > > myself soon.  Test can be running your favorite programs, doing make
> > > > builds or running tests in regress/lib/libc/malloc.
> > > 
> > > I see openssl and tmux crash with this diff.
> > > /usr/src/regress/usr.sbin/openssl reproduces it on arm64, amd64,
> > > i386.
> > 
> > Are you running with any malloc flags?
> 
> This bug report enabled me to find a bug that would pop up if G mode
> is enabled.
> 
> New diff below. New tests appreciated.

This has been in snaps for a while.

Any body willing to review and OK?

        -Otto


> Index: stdlib/malloc.c
> ===================================================================
> RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
> retrieving revision 1.272
> diff -u -p -r1.272 malloc.c
> --- stdlib/malloc.c   19 Sep 2021 09:15:22 -0000      1.272
> +++ stdlib/malloc.c   31 Jan 2022 16:27:31 -0000
> @@ -113,13 +113,27 @@ struct region_info {
>  
>  LIST_HEAD(chunk_head, chunk_info);
>  
> -#define MAX_CACHEABLE_SIZE   32
> -struct cache {
> -     void *pages[MALLOC_MAXCACHE];
> +/*
> + * Two caches, one for "small" regions, one for "big".
> + * Small cache is an array per size, big cache is one array with different
> + * sized regions
> + */
> +#define MAX_SMALLCACHEABLE_SIZE      32
> +#define MAX_BIGCACHEABLE_SIZE        512
> +/* If the total # of pages is larger than this, evict before inserting */
> +#define BIGCACHE_FILL(sz)    (MAX_BIGCACHEABLE_SIZE * (sz) / 4)
> +
> +struct smallcache {
> +     void **pages;
>       ushort length;
>       ushort max;
>  };
>  
> +struct bigcache {
> +     void *page;
> +     size_t psize;
> +};
> +
>  struct dir_info {
>       u_int32_t canary1;
>       int active;                     /* status of malloc */
> @@ -139,7 +153,10 @@ struct dir_info {
>       void *delayed_chunks[MALLOC_DELAYED_CHUNK_MASK + 1];
>       u_char rbytes[32];              /* random bytes */
>                                       /* free pages cache */
> -     struct cache cache[MAX_CACHEABLE_SIZE];
> +     struct smallcache smallcache[MAX_SMALLCACHEABLE_SIZE];
> +     size_t bigcache_used;
> +     size_t bigcache_size;
> +     struct bigcache *bigcache;
>  #ifdef MALLOC_STATS
>       size_t inserts;
>       size_t insert_collisions;
> @@ -207,7 +224,7 @@ struct malloc_readonly {
>  #ifdef MALLOC_STATS
>       int     malloc_stats;           /* dump statistics at end */
>  #endif
> -     u_int32_t malloc_canary;        /* Matched against ones in malloc_pool 
> */
> +     u_int32_t malloc_canary;        /* Matched against ones in pool */
>  };
>  
>  /* This object is mapped PROT_READ after initialisation to prevent tampering 
> */
> @@ -714,18 +731,61 @@ unmap(struct dir_info *d, void *p, size_
>       size_t psz = sz >> MALLOC_PAGESHIFT;
>       void *r;
>       u_short i;
> -     struct cache *cache;
> +     struct smallcache *cache;
>  
>       if (sz != PAGEROUND(sz) || psz == 0)
>               wrterror(d, "munmap round");
>  
> -     if (psz > MAX_CACHEABLE_SIZE || d->cache[psz - 1].max == 0) {
> +     if (d->bigcache_size > 0 && psz > MAX_SMALLCACHEABLE_SIZE &&
> +         psz <= MAX_BIGCACHEABLE_SIZE) {
> +             u_short base = getrbyte(d);
> +             u_short j;
> +
> +             /* don't look through all slots */
> +             for (j = 0; j < d->bigcache_size / 4; j++) {
> +                     i = (base + j) % d->bigcache_size;
> +                     if (d->bigcache_used <
> +                         BIGCACHE_FILL(d->bigcache_size))  {
> +                             if (d->bigcache[i].psize == 0)
> +                                     break;
> +                     } else {
> +                             if (d->bigcache[i].psize != 0)
> +                                     break;
> +                     }
> +             }
> +             /* if we didn't find a preferred slot, use random one */
> +             if (d->bigcache[i].psize != 0) {
> +                     size_t tmp;
> +
> +                     r = d->bigcache[i].page;
> +                     d->bigcache_used -= d->bigcache[i].psize;
> +                     tmp = d->bigcache[i].psize << MALLOC_PAGESHIFT;
> +                     if (!mopts.malloc_freeunmap)
> +                             validate_junk(d, r, tmp);
> +                     if (munmap(r, tmp))
> +                              wrterror(d, "munmap %p", r);
> +                     STATS_SUB(d->malloc_used, tmp);
> +             }
> +             
> +             if (clear > 0)
> +                     explicit_bzero(p, clear);
> +             if (mopts.malloc_freeunmap) {
> +                     if (mprotect(p, sz, PROT_NONE))
> +                             wrterror(d, "mprotect %p", r);
> +             } else
> +                     junk_free(d->malloc_junk, p, sz);
> +             d->bigcache[i].page = p;
> +             d->bigcache[i].psize = psz;
> +             d->bigcache_used += psz;
> +             return;
> +     }
> +     if (psz > MAX_SMALLCACHEABLE_SIZE || d->smallcache[psz - 1].max == 0) {
>               if (munmap(p, sz))
>                       wrterror(d, "munmap %p", p);
>               STATS_SUB(d->malloc_used, sz);
>               return;
>       }
> -     cache = &d->cache[psz - 1];
> +     cache = &d->smallcache[psz - 1];
>       if (cache->length == cache->max) {
>               /* use a random slot */
>               i = getrbyte(d) % cache->max;
> @@ -753,9 +813,10 @@ unmap(struct dir_info *d, void *p, size_
>  static void *
>  map(struct dir_info *d, size_t sz, int zero_fill)
>  {
> -     size_t i, psz = sz >> MALLOC_PAGESHIFT;
> +     size_t psz = sz >> MALLOC_PAGESHIFT;
> +     u_short i;
>       void *p;
> -     struct cache *cache;
> +     struct smallcache *cache;
>  
>       if (mopts.malloc_canary != (d->canary1 ^ (u_int32_t)(uintptr_t)d) ||
>           d->canary1 != ~d->canary2)
> @@ -764,8 +825,35 @@ map(struct dir_info *d, size_t sz, int z
>               wrterror(d, "map round");
>  
>       
> -     if (psz <= MAX_CACHEABLE_SIZE && d->cache[psz - 1].max > 0) {
> -             cache = &d->cache[psz - 1];
> +     if (d->bigcache_size > 0 && psz > MAX_SMALLCACHEABLE_SIZE &&
> +         psz <= MAX_BIGCACHEABLE_SIZE) {
> +             size_t base = getrbyte(d);
> +             size_t cached = d->bigcache_used;
> +             ushort j;
> +
> +             for (j = 0; j < d->bigcache_size && cached >= psz; j++) {
> +                     i = (j + base) % d->bigcache_size;
> +                     if (d->bigcache[i].psize == psz) {
> +                             p = d->bigcache[i].page;
> +                             d->bigcache_used -= psz;
> +                             d->bigcache[i].page = NULL;
> +                             d->bigcache[i].psize = 0;
> +
> +                             if (!mopts.malloc_freeunmap)
> +                                     validate_junk(d, p, sz);
> +                             if (mopts.malloc_freeunmap)
> +                                     mprotect(p, sz, PROT_READ | PROT_WRITE);
> +                             if (zero_fill)
> +                                     memset(p, 0, sz);
> +                             else if (mopts.malloc_freeunmap)
> +                                     junk_free(d->malloc_junk, p, sz);
> +                             return p;
> +                     }
> +                     cached -= d->bigcache[i].psize;
> +             }
> +     }
> +     if (psz <= MAX_SMALLCACHEABLE_SIZE && d->smallcache[psz - 1].max > 0) {
> +             cache = &d->smallcache[psz - 1];
>               if (cache->length > 0) {
>                       if (cache->length == 1)
>                               p = cache->pages[--cache->length];
> @@ -795,10 +883,12 @@ map(struct dir_info *d, size_t sz, int z
>                                       void *q = (char*)p + i * sz;
>                                       cache->pages[i] = q;
>                                       if (!mopts.malloc_freeunmap)
> -                                             junk_free(d->malloc_junk, q, 
> sz);
> +                                             junk_free(d->malloc_junk, q,
> +                                                 sz);
>                               }
>                               if (mopts.malloc_freeunmap)
> -                                     mprotect(p, (cache->max - 1) * sz, 
> PROT_NONE);
> +                                     mprotect(p, (cache->max - 1) * sz,
> +                                         PROT_NONE);
>                               p = (char*)p + (cache->max - 1) * sz;
>                               /* zero fill not needed */
>                               return p;
> @@ -1161,8 +1251,9 @@ omalloc(struct dir_info *pool, size_t sz
>               } else {
>                       if (pool->malloc_junk == 2) {
>                               if (zero_fill)
> -                                     memset((char *)p + sz - 
> mopts.malloc_guard,
> -                                         SOME_JUNK, psz - sz);
> +                                     memset((char *)p + sz -
> +                                         mopts.malloc_guard, SOME_JUNK,
> +                                         psz - sz);
>                               else
>                                       memset(p, SOME_JUNK,
>                                           psz - mopts.malloc_guard);
> @@ -1224,13 +1315,33 @@ _malloc_init(int from_rthreads)
>               if (i == 0) {
>                       omalloc_poolinit(&d, MAP_CONCEAL);
>                       d->malloc_junk = 2;
> -                     for (j = 0; j < MAX_CACHEABLE_SIZE; j++)
> -                             d->cache[j].max = 0;
> +                     d->bigcache_size = 0;
> +                     for (j = 0; j < MAX_SMALLCACHEABLE_SIZE; j++)
> +                             d->smallcache[j].max = 0;
>               } else {
> +                     size_t sz = 0;
> +
>                       omalloc_poolinit(&d, 0);
>                       d->malloc_junk = mopts.def_malloc_junk;
> -                     for (j = 0; j < MAX_CACHEABLE_SIZE; j++)
> -                             d->cache[j].max = mopts.def_maxcache >> (j / 8);
> +                     d->bigcache_size = mopts.def_maxcache;
> +                     for (j = 0; j < MAX_SMALLCACHEABLE_SIZE; j++) {
> +                             d->smallcache[j].max =
> +                                 mopts.def_maxcache >> (j / 8);
> +                             sz += d->smallcache[j].max * sizeof(void *);
> +                     }
> +                     sz += d->bigcache_size * sizeof(struct bigcache);
> +                     if (sz > 0) {
> +                             void *p = MMAP(sz, 0);
> +                             if (p == MAP_FAILED)
> +                                     wrterror(NULL,
> +                                         "malloc init mmap failed");
> +                             for (j = 0; j < MAX_SMALLCACHEABLE_SIZE; j++) {
> +                                     d->smallcache[j].pages = p;
> +                                     p = (char *)p + d->smallcache[j].max *
> +                                         sizeof(void *);
> +                             }
> +                             d->bigcache = p;
> +                     }
>               }
>               d->mutex = i;
>               mopts.malloc_pool[i] = d;
> @@ -1348,8 +1459,11 @@ ofree(struct dir_info **argpool, void *p
>       REALSIZE(sz, r);
>       if (pool->mmap_flag) {
>               clear = 1;
> -             if (!check)
> +             if (!check) {
>                       argsz = sz;
> +                     if (sz > MALLOC_MAXCHUNK)
> +                             argsz -= mopts.malloc_guard;
> +             }
>       }
>       if (check) {
>               if (sz <= MALLOC_MAXCHUNK) {
> @@ -1609,7 +1723,8 @@ orealloc(struct dir_info **argpool, void
>                                       wrterror(pool, "mprotect");
>                       }
>                       if (munmap((char *)r->p + rnewsz, roldsz - rnewsz))
> -                             wrterror(pool, "munmap %p", (char *)r->p + 
> rnewsz);
> +                             wrterror(pool, "munmap %p", (char *)r->p +
> +                                 rnewsz);
>                       STATS_SUB(pool->malloc_used, roldsz - rnewsz);
>                       r->size = gnewsz;
>                       if (MALLOC_MOVE_COND(gnewsz)) {
> @@ -2091,7 +2206,7 @@ putleakinfo(void *f, size_t sz, int cnt)
>  {
>       struct leaknode key, *p;
>       static struct leaknode *page;
> -     static int used;
> +     static unsigned int used;
>  
>       if (cnt == 0 || page == MAP_FAILED)
>               return;
> @@ -2123,7 +2238,7 @@ static void
>  dump_leaks(int fd)
>  {
>       struct leaknode *p;
> -     int i = 0;
> +     unsigned int i = 0;
>  
>       dprintf(fd, "Leak report\n");
>       dprintf(fd, "                 f     sum      #    avg\n");
> @@ -2196,16 +2311,25 @@ dump_free_chunk_info(int fd, struct dir_
>  static void
>  dump_free_page_info(int fd, struct dir_info *d)
>  {
> -     struct cache *cache;
> +     struct smallcache *cache;
>       size_t i, total = 0;
>  
> -     dprintf(fd, "Cached:\n");
> -     for (i = 0; i < MAX_CACHEABLE_SIZE; i++) {
> -             cache = &d->cache[i];
> +     dprintf(fd, "Cached in small cache:\n");
> +     for (i = 0; i < MAX_SMALLCACHEABLE_SIZE; i++) {
> +             cache = &d->smallcache[i];
>               if (cache->length != 0)
> -                     dprintf(fd, "%zu(%u): %u = %zu\n", i + 1, cache->max, 
> cache->length, cache->length * (i + 1));
> +                     dprintf(fd, "%zu(%u): %u = %zu\n", i + 1, cache->max,
> +                         cache->length, cache->length * (i + 1));
>               total += cache->length * (i + 1);
>       }
> +
> +     dprintf(fd, "Cached in big cache: %zu/%zu\n", d->bigcache_used,
> +         d->bigcache_size);
> +     for (i = 0; i < d->bigcache_size; i++) {
> +             if (d->bigcache[i].psize != 0)
> +                     dprintf(fd, "%zu: %zu\n", i, d->bigcache[i].psize);
> +             total += d->bigcache[i].psize;
> +     }
>       dprintf(fd, "Free pages cached: %zu\n", total);
>  }
>  
> @@ -2232,7 +2356,8 @@ malloc_dump1(int fd, int poolno, struct 
>       dump_free_chunk_info(fd, d);
>       dump_free_page_info(fd, d);
>       dprintf(fd,
> -         "slot)  hash d  type               page                  f size 
> [free/n]\n");
> +         "slot)  hash d  type               page                  f "
> +         "size [free/n]\n");
>       for (i = 0; i < d->regions_total; i++) {
>               if (d->r[i].p != NULL) {
>                       size_t h = hash(d->r[i].p) &
> @@ -2298,13 +2423,15 @@ DEF_WEAK(malloc_gdump);
>  static void
>  malloc_exit(void)
>  {
> -     int save_errno = errno, fd, i;
> +     int save_errno = errno, fd;
> +     unsigned i;
>  
>       fd = open("malloc.out", O_RDWR|O_APPEND);
>       if (fd != -1) {
>               dprintf(fd, "******** Start dump %s *******\n", __progname);
>               dprintf(fd,
> -                 "MT=%d M=%u I=%d F=%d U=%d J=%d R=%d X=%d C=%d cache=%u 
> G=%zu\n",
> +                 "MT=%d M=%u I=%d F=%d U=%d J=%d R=%d X=%d C=%d cache=%u "
> +                 "G=%zu\n",
>                   mopts.malloc_mt, mopts.malloc_mutexes,
>                   mopts.internal_funcs, mopts.malloc_freecheck,
>                   mopts.malloc_freeunmap, mopts.def_malloc_junk,
> 

Reply via email to