On 05/07/17(Wed) 09:44, David Gwynne wrote:
> the following adds a mutex to malloc and free to protect their
> internal state. this should be enough to make the api mpsafe,
> assuming the way they interact with uvm is mpsafe.
> 
> this only uses a single mutex around the entire malloc subsystem,
> but shuffles the code slightly to avoid holding it around calls
> into uvm. the shuffling is mostly to account for an allocation (ie,
> bump memuse early) before releasing the mutex to try and allocate
> it from uvm. if uvm fails, it rolls this back.
> 
> this is modelled on how the item accounting and locking is done in
> pools.
> 
> can anyone comment on the uvm safety?
> 
> also, it is obvious that the malloc debug code has started to rot
> a bit. ive fixed it up a bit, but now there's also a possible mp
> race when items are moved from the used list to the free list, and
> when the item is unmapped between that. id like to remove the malloc
> debug code and improve the robustness of malloc itself. pools have
> shows they can be fast and strict at the same time.

I'd welcome a mpsafe malloc(9), but I'm not sure it is worth spending
too much time making it shine.  I though the plan was to base it on
pool(9) as soon as all the free(xx, 0) are converted.  Only ~700 left!

That said we're being slowed down by a non MP-safe malloc(9) and free(9).

> Index: kern_malloc.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_malloc.c,v
> retrieving revision 1.129
> diff -u -p -r1.129 kern_malloc.c
> --- kern_malloc.c     7 Jun 2017 13:30:36 -0000       1.129
> +++ kern_malloc.c     3 Jul 2017 10:14:49 -0000
> @@ -39,6 +39,7 @@
>  #include <sys/systm.h>
>  #include <sys/sysctl.h>
>  #include <sys/time.h>
> +#include <sys/mutex.h>
>  #include <sys/rwlock.h>
>  
>  #include <uvm/uvm_extern.h>
> @@ -94,6 +95,7 @@ u_int       nkmempages_min = 0;
>  #endif
>  u_int        nkmempages_max = 0;
>  
> +struct mutex malloc_mtx = MUTEX_INITIALIZER(IPL_VM);
>  struct kmembuckets bucket[MINBUCKET + 16];
>  #ifdef KMEMSTATS
>  struct kmemstats kmemstats[M_LAST];
> @@ -151,8 +153,8 @@ malloc(size_t size, int type, int flags)
>       struct kmemusage *kup;
>       struct kmem_freelist *freep;
>       long indx, npg, allocsize;
> -     int s;
>       caddr_t va, cp;
> +     int s;
>  #ifdef DIAGNOSTIC
>       int freshalloc;
>       char *savedtype;
> @@ -164,9 +166,6 @@ malloc(size_t size, int type, int flags)
>               panic("malloc: bogus type %d", type);
>  #endif
>  
> -     if (!cold)
> -             KERNEL_ASSERT_LOCKED();
> -
>       KASSERT(flags & (M_WAITOK | M_NOWAIT));
>  
>       if ((flags & M_NOWAIT) == 0) {
> @@ -176,10 +175,6 @@ malloc(size_t size, int type, int flags)
>               if (pool_debug == 2)
>                       yield();
>  #endif
> -             if (!cold && pool_debug) {
> -                     KERNEL_UNLOCK();
> -                     KERNEL_LOCK();
> -             }
>       }
>  
>  #ifdef MALLOC_DEBUG
> @@ -193,6 +188,7 @@ malloc(size_t size, int type, int flags)
>       if (size > 65535 * PAGE_SIZE) {
>               if (flags & M_CANFAIL) {
>  #ifndef SMALL_KERNEL
> +                     /* XXX lock */
>                       if (ratecheck(&malloc_lasterr, &malloc_errintvl))
>                               printf("malloc(): allocation too large, "
>                                   "type = %d, size = %lu\n", type, size);
> @@ -204,33 +200,38 @@ malloc(size_t size, int type, int flags)
>       }
>  
>       indx = BUCKETINDX(size);
> +     if (size > MAXALLOCSAVE)
> +             allocsize = round_page(size);
> +     else
> +             allocsize = 1 << indx;
>       kbp = &bucket[indx];
> -     s = splvm();
> +     mtx_enter(&malloc_mtx);
>  #ifdef KMEMSTATS
>       while (ksp->ks_memuse >= ksp->ks_limit) {
>               if (flags & M_NOWAIT) {
> -                     splx(s);
> +                     mtx_leave(&malloc_mtx);
>                       return (NULL);
>               }
>               if (ksp->ks_limblocks < 65535)
>                       ksp->ks_limblocks++;
> -             tsleep(ksp, PSWP+2, memname[type], 0);
> +             msleep(ksp, &malloc_mtx, PSWP+2, memname[type], 0);
>       }
> +     ksp->ks_memuse += allocsize; /* account for this early */
>       ksp->ks_size |= 1 << indx;
>  #endif
> -     if (size > MAXALLOCSAVE)
> -             allocsize = round_page(size);
> -     else
> -             allocsize = 1 << indx;
>       if (XSIMPLEQ_FIRST(&kbp->kb_freelist) == NULL) {
> +             mtx_leave(&malloc_mtx);
>               npg = atop(round_page(allocsize));
> +             s = splvm();
>               va = (caddr_t)uvm_km_kmemalloc_pla(kmem_map, NULL,
>                   (vsize_t)ptoa(npg), 0,
>                   ((flags & M_NOWAIT) ? UVM_KMF_NOWAIT : 0) |
>                   ((flags & M_CANFAIL) ? UVM_KMF_CANFAIL : 0),
>                   no_constraint.ucr_low, no_constraint.ucr_high,
>                   0, 0, 0);
> +             splx(s);
>               if (va == NULL) {
> +                     long wake;
>                       /*
>                        * Kmem_malloc() can return NULL, even if it can
>                        * wait, if there is no map space available, because
> @@ -241,9 +242,18 @@ malloc(size_t size, int type, int flags)
>                        */
>                       if ((flags & (M_NOWAIT|M_CANFAIL)) == 0)
>                               panic("malloc: out of space in kmem_map");
> -                     splx(s);
> +
> +                     mtx_enter(&malloc_mtx);
> +                     ksp->ks_memuse -= allocsize;
> +                     wake = ksp->ks_memuse + allocsize >= ksp->ks_limit &&
> +                         ksp->ks_memuse < ksp->ks_limit;
> +                     mtx_leave(&malloc_mtx);
> +                     if (wake)
> +                             wakeup(ksp);
> +                     
>                       return (NULL);
>               }
> +             mtx_enter(&malloc_mtx);
>  #ifdef KMEMSTATS
>               kbp->kb_total += kbp->kb_elmpercl;
>  #endif
> @@ -254,9 +264,6 @@ malloc(size_t size, int type, int flags)
>  #endif
>               if (allocsize > MAXALLOCSAVE) {
>                       kup->ku_pagecnt = npg;
> -#ifdef KMEMSTATS
> -                     ksp->ks_memuse += allocsize;
> -#endif
>                       goto out;
>               }
>  #ifdef KMEMSTATS
> @@ -334,17 +341,14 @@ malloc(size_t size, int type, int flags)
>               panic("malloc: lost data");
>       kup->ku_freecnt--;
>       kbp->kb_totalfree--;
> -     ksp->ks_memuse += 1 << indx;
>  out:
>       kbp->kb_calls++;
>       ksp->ks_inuse++;
>       ksp->ks_calls++;
> -     if (ksp->ks_memuse > ksp->ks_maxused)
> -             ksp->ks_maxused = ksp->ks_memuse;
>  #else
>  out:
>  #endif
> -     splx(s);
> +     mtx_leave(&malloc_mtx);
>  
>       if ((flags & M_ZERO) && va != NULL)
>               memset(va, 0, size);
> @@ -369,9 +373,6 @@ free(void *addr, int type, size_t freeds
>       struct kmemstats *ksp = &kmemstats[type];
>  #endif
>  
> -     if (!cold)
> -             KERNEL_ASSERT_LOCKED();
> -
>       if (addr == NULL)
>               return;
>  
> @@ -391,7 +392,6 @@ free(void *addr, int type, size_t freeds
>       kbp = &bucket[kup->ku_indx];
>       if (size > MAXALLOCSAVE)
>               size = kup->ku_pagecnt << PAGE_SHIFT;
> -     s = splvm();
>  #ifdef DIAGNOSTIC
>       if (freedsize != 0 && freedsize > size)
>               panic("free: size too large %zu > %ld (%p) type %s",
> @@ -412,8 +412,11 @@ free(void *addr, int type, size_t freeds
>                       addr, size, memname[type], alloc);
>  #endif /* DIAGNOSTIC */
>       if (size > MAXALLOCSAVE) {
> +             s = splvm();
>               uvm_km_free(kmem_map, (vaddr_t)addr, ptoa(kup->ku_pagecnt));
> +             splx(s);
>  #ifdef KMEMSTATS
> +             mtx_enter(&malloc_mtx);
>               ksp->ks_memuse -= size;
>               kup->ku_indx = 0;
>               kup->ku_pagecnt = 0;
> @@ -422,11 +425,12 @@ free(void *addr, int type, size_t freeds
>                       wakeup(ksp);
>               ksp->ks_inuse--;
>               kbp->kb_total -= 1;
> +             mtx_leave(&malloc_mtx);
>  #endif
> -             splx(s);
>               return;
>       }
>       freep = (struct kmem_freelist *)addr;
> +     mtx_enter(&malloc_mtx);
>  #ifdef DIAGNOSTIC
>       /*
>        * Check for multiple frees. Use a quick check to see if
> @@ -468,7 +472,7 @@ free(void *addr, int type, size_t freeds
>       ksp->ks_inuse--;
>  #endif
>       XSIMPLEQ_INSERT_TAIL(&kbp->kb_freelist, freep, kf_flist);
> -     splx(s);
> +     mtx_leave(&malloc_mtx);
>  }
>  
>  /*
> @@ -578,6 +582,9 @@ sysctl_malloc(int *name, u_int namelen, 
>      size_t newlen, struct proc *p)
>  {
>       struct kmembuckets kb;
> +#ifdef KMEMSTATS
> +     struct kmemstats km;
> +#endif
>  #if defined(KMEMSTATS) || defined(DIAGNOSTIC) || defined(FFS_SOFTUPDATES)
>       int error;
>  #endif
> @@ -606,15 +613,19 @@ sysctl_malloc(int *name, u_int namelen, 
>               return (sysctl_rdstring(oldp, oldlenp, newp, buckstring));
>  
>       case KERN_MALLOC_BUCKET:
> +             mtx_enter(&malloc_mtx);
>               memcpy(&kb, &bucket[BUCKETINDX(name[1])], sizeof(kb));
> +             mtx_leave(&malloc_mtx);
>               memset(&kb.kb_freelist, 0, sizeof(kb.kb_freelist));
>               return (sysctl_rdstruct(oldp, oldlenp, newp, &kb, sizeof(kb)));
>       case KERN_MALLOC_KMEMSTATS:
>  #ifdef KMEMSTATS
>               if ((name[1] < 0) || (name[1] >= M_LAST))
>                       return (EINVAL);
> -             return (sysctl_rdstruct(oldp, oldlenp, newp,
> -                 &kmemstats[name[1]], sizeof(struct kmemstats)));
> +             mtx_enter(&malloc_mtx);
> +             memcpy(&km, &kmemstats[name[1]], sizeof(km));
> +             mtx_leave(&malloc_mtx);
> +             return (sysctl_rdstruct(oldp, oldlenp, newp, &km, sizeof(km)));
>  #else
>               return (EOPNOTSUPP);
>  #endif
> Index: kern_malloc_debug.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_malloc_debug.c,v
> retrieving revision 1.34
> diff -u -p -r1.34 kern_malloc_debug.c
> --- kern_malloc_debug.c       16 Nov 2014 12:31:00 -0000      1.34
> +++ kern_malloc_debug.c       3 Jul 2017 10:14:49 -0000
> @@ -56,8 +56,10 @@
>  #include <sys/malloc.h>
>  #include <sys/systm.h>
>  #include <sys/pool.h>
> +#include <sys/mutex.h>
>  
>  #include <uvm/uvm_extern.h>
> +#include <uvm/uvm.h>
>  
>  /*
>   * debug_malloc_type and debug_malloc_size define the type and size of
> @@ -104,12 +106,13 @@ int debug_malloc_chunks_on_freelist;
>  int debug_malloc_initialized;
>  
>  struct pool debug_malloc_pool;
> +struct mutex debug_malloc_mtx = MUTEX_INITIALIZER(IPL_VM);
>  
>  int
>  debug_malloc(unsigned long size, int type, int flags, void **addr)
>  {
>       struct debug_malloc_entry *md = NULL;
> -     int s, wait = (flags & M_NOWAIT) == 0;
> +     int wait = (flags & M_NOWAIT) == 0;
>  
>       /* Careful not to compare unsigned long to int -1 */
>       if (((type != debug_malloc_type && debug_malloc_type != 0) ||
> @@ -123,13 +126,14 @@ debug_malloc(unsigned long size, int typ
>       if (size > PAGE_SIZE)
>               return (0);
>  
> -     s = splvm();
>       if (debug_malloc_chunks_on_freelist < MALLOC_DEBUG_CHUNKS)
>               debug_malloc_allocate_free(wait);
>  
> +     mtx_enter(&debug_malloc_mtx);
> +
>       md = TAILQ_FIRST(&debug_malloc_freelist);
>       if (md == NULL) {
> -             splx(s);
> +             mtx_leave(&debug_malloc_mtx);
>               return (0);
>       }
>       TAILQ_REMOVE(&debug_malloc_freelist, md, md_list);
> @@ -137,14 +141,15 @@ debug_malloc(unsigned long size, int typ
>  
>       TAILQ_INSERT_HEAD(&debug_malloc_usedlist, md, md_list);
>       debug_malloc_allocs++;
> -     splx(s);
> -
> -     pmap_kenter_pa(md->md_va, md->md_pa, PROT_READ | PROT_WRITE);
> -     pmap_update(pmap_kernel());
>  
>       md->md_size = size;
>       md->md_type = type;
>  
> +     mtx_leave(&debug_malloc_mtx);
> +
> +     pmap_kenter_pa(md->md_va, md->md_pa, PROT_READ | PROT_WRITE);
> +     pmap_update(pmap_kernel());
> +
>       /*
>        * Align the returned addr so that it ends where the first page
>        * ends. roundup to get decent alignment.
> @@ -158,7 +163,6 @@ debug_free(void *addr, int type)
>  {
>       struct debug_malloc_entry *md;
>       vaddr_t va;
> -     int s;
>  
>       if (type != debug_malloc_type && debug_malloc_type != 0 &&
>           type != M_DEBUG)
> @@ -169,7 +173,7 @@ debug_free(void *addr, int type)
>        */
>       va = trunc_page((vaddr_t)addr);
>  
> -     s = splvm();
> +     mtx_enter(&debug_malloc_mtx);
>       TAILQ_FOREACH(md, &debug_malloc_usedlist, md_list)
>               if (md->md_va == va)
>                       break;
> @@ -185,21 +189,24 @@ debug_free(void *addr, int type)
>               TAILQ_FOREACH(md, &debug_malloc_freelist, md_list)
>                       if (md->md_va == va)
>                               panic("debug_free: already free");
> -             splx(s);
> +             mtx_leave(&debug_malloc_mtx);
>               return (0);
>       }
>  
>       debug_malloc_frees++;
>       TAILQ_REMOVE(&debug_malloc_usedlist, md, md_list);
> +     mtx_leave(&debug_malloc_mtx);
>  
> -     TAILQ_INSERT_TAIL(&debug_malloc_freelist, md, md_list);
> -     debug_malloc_chunks_on_freelist++;
>       /*
>        * unmap the page.
>        */
>       pmap_kremove(md->md_va, PAGE_SIZE);
>       pmap_update(pmap_kernel());
> -     splx(s);
> +
> +     mtx_enter(&debug_malloc_mtx);
> +     TAILQ_INSERT_TAIL(&debug_malloc_freelist, md, md_list);
> +     debug_malloc_chunks_on_freelist++;
> +     mtx_leave(&debug_malloc_mtx);
>  
>       return (1);
>  }
> @@ -217,7 +224,7 @@ debug_malloc_init(void)
>       debug_malloc_chunks_on_freelist = 0;
>  
>       pool_init(&debug_malloc_pool, sizeof(struct debug_malloc_entry),
> -         0, 0, 0, "mdbepl", NULL);
> +         0, 0, IPL_VM, "mdbepl", NULL);
>  
>       debug_malloc_initialized = 1;
>  }
> @@ -233,19 +240,18 @@ debug_malloc_allocate_free(int wait)
>       vaddr_t va, offset;
>       struct vm_page *pg;
>       struct debug_malloc_entry *md;
> -
> -     splassert(IPL_VM);
> +     int s;
>  
>       md = pool_get(&debug_malloc_pool, wait ? PR_WAITOK : PR_NOWAIT);
>       if (md == NULL)
>               return;
>  
> +     s = splvm();
> +
>       va = uvm_km_kmemalloc(kmem_map, NULL, PAGE_SIZE * 2,
>           UVM_KMF_VALLOC | (wait ? 0: UVM_KMF_NOWAIT));
> -     if (va == 0) {
> -             pool_put(&debug_malloc_pool, md);
> -             return;
> -     }
> +     if (va == 0)
> +             goto put;
>  
>       offset = va - vm_map_min(kernel_map);
>       for (;;) {
> @@ -258,20 +264,31 @@ debug_malloc_allocate_free(int wait)
>               if (pg)
>                       break;
>  
> -             if (wait == 0) {
> -                     uvm_unmap(kmem_map, va, va + PAGE_SIZE * 2);
> -                     pool_put(&debug_malloc_pool, md);
> -                     return;
> -             }
> +             if (wait == 0)
> +                     goto unmap;
> +
>               uvm_wait("debug_malloc");
>       }
>  
> +     splx(s);
> +
>       md->md_va = va;
>       md->md_pa = VM_PAGE_TO_PHYS(pg);
>  
> +     mtx_enter(&debug_malloc_mtx);
>       debug_malloc_pages++;
>       TAILQ_INSERT_HEAD(&debug_malloc_freelist, md, md_list);
>       debug_malloc_chunks_on_freelist++;
> +     mtx_leave(&debug_malloc_mtx);
> +
> +     return;
> +
> +unmap:
> +     uvm_unmap(kmem_map, va, va + PAGE_SIZE * 2);
> +put:
> +     splx(s);
> +     pool_put(&debug_malloc_pool, md);
> +     return;
>  }
>  
>  void
> @@ -311,7 +328,7 @@ debug_malloc_printit(
>                       if (addr >= md->md_va &&
>                           addr < md->md_va + 2 * PAGE_SIZE) {
>                               (*pr)("Memory at address 0x%lx is in a freed "
> -                                   "area. type %d, size: %d\n ",
> +                                   "area. type %d, size: %zu\n ",
>                                     addr, md->md_type, md->md_size);
>                               return;
>                       }
> @@ -320,7 +337,7 @@ debug_malloc_printit(
>                       if (addr >= md->md_va + PAGE_SIZE &&
>                           addr < md->md_va + 2 * PAGE_SIZE) {
>                               (*pr)("Memory at address 0x%lx is just outside "
> -                                   "an allocated area. type %d, size: %d\n",
> +                                   "an allocated area. type %d, size: %zu\n",
>                                     addr, md->md_type, md->md_size);
>                               return;
>                       }
> 

Reply via email to