On Wed, Jul 05, 2017 at 09:44:19AM +1000, David Gwynne wrote:
> the following adds a mutex to malloc and free to protect their
> internal state. this should be enough to make the api mpsafe,
> assuming the way they interact with uvm is mpsafe.
> 
> this only uses a single mutex around the entire malloc subsystem,
> but shuffles the code slightly to avoid holding it around calls
> into uvm. the shuffling is mostly to account for an allocation (ie,
> bump memuse early) before releasing the mutex to try and allocate
> it from uvm. if uvm fails, it rolls this back.
> 
> this is modelled on how the item accounting and locking is done in
> pools.
> 
> can anyone comment on the uvm safety?
> 
> also, it is obvious that the malloc debug code has started to rot
> a bit. ive fixed it up a bit, but now there's also a possible mp
> race when items are moved from the used list to the free list, and
> when the item is unmapped between that. id like to remove the malloc
> debug code and improve the robustness of malloc itself. pools have
> shows they can be fast and strict at the same time.

visa pointed out that the diff didnt work on RAMDISKs, and it dropped
maintenance of ks_maxused. the one below fixes both issues.

i'm going to commit the new one tomorrow with oks from kettenis@
and visa@ unless anyone objects.

cheers,
dlg

Index: kern_malloc.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_malloc.c,v
retrieving revision 1.129
diff -u -p -r1.129 kern_malloc.c
--- kern_malloc.c       7 Jun 2017 13:30:36 -0000       1.129
+++ kern_malloc.c       10 Jul 2017 06:33:38 -0000
@@ -39,6 +39,7 @@
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
+#include <sys/mutex.h>
 #include <sys/rwlock.h>
 
 #include <uvm/uvm_extern.h>
@@ -94,6 +95,7 @@ u_int nkmempages_min = 0;
 #endif
 u_int  nkmempages_max = 0;
 
+struct mutex malloc_mtx = MUTEX_INITIALIZER(IPL_VM);
 struct kmembuckets bucket[MINBUCKET + 16];
 #ifdef KMEMSTATS
 struct kmemstats kmemstats[M_LAST];
@@ -151,36 +153,30 @@ malloc(size_t size, int type, int flags)
        struct kmemusage *kup;
        struct kmem_freelist *freep;
        long indx, npg, allocsize;
-       int s;
        caddr_t va, cp;
+       int s;
 #ifdef DIAGNOSTIC
        int freshalloc;
        char *savedtype;
 #endif
 #ifdef KMEMSTATS
        struct kmemstats *ksp = &kmemstats[type];
+       int wake;
 
        if (((unsigned long)type) <= 1 || ((unsigned long)type) >= M_LAST)
                panic("malloc: bogus type %d", type);
 #endif
 
-       if (!cold)
-               KERNEL_ASSERT_LOCKED();
-
        KASSERT(flags & (M_WAITOK | M_NOWAIT));
 
+#ifdef DIAGNOSTIC
        if ((flags & M_NOWAIT) == 0) {
                extern int pool_debug;
-#ifdef DIAGNOSTIC
                assertwaitok();
                if (pool_debug == 2)
                        yield();
-#endif
-               if (!cold && pool_debug) {
-                       KERNEL_UNLOCK();
-                       KERNEL_LOCK();
-               }
        }
+#endif
 
 #ifdef MALLOC_DEBUG
        if (debug_malloc(size, type, flags, (void **)&va)) {
@@ -193,6 +189,7 @@ malloc(size_t size, int type, int flags)
        if (size > 65535 * PAGE_SIZE) {
                if (flags & M_CANFAIL) {
 #ifndef SMALL_KERNEL
+                       /* XXX lock */
                        if (ratecheck(&malloc_lasterr, &malloc_errintvl))
                                printf("malloc(): allocation too large, "
                                    "type = %d, size = %lu\n", type, size);
@@ -204,32 +201,36 @@ malloc(size_t size, int type, int flags)
        }
 
        indx = BUCKETINDX(size);
+       if (size > MAXALLOCSAVE)
+               allocsize = round_page(size);
+       else
+               allocsize = 1 << indx;
        kbp = &bucket[indx];
-       s = splvm();
+       mtx_enter(&malloc_mtx);
 #ifdef KMEMSTATS
        while (ksp->ks_memuse >= ksp->ks_limit) {
                if (flags & M_NOWAIT) {
-                       splx(s);
+                       mtx_leave(&malloc_mtx);
                        return (NULL);
                }
                if (ksp->ks_limblocks < 65535)
                        ksp->ks_limblocks++;
-               tsleep(ksp, PSWP+2, memname[type], 0);
+               msleep(ksp, &malloc_mtx, PSWP+2, memname[type], 0);
        }
+       ksp->ks_memuse += allocsize; /* account for this early */
        ksp->ks_size |= 1 << indx;
 #endif
-       if (size > MAXALLOCSAVE)
-               allocsize = round_page(size);
-       else
-               allocsize = 1 << indx;
        if (XSIMPLEQ_FIRST(&kbp->kb_freelist) == NULL) {
+               mtx_leave(&malloc_mtx);
                npg = atop(round_page(allocsize));
+               s = splvm();
                va = (caddr_t)uvm_km_kmemalloc_pla(kmem_map, NULL,
                    (vsize_t)ptoa(npg), 0,
                    ((flags & M_NOWAIT) ? UVM_KMF_NOWAIT : 0) |
                    ((flags & M_CANFAIL) ? UVM_KMF_CANFAIL : 0),
                    no_constraint.ucr_low, no_constraint.ucr_high,
                    0, 0, 0);
+               splx(s);
                if (va == NULL) {
                        /*
                         * Kmem_malloc() can return NULL, even if it can
@@ -241,9 +242,20 @@ malloc(size_t size, int type, int flags)
                         */
                        if ((flags & (M_NOWAIT|M_CANFAIL)) == 0)
                                panic("malloc: out of space in kmem_map");
-                       splx(s);
+
+#ifdef KMEMSTATS
+                       mtx_enter(&malloc_mtx);
+                       ksp->ks_memuse -= allocsize;
+                       wake = ksp->ks_memuse + allocsize >= ksp->ks_limit &&
+                           ksp->ks_memuse < ksp->ks_limit;
+                       mtx_leave(&malloc_mtx);
+                       if (wake)
+                               wakeup(ksp);
+#endif
+                       
                        return (NULL);
                }
+               mtx_enter(&malloc_mtx);
 #ifdef KMEMSTATS
                kbp->kb_total += kbp->kb_elmpercl;
 #endif
@@ -254,9 +266,6 @@ malloc(size_t size, int type, int flags)
 #endif
                if (allocsize > MAXALLOCSAVE) {
                        kup->ku_pagecnt = npg;
-#ifdef KMEMSTATS
-                       ksp->ks_memuse += allocsize;
-#endif
                        goto out;
                }
 #ifdef KMEMSTATS
@@ -334,7 +343,6 @@ malloc(size_t size, int type, int flags)
                panic("malloc: lost data");
        kup->ku_freecnt--;
        kbp->kb_totalfree--;
-       ksp->ks_memuse += 1 << indx;
 out:
        kbp->kb_calls++;
        ksp->ks_inuse++;
@@ -344,7 +352,7 @@ out:
 #else
 out:
 #endif
-       splx(s);
+       mtx_leave(&malloc_mtx);
 
        if ((flags & M_ZERO) && va != NULL)
                memset(va, 0, size);
@@ -369,9 +377,6 @@ free(void *addr, int type, size_t freeds
        struct kmemstats *ksp = &kmemstats[type];
 #endif
 
-       if (!cold)
-               KERNEL_ASSERT_LOCKED();
-
        if (addr == NULL)
                return;
 
@@ -391,7 +396,6 @@ free(void *addr, int type, size_t freeds
        kbp = &bucket[kup->ku_indx];
        if (size > MAXALLOCSAVE)
                size = kup->ku_pagecnt << PAGE_SHIFT;
-       s = splvm();
 #ifdef DIAGNOSTIC
        if (freedsize != 0 && freedsize > size)
                panic("free: size too large %zu > %ld (%p) type %s",
@@ -412,8 +416,11 @@ free(void *addr, int type, size_t freeds
                        addr, size, memname[type], alloc);
 #endif /* DIAGNOSTIC */
        if (size > MAXALLOCSAVE) {
+               s = splvm();
                uvm_km_free(kmem_map, (vaddr_t)addr, ptoa(kup->ku_pagecnt));
+               splx(s);
 #ifdef KMEMSTATS
+               mtx_enter(&malloc_mtx);
                ksp->ks_memuse -= size;
                kup->ku_indx = 0;
                kup->ku_pagecnt = 0;
@@ -422,11 +429,12 @@ free(void *addr, int type, size_t freeds
                        wakeup(ksp);
                ksp->ks_inuse--;
                kbp->kb_total -= 1;
+               mtx_leave(&malloc_mtx);
 #endif
-               splx(s);
                return;
        }
        freep = (struct kmem_freelist *)addr;
+       mtx_enter(&malloc_mtx);
 #ifdef DIAGNOSTIC
        /*
         * Check for multiple frees. Use a quick check to see if
@@ -468,7 +476,7 @@ free(void *addr, int type, size_t freeds
        ksp->ks_inuse--;
 #endif
        XSIMPLEQ_INSERT_TAIL(&kbp->kb_freelist, freep, kf_flist);
-       splx(s);
+       mtx_leave(&malloc_mtx);
 }
 
 /*
@@ -578,6 +586,9 @@ sysctl_malloc(int *name, u_int namelen, 
     size_t newlen, struct proc *p)
 {
        struct kmembuckets kb;
+#ifdef KMEMSTATS
+       struct kmemstats km;
+#endif
 #if defined(KMEMSTATS) || defined(DIAGNOSTIC) || defined(FFS_SOFTUPDATES)
        int error;
 #endif
@@ -606,15 +617,19 @@ sysctl_malloc(int *name, u_int namelen, 
                return (sysctl_rdstring(oldp, oldlenp, newp, buckstring));
 
        case KERN_MALLOC_BUCKET:
+               mtx_enter(&malloc_mtx);
                memcpy(&kb, &bucket[BUCKETINDX(name[1])], sizeof(kb));
+               mtx_leave(&malloc_mtx);
                memset(&kb.kb_freelist, 0, sizeof(kb.kb_freelist));
                return (sysctl_rdstruct(oldp, oldlenp, newp, &kb, sizeof(kb)));
        case KERN_MALLOC_KMEMSTATS:
 #ifdef KMEMSTATS
                if ((name[1] < 0) || (name[1] >= M_LAST))
                        return (EINVAL);
-               return (sysctl_rdstruct(oldp, oldlenp, newp,
-                   &kmemstats[name[1]], sizeof(struct kmemstats)));
+               mtx_enter(&malloc_mtx);
+               memcpy(&km, &kmemstats[name[1]], sizeof(km));
+               mtx_leave(&malloc_mtx);
+               return (sysctl_rdstruct(oldp, oldlenp, newp, &km, sizeof(km)));
 #else
                return (EOPNOTSUPP);
 #endif
Index: kern_malloc_debug.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_malloc_debug.c,v
retrieving revision 1.34
diff -u -p -r1.34 kern_malloc_debug.c
--- kern_malloc_debug.c 16 Nov 2014 12:31:00 -0000      1.34
+++ kern_malloc_debug.c 10 Jul 2017 06:33:38 -0000
@@ -56,8 +56,10 @@
 #include <sys/malloc.h>
 #include <sys/systm.h>
 #include <sys/pool.h>
+#include <sys/mutex.h>
 
 #include <uvm/uvm_extern.h>
+#include <uvm/uvm.h>
 
 /*
  * debug_malloc_type and debug_malloc_size define the type and size of
@@ -104,12 +106,13 @@ int debug_malloc_chunks_on_freelist;
 int debug_malloc_initialized;
 
 struct pool debug_malloc_pool;
+struct mutex debug_malloc_mtx = MUTEX_INITIALIZER(IPL_VM);
 
 int
 debug_malloc(unsigned long size, int type, int flags, void **addr)
 {
        struct debug_malloc_entry *md = NULL;
-       int s, wait = (flags & M_NOWAIT) == 0;
+       int wait = (flags & M_NOWAIT) == 0;
 
        /* Careful not to compare unsigned long to int -1 */
        if (((type != debug_malloc_type && debug_malloc_type != 0) ||
@@ -123,13 +126,14 @@ debug_malloc(unsigned long size, int typ
        if (size > PAGE_SIZE)
                return (0);
 
-       s = splvm();
        if (debug_malloc_chunks_on_freelist < MALLOC_DEBUG_CHUNKS)
                debug_malloc_allocate_free(wait);
 
+       mtx_enter(&debug_malloc_mtx);
+
        md = TAILQ_FIRST(&debug_malloc_freelist);
        if (md == NULL) {
-               splx(s);
+               mtx_leave(&debug_malloc_mtx);
                return (0);
        }
        TAILQ_REMOVE(&debug_malloc_freelist, md, md_list);
@@ -137,14 +141,15 @@ debug_malloc(unsigned long size, int typ
 
        TAILQ_INSERT_HEAD(&debug_malloc_usedlist, md, md_list);
        debug_malloc_allocs++;
-       splx(s);
-
-       pmap_kenter_pa(md->md_va, md->md_pa, PROT_READ | PROT_WRITE);
-       pmap_update(pmap_kernel());
 
        md->md_size = size;
        md->md_type = type;
 
+       mtx_leave(&debug_malloc_mtx);
+
+       pmap_kenter_pa(md->md_va, md->md_pa, PROT_READ | PROT_WRITE);
+       pmap_update(pmap_kernel());
+
        /*
         * Align the returned addr so that it ends where the first page
         * ends. roundup to get decent alignment.
@@ -158,7 +163,6 @@ debug_free(void *addr, int type)
 {
        struct debug_malloc_entry *md;
        vaddr_t va;
-       int s;
 
        if (type != debug_malloc_type && debug_malloc_type != 0 &&
            type != M_DEBUG)
@@ -169,7 +173,7 @@ debug_free(void *addr, int type)
         */
        va = trunc_page((vaddr_t)addr);
 
-       s = splvm();
+       mtx_enter(&debug_malloc_mtx);
        TAILQ_FOREACH(md, &debug_malloc_usedlist, md_list)
                if (md->md_va == va)
                        break;
@@ -185,21 +189,24 @@ debug_free(void *addr, int type)
                TAILQ_FOREACH(md, &debug_malloc_freelist, md_list)
                        if (md->md_va == va)
                                panic("debug_free: already free");
-               splx(s);
+               mtx_leave(&debug_malloc_mtx);
                return (0);
        }
 
        debug_malloc_frees++;
        TAILQ_REMOVE(&debug_malloc_usedlist, md, md_list);
+       mtx_leave(&debug_malloc_mtx);
 
-       TAILQ_INSERT_TAIL(&debug_malloc_freelist, md, md_list);
-       debug_malloc_chunks_on_freelist++;
        /*
         * unmap the page.
         */
        pmap_kremove(md->md_va, PAGE_SIZE);
        pmap_update(pmap_kernel());
-       splx(s);
+
+       mtx_enter(&debug_malloc_mtx);
+       TAILQ_INSERT_TAIL(&debug_malloc_freelist, md, md_list);
+       debug_malloc_chunks_on_freelist++;
+       mtx_leave(&debug_malloc_mtx);
 
        return (1);
 }
@@ -217,7 +224,7 @@ debug_malloc_init(void)
        debug_malloc_chunks_on_freelist = 0;
 
        pool_init(&debug_malloc_pool, sizeof(struct debug_malloc_entry),
-           0, 0, 0, "mdbepl", NULL);
+           0, 0, IPL_VM, "mdbepl", NULL);
 
        debug_malloc_initialized = 1;
 }
@@ -233,19 +240,18 @@ debug_malloc_allocate_free(int wait)
        vaddr_t va, offset;
        struct vm_page *pg;
        struct debug_malloc_entry *md;
-
-       splassert(IPL_VM);
+       int s;
 
        md = pool_get(&debug_malloc_pool, wait ? PR_WAITOK : PR_NOWAIT);
        if (md == NULL)
                return;
 
+       s = splvm();
+
        va = uvm_km_kmemalloc(kmem_map, NULL, PAGE_SIZE * 2,
            UVM_KMF_VALLOC | (wait ? 0: UVM_KMF_NOWAIT));
-       if (va == 0) {
-               pool_put(&debug_malloc_pool, md);
-               return;
-       }
+       if (va == 0)
+               goto put;
 
        offset = va - vm_map_min(kernel_map);
        for (;;) {
@@ -258,20 +264,31 @@ debug_malloc_allocate_free(int wait)
                if (pg)
                        break;
 
-               if (wait == 0) {
-                       uvm_unmap(kmem_map, va, va + PAGE_SIZE * 2);
-                       pool_put(&debug_malloc_pool, md);
-                       return;
-               }
+               if (wait == 0)
+                       goto unmap;
+
                uvm_wait("debug_malloc");
        }
 
+       splx(s);
+
        md->md_va = va;
        md->md_pa = VM_PAGE_TO_PHYS(pg);
 
+       mtx_enter(&debug_malloc_mtx);
        debug_malloc_pages++;
        TAILQ_INSERT_HEAD(&debug_malloc_freelist, md, md_list);
        debug_malloc_chunks_on_freelist++;
+       mtx_leave(&debug_malloc_mtx);
+
+       return;
+
+unmap:
+       uvm_unmap(kmem_map, va, va + PAGE_SIZE * 2);
+put:
+       splx(s);
+       pool_put(&debug_malloc_pool, md);
+       return;
 }
 
 void
@@ -311,7 +328,7 @@ debug_malloc_printit(
                        if (addr >= md->md_va &&
                            addr < md->md_va + 2 * PAGE_SIZE) {
                                (*pr)("Memory at address 0x%lx is in a freed "
-                                     "area. type %d, size: %d\n ",
+                                     "area. type %d, size: %zu\n ",
                                      addr, md->md_type, md->md_size);
                                return;
                        }
@@ -320,7 +337,7 @@ debug_malloc_printit(
                        if (addr >= md->md_va + PAGE_SIZE &&
                            addr < md->md_va + 2 * PAGE_SIZE) {
                                (*pr)("Memory at address 0x%lx is just outside "
-                                     "an allocated area. type %d, size: %d\n",
+                                     "an allocated area. type %d, size: %zu\n",
                                      addr, md->md_type, md->md_size);
                                return;
                        }

Reply via email to