We don't need to hold the malloc lock when making syscalls like mmap
and munmap if we're just a little careful about the order of
operations. This will allow other threads to concurrently allocate
perhaps smaller chunks while the first thread is in the kernel.

This makes a huge difference in a simple benchmark that allocates
chunks in one thread and pages in a second thread. The chunk thread
finishes almost immediately, instead of contending for the lock and
running as slowly as the page thread. Admittedly contrived benchmark,
but the changes are very simple so I think it's worth it.

There are some other possibly expensive operations to tweak, but this
covers the smallest, simplest sections.

Index: malloc.c
===================================================================
RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
retrieving revision 1.170
diff -u -p -r1.170 malloc.c
--- malloc.c    9 Jul 2014 19:11:00 -0000       1.170
+++ malloc.c    11 Jul 2014 10:23:10 -0000
@@ -93,6 +93,15 @@
 #define MQUERY(a, sz)  mquery((a), (size_t)(sz), PROT_READ | PROT_WRITE, \
     MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, (off_t)0)
 
+#define KERNENTER() if (__isthreaded) do { \
+       malloc_active--; \
+       _MALLOC_UNLOCK(); \
+} while (0)
+#define KERNEXIT() if (__isthreaded) do { \
+       _MALLOC_LOCK(); \
+       malloc_active++; \
+} while (0)
+
 struct region_info {
        void *p;                /* page; low bits used to mark chunks */
        uintptr_t size;         /* size for pages, or chunk_info pointer */
@@ -312,7 +321,10 @@ unmap(struct dir_info *d, void *p, size_
        }
 
        if (psz > mopts.malloc_cache) {
-               if (munmap(p, sz))
+               KERNENTER();
+               i = munmap(p, sz);
+               KERNEXIT();
+               if (i)
                        wrterror("munmap", p);
                STATS_SUB(d->malloc_used, sz);
                return;
@@ -396,7 +408,9 @@ map(struct dir_info *d, size_t sz, int z
                return MAP_FAILED;
        }
        if (psz > d->free_regions_size) {
+               KERNENTER();
                p = MMAP(sz);
+               KERNEXIT();
                if (p != MAP_FAILED)
                        STATS_ADD(d->malloc_used, sz);
                /* zero fill not needed */
@@ -408,18 +422,20 @@ map(struct dir_info *d, size_t sz, int z
                if (r->p != NULL) {
                        if (r->size == psz) {
                                p = r->p;
+                               r->p = NULL;
+                               r->size = 0;
+                               d->free_regions_size -= psz;
+                               KERNENTER();
                                if (mopts.malloc_freeunmap)
                                        mprotect(p, sz, PROT_READ | PROT_WRITE);
                                if (mopts.malloc_hint)
                                        madvise(p, sz, MADV_NORMAL);
-                               r->p = NULL;
-                               r->size = 0;
-                               d->free_regions_size -= psz;
                                if (zero_fill)
                                        memset(p, 0, sz);
                                else if (mopts.malloc_junk == 2 &&
                                    mopts.malloc_freeunmap)
                                        memset(p, SOME_FREEJUNK, sz);
+                               KERNEXIT();
                                return p;
                        } else if (r->size > psz)
                                big = r;
@@ -1317,11 +1333,13 @@ orealloc(void *p, size_t newsz, void *f)
 
                                STATS_INC(pool->cheap_realloc_tries);
                                zapcacheregion(pool, hint, needed);
+                               KERNENTER();
                                q = MQUERY(hint, needed);
                                if (q == hint)
                                        q = MMAPA(hint, needed);
                                else
                                        q = MAP_FAILED;
+                               KERNEXIT();
                                if (q == hint) {
                                        STATS_ADD(pool->malloc_used, needed);
                                        if (mopts.malloc_junk == 2)


Reply via email to