I just tried out a simpler approach to fixing the mutex contention
within pool cleanups.  It seems to work reasonably well, so I'm
presenting it here for feedback.

This patch does three things:

 * Optimize away mutexes in the case of subrequest pool
   deletion (an important special case because it's in requests
   with a lot of subrequests, like SSI pages, that we see the
   worst mutex overhead)

 * Add support for a "free list cache" within a pool. If this is
   enabled (on a per-pool basis), blocks used for the pool's descendants
   will be put into this private free list rather than the global one
   when the descendants are destroyed.  Subsequent subpool creation
   for this parent pool can take advantage of the pool's free list cache
   to bypass the global free list and its mutex.  (This is useful for
   things like mod_include.)

 * Switch to the new lock API and use nonrecursive mutexes.
   (Thanks to Aaron for this suggestion.  According to profiling
   data, the old lock API spends literally half its time in
   apr_os_thread_equal() and apr_os_thread_current().)

This patch removes essentially all the pool-related mutex operations
in prefork, and a lot of the mutex ops in worker.

--Brian


Index: server/mpm/prefork/prefork.c
===================================================================
RCS file: /home/cvs/httpd-2.0/server/mpm/prefork/prefork.c,v
retrieving revision 1.223
diff -u -r1.223 prefork.c
--- server/mpm/prefork/prefork.c        2001/11/29 04:06:05     1.223
+++ server/mpm/prefork/prefork.c        2001/11/29 14:41:55
@@ -559,7 +559,8 @@
      * we can have cleanups occur when the child exits.
      */
     apr_pool_create(&pchild, pconf);
-
+    apr_pool_set_options(pchild, APR_POOL_OPT_SINGLE_THREADED |
+                         APR_POOL_OPT_CACHE_FREELIST);
     apr_pool_create(&ptrans, pchild);
 
     /* needs to be done before we switch UIDs so we have permissions */
Index: server/mpm/worker/worker.c
===================================================================
RCS file: /home/cvs/httpd-2.0/server/mpm/worker/worker.c,v
retrieving revision 1.43
diff -u -r1.43 worker.c
--- server/mpm/worker/worker.c  2001/11/22 05:13:29     1.43
+++ server/mpm/worker/worker.c  2001/11/29 14:41:56
@@ -641,6 +641,8 @@
         if (!workers_may_exit) {
             /* create a new transaction pool for each accepted socket */
             apr_pool_create(&ptrans, tpool);
+            apr_pool_set_options(ptrans, APR_POOL_OPT_SINGLE_THREADED |
+                                 APR_POOL_OPT_CACHE_FREELIST);
 
             rv = lr->accept_func(&csd, lr, ptrans);
 
Index: srclib/apr/include/apr_pools.h
===================================================================
RCS file: /home/cvspublic/apr/include/apr_pools.h,v
retrieving revision 1.63
diff -u -r1.63 apr_pools.h
--- srclib/apr/include/apr_pools.h      2001/11/09 17:50:48     1.63
+++ srclib/apr/include/apr_pools.h      2001/11/29 14:41:57
@@ -363,6 +363,32 @@
                                             apr_pool_t *pparent,
                                             int (*apr_abort)(int retcode));
 
+/* Options for use with apr_pool_set_options: */
+
+/** Optimize a pool for use with a single thread only */
+#define APR_POOL_OPT_SINGLE_THREADED    0x1
+/** Maintain a cache of free blocks in a pool for use in creating subpools */
+#define APR_POOL_OPT_CACHE_FREELIST     0x2
+
+/**
+ * Set performance tuning options for a pool
+ * @param p The pool
+ * @param flags The APR_POOL_OPT_* flags to enable for the pool,
+ *              OR'ed together
+ * @remark Options set with this function are inherited by subsequently
+ *         created child pools, although the children can override the
+ *         settings.
+ */
+APR_DECLARE(void) apr_pool_set_options(apr_pool_t *p, apr_uint32_t flags);
+
+/**
+ * Retrieve the performance tuning options for a pool
+ * @param p The pool
+ * @return The APR_POOL_OPT_* flags that are enabled for the pool,
+ *         OR'ed together
+ */
+APR_DECLARE(apr_uint32_t) apr_pool_get_options(const apr_pool_t *p);
+
 /**
  * Register a function to be called when a pool is cleared or destroyed
  * @param p The pool register the cleanup with 
Index: srclib/apr/memory/unix/apr_pools.c
===================================================================
RCS file: /home/cvspublic/apr/memory/unix/apr_pools.c,v
retrieving revision 1.117
diff -u -r1.117 apr_pools.c
--- srclib/apr/memory/unix/apr_pools.c  2001/11/23 16:47:52     1.117
+++ srclib/apr/memory/unix/apr_pools.c  2001/11/29 14:41:58
@@ -67,7 +67,7 @@
 #include "apr_general.h"
 #include "apr_pools.h"
 #include "apr_lib.h"
-#include "apr_lock.h"
+#include "apr_thread_mutex.h"
 #include "apr_hash.h"
 
 #if APR_HAVE_STDIO_H
@@ -207,6 +207,15 @@
     int (*apr_abort)(int retcode);
     /** A place to hold user data associated with this pool */
     struct apr_hash_t *prog_data;
+    /** Tuning options for this pool */
+    int flags;
+    /** Optional cache of free blocks to use when allocating child pools */
+    union block_hdr *free_list_cache;
+    /** If non-null, points to the pool whose free list should be used
+     *  when allocating blocks for this pool (will be either this pool
+     *  itself or an ancestor thereof)
+     */
+    struct apr_pool_t *free_list_cache_owner;
 };
 
 
@@ -253,7 +262,7 @@
 static union block_hdr *block_freelist = NULL;
 
 #if APR_HAS_THREADS
-static apr_lock_t *alloc_mutex;
+static apr_thread_mutex_t *alloc_mutex;
 #endif
 
 #ifdef APR_POOL_DEBUG
@@ -477,7 +486,8 @@
 
 /* Free a chain of blocks --- must be called with alarms blocked. */
 
-static void free_blocks(union block_hdr *blok)
+static void free_blocks(union block_hdr *blok, union block_hdr **free_list,
+                        int use_lock)
 {
 #ifdef ALLOC_USE_MALLOC
     union block_hdr *next;
@@ -492,6 +502,10 @@
     unsigned num_blocks;
 #endif /* ALLOC_STATS */
 
+#if APR_HAS_THREADS
+    int lock_needed = (use_lock && alloc_mutex);
+#endif /* APR_HAS_THREADS */
+
     /*
      * First, put new blocks at the head of the free list ---
      * we'll eventually bash the 'next' pointer of the last block
@@ -505,12 +519,12 @@
     }
 
 #if APR_HAS_THREADS
-    if (alloc_mutex) {
-        apr_lock_acquire(alloc_mutex);
+    if (lock_needed) {
+        apr_thread_mutex_lock(alloc_mutex);
     }
 #endif
-    old_free_list = block_freelist;
-    block_freelist = blok;
+    old_free_list = *free_list;
+    *free_list = blok;
 
     /*
      * Next, adjust first_avail pointers of each block --- have to do it
@@ -557,8 +571,8 @@
 #endif /* ALLOC_STATS */
 
 #if APR_HAS_THREADS
-    if (alloc_mutex) {
-        apr_lock_release(alloc_mutex);
+    if (lock_needed) {
+        apr_thread_mutex_unlock(alloc_mutex);
     }
 #endif /* APR_HAS_THREADS */
 #endif /* ALLOC_USE_MALLOC */
@@ -568,10 +582,11 @@
  * Get a new block, from our own free list if possible, from the system
  * if necessary.  Must be called with alarms blocked.
  */
-static union block_hdr *new_block(apr_size_t min_size, apr_abortfunc_t 
abortfunc)
+static union block_hdr *new_block(union block_hdr **free_list,
+                                  apr_size_t min_size, apr_abortfunc_t 
abortfunc)
 {
-    union block_hdr **lastptr = &block_freelist;
-    union block_hdr *blok = block_freelist;
+    union block_hdr **lastptr = free_list;
+    union block_hdr *blok = *free_list;
 
     /* First, see if we have anything of the required size
      * on the free list...
@@ -648,15 +663,48 @@
 {
     union block_hdr *blok;
     apr_pool_t *new_pool;
+    union block_hdr **free_list;
+#if APR_HAS_THREADS
+    int lock_needed;
+    int have_lock = 0;
+#endif /* APR_HAS_THREADS */
 
+    /* If the parent pool has its own free list, use it if it's non-empty.
+     * Otherwise, use the global free list.
+     */
+    if (parent && parent->free_list_cache_owner) {
+        free_list = &(parent->free_list_cache_owner->free_list_cache);
+#if APR_HAS_THREADS
+        lock_needed = (alloc_mutex &&
+                       (!(parent->flags & APR_POOL_OPT_SINGLE_THREADED) ||
+                        !(parent->free_list_cache_owner->flags &
+                          APR_POOL_OPT_SINGLE_THREADED)));
+        if (lock_needed) {
+            apr_thread_mutex_lock(alloc_mutex);
+            have_lock = 1;
+        }
+#endif /* APR_HAS_THREADS */
+        if (!parent->free_list_cache_owner->free_list_cache) {
+            free_list = &block_freelist;
+#if APR_HAS_THREADS
+        lock_needed = (alloc_mutex != NULL);
+#endif /* APR_HAS_THREADS */
+        }
+    }
+    else {
+        free_list = &block_freelist;
+#if APR_HAS_THREADS
+        lock_needed = (alloc_mutex != NULL);
+#endif /* APR_HAS_THREADS */
+    }
 
 #if APR_HAS_THREADS
-    if (alloc_mutex) {
-        apr_lock_acquire(alloc_mutex);
+    if (lock_needed && !have_lock) {
+        apr_thread_mutex_lock(alloc_mutex);
     }
 #endif
 
-    blok = new_block(POOL_HDR_BYTES, abortfunc);
+    blok = new_block(free_list, POOL_HDR_BYTES, abortfunc);
     new_pool = (apr_pool_t *) blok->h.first_avail;
     blok->h.first_avail += POOL_HDR_BYTES;
 #ifdef APR_POOL_DEBUG
@@ -674,11 +722,13 @@
            new_pool->sub_next->sub_prev = new_pool;
        }
        parent->sub_pools = new_pool;
+        new_pool->flags = parent->flags;
+        new_pool->free_list_cache_owner = parent->free_list_cache_owner;
     }
 
 #if APR_HAS_THREADS
-    if (alloc_mutex) {
-        apr_lock_release(alloc_mutex);
+    if (lock_needed) {
+        apr_thread_mutex_unlock(alloc_mutex);
     }
 #endif
 
@@ -752,6 +802,29 @@
     return pool->parent;
 }
 
+APR_DECLARE(void) apr_pool_set_options(apr_pool_t *p, apr_uint32_t flags)
+{
+    p->flags = flags;
+    if (flags & APR_POOL_OPT_CACHE_FREELIST) {
+        if (!p->parent || !p->parent->free_list_cache_owner) {
+            p->free_list_cache_owner = p;
+        }
+    }
+    else {
+        if (p->parent) {
+            p->free_list_cache_owner = p->parent->free_list_cache_owner;
+        }
+        else {
+            p->free_list_cache_owner = NULL;
+        }
+    }
+}
+
+APR_DECLARE(apr_uint32_t) apr_pool_get_options(const apr_pool_t *p)
+{
+    return p->flags;
+}
+
 /*****************************************************************
  *
  * Managing generic cleanups.  
@@ -887,8 +960,8 @@
     stack_var_init(&s);
 #endif
 #if APR_HAS_THREADS
-    status = apr_lock_create(&alloc_mutex, APR_MUTEX, APR_INTRAPROCESS,
-                   NULL, globalp);
+    status = apr_thread_mutex_create(&alloc_mutex, APR_THREAD_MUTEX_DEFAULT,
+                                     globalp);
     if (status != APR_SUCCESS) {
         return status;
     }
@@ -905,7 +978,7 @@
 APR_DECLARE(void) apr_pool_alloc_term(apr_pool_t *globalp)
 {
 #if APR_HAS_THREADS
-    apr_lock_destroy(alloc_mutex);
+    apr_thread_mutex_destroy(alloc_mutex);
     alloc_mutex = NULL;
 #endif
     apr_pool_destroy(globalp);
@@ -954,8 +1027,18 @@
     /* free the pool's blocks, *except* for the first one. the actual pool
        structure is contained in the first block. this also gives us some
        ready memory for reallocating within this pool. */
-    free_blocks(a->first->h.next);
-    a->first->h.next = NULL;
+    if (a->first->h.next) {
+        if (a->free_list_cache_owner) {
+            free_blocks(a->first->h.next,
+                        &(a->free_list_cache_owner->free_list_cache),
+                        !(a->free_list_cache_owner->flags &
+                          APR_POOL_OPT_SINGLE_THREADED));
+        }
+        else {
+            free_blocks(a->first->h.next, &block_freelist, 1);
+        }
+        a->first->h.next = NULL;
+    }
 
     /* this was allocated in self, or a subpool of self. it simply
        disappears, so forget the hash table. */
@@ -994,14 +1077,15 @@
     /* toss everything in the pool. */
     apr_pool_clear(a);
 
-#if APR_HAS_THREADS
-    if (alloc_mutex) {
-        apr_lock_acquire(alloc_mutex);
-    }
-#endif
-
     /* detach this pool from its parent. */
     if (a->parent) {
+#if APR_HAS_THREADS
+        int lock_required = (alloc_mutex &&
+                          !(a->parent->flags & APR_POOL_OPT_SINGLE_THREADED));
+        if (lock_required) {
+            apr_thread_mutex_lock(alloc_mutex);
+        }
+#endif
        if (a->parent->sub_pools == a) {
            a->parent->sub_pools = a->sub_next;
        }
@@ -1011,13 +1095,16 @@
        if (a->sub_next) {
            a->sub_next->sub_prev = a->sub_prev;
        }
+#if APR_HAS_THREADS
+        if (lock_required) {
+            apr_thread_mutex_unlock(alloc_mutex);
+        }
+#endif
     }
 
-#if APR_HAS_THREADS
-    if (alloc_mutex) {
-        apr_lock_release(alloc_mutex);
+    if (a->free_list_cache) {
+        free_blocks(a->free_list_cache, &block_freelist, 1);
     }
-#endif
 
     /* freeing the first block will include the pool structure. to prevent
        a double call to apr_pool_destroy, we want to fill a NULL into
@@ -1031,7 +1118,15 @@
     */
     blok = a->first;
     a->first = NULL;
-    free_blocks(blok);
+
+    if (a->free_list_cache_owner && (a->free_list_cache_owner != a)) {
+        free_blocks(blok, &(a->free_list_cache_owner->free_list_cache),
+                    !(a->free_list_cache_owner->flags &
+                      APR_POOL_OPT_SINGLE_THREADED));
+    }
+    else {
+        free_blocks(blok, &block_freelist, 1);
+    }
 }
 
 
@@ -1231,11 +1326,11 @@
 
 #if APR_HAS_THREADS
     if (alloc_mutex) {
-        apr_lock_acquire(alloc_mutex);
+        apr_thread_mutex_lock(alloc_mutex);
     }
 #endif
 
-    blok = new_block(size, a->apr_abort);
+    blok = new_block(&block_freelist, size, a->apr_abort);
     a->last->h.next = blok;
     a->last = blok;
 #ifdef APR_POOL_DEBUG
@@ -1244,7 +1339,7 @@
 
 #if APR_HAS_THREADS
     if (alloc_mutex) {
-        apr_lock_release(alloc_mutex);
+        apr_thread_mutex_unlock(alloc_mutex);
     }
 #endif
 
@@ -1370,11 +1465,11 @@
 
     /* must try another blok */
 #if APR_HAS_THREADS
-    apr_lock_acquire(alloc_mutex);
+    apr_thread_mutex_lock(alloc_mutex);
 #endif
-    nblok = new_block(2 * cur_len, NULL);
+    nblok = new_block(&block_freelist, 2 * cur_len, NULL);
 #if APR_HAS_THREADS
-    apr_lock_release(alloc_mutex);
+    apr_thread_mutex_unlock(alloc_mutex);
 #endif
     memcpy(nblok->h.first_avail, blok->h.first_avail, cur_len);
     ps->vbuff.curpos = nblok->h.first_avail + cur_len;
@@ -1385,12 +1480,12 @@
     if (ps->got_a_new_block) {
        debug_fill(blok->h.first_avail, blok->h.endp - blok->h.first_avail);
 #if APR_HAS_THREADS
-        apr_lock_acquire(alloc_mutex);
+        apr_thread_mutex_lock(alloc_mutex);
 #endif
        blok->h.next = block_freelist;
        block_freelist = blok;
 #if APR_HAS_THREADS
-        apr_lock_release(alloc_mutex);
+        apr_thread_mutex_unlock(alloc_mutex);
 #endif
     }
     ps->blok = nblok;

Reply via email to