Re: [PATCH] time-space tradeoff (reuse tpool, one CV per worker thread)

Brian Pane Sat, 24 Nov 2001 19:55:56 -0800

Aaron Bannert wrote:

>On Fri, Nov 23, 2001 at 11:46:25AM -0800, Brian Pane wrote:
>
>>Sounds good.  I think the "apr_pool_create_for_thread()" function that
>>I proposed earlier this morning will work well in combination with the
>>"time-space-tradeoff" worker design, so I'll continue with the prototyping
>>on the former.
>>
>
>Here's an updated version of my worker redesign. The "queue" is really a
>stack, but I didn't change the name for the sake of having a readable
>patch -- if we end up going with this patch I'll rename everything to
>"stack".
>


Thanks.  Here's my patch to optimize away the mutex operations in
pools that have been designated thread-private.  With the current
worker code, it can eliminate the mutex ops for subrequest pool
creation/destruction.  By combining it with your worker redesign,
I think we may be able to eliminate the mutexes for the ptrans
pool.

--Brian

Index: server/mpm/prefork/prefork.c
===================================================================
RCS file: /home/cvs/httpd-2.0/server/mpm/prefork/prefork.c,v
retrieving revision 1.222
diff -u -r1.222 prefork.c
--- server/mpm/prefork/prefork.c        2001/11/20 18:18:45     1.222
+++ server/mpm/prefork/prefork.c        2001/11/23 23:01:29
@@ -558,7 +558,7 @@
     /* Get a sub context for global allocations in this child, so that
      * we can have cleanups occur when the child exits.
      */
-    apr_pool_create(&pchild, pconf);
+    apr_pool_create_for_thread(&pchild, pconf);
 
     apr_pool_create(&ptrans, pchild);
 
Index: server/mpm/worker/worker.c
===================================================================
RCS file: /home/cvs/httpd-2.0/server/mpm/worker/worker.c,v
retrieving revision 1.43
diff -u -r1.43 worker.c
--- server/mpm/worker/worker.c  2001/11/22 05:13:29     1.43
+++ server/mpm/worker/worker.c  2001/11/23 23:01:30
@@ -640,7 +640,7 @@
     got_fd:
         if (!workers_may_exit) {
             /* create a new transaction pool for each accepted socket */
-            apr_pool_create(&ptrans, tpool);
+            apr_pool_create_for_thread(&ptrans, tpool);
 
             rv = lr->accept_func(&csd, lr, ptrans);
 
Index: srclib/apr/include/apr_pools.h
===================================================================
RCS file: /home/cvspublic/apr/include/apr_pools.h,v
retrieving revision 1.63
diff -u -r1.63 apr_pools.h
--- srclib/apr/include/apr_pools.h      2001/11/09 17:50:48     1.63
+++ srclib/apr/include/apr_pools.h      2001/11/23 23:01:31
@@ -233,6 +233,21 @@
                                           apr_pool_t *cont);
 
 /**
+ * Create a new pool that may be used by only one thread
+ * @param newcont The pool we have just created.
+ * @param cont The parent pool.  If this is NULL, the new pool is a root
+ *        pool.  If it is non-NULL, the new pool will inherit all
+ *        of its parent pool's attributes, except the apr_pool_t will 
+ *        be a sub-pool.
+ * @remark The new pool and any subpools created from it are
+ *         optimized for use by a single thread.  They skip
+ *         the mutex locking that would normally protect
+ *         allocation operations.
+ */
+APR_DECLARE(apr_status_t) apr_pool_create_for_thread(apr_pool_t **newcont,
+                                                     apr_pool_t *cont);
+
+/**
  * Set the function to be called when an allocation failure occurs.
  * @tip If the program wants APR to exit on a memory allocation error,
  *      then this function can be called to set the callback to use (for
@@ -352,6 +367,7 @@
 APR_DECLARE(void *) apr_pcalloc(apr_pool_t *p, apr_size_t size);
 
 /**
+ * Create a new pool
  * @param p The new sub-pool
  * @param parent The pool to use as a parent pool
  * @param apr_abort A function to use if the pool cannot allocate more memory.
@@ -362,6 +378,23 @@
 APR_DECLARE(void) apr_pool_sub_make(apr_pool_t **p, 
                                             apr_pool_t *pparent,
                                             int (*apr_abort)(int retcode));
+
+/**
+ * Create a new pool that may be used by only one thread
+ * @param p The new sub-pool
+ * @param parent The pool to use as a parent pool
+ * @param apr_abort A function to use if the pool cannot allocate more memory.
+ * @deffunc void apr_pool_sub_make(apr_pool_t **p, apr_pool_t *parent, int 
+(*apr_abort)(int retcode), const char *created)
+ * @remark The @a apr_abort function provides a way to quit the program if the
+ *      machine is out of memory.  By default, APR will return on error.
+ * @remark The new pool and any subpools created from it are
+ *         optimized for use by a single thread.  They skip
+ *         the mutex locking that would normally protect
+ *         allocation operations.
+ */
+APR_DECLARE(void) apr_pool_sub_make_for_thread(apr_pool_t **p, 
+                                               apr_pool_t *pparent,
+                                               int (*apr_abort)(int retcode));
 
 /**
  * Register a function to be called when a pool is cleared or destroyed
Index: srclib/apr/memory/unix/apr_pools.c
===================================================================
RCS file: /home/cvspublic/apr/memory/unix/apr_pools.c,v
retrieving revision 1.117
diff -u -r1.117 apr_pools.c
--- srclib/apr/memory/unix/apr_pools.c  2001/11/23 16:47:52     1.117
+++ srclib/apr/memory/unix/apr_pools.c  2001/11/23 23:01:32
@@ -174,6 +174,8 @@
 #endif
 
 
+typedef union block_hdr block_list;
+
 /** The memory allocation structure
  */
 struct apr_pool_t {
@@ -207,6 +209,15 @@
     int (*apr_abort)(int retcode);
     /** A place to hold user data associated with this pool */
     struct apr_hash_t *prog_data;
+    /** Whether this pool may be used by a single thread only */
+    int thread_private;
+    /** Free block list to use for this pool--NULL means use a global one */
+    block_list **free_list;
+    /** Free list owned by this pool--if this pool owns a free list,
+     *  then p->free_list==&(p->managed_free_list)
+     *  otherwise p->free_list==NULL
+     */
+    block_list *managed_free_list;
 };
 
 
@@ -475,9 +486,10 @@
 #define chk_on_blk_list(_x, _y)
 #endif /* defined(ALLOC_DEBUG) && !defined(ALLOC_USE_MALLOC) */
 
-/* Free a chain of blocks --- must be called with alarms blocked. */
-
-static void free_blocks(union block_hdr *blok)
+/* Free a chain of blocks --- must be called with alarms blocked.
+ * WARNING: Caller is responsible for thread synchronization
+ */
+static void free_blocks(union block_hdr **free_list, union block_hdr *blok)
 {
 #ifdef ALLOC_USE_MALLOC
     union block_hdr *next;
@@ -504,13 +516,14 @@
        return;                 /* Sanity check --- freeing empty pool? */
     }
 
-#if APR_HAS_THREADS
-    if (alloc_mutex) {
-        apr_lock_acquire(alloc_mutex);
+    if (free_list) {
+        old_free_list = *free_list;
+        *free_list = blok;
     }
-#endif
-    old_free_list = block_freelist;
-    block_freelist = blok;
+    else {
+        old_free_list = block_freelist;
+        block_freelist = blok;
+    }
 
     /*
      * Next, adjust first_avail pointers of each block --- have to do it
@@ -556,23 +569,40 @@
     num_blocks_freed += num_blocks;
 #endif /* ALLOC_STATS */
 
-#if APR_HAS_THREADS
-    if (alloc_mutex) {
-        apr_lock_release(alloc_mutex);
-    }
-#endif /* APR_HAS_THREADS */
 #endif /* ALLOC_USE_MALLOC */
 }
 
+static void destroy_free_list(union block_hdr *free_list)
+{
+    union block_hdr *blok;
+    union block_hdr *next;
+    for (blok = free_list; blok; blok = next) {
+        next = blok->h.next;
+        DO_FREE(blok);
+    }
+}
+
 /*
  * Get a new block, from our own free list if possible, from the system
  * if necessary.  Must be called with alarms blocked.
+ * WARNING: Caller is responsible for thread synchronization
  */
-static union block_hdr *new_block(apr_size_t min_size, apr_abortfunc_t abortfunc)
+static union block_hdr *new_block(union block_hdr **free_list,
+                                  apr_size_t min_size,
+                                  apr_abortfunc_t abortfunc)
 {
-    union block_hdr **lastptr = &block_freelist;
-    union block_hdr *blok = block_freelist;
+    union block_hdr **lastptr;
+    union block_hdr *blok;
 
+    if (free_list) {
+        lastptr = free_list;
+        blok = *free_list;
+    }
+    else {
+        lastptr = &block_freelist;
+        blok = block_freelist;
+    }
+
     /* First, see if we have anything of the required size
      * on the free list...
      */
@@ -642,21 +672,46 @@
 #define POOL_HDR_CLICKS (1 + ((sizeof(struct apr_pool_t) - 1) / CLICK_SZ))
 #define POOL_HDR_BYTES (POOL_HDR_CLICKS * CLICK_SZ)
 
-APR_DECLARE(void) apr_pool_sub_make(apr_pool_t **p,
-                                    apr_pool_t *parent,
-                                    apr_abortfunc_t abortfunc)
-{
+APR_DECLARE(void) pool_sub_make(apr_pool_t **p,
+                                apr_pool_t *parent,
+                                apr_abortfunc_t abortfunc,
+                                int thread_private)
+{
+    block_list **free_list;
+    block_list *managed_free_list;
+    int free_list_owner = 0;
     union block_hdr *blok;
     apr_pool_t *new_pool;
+#if APR_HAS_THREADS
+    int lock_needed;
+#endif /* APR_HAS_THREADS */
 
+    if (!thread_private) {
+        free_list = NULL;
+    }
+    else {
+        if (parent && parent->thread_private) {
+            free_list = parent->free_list;
+        }
+        else {
+            managed_free_list = NULL;
+            free_list = &managed_free_list;
+            free_list_owner = 1;
+        }
+    }
 
 #if APR_HAS_THREADS
-    if (alloc_mutex) {
+    lock_needed = (alloc_mutex &&
+                   (!thread_private || (parent && !parent->thread_private)));
+#endif /* APR_HAS_THREADS */
+
+#if APR_HAS_THREADS
+    if (lock_needed) {
         apr_lock_acquire(alloc_mutex);
     }
 #endif
 
-    blok = new_block(POOL_HDR_BYTES, abortfunc);
+    blok = new_block(free_list, POOL_HDR_BYTES, abortfunc);
     new_pool = (apr_pool_t *) blok->h.first_avail;
     blok->h.first_avail += POOL_HDR_BYTES;
 #ifdef APR_POOL_DEBUG
@@ -677,14 +732,38 @@
     }
 
 #if APR_HAS_THREADS
-    if (alloc_mutex) {
+    if (lock_needed) {
         apr_lock_release(alloc_mutex);
     }
 #endif
 
+    new_pool->thread_private = thread_private;
+    if (free_list_owner) {
+        new_pool->managed_free_list = managed_free_list;
+        new_pool->free_list = &(new_pool->managed_free_list);
+    }
+    else {
+        new_pool->free_list = free_list;
+    }
+
     *p = new_pool;
 }
 
+APR_DECLARE(void) apr_pool_sub_make(apr_pool_t **p,
+                                    apr_pool_t *parent,
+                                    apr_abortfunc_t abortfunc)
+{
+    pool_sub_make(p, parent, abortfunc,
+                  parent ? parent->thread_private : 0);
+}
+
+APR_DECLARE(void) apr_pool_sub_make_for_thread(apr_pool_t **p,
+                                               apr_pool_t *parent,
+                                               apr_abortfunc_t abortfunc)
+{
+    pool_sub_make(p, parent, abortfunc, 1);
+}
+
 #ifdef APR_POOL_DEBUG
 static void stack_var_init(char *s)
 {
@@ -724,8 +803,29 @@
 
     abortfunc = parent_pool ? parent_pool->apr_abort : NULL;
     ppool = parent_pool ? parent_pool : permanent_pool;
+
+    pool_sub_make(newpool, ppool, abortfunc,
+                  ppool? ppool->thread_private : 0);
+    if (*newpool == NULL) {
+        return APR_ENOPOOL;
+    }   
+
+    (*newpool)->prog_data = NULL;
+    (*newpool)->apr_abort = abortfunc;
+
+    return APR_SUCCESS;
+}
+
+APR_DECLARE(apr_status_t) apr_pool_create_for_thread(apr_pool_t **newpool,
+                                                     apr_pool_t *parent_pool)
+{
+    apr_abortfunc_t abortfunc;
+    apr_pool_t *ppool;
+
+    abortfunc = parent_pool ? parent_pool->apr_abort : NULL;
+    ppool = parent_pool ? parent_pool : permanent_pool;
 
-    apr_pool_sub_make(newpool, ppool, abortfunc);
+    pool_sub_make(newpool, ppool, abortfunc, 1);
     if (*newpool == NULL) {
         return APR_ENOPOOL;
     }   
@@ -939,6 +1039,10 @@
  */
 APR_DECLARE(void) apr_pool_clear(apr_pool_t *a)
 {
+#if APR_HAS_THREADS
+    int lock_needed = (alloc_mutex && !a->thread_private);
+#endif /* APR_HAS_THREADS */
+
     /* free the subpools. we can just loop -- the subpools will detach
        themselve from us, so this is easy. */
     while (a->sub_pools) {
@@ -954,8 +1058,19 @@
     /* free the pool's blocks, *except* for the first one. the actual pool
        structure is contained in the first block. this also gives us some
        ready memory for reallocating within this pool. */
-    free_blocks(a->first->h.next);
+
+#if APR_HAS_THREADS
+    if (lock_needed) {
+        apr_lock_acquire(alloc_mutex);
+    }
+#endif /* APR_HAS_THREADS */
+    free_blocks(a->free_list, a->first->h.next);
     a->first->h.next = NULL;
+#if APR_HAS_THREADS
+    if (lock_needed) {
+        apr_lock_release(alloc_mutex);
+    }
+#endif /* APR_HAS_THREADS */
 
     /* this was allocated in self, or a subpool of self. it simply
        disappears, so forget the hash table. */
@@ -990,12 +1105,17 @@
 APR_DECLARE(void) apr_pool_destroy(apr_pool_t *a)
 {
     union block_hdr *blok;
+#if APR_HAS_THREADS
+    int lock_needed = (alloc_mutex &&
+                       (!a->thread_private ||
+                        (a->parent && !a->parent->thread_private)));
+#endif /* APR_HAS_THREADS */
 
     /* toss everything in the pool. */
     apr_pool_clear(a);
 
 #if APR_HAS_THREADS
-    if (alloc_mutex) {
+    if (lock_needed) {
         apr_lock_acquire(alloc_mutex);
     }
 #endif
@@ -1013,12 +1133,6 @@
        }
     }
 
-#if APR_HAS_THREADS
-    if (alloc_mutex) {
-        apr_lock_release(alloc_mutex);
-    }
-#endif
-
     /* freeing the first block will include the pool structure. to prevent
        a double call to apr_pool_destroy, we want to fill a NULL into
        a->first so that the second call (or any attempted usage of the
@@ -1031,7 +1145,15 @@
     */
     blok = a->first;
     a->first = NULL;
-    free_blocks(blok);
+    free_blocks(a->free_list, blok);
+#if APR_HAS_THREADS
+    if (alloc_mutex) {
+        apr_lock_release(alloc_mutex);
+    }
+#endif
+    if (a->managed_free_list) {
+        destroy_free_list(a->managed_free_list);
+    }
 }
 
 
@@ -1202,6 +1324,9 @@
     union block_hdr *blok;
     char *first_avail;
     char *new_first_avail;
+#if APR_HAS_THREADS
+    int lock_needed = (alloc_mutex && !a->thread_private);
+#endif /* APR_HAS_THREADS */
 
     nclicks = 1 + ((reqsize - 1) / CLICK_SZ);
     size = nclicks * CLICK_SZ;
@@ -1230,12 +1355,12 @@
     /* Nope --- get a new one that's guaranteed to be big enough */
 
 #if APR_HAS_THREADS
-    if (alloc_mutex) {
+    if (lock_needed) {
         apr_lock_acquire(alloc_mutex);
     }
 #endif
 
-    blok = new_block(size, a->apr_abort);
+    blok = new_block(a->free_list, size, a->apr_abort);
     a->last->h.next = blok;
     a->last = blok;
 #ifdef APR_POOL_DEBUG
@@ -1243,7 +1368,7 @@
 #endif
 
 #if APR_HAS_THREADS
-    if (alloc_mutex) {
+    if (lock_needed) {
         apr_lock_release(alloc_mutex);
     }
 #endif
@@ -1271,15 +1396,17 @@
                              apr_status_t (*cleanup) (void *),
                              apr_pool_t *cont)
 {
+    apr_size_t keylen = strlen(key);
+
     if (cont->prog_data == NULL)
         cont->prog_data = apr_hash_make(cont);
 
-    if (apr_hash_get(cont->prog_data, key, APR_HASH_KEY_STRING) == NULL){
+    if (apr_hash_get(cont->prog_data, key, keylen) == NULL){
         char *new_key = apr_pstrdup(cont, key);
-        apr_hash_set(cont->prog_data, new_key, APR_HASH_KEY_STRING, data);
+        apr_hash_set(cont->prog_data, new_key, keylen, data);
     } 
     else {
-        apr_hash_set(cont->prog_data, key, APR_HASH_KEY_STRING, data);
+        apr_hash_set(cont->prog_data, key, keylen, data);
     }
 
     if (cleanup) {
@@ -1333,6 +1460,7 @@
 
 struct psprintf_data {
     apr_vformatter_buff_t vbuff;
+    apr_pool_t *p;
 #ifdef ALLOC_USE_MALLOC
     char *base;
 #else
@@ -1363,6 +1491,9 @@
     union block_hdr *nblok;
     apr_size_t cur_len;
     char *strp;
+#if APR_HAS_THREADS
+    int lock_needed = (alloc_mutex && !ps->p->thread_private);
+#endif /* APR_HAS_THREADS */
 
     blok = ps->blok;
     strp = ps->vbuff.curpos;
@@ -1370,11 +1501,15 @@
 
     /* must try another blok */
 #if APR_HAS_THREADS
-    apr_lock_acquire(alloc_mutex);
+    if (lock_needed) {
+        apr_lock_acquire(alloc_mutex);
+    }
 #endif
-    nblok = new_block(2 * cur_len, NULL);
+    nblok = new_block(ps->p->free_list, 2 * cur_len, NULL);
 #if APR_HAS_THREADS
-    apr_lock_release(alloc_mutex);
+    if (lock_needed) {
+        apr_lock_release(alloc_mutex);
+    }
 #endif
     memcpy(nblok->h.first_avail, blok->h.first_avail, cur_len);
     ps->vbuff.curpos = nblok->h.first_avail + cur_len;
@@ -1385,12 +1520,22 @@
     if (ps->got_a_new_block) {
        debug_fill(blok->h.first_avail, blok->h.endp - blok->h.first_avail);
 #if APR_HAS_THREADS
-        apr_lock_acquire(alloc_mutex);
+        if (lock_needed) {
+            apr_lock_acquire(alloc_mutex);
+        }
 #endif
-       blok->h.next = block_freelist;
-       block_freelist = blok;
+        if (ps->p->free_list) {
+            blok->h.next = *(ps->p->free_list);
+            *(ps->p->free_list) = blok;
+        }
+        else {
+            blok->h.next = block_freelist;
+            block_freelist = blok;
+        }
 #if APR_HAS_THREADS
-        apr_lock_release(alloc_mutex);
+        if (lock_needed) {
+            apr_lock_release(alloc_mutex);
+        }
 #endif
     }
     ps->blok = nblok;
@@ -1409,6 +1554,7 @@
     struct psprintf_data ps;
     void *ptr;
 
+    ps.p = p;
     ps.base = DO_MALLOC(512);
     if (ps.base == NULL) {
        fputs("Ouch!  Out of memory!\n", stderr);
@@ -1434,6 +1580,7 @@
     char *strp;
     apr_size_t size;
 
+    ps.p = p;
     ps.blok = p->last;
     ps.vbuff.curpos = ps.blok->h.first_avail;
     ps.vbuff.endpos = ps.blok->h.endp - 1;     /* save one for NUL */

Re: [PATCH] time-space tradeoff (reuse tpool, one CV per worker thread)

Reply via email to