Hi there,

I am using apr as a system layer in the development of a cross-platform
financial system in London. While using it there has been spotted an
opportunity to reduce the memory footprint of the allocator, to make it a
bit greener. Not to save a forest ;), however, to provide extra 16 bytes
(32-bit system) to the user from each apr_memnode_t allocation.

In a test run, a newly created pool is now able to accommodate 1015 8-byte
objects before requesting a new memory node, instead of 1013.

Summary of changes:

* There is a bug in several memory allocation functions in apr_pools.c,
  where memory request size is compared with free memory available from the
  node this way:

    /* If the active node has enough bytes left, use it. */
    if (size < (apr_size_t)(active->endp - active->first_avail)) {

  Thus, if size is 8 and the active node has 8 bytes left free, the strict
  comparison yields false. The comparisons changed to <=. This change
  provides extra 8 bytes of memory to the user (since the minimum allocation
  size is 8).

* apr_memnode_t structure cleaned up.

  apr_memnode_t::endp member has been removed. Instead, the value is
  calculated this way:

    static inline char* memnode_end(apr_memnode_t* node)
    {
        return (char*)node + (1 + node->index << BOUNDARY_INDEX);
    }

    static inline apr_size_t memnode_space_left(apr_memnode_t* node)
    {
        return memnode_end(node) - node->first_avail;
    }

  apr_memnode_t::free_index member is removed. The value is calculated this
  way:

    static inline apr_size_t memnode_free_index(apr_memnode_t* node)
    {
        return APR_ALIGN(memnode_space_left(node) + 1, BOUNDARY_SIZE)
            - BOUNDARY_SIZE >> BOUNDARY_INDEX;
    }

  Removing these two members yields another 8 extra bytes.

* allocator_alloc() has been refactored to eliminate race conditions. It
  used to hold the mutex while changing apr_allocator_t members, but not
  while reading.

* Some duplicate code was extracted into functions. apr_pool.c code is now
  63 lines shorter.

Please comment it.

--
Max
Index: memory/unix/apr_pools.c
===================================================================
--- memory/unix/apr_pools.c	(revision 607616)
+++ memory/unix/apr_pools.c	(working copy)
@@ -60,6 +60,21 @@
 #define TIMEOUT_USECS    3000000
 #define TIMEOUT_INTERVAL   46875
 
+static inline char* memnode_end(apr_memnode_t* node)
+{
+    return (char*)node + (1 + node->index << BOUNDARY_INDEX);
+}
+
+static inline apr_size_t memnode_space_left(apr_memnode_t* node)
+{
+    return memnode_end(node) - node->first_avail;
+}
+
+static inline apr_size_t memnode_free_index(apr_memnode_t* node)
+{
+    return APR_ALIGN(memnode_space_left(node) + 1, BOUNDARY_SIZE) - BOUNDARY_SIZE >> BOUNDARY_INDEX;
+}
+
 /*
  * Allocator
  *
@@ -193,16 +208,14 @@
 static APR_INLINE
 apr_memnode_t *allocator_alloc(apr_allocator_t *allocator, apr_size_t size)
 {
-    apr_memnode_t *node, **ref;
+    apr_memnode_t *node = NULL, **ref;
     apr_uint32_t max_index;
     apr_size_t i, index;
 
     /* Round up the block size to the next boundary, but always
      * allocate at least a certain size (MIN_ALLOC).
      */
-    size = APR_ALIGN(size + APR_MEMNODE_T_SIZE, BOUNDARY_SIZE);
-    if (size < MIN_ALLOC)
-        size = MIN_ALLOC;
+    size = APR_MAX(MIN_ALLOC, APR_ALIGN(size + APR_MEMNODE_T_SIZE, BOUNDARY_SIZE));
 
     /* Find the index for this node size by
      * dividing its size by the boundary size
@@ -213,15 +226,15 @@
         return NULL;
     }
 
+#if APR_HAS_THREADS
+    if (allocator->mutex)
+        apr_thread_mutex_lock(allocator->mutex);
+#endif /* APR_HAS_THREADS */
+
     /* First see if there are any nodes in the area we know
      * our node will fit into.
      */
     if (index <= allocator->max_index) {
-#if APR_HAS_THREADS
-        if (allocator->mutex)
-            apr_thread_mutex_lock(allocator->mutex);
-#endif /* APR_HAS_THREADS */
-
         /* Walk the free list to see if there are
          * any nodes on it of the requested size
          *
@@ -255,37 +268,12 @@
 
                 allocator->max_index = max_index;
             }
-
-            allocator->current_free_index += node->index;
-            if (allocator->current_free_index > allocator->max_free_index)
-                allocator->current_free_index = allocator->max_free_index;
-
-#if APR_HAS_THREADS
-            if (allocator->mutex)
-                apr_thread_mutex_unlock(allocator->mutex);
-#endif /* APR_HAS_THREADS */
-
-            node->next = NULL;
-            node->first_avail = (char *)node + APR_MEMNODE_T_SIZE;
-
-            return node;
         }
-
-#if APR_HAS_THREADS
-        if (allocator->mutex)
-            apr_thread_mutex_unlock(allocator->mutex);
-#endif /* APR_HAS_THREADS */
     }
-
     /* If we found nothing, seek the sink (at index 0), if
      * it is not empty.
      */
     else if (allocator->free[0]) {
-#if APR_HAS_THREADS
-        if (allocator->mutex)
-            apr_thread_mutex_lock(allocator->mutex);
-#endif /* APR_HAS_THREADS */
-
         /* Walk the free list to see if there are
          * any nodes on it of the requested size
          */
@@ -293,40 +281,33 @@
         while ((node = *ref) != NULL && index > node->index)
             ref = &node->next;
 
-        if (node) {
+        if (node) 
             *ref = node->next;
+    }
 
-            allocator->current_free_index += node->index;
-            if (allocator->current_free_index > allocator->max_free_index)
-                allocator->current_free_index = allocator->max_free_index;
+    if (node) {
+        allocator->current_free_index += node->index;
+        if (allocator->current_free_index > allocator->max_free_index)
+            allocator->current_free_index = allocator->max_free_index;
+    }
 
 #if APR_HAS_THREADS
-            if (allocator->mutex)
-                apr_thread_mutex_unlock(allocator->mutex);
+    if (allocator->mutex)
+        apr_thread_mutex_unlock(allocator->mutex);
 #endif /* APR_HAS_THREADS */
 
-            node->next = NULL;
-            node->first_avail = (char *)node + APR_MEMNODE_T_SIZE;
+    if (!node) {
+        /* If we haven't got a suitable node, malloc a new one
+         * and initialize it.
+         */
+        if ((node = malloc(size)) == NULL)
+            return NULL;
 
-            return node;
-        }
-
-#if APR_HAS_THREADS
-        if (allocator->mutex)
-            apr_thread_mutex_unlock(allocator->mutex);
-#endif /* APR_HAS_THREADS */
+        node->index = (APR_UINT32_TRUNC_CAST)index;
     }
 
-    /* If we haven't got a suitable node, malloc a new one
-     * and initialize it.
-     */
-    if ((node = malloc(size)) == NULL)
-        return NULL;
-
     node->next = NULL;
-    node->index = (APR_UINT32_TRUNC_CAST)index;
     node->first_avail = (char *)node + APR_MEMNODE_T_SIZE;
-    node->endp = (char *)node + size;
 
     return node;
 }
@@ -353,37 +334,31 @@
     do {
         next = node->next;
         index = node->index;
+        
 
         if (max_free_index != APR_ALLOCATOR_MAX_FREE_UNLIMITED
             && index > current_free_index) {
             node->next = freelist;
             freelist = node;
-        }
-        else if (index < MAX_INDEX) {
-            /* Add the node to the appropiate 'size' bucket.  Adjust
-             * the max_index when appropiate.
+        } 
+        else {
+            current_free_index -= APR_MIN(current_free_index, index);
+
+            /* If this node is too large to keep in a specific size bucket,
+             * just add it to the sink (at index 0).
              */
-            if ((node->next = allocator->free[index]) == NULL
-                && index > max_index) {
-                max_index = index;
+            if (index >= MAX_INDEX) {
+                index = 0;
+                node->next = allocator->free[index];
             }
+            else {
+                node->next = allocator->free[index];
+                if (!(node->next) && index > max_index)
+                    max_index = index;
+            }
+          
             allocator->free[index] = node;
-            if (current_free_index >= index)
-                current_free_index -= index;
-            else
-                current_free_index = 0;
         }
-        else {
-            /* This node is too large to keep in a specific size bucket,
-             * just add it to the sink (at index 0).
-             */
-            node->next = allocator->free[0];
-            allocator->free[0] = node;
-            if (current_free_index >= index)
-                current_free_index -= index;
-            else
-                current_free_index = 0;
-        }
     } while ((node = next) != NULL);
 
     allocator->max_index = max_index;
@@ -505,7 +480,6 @@
 
 #define SIZEOF_POOL_T       APR_ALIGN_DEFAULT(sizeof(apr_pool_t))
 
-
 /*
  * Variables
  */
@@ -538,6 +512,20 @@
  * Initialization
  */
 
+static void apr_pool_do_terminate()
+{
+    if (global_pool) {
+        /* it destroys global_allocator as well */
+        apr_pool_destroy(global_pool);
+        global_pool = NULL;
+    }
+    else if (global_allocator) {
+        apr_allocator_destroy(global_allocator);
+    }
+    global_allocator = NULL;
+    apr_pools_initialized = 0;
+}
+
 APR_DECLARE(apr_status_t) apr_pool_initialize(void)
 {
     apr_status_t rv;
@@ -545,18 +533,11 @@
     if (apr_pools_initialized++)
         return APR_SUCCESS;
 
-    if ((rv = apr_allocator_create(&global_allocator)) != APR_SUCCESS) {
-        apr_pools_initialized = 0;
-        return rv;
-    }
+    if ((rv = apr_allocator_create(&global_allocator)) != APR_SUCCESS)
+        goto error;
 
-    if ((rv = apr_pool_create_ex(&global_pool, NULL, NULL,
-                                 global_allocator)) != APR_SUCCESS) {
-        apr_allocator_destroy(global_allocator);
-        global_allocator = NULL;
-        apr_pools_initialized = 0;
-        return rv;
-    }
+    if ((rv = apr_pool_create_ex(&global_pool, NULL, NULL, global_allocator)) != APR_SUCCESS)
+        goto error;
 
     apr_pool_tag(global_pool, "apr_global_pool");
 
@@ -566,9 +547,8 @@
      * Warning: apr_atomic_init() must always be called, by any
      * means possible, from apr_initialize().
      */
-    if ((rv = apr_atomic_init(global_pool)) != APR_SUCCESS) {
-        return rv;
-    }
+    if ((rv = apr_atomic_init(global_pool)) != APR_SUCCESS)
+        goto error;
 
 #if APR_HAS_THREADS
     {
@@ -576,9 +556,8 @@
 
         if ((rv = apr_thread_mutex_create(&mutex,
                                           APR_THREAD_MUTEX_DEFAULT,
-                                          global_pool)) != APR_SUCCESS) {
-            return rv;
-        }
+                                          global_pool)) != APR_SUCCESS)
+            goto error;
 
         apr_allocator_mutex_set(global_allocator, mutex);
     }
@@ -587,23 +566,18 @@
     apr_allocator_owner_set(global_allocator, global_pool);
 
     return APR_SUCCESS;
+
+error:
+    apr_pool_do_terminate();
+    return rv;
 }
 
 APR_DECLARE(void) apr_pool_terminate(void)
 {
-    if (!apr_pools_initialized)
-        return;
-
-    if (--apr_pools_initialized)
-        return;
-
-    apr_pool_destroy(global_pool); /* This will also destroy the mutex */
-    global_pool = NULL;
-
-    global_allocator = NULL;
+    if (apr_pools_initialized || !--apr_pools_initialized)
+        apr_pool_do_terminate();
 }
 
-
 /* Node list management helper macros; list_insert() inserts 'node'
  * before 'point'. */
 #define list_insert(node, point) do {           \
@@ -619,6 +593,31 @@
     node->next->ref = node->ref;                \
 } while (0)
 
+static inline void pool_active_set(apr_pool_t* pool, apr_memnode_t* node)
+{
+    apr_memnode_t* old_active = pool->active;
+    apr_size_t old_active_free_index;
+
+    list_insert(node, old_active);
+    pool->active = node;
+
+    if (node == old_active->next) /* only two nodes in the list */
+        return;
+
+    /* put old_active in the list in the descending order of free_index */
+    old_active_free_index = memnode_free_index(old_active);
+    node = old_active->next;
+    if (old_active_free_index < memnode_free_index(node)) {
+        do {
+            node = node->next;
+        }
+        while (old_active_free_index < memnode_free_index(node));
+
+        list_remove(old_active);
+        list_insert(old_active, node);
+    }
+}
+
 /*
  * Memory allocation
  */
@@ -633,7 +632,7 @@
     active = pool->active;
 
     /* If the active node has enough bytes left, use it. */
-    if (size < (apr_size_t)(active->endp - active->first_avail)) {
+    if (size <= memnode_space_left(active)) {
         mem = active->first_avail;
         active->first_avail += size;
 
@@ -641,7 +640,7 @@
     }
 
     node = active->next;
-    if (size < (apr_size_t)(node->endp - node->first_avail)) {
+    if (size <= memnode_space_left(node)) {
         list_remove(node);
     }
     else {
@@ -653,31 +652,13 @@
         }
     }
 
-    node->free_index = 0;
+    /* At this point node has enough free bytes and it is not in the list */
 
+    pool_active_set(pool, node);
+
     mem = node->first_avail;
     node->first_avail += size;
 
-    list_insert(node, active);
-
-    pool->active = node;
-
-    free_index = (APR_ALIGN(active->endp - active->first_avail + 1,
-                            BOUNDARY_SIZE) - BOUNDARY_SIZE) >> BOUNDARY_INDEX;
-
-    active->free_index = (APR_UINT32_TRUNC_CAST)free_index;
-    node = active->next;
-    if (free_index >= node->free_index)
-        return mem;
-
-    do {
-        node = node->next;
-    }
-    while (free_index < node->free_index);
-
-    list_remove(active);
-    list_insert(active, node);
-
     return mem;
 }
 
@@ -943,32 +924,9 @@
         size = APR_PSPRINTF_MIN_STRINGSIZE;
 
     node = active->next;
-    if (!ps->got_a_new_node
-        && size < (apr_size_t)(node->endp - node->first_avail)) {
-
+    if (!ps->got_a_new_node && size <= memnode_space_left(node)) {
         list_remove(node);
-        list_insert(node, active);
-
-        node->free_index = 0;
-
-        pool->active = node;
-
-        free_index = (APR_ALIGN(active->endp - active->first_avail + 1,
-                                BOUNDARY_SIZE) - BOUNDARY_SIZE) >> BOUNDARY_INDEX;
-
-        active->free_index = (APR_UINT32_TRUNC_CAST)free_index;
-        node = active->next;
-        if (free_index < node->free_index) {
-            do {
-                node = node->next;
-            }
-            while (free_index < node->free_index);
-
-            list_remove(active);
-            list_insert(active, node);
-        }
-
-        node = pool->active;
+        pool_active_set(pool, node);
     }
     else {
         if ((node = allocator_alloc(pool->allocator, size)) == NULL)
@@ -986,7 +944,7 @@
 
     ps->node = node;
     ps->vbuff.curpos = node->first_avail + cur_len;
-    ps->vbuff.endpos = node->endp - 1; /* Save a byte for NUL terminator */
+    ps->vbuff.endpos = memnode_end(node) - 1; /* Save a byte for NUL terminator */
 
     return 0;
 }
@@ -1004,14 +962,14 @@
     ps.vbuff.curpos  = ps.node->first_avail;
 
     /* Save a byte for the NUL terminator */
-    ps.vbuff.endpos = ps.node->endp - 1;
+    ps.vbuff.endpos = memnode_end(ps.node) - 1;
     ps.got_a_new_node = 0;
     ps.free = NULL;
 
     /* Make sure that the first node passed to apr_vformatter has at least
      * room to hold the NUL terminator.
      */
-    if (ps.node->first_avail == ps.node->endp) {
+    if (!memnode_space_left(ps.node)) {
         if (psprintf_flush(&ps.vbuff) == -1) {
             if (pool->abort_fn) {
                 pool->abort_fn(APR_ENOMEM);
@@ -1048,29 +1006,8 @@
     active = pool->active;
     node = ps.node;
 
-    node->free_index = 0;
+    pool_active_set(pool, node);
 
-    list_insert(node, active);
-
-    pool->active = node;
-
-    free_index = (APR_ALIGN(active->endp - active->first_avail + 1,
-                            BOUNDARY_SIZE) - BOUNDARY_SIZE) >> BOUNDARY_INDEX;
-
-    active->free_index = (APR_UINT32_TRUNC_CAST)free_index;
-    node = active->next;
-
-    if (free_index >= node->free_index)
-        return strp;
-
-    do {
-        node = node->next;
-    }
-    while (free_index < node->free_index);
-
-    list_remove(active);
-    list_insert(active, node);
-
     return strp;
 }
 
Index: include/apr_general.h
===================================================================
--- include/apr_general.h	(revision 607616)
+++ include/apr_general.h	(working copy)
@@ -139,6 +139,9 @@
 /** Default alignment */
 #define APR_ALIGN_DEFAULT(size) APR_ALIGN(size, 8)
 
+/* min/max classic */
+#define APR_MIN(a, b) (((a) < (b)) ? (a) : (b))
+#define APR_MAX(a, b) (((a) < (b)) ? (b) : (a))
 
 /**
  * String and memory functions
Index: include/apr_allocator.h
===================================================================
--- include/apr_allocator.h	(revision 607616)
+++ include/apr_allocator.h	(working copy)
@@ -54,10 +54,8 @@
 struct apr_memnode_t {
     apr_memnode_t *next;            /**< next memnode */
     apr_memnode_t **ref;            /**< reference to self */
-    apr_uint32_t   index;           /**< size */
-    apr_uint32_t   free_index;      /**< how much free */
     char          *first_avail;     /**< pointer to first free memory */
-    char          *endp;            /**< pointer to end of free memory */
+    apr_uint32_t   index;           /**< size */
 };
 
 /** The base size of a memory node - aligned.  */

Reply via email to