Hi there,
I am using apr as a system layer in the development of a cross-platform
financial system in London. While using it there has been spotted an
opportunity to reduce the memory footprint of the allocator, to make it a
bit greener. Not to save a forest ;), however, to provide extra 16 bytes
(32-bit system) to the user from each apr_memnode_t allocation.
In a test run, a newly created pool is now able to accommodate 1015 8-byte
objects before requesting a new memory node, instead of 1013.
Summary of changes:
* There is a bug in several memory allocation functions in apr_pools.c,
where memory request size is compared with free memory available from the
node this way:
/* If the active node has enough bytes left, use it. */
if (size < (apr_size_t)(active->endp - active->first_avail)) {
Thus, if size is 8 and the active node has 8 bytes left free, the strict
comparison yields false. The comparisons changed to <=. This change
provides extra 8 bytes of memory to the user (since the minimum allocation
size is 8).
* apr_memnode_t structure cleaned up.
apr_memnode_t::endp member has been removed. Instead, the value is
calculated this way:
static inline char* memnode_end(apr_memnode_t* node)
{
return (char*)node + (1 + node->index << BOUNDARY_INDEX);
}
static inline apr_size_t memnode_space_left(apr_memnode_t* node)
{
return memnode_end(node) - node->first_avail;
}
apr_memnode_t::free_index member is removed. The value is calculated this
way:
static inline apr_size_t memnode_free_index(apr_memnode_t* node)
{
return APR_ALIGN(memnode_space_left(node) + 1, BOUNDARY_SIZE)
- BOUNDARY_SIZE >> BOUNDARY_INDEX;
}
Removing these two members yields another 8 extra bytes.
* allocator_alloc() has been refactored to eliminate race conditions. It
used to hold the mutex while changing apr_allocator_t members, but not
while reading.
* Some duplicate code was extracted into functions. apr_pool.c code is now
63 lines shorter.
Please comment it.
--
Max
Index: memory/unix/apr_pools.c
===================================================================
--- memory/unix/apr_pools.c (revision 607616)
+++ memory/unix/apr_pools.c (working copy)
@@ -60,6 +60,21 @@
#define TIMEOUT_USECS 3000000
#define TIMEOUT_INTERVAL 46875
+static inline char* memnode_end(apr_memnode_t* node)
+{
+ return (char*)node + (1 + node->index << BOUNDARY_INDEX);
+}
+
+static inline apr_size_t memnode_space_left(apr_memnode_t* node)
+{
+ return memnode_end(node) - node->first_avail;
+}
+
+static inline apr_size_t memnode_free_index(apr_memnode_t* node)
+{
+ return APR_ALIGN(memnode_space_left(node) + 1, BOUNDARY_SIZE) - BOUNDARY_SIZE >> BOUNDARY_INDEX;
+}
+
/*
* Allocator
*
@@ -193,16 +208,14 @@
static APR_INLINE
apr_memnode_t *allocator_alloc(apr_allocator_t *allocator, apr_size_t size)
{
- apr_memnode_t *node, **ref;
+ apr_memnode_t *node = NULL, **ref;
apr_uint32_t max_index;
apr_size_t i, index;
/* Round up the block size to the next boundary, but always
* allocate at least a certain size (MIN_ALLOC).
*/
- size = APR_ALIGN(size + APR_MEMNODE_T_SIZE, BOUNDARY_SIZE);
- if (size < MIN_ALLOC)
- size = MIN_ALLOC;
+ size = APR_MAX(MIN_ALLOC, APR_ALIGN(size + APR_MEMNODE_T_SIZE, BOUNDARY_SIZE));
/* Find the index for this node size by
* dividing its size by the boundary size
@@ -213,15 +226,15 @@
return NULL;
}
+#if APR_HAS_THREADS
+ if (allocator->mutex)
+ apr_thread_mutex_lock(allocator->mutex);
+#endif /* APR_HAS_THREADS */
+
/* First see if there are any nodes in the area we know
* our node will fit into.
*/
if (index <= allocator->max_index) {
-#if APR_HAS_THREADS
- if (allocator->mutex)
- apr_thread_mutex_lock(allocator->mutex);
-#endif /* APR_HAS_THREADS */
-
/* Walk the free list to see if there are
* any nodes on it of the requested size
*
@@ -255,37 +268,12 @@
allocator->max_index = max_index;
}
-
- allocator->current_free_index += node->index;
- if (allocator->current_free_index > allocator->max_free_index)
- allocator->current_free_index = allocator->max_free_index;
-
-#if APR_HAS_THREADS
- if (allocator->mutex)
- apr_thread_mutex_unlock(allocator->mutex);
-#endif /* APR_HAS_THREADS */
-
- node->next = NULL;
- node->first_avail = (char *)node + APR_MEMNODE_T_SIZE;
-
- return node;
}
-
-#if APR_HAS_THREADS
- if (allocator->mutex)
- apr_thread_mutex_unlock(allocator->mutex);
-#endif /* APR_HAS_THREADS */
}
-
/* If we found nothing, seek the sink (at index 0), if
* it is not empty.
*/
else if (allocator->free[0]) {
-#if APR_HAS_THREADS
- if (allocator->mutex)
- apr_thread_mutex_lock(allocator->mutex);
-#endif /* APR_HAS_THREADS */
-
/* Walk the free list to see if there are
* any nodes on it of the requested size
*/
@@ -293,40 +281,33 @@
while ((node = *ref) != NULL && index > node->index)
ref = &node->next;
- if (node) {
+ if (node)
*ref = node->next;
+ }
- allocator->current_free_index += node->index;
- if (allocator->current_free_index > allocator->max_free_index)
- allocator->current_free_index = allocator->max_free_index;
+ if (node) {
+ allocator->current_free_index += node->index;
+ if (allocator->current_free_index > allocator->max_free_index)
+ allocator->current_free_index = allocator->max_free_index;
+ }
#if APR_HAS_THREADS
- if (allocator->mutex)
- apr_thread_mutex_unlock(allocator->mutex);
+ if (allocator->mutex)
+ apr_thread_mutex_unlock(allocator->mutex);
#endif /* APR_HAS_THREADS */
- node->next = NULL;
- node->first_avail = (char *)node + APR_MEMNODE_T_SIZE;
+ if (!node) {
+ /* If we haven't got a suitable node, malloc a new one
+ * and initialize it.
+ */
+ if ((node = malloc(size)) == NULL)
+ return NULL;
- return node;
- }
-
-#if APR_HAS_THREADS
- if (allocator->mutex)
- apr_thread_mutex_unlock(allocator->mutex);
-#endif /* APR_HAS_THREADS */
+ node->index = (APR_UINT32_TRUNC_CAST)index;
}
- /* If we haven't got a suitable node, malloc a new one
- * and initialize it.
- */
- if ((node = malloc(size)) == NULL)
- return NULL;
-
node->next = NULL;
- node->index = (APR_UINT32_TRUNC_CAST)index;
node->first_avail = (char *)node + APR_MEMNODE_T_SIZE;
- node->endp = (char *)node + size;
return node;
}
@@ -353,37 +334,31 @@
do {
next = node->next;
index = node->index;
+
if (max_free_index != APR_ALLOCATOR_MAX_FREE_UNLIMITED
&& index > current_free_index) {
node->next = freelist;
freelist = node;
- }
- else if (index < MAX_INDEX) {
- /* Add the node to the appropiate 'size' bucket. Adjust
- * the max_index when appropiate.
+ }
+ else {
+ current_free_index -= APR_MIN(current_free_index, index);
+
+ /* If this node is too large to keep in a specific size bucket,
+ * just add it to the sink (at index 0).
*/
- if ((node->next = allocator->free[index]) == NULL
- && index > max_index) {
- max_index = index;
+ if (index >= MAX_INDEX) {
+ index = 0;
+ node->next = allocator->free[index];
}
+ else {
+ node->next = allocator->free[index];
+ if (!(node->next) && index > max_index)
+ max_index = index;
+ }
+
allocator->free[index] = node;
- if (current_free_index >= index)
- current_free_index -= index;
- else
- current_free_index = 0;
}
- else {
- /* This node is too large to keep in a specific size bucket,
- * just add it to the sink (at index 0).
- */
- node->next = allocator->free[0];
- allocator->free[0] = node;
- if (current_free_index >= index)
- current_free_index -= index;
- else
- current_free_index = 0;
- }
} while ((node = next) != NULL);
allocator->max_index = max_index;
@@ -505,7 +480,6 @@
#define SIZEOF_POOL_T APR_ALIGN_DEFAULT(sizeof(apr_pool_t))
-
/*
* Variables
*/
@@ -538,6 +512,20 @@
* Initialization
*/
+static void apr_pool_do_terminate()
+{
+ if (global_pool) {
+ /* it destroys global_allocator as well */
+ apr_pool_destroy(global_pool);
+ global_pool = NULL;
+ }
+ else if (global_allocator) {
+ apr_allocator_destroy(global_allocator);
+ }
+ global_allocator = NULL;
+ apr_pools_initialized = 0;
+}
+
APR_DECLARE(apr_status_t) apr_pool_initialize(void)
{
apr_status_t rv;
@@ -545,18 +533,11 @@
if (apr_pools_initialized++)
return APR_SUCCESS;
- if ((rv = apr_allocator_create(&global_allocator)) != APR_SUCCESS) {
- apr_pools_initialized = 0;
- return rv;
- }
+ if ((rv = apr_allocator_create(&global_allocator)) != APR_SUCCESS)
+ goto error;
- if ((rv = apr_pool_create_ex(&global_pool, NULL, NULL,
- global_allocator)) != APR_SUCCESS) {
- apr_allocator_destroy(global_allocator);
- global_allocator = NULL;
- apr_pools_initialized = 0;
- return rv;
- }
+ if ((rv = apr_pool_create_ex(&global_pool, NULL, NULL, global_allocator)) != APR_SUCCESS)
+ goto error;
apr_pool_tag(global_pool, "apr_global_pool");
@@ -566,9 +547,8 @@
* Warning: apr_atomic_init() must always be called, by any
* means possible, from apr_initialize().
*/
- if ((rv = apr_atomic_init(global_pool)) != APR_SUCCESS) {
- return rv;
- }
+ if ((rv = apr_atomic_init(global_pool)) != APR_SUCCESS)
+ goto error;
#if APR_HAS_THREADS
{
@@ -576,9 +556,8 @@
if ((rv = apr_thread_mutex_create(&mutex,
APR_THREAD_MUTEX_DEFAULT,
- global_pool)) != APR_SUCCESS) {
- return rv;
- }
+ global_pool)) != APR_SUCCESS)
+ goto error;
apr_allocator_mutex_set(global_allocator, mutex);
}
@@ -587,23 +566,18 @@
apr_allocator_owner_set(global_allocator, global_pool);
return APR_SUCCESS;
+
+error:
+ apr_pool_do_terminate();
+ return rv;
}
APR_DECLARE(void) apr_pool_terminate(void)
{
- if (!apr_pools_initialized)
- return;
-
- if (--apr_pools_initialized)
- return;
-
- apr_pool_destroy(global_pool); /* This will also destroy the mutex */
- global_pool = NULL;
-
- global_allocator = NULL;
+ if (apr_pools_initialized || !--apr_pools_initialized)
+ apr_pool_do_terminate();
}
-
/* Node list management helper macros; list_insert() inserts 'node'
* before 'point'. */
#define list_insert(node, point) do { \
@@ -619,6 +593,31 @@
node->next->ref = node->ref; \
} while (0)
+static inline void pool_active_set(apr_pool_t* pool, apr_memnode_t* node)
+{
+ apr_memnode_t* old_active = pool->active;
+ apr_size_t old_active_free_index;
+
+ list_insert(node, old_active);
+ pool->active = node;
+
+ if (node == old_active->next) /* only two nodes in the list */
+ return;
+
+ /* put old_active in the list in the descending order of free_index */
+ old_active_free_index = memnode_free_index(old_active);
+ node = old_active->next;
+ if (old_active_free_index < memnode_free_index(node)) {
+ do {
+ node = node->next;
+ }
+ while (old_active_free_index < memnode_free_index(node));
+
+ list_remove(old_active);
+ list_insert(old_active, node);
+ }
+}
+
/*
* Memory allocation
*/
@@ -633,7 +632,7 @@
active = pool->active;
/* If the active node has enough bytes left, use it. */
- if (size < (apr_size_t)(active->endp - active->first_avail)) {
+ if (size <= memnode_space_left(active)) {
mem = active->first_avail;
active->first_avail += size;
@@ -641,7 +640,7 @@
}
node = active->next;
- if (size < (apr_size_t)(node->endp - node->first_avail)) {
+ if (size <= memnode_space_left(node)) {
list_remove(node);
}
else {
@@ -653,31 +652,13 @@
}
}
- node->free_index = 0;
+ /* At this point node has enough free bytes and it is not in the list */
+ pool_active_set(pool, node);
+
mem = node->first_avail;
node->first_avail += size;
- list_insert(node, active);
-
- pool->active = node;
-
- free_index = (APR_ALIGN(active->endp - active->first_avail + 1,
- BOUNDARY_SIZE) - BOUNDARY_SIZE) >> BOUNDARY_INDEX;
-
- active->free_index = (APR_UINT32_TRUNC_CAST)free_index;
- node = active->next;
- if (free_index >= node->free_index)
- return mem;
-
- do {
- node = node->next;
- }
- while (free_index < node->free_index);
-
- list_remove(active);
- list_insert(active, node);
-
return mem;
}
@@ -943,32 +924,9 @@
size = APR_PSPRINTF_MIN_STRINGSIZE;
node = active->next;
- if (!ps->got_a_new_node
- && size < (apr_size_t)(node->endp - node->first_avail)) {
-
+ if (!ps->got_a_new_node && size <= memnode_space_left(node)) {
list_remove(node);
- list_insert(node, active);
-
- node->free_index = 0;
-
- pool->active = node;
-
- free_index = (APR_ALIGN(active->endp - active->first_avail + 1,
- BOUNDARY_SIZE) - BOUNDARY_SIZE) >> BOUNDARY_INDEX;
-
- active->free_index = (APR_UINT32_TRUNC_CAST)free_index;
- node = active->next;
- if (free_index < node->free_index) {
- do {
- node = node->next;
- }
- while (free_index < node->free_index);
-
- list_remove(active);
- list_insert(active, node);
- }
-
- node = pool->active;
+ pool_active_set(pool, node);
}
else {
if ((node = allocator_alloc(pool->allocator, size)) == NULL)
@@ -986,7 +944,7 @@
ps->node = node;
ps->vbuff.curpos = node->first_avail + cur_len;
- ps->vbuff.endpos = node->endp - 1; /* Save a byte for NUL terminator */
+ ps->vbuff.endpos = memnode_end(node) - 1; /* Save a byte for NUL terminator */
return 0;
}
@@ -1004,14 +962,14 @@
ps.vbuff.curpos = ps.node->first_avail;
/* Save a byte for the NUL terminator */
- ps.vbuff.endpos = ps.node->endp - 1;
+ ps.vbuff.endpos = memnode_end(ps.node) - 1;
ps.got_a_new_node = 0;
ps.free = NULL;
/* Make sure that the first node passed to apr_vformatter has at least
* room to hold the NUL terminator.
*/
- if (ps.node->first_avail == ps.node->endp) {
+ if (!memnode_space_left(ps.node)) {
if (psprintf_flush(&ps.vbuff) == -1) {
if (pool->abort_fn) {
pool->abort_fn(APR_ENOMEM);
@@ -1048,29 +1006,8 @@
active = pool->active;
node = ps.node;
- node->free_index = 0;
+ pool_active_set(pool, node);
- list_insert(node, active);
-
- pool->active = node;
-
- free_index = (APR_ALIGN(active->endp - active->first_avail + 1,
- BOUNDARY_SIZE) - BOUNDARY_SIZE) >> BOUNDARY_INDEX;
-
- active->free_index = (APR_UINT32_TRUNC_CAST)free_index;
- node = active->next;
-
- if (free_index >= node->free_index)
- return strp;
-
- do {
- node = node->next;
- }
- while (free_index < node->free_index);
-
- list_remove(active);
- list_insert(active, node);
-
return strp;
}
Index: include/apr_general.h
===================================================================
--- include/apr_general.h (revision 607616)
+++ include/apr_general.h (working copy)
@@ -139,6 +139,9 @@
/** Default alignment */
#define APR_ALIGN_DEFAULT(size) APR_ALIGN(size, 8)
+/* min/max classic */
+#define APR_MIN(a, b) (((a) < (b)) ? (a) : (b))
+#define APR_MAX(a, b) (((a) < (b)) ? (b) : (a))
/**
* String and memory functions
Index: include/apr_allocator.h
===================================================================
--- include/apr_allocator.h (revision 607616)
+++ include/apr_allocator.h (working copy)
@@ -54,10 +54,8 @@
struct apr_memnode_t {
apr_memnode_t *next; /**< next memnode */
apr_memnode_t **ref; /**< reference to self */
- apr_uint32_t index; /**< size */
- apr_uint32_t free_index; /**< how much free */
char *first_avail; /**< pointer to first free memory */
- char *endp; /**< pointer to end of free memory */
+ apr_uint32_t index; /**< size */
};
/** The base size of a memory node - aligned. */