[PATCH] speedups for mod_mime and apr_hash_t

Brian Pane 7 Nov 2001 08:11:18 -0000

The mod_mime per-dir-merge function is one of the most computationally
expensive parts of the httpd, due to all of the hash table merges that
it does.  It has accounted for about 15% of the total user CPU time in
recent benchmarks.

This patch attempts to solve the problem by adding two new functions to
apr_hash_t:
 apr_hash_copy()  /* fast shallow copy of a hash table */
 apr_hash_merge() /* like apr_hash_overlap, but it lets you supply a
                     callback function to handle collisions... */

The changes to mod_mime use these new functions to reduce the amount
of copying required for a dir-merge.

Note: depending on how effective this patch is, it may eliminate the
need for the "static pre-merge of per-dir configs" patch that I posted
a while back.

--Brian

Index: srclib/apr/include/apr_hash.h
===================================================================
RCS file: /home/cvspublic/apr/include/apr_hash.h,v
retrieving revision 1.30
diff -u -r1.30 apr_hash.h
--- srclib/apr/include/apr_hash.h       2001/08/24 17:55:45     1.30
+++ srclib/apr/include/apr_hash.h       2001/11/07 07:44:29
@@ -105,6 +105,16 @@
 APR_DECLARE(apr_hash_t *) apr_hash_make(apr_pool_t *pool);
 
 /**
+ * Make a copy of a hash table
+ * @param pool The pool from which to allocate the new hash table
+ * @param h The hash table to clone
+ * @return The hash table just created
+ * @remark Makes a shallow copy
+ */
+APR_DECLARE(apr_hash_t *) apr_hash_copy(apr_pool_t *pool,
+                                        const apr_hash_t *h);
+
+/**
  * Associate a value with a key in a hash table.
  * @param ht The hash table
  * @param key Pointer to the key
@@ -192,6 +202,24 @@
 APR_DECLARE(apr_hash_t *) apr_hash_overlay(apr_pool_t *p,
                                            const apr_hash_t *overlay, 
                                            const apr_hash_t *base);
+
+/**
+ * Merge two hash tables into one new hash table. If the same key
+ * is present in both tables, call the supplied merge function to
+ * produce a merged value for the key in the new table.
+ * @param p The pool to use for the new hash table
+ * @param overlay The table to add to the initial table
+ * @param base The table that represents the initial values of the new table
+ * @param merger A callback function to merge values
+ * @return A new hash table containing all of the data from the two passed in
+ */
+APR_DECLARE(apr_hash_t *) apr_hash_merge(apr_pool_t *p,
+                                         const apr_hash_t *overlay, 
+                                         const apr_hash_t *base,
+                                         void * (*merger)(apr_pool_t *p,
+                                                     const void *key,
+                                                     const void *overlay_val,
+                                                     const void *base_val));
 
 /**
  * Get a pointer to the pool which the hash table 
Index: srclib/apr/tables/apr_hash.c
===================================================================
RCS file: /home/cvspublic/apr/tables/apr_hash.c,v
retrieving revision 1.25
diff -u -r1.25 apr_hash.c
--- srclib/apr/tables/apr_hash.c        2001/09/06 06:34:59     1.25
+++ srclib/apr/tables/apr_hash.c        2001/11/07 07:44:29
@@ -289,6 +289,36 @@
     return hep;
 }
 
+APR_DECLARE(apr_hash_t *) apr_hash_copy(apr_pool_t *pool,
+                                        const apr_hash_t *orig)
+{
+    apr_hash_t *ht;
+    int i;
+
+    ht = apr_palloc(pool, sizeof(apr_hash_t));
+    ht->pool = pool;
+    ht->count = orig->count;
+    ht->max = orig->max;
+    ht->array = alloc_array(ht, ht->max);
+
+    for (i = 0; i <= ht->max; i++) {
+        apr_hash_entry_t **new_entry = &(ht->array[i]);
+        apr_hash_entry_t *orig_entry = orig->array[i];
+        while (orig_entry) {
+            *new_entry = (apr_hash_entry_t *)apr_palloc(ht->pool,
+                                                sizeof(apr_hash_entry_t));
+            (*new_entry)->hash = orig_entry->hash;
+            (*new_entry)->key = orig_entry->key;
+            (*new_entry)->klen = orig_entry->klen;
+            (*new_entry)->val = orig_entry->val;
+            new_entry = &((*new_entry)->next);
+            orig_entry = orig_entry->next;
+        }
+        *new_entry = NULL;
+    }
+    return ht;
+}
+
 APR_DECLARE(void *) apr_hash_get(apr_hash_t *ht,
                               const void *key,
                               apr_ssize_t klen)
@@ -335,10 +365,22 @@
                                           const apr_hash_t *overlay, 
                                           const apr_hash_t *base)
 {
+    return apr_hash_merge(p, overlay, base, NULL);
+}
+
+APR_DECLARE(apr_hash_t *) apr_hash_merge(apr_pool_t *p,
+                                         const apr_hash_t *overlay, 
+                                         const apr_hash_t *base,
+                                         void * (*merger)(apr_pool_t *p,
+                                                     const void *key,
+                                                     const void *overlay_val,
+                                                     const void *base_val))
+{
     apr_hash_t *res;
-    apr_hash_index_t *hi;
     apr_hash_entry_t *new_vals;
-    int i,j;
+    apr_hash_entry_t *iter;
+    apr_hash_entry_t *ent;
+    int i,j,k;
 
 #ifdef POOL_DEBUG
     /* we don't copy keys and values, so it's necessary that
@@ -361,27 +403,52 @@
     res->pool = p;
     res->count = base->count;
     res->max = (overlay->max > base->max) ? overlay->max : base->max;
+    if (base->count + overlay->count > res->max) {
+        res->max = res->max * 2 + 1;
+    }
     res->array = alloc_array(res, res->max);
-    new_vals = apr_palloc(p, sizeof(apr_hash_entry_t) * res->count);
+    new_vals = apr_palloc(p, sizeof(apr_hash_entry_t) *
+                          (base->count + overlay->count));
     j = 0;
-    for (hi = apr_hash_first(NULL, (apr_hash_t*)base); hi; hi = 
apr_hash_next(hi)) {
-        i = hi->this->hash & res->max;
-
-        new_vals[j].klen = hi->this->klen;
-        new_vals[j].key = hi->this->key;
-        new_vals[j].val = hi->this->val;
-        new_vals[j].hash = hi->this->hash;
-        new_vals[j].next = res->array[i];
-        res->array[i] = &new_vals[j];
-        j++;
+    for (k = 0; k <= base->max; k++) {
+        for (iter = base->array[k]; iter; iter = iter->next) {
+            i = iter->hash & res->max;
+            new_vals[j].klen = iter->klen;
+            new_vals[j].key = iter->key;
+            new_vals[j].val = iter->val;
+            new_vals[j].hash = iter->hash;
+            new_vals[j].next = res->array[i];
+            res->array[i] = &new_vals[j];
+            j++;
+        }
     }
 
-    /* can't simply copy the stuff over, need to set each one so as to
-     * increment the counts/array properly
-     */
-    for (hi = apr_hash_first(NULL, (apr_hash_t*)overlay); hi; 
-         hi = apr_hash_next(hi)) {
-        apr_hash_set(res, hi->this->key, hi->this->klen, hi->this->val);
+    for (k = 0; k < overlay->max; k++) {
+        for (iter = overlay->array[k]; iter; iter = iter->next) {
+            i = iter->hash & res->max;
+            for (ent = res->array[i]; ent; ent = ent->next) {
+                if ((ent->klen == iter->klen) &&
+                    (memcmp(ent->key, iter->key, iter->klen) == 0)) {
+                    if (merger) {
+                        ent->val = (*merger)(p, iter->key,
+                                             iter->val, ent->val);
+                    }
+                    else {
+                        ent->val = iter->val;
+                    }
+                    break;
+                }
+            }
+            if (!ent) {
+                new_vals[j].klen = iter->klen;
+                new_vals[j].key = iter->key;
+                new_vals[j].val = iter->val;
+                new_vals[j].hash = iter->hash;
+                new_vals[j].next = res->array[i];
+                res->array[i] = &new_vals[j];
+                j++;
+            }
+        }
     }
     return res;
 }
Index: modules/http/mod_mime.c
===================================================================
RCS file: /home/cvspublic/httpd-2.0/modules/http/mod_mime.c,v
retrieving revision 1.68
diff -u -r1.68 mod_mime.c
--- modules/http/mod_mime.c     2001/10/15 02:39:37     1.68
+++ modules/http/mod_mime.c     2001/11/07 07:44:30
@@ -166,52 +166,39 @@
 /*
  * Overlay one hash table of extension_mappings onto another
  */
-static void overlay_extension_mappings(apr_pool_t *p,
-                                       apr_hash_t *overlay, apr_hash_t *base)
+static void *overlay_extension_mappings(apr_pool_t *p,
+                                        const void *key,
+                                        const void *overlay_val,
+                                        const void *base_val)
 {
-    apr_hash_index_t *index;
-    for (index = apr_hash_first(p, overlay); index;
-         index = apr_hash_next(index)) {
-        char *key;
-        apr_ssize_t klen;
-        extension_info *overlay_info, *base_info;
-        
-        apr_hash_this(index, (const void**)&key, &klen, (void**)&overlay_info);
-
-        base_info = (extension_info*)apr_hash_get(base, key, klen);
-
-        if (base_info) {
-            extension_info *copyinfo = base_info;
-            base_info = (extension_info*)apr_palloc(p, sizeof(*base_info));
-            apr_hash_set(base, key, klen, base_info);
-            memcpy(base_info, copyinfo, sizeof(*base_info));
-
-            if (overlay_info->forced_type) {
-                base_info->forced_type = overlay_info->forced_type;
-            }
-            if (overlay_info->encoding_type) {
-                base_info->encoding_type = overlay_info->encoding_type;
-            }
-            if (overlay_info->language_type) {
-                base_info->language_type = overlay_info->language_type;
-            }
-            if (overlay_info->handler) {
-                base_info->handler = overlay_info->handler;
-            }
-            if (overlay_info->charset_type) {
-                base_info->charset_type = overlay_info->charset_type;
-            }
-            if (overlay_info->input_filters) {
-                base_info->input_filters = overlay_info->input_filters;
-            }
-            if (overlay_info->output_filters) {
-                base_info->output_filters = overlay_info->output_filters;
-            }
-        }
-        else {
-            apr_hash_set(base, key, klen, overlay_info);
-        }
+    extension_info *new_info = apr_palloc(p, sizeof(extension_info));
+    const extension_info *overlay_info = (const extension_info *)overlay_val;
+    const extension_info *base_info = (const extension_info *)base_val;
+
+    memcpy(new_info, base_info, sizeof(extension_info));
+    if (overlay_info->forced_type) {
+        new_info->forced_type = overlay_info->forced_type;
+    }
+    if (overlay_info->encoding_type) {
+        new_info->encoding_type = overlay_info->encoding_type;
+    }
+    if (overlay_info->language_type) {
+        new_info->language_type = overlay_info->language_type;
     }
+    if (overlay_info->handler) {
+        new_info->handler = overlay_info->handler;
+    }
+    if (overlay_info->charset_type) {
+        new_info->charset_type = overlay_info->charset_type;
+    }
+    if (overlay_info->input_filters) {
+        new_info->input_filters = overlay_info->input_filters;
+    }
+    if (overlay_info->output_filters) {
+        new_info->output_filters = overlay_info->output_filters;
+    }
+
+    return new_info;
 }
 
 /* Member is the offset within an extension_info of the pointer to reset 
@@ -244,11 +231,9 @@
     mime_dir_config *new = apr_palloc(p, sizeof(mime_dir_config));
 
     if (base->extension_mappings && add->extension_mappings) {
-        new->extension_mappings = apr_hash_make(p);
-        overlay_extension_mappings(p, base->extension_mappings,
-                                   new->extension_mappings);
-        overlay_extension_mappings(p, add->extension_mappings,
-                                   new->extension_mappings);
+        new->extension_mappings = apr_hash_merge(p, add->extension_mappings,
+                                                 base->extension_mappings,
+                                                 overlay_extension_mappings);
     }
     else {
         if (base->extension_mappings == NULL) {
@@ -262,9 +247,8 @@
          * We must have a copy for safety.
          */
         if (new->extension_mappings && add->remove_mappings) {
-            apr_hash_t *copyhash = new->extension_mappings;
-            new->extension_mappings = apr_hash_make(p);
-            overlay_extension_mappings(p, copyhash, new->extension_mappings);
+            new->extension_mappings =
+                apr_hash_copy(p, new->extension_mappings);
         }
     }
 
@@ -764,7 +748,7 @@
     /* Parse filename extensions which can be in any order 
      */
     while (*fn && (ext = ap_getword(r->pool, &fn, '.'))) {
-        extension_info *exinfo = NULL;
+        const extension_info *exinfo = NULL;
         int found;
 
         if (*ext == '\0')  /* ignore empty extensions "bad..html" */

[PATCH] speedups for mod_mime and apr_hash_t

Reply via email to