The mod_mime per-dir-merge function is one of the most computationally
expensive parts of the httpd, due to all of the hash table merges that
it does. It has accounted for about 15% of the total user CPU time in
recent benchmarks.
This patch attempts to solve the problem by adding two new functions to
apr_hash_t:
apr_hash_copy() /* fast shallow copy of a hash table */
apr_hash_merge() /* like apr_hash_overlap, but it lets you supply a
callback function to handle collisions... */
The changes to mod_mime use these new functions to reduce the amount
of copying required for a dir-merge.
Note: depending on how effective this patch is, it may eliminate the
need for the "static pre-merge of per-dir configs" patch that I posted
a while back.
--Brian
Index: srclib/apr/include/apr_hash.h
===================================================================
RCS file: /home/cvspublic/apr/include/apr_hash.h,v
retrieving revision 1.30
diff -u -r1.30 apr_hash.h
--- srclib/apr/include/apr_hash.h 2001/08/24 17:55:45 1.30
+++ srclib/apr/include/apr_hash.h 2001/11/07 07:44:29
@@ -105,6 +105,16 @@
APR_DECLARE(apr_hash_t *) apr_hash_make(apr_pool_t *pool);
/**
+ * Make a copy of a hash table
+ * @param pool The pool from which to allocate the new hash table
+ * @param h The hash table to clone
+ * @return The hash table just created
+ * @remark Makes a shallow copy
+ */
+APR_DECLARE(apr_hash_t *) apr_hash_copy(apr_pool_t *pool,
+ const apr_hash_t *h);
+
+/**
* Associate a value with a key in a hash table.
* @param ht The hash table
* @param key Pointer to the key
@@ -192,6 +202,24 @@
APR_DECLARE(apr_hash_t *) apr_hash_overlay(apr_pool_t *p,
const apr_hash_t *overlay,
const apr_hash_t *base);
+
+/**
+ * Merge two hash tables into one new hash table. If the same key
+ * is present in both tables, call the supplied merge function to
+ * produce a merged value for the key in the new table.
+ * @param p The pool to use for the new hash table
+ * @param overlay The table to add to the initial table
+ * @param base The table that represents the initial values of the new table
+ * @param merger A callback function to merge values
+ * @return A new hash table containing all of the data from the two passed in
+ */
+APR_DECLARE(apr_hash_t *) apr_hash_merge(apr_pool_t *p,
+ const apr_hash_t *overlay,
+ const apr_hash_t *base,
+ void * (*merger)(apr_pool_t *p,
+ const void *key,
+ const void *overlay_val,
+ const void *base_val));
/**
* Get a pointer to the pool which the hash table
Index: srclib/apr/tables/apr_hash.c
===================================================================
RCS file: /home/cvspublic/apr/tables/apr_hash.c,v
retrieving revision 1.25
diff -u -r1.25 apr_hash.c
--- srclib/apr/tables/apr_hash.c 2001/09/06 06:34:59 1.25
+++ srclib/apr/tables/apr_hash.c 2001/11/07 07:44:29
@@ -289,6 +289,36 @@
return hep;
}
+APR_DECLARE(apr_hash_t *) apr_hash_copy(apr_pool_t *pool,
+ const apr_hash_t *orig)
+{
+ apr_hash_t *ht;
+ int i;
+
+ ht = apr_palloc(pool, sizeof(apr_hash_t));
+ ht->pool = pool;
+ ht->count = orig->count;
+ ht->max = orig->max;
+ ht->array = alloc_array(ht, ht->max);
+
+ for (i = 0; i <= ht->max; i++) {
+ apr_hash_entry_t **new_entry = &(ht->array[i]);
+ apr_hash_entry_t *orig_entry = orig->array[i];
+ while (orig_entry) {
+ *new_entry = (apr_hash_entry_t *)apr_palloc(ht->pool,
+ sizeof(apr_hash_entry_t));
+ (*new_entry)->hash = orig_entry->hash;
+ (*new_entry)->key = orig_entry->key;
+ (*new_entry)->klen = orig_entry->klen;
+ (*new_entry)->val = orig_entry->val;
+ new_entry = &((*new_entry)->next);
+ orig_entry = orig_entry->next;
+ }
+ *new_entry = NULL;
+ }
+ return ht;
+}
+
APR_DECLARE(void *) apr_hash_get(apr_hash_t *ht,
const void *key,
apr_ssize_t klen)
@@ -335,10 +365,22 @@
const apr_hash_t *overlay,
const apr_hash_t *base)
{
+ return apr_hash_merge(p, overlay, base, NULL);
+}
+
+APR_DECLARE(apr_hash_t *) apr_hash_merge(apr_pool_t *p,
+ const apr_hash_t *overlay,
+ const apr_hash_t *base,
+ void * (*merger)(apr_pool_t *p,
+ const void *key,
+ const void *overlay_val,
+ const void *base_val))
+{
apr_hash_t *res;
- apr_hash_index_t *hi;
apr_hash_entry_t *new_vals;
- int i,j;
+ apr_hash_entry_t *iter;
+ apr_hash_entry_t *ent;
+ int i,j,k;
#ifdef POOL_DEBUG
/* we don't copy keys and values, so it's necessary that
@@ -361,27 +403,52 @@
res->pool = p;
res->count = base->count;
res->max = (overlay->max > base->max) ? overlay->max : base->max;
+ if (base->count + overlay->count > res->max) {
+ res->max = res->max * 2 + 1;
+ }
res->array = alloc_array(res, res->max);
- new_vals = apr_palloc(p, sizeof(apr_hash_entry_t) * res->count);
+ new_vals = apr_palloc(p, sizeof(apr_hash_entry_t) *
+ (base->count + overlay->count));
j = 0;
- for (hi = apr_hash_first(NULL, (apr_hash_t*)base); hi; hi =
apr_hash_next(hi)) {
- i = hi->this->hash & res->max;
-
- new_vals[j].klen = hi->this->klen;
- new_vals[j].key = hi->this->key;
- new_vals[j].val = hi->this->val;
- new_vals[j].hash = hi->this->hash;
- new_vals[j].next = res->array[i];
- res->array[i] = &new_vals[j];
- j++;
+ for (k = 0; k <= base->max; k++) {
+ for (iter = base->array[k]; iter; iter = iter->next) {
+ i = iter->hash & res->max;
+ new_vals[j].klen = iter->klen;
+ new_vals[j].key = iter->key;
+ new_vals[j].val = iter->val;
+ new_vals[j].hash = iter->hash;
+ new_vals[j].next = res->array[i];
+ res->array[i] = &new_vals[j];
+ j++;
+ }
}
- /* can't simply copy the stuff over, need to set each one so as to
- * increment the counts/array properly
- */
- for (hi = apr_hash_first(NULL, (apr_hash_t*)overlay); hi;
- hi = apr_hash_next(hi)) {
- apr_hash_set(res, hi->this->key, hi->this->klen, hi->this->val);
+ for (k = 0; k < overlay->max; k++) {
+ for (iter = overlay->array[k]; iter; iter = iter->next) {
+ i = iter->hash & res->max;
+ for (ent = res->array[i]; ent; ent = ent->next) {
+ if ((ent->klen == iter->klen) &&
+ (memcmp(ent->key, iter->key, iter->klen) == 0)) {
+ if (merger) {
+ ent->val = (*merger)(p, iter->key,
+ iter->val, ent->val);
+ }
+ else {
+ ent->val = iter->val;
+ }
+ break;
+ }
+ }
+ if (!ent) {
+ new_vals[j].klen = iter->klen;
+ new_vals[j].key = iter->key;
+ new_vals[j].val = iter->val;
+ new_vals[j].hash = iter->hash;
+ new_vals[j].next = res->array[i];
+ res->array[i] = &new_vals[j];
+ j++;
+ }
+ }
}
return res;
}
Index: modules/http/mod_mime.c
===================================================================
RCS file: /home/cvspublic/httpd-2.0/modules/http/mod_mime.c,v
retrieving revision 1.68
diff -u -r1.68 mod_mime.c
--- modules/http/mod_mime.c 2001/10/15 02:39:37 1.68
+++ modules/http/mod_mime.c 2001/11/07 07:44:30
@@ -166,52 +166,39 @@
/*
* Overlay one hash table of extension_mappings onto another
*/
-static void overlay_extension_mappings(apr_pool_t *p,
- apr_hash_t *overlay, apr_hash_t *base)
+static void *overlay_extension_mappings(apr_pool_t *p,
+ const void *key,
+ const void *overlay_val,
+ const void *base_val)
{
- apr_hash_index_t *index;
- for (index = apr_hash_first(p, overlay); index;
- index = apr_hash_next(index)) {
- char *key;
- apr_ssize_t klen;
- extension_info *overlay_info, *base_info;
-
- apr_hash_this(index, (const void**)&key, &klen, (void**)&overlay_info);
-
- base_info = (extension_info*)apr_hash_get(base, key, klen);
-
- if (base_info) {
- extension_info *copyinfo = base_info;
- base_info = (extension_info*)apr_palloc(p, sizeof(*base_info));
- apr_hash_set(base, key, klen, base_info);
- memcpy(base_info, copyinfo, sizeof(*base_info));
-
- if (overlay_info->forced_type) {
- base_info->forced_type = overlay_info->forced_type;
- }
- if (overlay_info->encoding_type) {
- base_info->encoding_type = overlay_info->encoding_type;
- }
- if (overlay_info->language_type) {
- base_info->language_type = overlay_info->language_type;
- }
- if (overlay_info->handler) {
- base_info->handler = overlay_info->handler;
- }
- if (overlay_info->charset_type) {
- base_info->charset_type = overlay_info->charset_type;
- }
- if (overlay_info->input_filters) {
- base_info->input_filters = overlay_info->input_filters;
- }
- if (overlay_info->output_filters) {
- base_info->output_filters = overlay_info->output_filters;
- }
- }
- else {
- apr_hash_set(base, key, klen, overlay_info);
- }
+ extension_info *new_info = apr_palloc(p, sizeof(extension_info));
+ const extension_info *overlay_info = (const extension_info *)overlay_val;
+ const extension_info *base_info = (const extension_info *)base_val;
+
+ memcpy(new_info, base_info, sizeof(extension_info));
+ if (overlay_info->forced_type) {
+ new_info->forced_type = overlay_info->forced_type;
+ }
+ if (overlay_info->encoding_type) {
+ new_info->encoding_type = overlay_info->encoding_type;
+ }
+ if (overlay_info->language_type) {
+ new_info->language_type = overlay_info->language_type;
}
+ if (overlay_info->handler) {
+ new_info->handler = overlay_info->handler;
+ }
+ if (overlay_info->charset_type) {
+ new_info->charset_type = overlay_info->charset_type;
+ }
+ if (overlay_info->input_filters) {
+ new_info->input_filters = overlay_info->input_filters;
+ }
+ if (overlay_info->output_filters) {
+ new_info->output_filters = overlay_info->output_filters;
+ }
+
+ return new_info;
}
/* Member is the offset within an extension_info of the pointer to reset
@@ -244,11 +231,9 @@
mime_dir_config *new = apr_palloc(p, sizeof(mime_dir_config));
if (base->extension_mappings && add->extension_mappings) {
- new->extension_mappings = apr_hash_make(p);
- overlay_extension_mappings(p, base->extension_mappings,
- new->extension_mappings);
- overlay_extension_mappings(p, add->extension_mappings,
- new->extension_mappings);
+ new->extension_mappings = apr_hash_merge(p, add->extension_mappings,
+ base->extension_mappings,
+ overlay_extension_mappings);
}
else {
if (base->extension_mappings == NULL) {
@@ -262,9 +247,8 @@
* We must have a copy for safety.
*/
if (new->extension_mappings && add->remove_mappings) {
- apr_hash_t *copyhash = new->extension_mappings;
- new->extension_mappings = apr_hash_make(p);
- overlay_extension_mappings(p, copyhash, new->extension_mappings);
+ new->extension_mappings =
+ apr_hash_copy(p, new->extension_mappings);
}
}
@@ -764,7 +748,7 @@
/* Parse filename extensions which can be in any order
*/
while (*fn && (ext = ap_getword(r->pool, &fn, '.'))) {
- extension_info *exinfo = NULL;
+ const extension_info *exinfo = NULL;
int found;
if (*ext == '\0') /* ignore empty extensions "bad..html" */