On Sat, 2002-07-13 at 18:59, Brian Pane wrote:

> This patch adds a simple index to apr_table_t.  The index maps

Here's an updated patch with two additional speedups:
  - remove some extraneous string copies in apr_table_addn()
  - don't initialize the "index_last" table to all -1
    values when creating a new table; it's sufficient
    to initialize just "index_first"

--Brian

Index: include/apr_tables.h
===================================================================
RCS file: /home/cvs/apr/include/apr_tables.h,v
retrieving revision 1.33
diff -u -r1.33 apr_tables.h
--- include/apr_tables.h        13 Jul 2002 18:16:33 -0000      1.33
+++ include/apr_tables.h        14 Jul 2002 22:31:10 -0000
@@ -115,9 +115,6 @@
                          */
     /** The value for the current table entry */
     char *val;
-
-    /** A checksum for the key, for use by the apr_table internals */
-    apr_uint32_t key_checksum;
 };
 
 /**
Index: tables/apr_tables.c
===================================================================
RCS file: /home/cvs/apr/tables/apr_tables.c,v
retrieving revision 1.36
diff -u -r1.36 apr_tables.c
--- tables/apr_tables.c 13 Jul 2002 18:16:33 -0000      1.36
+++ tables/apr_tables.c 14 Jul 2002 22:31:10 -0000
@@ -307,40 +307,8 @@
  * The "table" functions.
  */
 
-#if APR_CHARSET_EBCDIC
-#define CASE_MASK 0xbfbfbfbf
-#else
-#define CASE_MASK 0xdfdfdfdf
-#endif
-
-/* Compute the "checksum" for a key, consisting of the first
- * 4 bytes, normalized for case-insensitivity and packed into
- * an int...this checksum allows us to do a single integer
- * comparison as a fast check to determine whether we can
- * skip a strcasecmp
- */
-#define COMPUTE_KEY_CHECKSUM(key, checksum)    \
-{                                              \
-    const char *k = (key);                     \
-    apr_uint32_t c = (apr_uint32_t)*k;         \
-    (checksum) = c;                            \
-    (checksum) <<= 8;                          \
-    if (c) {                                   \
-        c = (apr_uint32_t)*++k;                \
-        checksum |= c;                         \
-    }                                          \
-    (checksum) <<= 8;                          \
-    if (c) {                                   \
-        c = (apr_uint32_t)*++k;                \
-        checksum |= c;                         \
-    }                                          \
-    (checksum) <<= 8;                          \
-    if (c) {                                   \
-        c = (apr_uint32_t)*++k;                \
-        checksum |= c;                         \
-    }                                          \
-    checksum &= CASE_MASK;                     \
-}
+#define TABLE_HASH_SIZE 32
+#define TABLE_INDEX_MASK 0x1f
 
 /** The opaque string-content table type */
 struct apr_table_t {
@@ -351,12 +319,21 @@
      */
     /** The underlying array for the table */
     apr_array_header_t a;
+
+    /* Indices within the table of the first and last
+     * instances of keys beginning with each character
+     */
+    int index_first[TABLE_HASH_SIZE];
+    int index_last[TABLE_HASH_SIZE];
+
 #ifdef MAKE_TABLE_PROFILE
     /** Who created the array. */
     void *creator;
 #endif
 };
 
+#define TABLE_HASH(k)  (int)(TABLE_INDEX_MASK & *(unsigned char *)(k))
+
 /*
  * XXX: if you tweak this you should look at is_empty_table() and table_elts()
  * in alloc.h
@@ -388,6 +365,8 @@
     apr_table_t *t = apr_palloc(p, sizeof(apr_table_t));
 
     make_array_core(&t->a, p, nelts, sizeof(apr_table_entry_t), 0);
+    /* Initialize all the indices with -1 (meaning "no match") */
+    memset(&(t->index_first), 0xff, sizeof(int) * TABLE_HASH_SIZE);
 #ifdef MAKE_TABLE_PROFILE
     t->creator = __builtin_return_address(0);
 #endif
@@ -410,28 +389,55 @@
     make_array_core(&new->a, p, t->a.nalloc, sizeof(apr_table_entry_t), 0);
     memcpy(new->a.elts, t->a.elts, t->a.nelts * sizeof(apr_table_entry_t));
     new->a.nelts = t->a.nelts;
+    memcpy(&(new->index_first), t->index_first, sizeof(int) * TABLE_HASH_SIZE);
+    memcpy(&(new->index_last), t->index_last, sizeof(int) * TABLE_HASH_SIZE);
     return new;
 }
 
+static void table_reindex(apr_table_t *t)
+{
+    int i;
+    int hash;
+    apr_table_entry_t *next_elt = (apr_table_entry_t *) t->a.elts;
+
+    memset(&(t->index_first), 0xff, sizeof(int) * TABLE_HASH_SIZE);
+    memset(&(t->index_last), 0xff, sizeof(int) * TABLE_HASH_SIZE);
+    for (i = 0; i < t->a.nelts; i++, next_elt++) {
+        hash = TABLE_HASH(next_elt->key);
+        t->index_last[hash] = i;
+        if (t->index_first[hash] < 0) {
+            t->index_first[hash] = i;
+        }
+    }
+}
+
 APR_DECLARE(void) apr_table_clear(apr_table_t *t)
 {
     t->a.nelts = 0;
+    memset(&(t->index_first), 0xff, sizeof(int) * TABLE_HASH_SIZE);
+    memset(&(t->index_last), 0xff, sizeof(int) * TABLE_HASH_SIZE);
 }
 
 APR_DECLARE(const char *) apr_table_get(const apr_table_t *t, const char *key)
 {
-    apr_table_entry_t *next_elt = (apr_table_entry_t *) t->a.elts;
-    apr_table_entry_t *end_elt = next_elt + t->a.nelts;
-    apr_uint32_t checksum;
+    apr_table_entry_t *next_elt;
+    int hash;
+    int i;
+    apr_table_entry_t *end_elt;
 
     if (key == NULL) {
        return NULL;
     }
+    hash = TABLE_HASH(key);
+    i = t->index_first[hash];
+    if (i < 0) {
+        return NULL;
+    }
+    next_elt = ((apr_table_entry_t *) t->a.elts) + i;
+    end_elt = ((apr_table_entry_t *) t->a.elts) + t->index_last[hash];
 
-    COMPUTE_KEY_CHECKSUM(key, checksum);
-    for (; next_elt < end_elt; next_elt++) {
-       if ((checksum == next_elt->key_checksum) &&
-            !strcasecmp(next_elt->key, key)) {
+    for (; next_elt <= end_elt; next_elt++) {
+       if (!strcasecmp(next_elt->key, key)) {
            return next_elt->val;
        }
     }
@@ -442,21 +448,29 @@
 APR_DECLARE(void) apr_table_set(apr_table_t *t, const char *key,
                               const char *val)
 {
-    apr_table_entry_t *next_elt = (apr_table_entry_t *) t->a.elts;
-    apr_table_entry_t *end_elt = next_elt + t->a.nelts;
-    apr_table_entry_t *dst_elt;
-    apr_uint32_t checksum;
+    apr_table_entry_t *next_elt;
+    int hash;
+    int i;
+    apr_table_entry_t *end_elt;
 
-    COMPUTE_KEY_CHECKSUM(key, checksum);
-    for (; next_elt < end_elt; next_elt++) {
-       if ((checksum == next_elt->key_checksum) &&
-            !strcasecmp(next_elt->key, key)) {
+    hash = TABLE_HASH(key);
+    i = t->index_first[hash];
+    if (i < 0) {
+        goto add_new_elt;
+    }
+    next_elt = ((apr_table_entry_t *) t->a.elts) + i;
+    end_elt = ((apr_table_entry_t *) t->a.elts) + t->index_last[hash];
+
+    for (; next_elt <= end_elt; next_elt++) {
+       if (!strcasecmp(next_elt->key, key)) {
+            int must_reindex = 0;
+            apr_table_entry_t *dst_elt = NULL;
+            apr_table_entry_t *table_end = ((apr_table_entry_t *) t->a.elts) +
+                t->a.nelts;
             next_elt->val = apr_pstrdup(t->a.pool, val);
             /* remove any other instances of this key */
-            dst_elt = NULL;
-            for (next_elt++; next_elt < end_elt; next_elt++) {
-                if ((checksum == next_elt->key_checksum) &&
-                    !strcasecmp(next_elt->key, key)) {
+            for (next_elt++; next_elt <= end_elt; next_elt++) {
+                if (!strcasecmp(next_elt->key, key)) {
                     t->a.nelts--;
                     if (!dst_elt) {
                         dst_elt = next_elt;
@@ -464,176 +478,279 @@
                 }
                 else if (dst_elt) {
                     *dst_elt++ = *next_elt;
+                    must_reindex = 1;
                 }
-
+            }
+            if (dst_elt) {
+                /* shift over the remainder of the table */
+                for (; next_elt < table_end; next_elt++) {
+                    *dst_elt++ = *next_elt;
+                }
+                must_reindex = 1;
+            }
+            if (must_reindex) {
+                table_reindex(t);
             }
             return;
         }
     }
 
+add_new_elt:
+    t->index_last[hash] = t->a.nelts;
+    if (t->index_first[hash] < 0) {
+        t->index_first[hash] = t->a.nelts;
+    }
     next_elt = (apr_table_entry_t *) table_push(t);
     next_elt->key = apr_pstrdup(t->a.pool, key);
     next_elt->val = apr_pstrdup(t->a.pool, val);
-    next_elt->key_checksum = checksum;
 }
 
 APR_DECLARE(void) apr_table_setn(apr_table_t *t, const char *key,
                                const char *val)
 {
-    apr_table_entry_t *next_elt = (apr_table_entry_t *) t->a.elts;
-    apr_table_entry_t *end_elt = next_elt + t->a.nelts;
-    apr_table_entry_t *dst_elt;
-    apr_uint32_t checksum;
+    apr_table_entry_t *next_elt;
+    int hash;
+    int i;
+    apr_table_entry_t *end_elt;
+
+#ifdef POOL_DEBUG
+    {
+       if (!apr_pool_is_ancestor(apr_pool_find(key), t->a.pool)) {
+           fprintf(stderr, "apr_table_setn: key not in ancestor pool of t\n");
+           abort();
+       }
+       if (!apr_pool_is_ancestor(apr_pool_find(val), t->a.pool)) {
+           fprintf(stderr, "apr_table_setn: key not in ancestor pool of t\n");
+           abort();
+       }
+    }
+#endif
 
-    COMPUTE_KEY_CHECKSUM(key, checksum);
-    for (; next_elt < end_elt; next_elt++) {
-       if ((checksum == next_elt->key_checksum) &&
-            !strcasecmp(next_elt->key, key)) {
+    hash = TABLE_HASH(key);
+    i = t->index_first[hash];
+    if (i < 0) {
+        goto add_new_elt;
+    }
+    next_elt = ((apr_table_entry_t *) t->a.elts) + i;
+    end_elt = ((apr_table_entry_t *) t->a.elts) + t->index_last[hash];
+
+    for (; next_elt <= end_elt; next_elt++) {
+       if (!strcasecmp(next_elt->key, key)) {
+            int must_reindex = 0;
+            apr_table_entry_t *dst_elt = NULL;
+            apr_table_entry_t *table_end = ((apr_table_entry_t *) t->a.elts) +
+                t->a.nelts;
             next_elt->val = (char *)val;
             /* remove any other instances of this key */
-            dst_elt = NULL;
-            for (next_elt++; next_elt < end_elt; next_elt++) {
-                if ((checksum == next_elt->key_checksum) &&
-                    !strcasecmp(next_elt->key, key)) {
+            for (next_elt++; next_elt <= end_elt; next_elt++) {
+                if (!strcasecmp(next_elt->key, key)) {
                     t->a.nelts--;
                     if (!dst_elt) {
                         dst_elt = next_elt;
                     }
+
                 }
                 else if (dst_elt) {
                     *dst_elt++ = *next_elt;
+                    must_reindex = 1;
                 }
-
+            }
+            if (dst_elt) {
+                /* shift over the remainder of the table */
+                for (; next_elt < table_end; next_elt++) {
+                    *dst_elt++ = *next_elt;
+                }
+                must_reindex = 1;
+            }
+            if (must_reindex) {
+                table_reindex(t);
             }
             return;
         }
     }
 
+add_new_elt:
+    t->index_last[hash] = t->a.nelts;
+    if (t->index_first[hash] < 0) {
+        t->index_first[hash] = t->a.nelts;
+    }
     next_elt = (apr_table_entry_t *) table_push(t);
     next_elt->key = (char *)key;
     next_elt->val = (char *)val;
-    next_elt->key_checksum = checksum;
 }
 
 APR_DECLARE(void) apr_table_unset(apr_table_t *t, const char *key)
 {
-    apr_table_entry_t *next_elt = (apr_table_entry_t *) t->a.elts;
-    apr_table_entry_t *end_elt = next_elt + t->a.nelts;
+    apr_table_entry_t *next_elt;
+    int hash;
+    int i;
+    apr_table_entry_t *end_elt;
     apr_table_entry_t *dst_elt;
-    apr_uint32_t checksum;
+    int must_reindex;
+
+    hash = TABLE_HASH(key);
+    i = t->index_first[hash];
+    if (i < 0) {
+        return;
+    }
+    next_elt = ((apr_table_entry_t *) t->a.elts) + i;
+    end_elt = ((apr_table_entry_t *) t->a.elts) + t->index_last[hash];
 
-    COMPUTE_KEY_CHECKSUM(key, checksum);
-    for (; next_elt < end_elt; next_elt++) {
-       if ((checksum == next_elt->key_checksum) &&
-            !strcasecmp(next_elt->key, key)) {
-            t->a.nelts--;
+    must_reindex = 0;
+    for (; next_elt <= end_elt; next_elt++) {
+       if (!strcasecmp(next_elt->key, key)) {
+            apr_table_entry_t *table_end = ((apr_table_entry_t *) t->a.elts) +
+                t->a.nelts;
+           t->a.nelts--;
             dst_elt = next_elt;
-            for (next_elt++; next_elt < end_elt; next_elt++) {
-                if ((checksum == next_elt->key_checksum) &&
-                    !strcasecmp(next_elt->key, key)) {
+            for (next_elt++; next_elt <= end_elt; next_elt++) {
+                if (!strcasecmp(next_elt->key, key)) {
                     t->a.nelts--;
                 }
                 else {
                     *dst_elt++ = *next_elt;
                 }
             }
+            /* shift over the remainder of the table */
+            for (; next_elt < table_end; next_elt++) {
+                *dst_elt++ = *next_elt;
+            }
+            must_reindex = 1;
+
             break;
-        }
+       }
+    }
+    if (must_reindex) {
+        table_reindex(t);
     }
 }
 
 APR_DECLARE(void) apr_table_merge(apr_table_t *t, const char *key,
                                 const char *val)
 {
-    apr_table_entry_t *elts = (apr_table_entry_t *) t->a.elts;
+    apr_table_entry_t *next_elt;
+    int hash;
     int i;
-    apr_uint32_t checksum;
+    apr_table_entry_t *end_elt;
 
-    COMPUTE_KEY_CHECKSUM(key, checksum);
-    for (i = 0; i < t->a.nelts; ++i) {
-       if ((checksum == elts[i].key_checksum) && !strcasecmp(elts[i].key, 
key)) {
-           elts[i].val = apr_pstrcat(t->a.pool, elts[i].val, ", ", val, NULL);
-           return;
-       }
+    hash = TABLE_HASH(key);
+    i = t->index_first[hash];
+    if (i < 0) {
+        goto add_new_elt;
+    }
+    next_elt = ((apr_table_entry_t *) t->a.elts) + i;
+    end_elt = ((apr_table_entry_t *) t->a.elts) + t->index_last[hash];
+
+    for (; next_elt <= end_elt; next_elt++) {
+       if (!strcasecmp(next_elt->key, key)) {
+            next_elt->val = apr_pstrcat(t->a.pool, next_elt->val, ", ",
+                                        val, NULL);
+            return;
+        }
     }
 
-    elts = (apr_table_entry_t *) table_push(t);
-    elts->key = apr_pstrdup(t->a.pool, key);
-    elts->val = apr_pstrdup(t->a.pool, val);
-    elts->key_checksum = checksum;
+add_new_elt:
+    t->index_last[hash] = t->a.nelts;
+    if (t->index_first[hash] < 0) {
+        t->index_first[hash] = t->a.nelts;
+    }
+    next_elt = (apr_table_entry_t *) table_push(t);
+    next_elt->key = apr_pstrdup(t->a.pool, key);
+    next_elt->val = apr_pstrdup(t->a.pool, val);
 }
 
 APR_DECLARE(void) apr_table_mergen(apr_table_t *t, const char *key,
                                  const char *val)
 {
-    apr_table_entry_t *elts = (apr_table_entry_t *) t->a.elts;
+    apr_table_entry_t *next_elt;
+    int hash;
     int i;
-    apr_uint32_t checksum;
+    apr_table_entry_t *end_elt;
 
 #ifdef POOL_DEBUG
     {
        if (!apr_pool_is_ancestor(apr_pool_find(key), t->a.pool)) {
-           fprintf(stderr, "table_set: key not in ancestor pool of t\n");
+           fprintf(stderr,
+                    "apr_table_mergen: key not in ancestor pool of t\n");
            abort();
        }
        if (!apr_pool_is_ancestor(apr_pool_find(val), t->a.pool)) {
-           fprintf(stderr, "table_set: key not in ancestor pool of t\n");
+           fprintf(stderr,
+                    "apr_table_mergen: key not in ancestor pool of t\n");
            abort();
        }
     }
 #endif
 
-    COMPUTE_KEY_CHECKSUM(key, checksum);
-    for (i = 0; i < t->a.nelts; ++i) {
-       if ((checksum == elts[i].key_checksum) && !strcasecmp(elts[i].key, 
key)) {
-           elts[i].val = apr_pstrcat(t->a.pool, elts[i].val, ", ", val, NULL);
-           return;
-       }
+    hash = TABLE_HASH(key);
+    i = t->index_first[hash];
+    if (i < 0) {
+        goto add_new_elt;
+    }
+    next_elt = ((apr_table_entry_t *) t->a.elts) + i;
+    end_elt = ((apr_table_entry_t *) t->a.elts) + t->index_last[hash];
+
+    for (; next_elt <= end_elt; next_elt++) {
+       if (!strcasecmp(next_elt->key, key)) {
+            next_elt->val = apr_pstrcat(t->a.pool, next_elt->val, ", ",
+                                        val, NULL);
+            return;
+        }
     }
 
-    elts = (apr_table_entry_t *) table_push(t);
-    elts->key = (char *)key;
-    elts->val = (char *)val;
-    elts->key_checksum = checksum;
+add_new_elt:
+    next_elt = (apr_table_entry_t *) table_push(t);
+    next_elt->key = (char *)key;
+    next_elt->val = (char *)val;
+    t->index_last[hash] = t->a.nelts - 1;
+    if (t->index_first[hash] < 0) {
+        t->index_first[hash] = t->a.nelts - 1;
+    }
 }
 
 APR_DECLARE(void) apr_table_add(apr_table_t *t, const char *key,
                               const char *val)
 {
-    apr_table_entry_t *elts;
-    apr_uint32_t checksum;
+    apr_table_entry_t *next_elt;
+    int hash;
 
-    COMPUTE_KEY_CHECKSUM(key, checksum);
-    elts = (apr_table_entry_t *) table_push(t);
-    elts->key = apr_pstrdup(t->a.pool, key);
-    elts->val = apr_pstrdup(t->a.pool, val);
-    elts->key_checksum = checksum;
+    hash = TABLE_HASH(key);
+    t->index_last[hash] = t->a.nelts;
+    if (t->index_first[hash] < 0) {
+        t->index_first[hash] = t->a.nelts;
+    }
+    next_elt = (apr_table_entry_t *) table_push(t);
+    next_elt->key = apr_pstrdup(t->a.pool, key);
+    next_elt->val = apr_pstrdup(t->a.pool, val);
 }
 
 APR_DECLARE(void) apr_table_addn(apr_table_t *t, const char *key,
                                const char *val)
 {
-    apr_table_entry_t *elts;
-    apr_uint32_t checksum;
+    apr_table_entry_t *next_elt;
+    int hash;
 
 #ifdef POOL_DEBUG
     {
        if (!apr_pool_is_ancestor(apr_pool_find(key), t->a.pool)) {
-           fprintf(stderr, "table_set: key not in ancestor pool of t\n");
+           fprintf(stderr, "apr_table_addn: key not in ancestor pool of t\n");
            abort();
        }
        if (!apr_pool_is_ancestor(apr_pool_find(val), t->a.pool)) {
-           fprintf(stderr, "table_set: key not in ancestor pool of t\n");
+           fprintf(stderr, "apr_table_addn: key not in ancestor pool of t\n");
            abort();
        }
     }
 #endif
 
-    COMPUTE_KEY_CHECKSUM(key, checksum);
-    elts = (apr_table_entry_t *) table_push(t);
-    elts->key = (char *)key;
-    elts->val = (char *)val;
-    elts->key_checksum = checksum;
+    hash = TABLE_HASH(key);
+    t->index_last[hash] = t->a.nelts;
+    if (t->index_first[hash] < 0) {
+        t->index_first[hash] = t->a.nelts;
+    }
+    next_elt = (apr_table_entry_t *) table_push(t);
+    next_elt->key = (char *)key;
+    next_elt->val = (char *)val;
 }
 
 APR_DECLARE(apr_table_t *) apr_table_overlay(apr_pool_t *p,
@@ -664,7 +781,7 @@
     res->a.pool = p;
     copy_array_hdr_core(&res->a, &overlay->a);
     apr_array_cat(&res->a, &base->a);
-
+    table_reindex(res);
     return res;
 }
 
@@ -763,14 +880,9 @@
 
     argp = va_arg(vp, char *);
     do {
-        apr_uint32_t checksum = 0;
-        if (argp) {
-            COMPUTE_KEY_CHECKSUM(argp, checksum);
-        }
         for (rv = 1, i = 0; rv && (i < t->a.nelts); ++i) {
             if (elts[i].key && (!argp ||
-                                ((checksum == elts[i].key_checksum) &&
-                                 !strcasecmp(elts[i].key, argp)))) {
+                                !strcasecmp(elts[i].key, argp))) {
                 rv = (*comp) (rec, elts[i].key, elts[i].val);
             }
         }
@@ -865,7 +977,7 @@
     /* Each bucket in the hash table holds a red-black tree
      * containing the overlap_keys that hash into that bucket
      */
-    overlap_key **child = &(hash_table[elt->elt->key_checksum & (nhash - 1)]);
+    overlap_key **child = &(hash_table[TABLE_HASH(elt->elt->key)]);
     overlap_key **root = child;
     overlap_key *parent = NULL;
     overlap_key *next;
@@ -993,9 +1105,6 @@
     (*root)->color = BLACK;
 }
 
-/* Must be a power of 2 */
-#define DEFAULT_HASH_SIZE 16
-
 APR_DECLARE(void) apr_table_overlap(apr_table_t *a, const apr_table_t *b,
                                    unsigned flags)
 {
@@ -1021,10 +1130,7 @@
         return;
     }
     cat_keys = apr_palloc(b->a.pool, sizeof(overlap_key) * max_keys);
-    nhash = DEFAULT_HASH_SIZE;
-    while (nhash < max_keys) {
-        nhash <<= 1;
-    }
+    nhash = TABLE_HASH_SIZE;
     hash_table = (overlap_key **)apr_pcalloc(b->a.pool,
                                              sizeof(overlap_key *) * nhash);
 
@@ -1099,14 +1205,13 @@
             elt = (apr_table_entry_t *)table_push(a);
             elt->key = cat_keys[i].elt->key;
             elt->val = new_val;
-            elt->key_checksum = cat_keys[i].elt->key_checksum;
         }
         else {
             apr_table_entry_t *elt = (apr_table_entry_t *)table_push(a);
             elt->key = cat_keys[i].elt->key;
             elt->val = cat_keys[i].elt->val;
-            elt->key_checksum = cat_keys[i].elt->key_checksum;
         }
     }
+    table_reindex(a);
 }
 

Reply via email to