On Sat, 2002-07-13 at 18:59, Brian Pane wrote:
> This patch adds a simple index to apr_table_t. The index maps
Here's an updated patch with two additional speedups:
- remove some extraneous string copies in apr_table_addn()
- don't initialize the "index_last" table to all -1
values when creating a new table; it's sufficient
to initialize just "index_first"
--Brian
Index: include/apr_tables.h
===================================================================
RCS file: /home/cvs/apr/include/apr_tables.h,v
retrieving revision 1.33
diff -u -r1.33 apr_tables.h
--- include/apr_tables.h 13 Jul 2002 18:16:33 -0000 1.33
+++ include/apr_tables.h 14 Jul 2002 22:31:10 -0000
@@ -115,9 +115,6 @@
*/
/** The value for the current table entry */
char *val;
-
- /** A checksum for the key, for use by the apr_table internals */
- apr_uint32_t key_checksum;
};
/**
Index: tables/apr_tables.c
===================================================================
RCS file: /home/cvs/apr/tables/apr_tables.c,v
retrieving revision 1.36
diff -u -r1.36 apr_tables.c
--- tables/apr_tables.c 13 Jul 2002 18:16:33 -0000 1.36
+++ tables/apr_tables.c 14 Jul 2002 22:31:10 -0000
@@ -307,40 +307,8 @@
* The "table" functions.
*/
-#if APR_CHARSET_EBCDIC
-#define CASE_MASK 0xbfbfbfbf
-#else
-#define CASE_MASK 0xdfdfdfdf
-#endif
-
-/* Compute the "checksum" for a key, consisting of the first
- * 4 bytes, normalized for case-insensitivity and packed into
- * an int...this checksum allows us to do a single integer
- * comparison as a fast check to determine whether we can
- * skip a strcasecmp
- */
-#define COMPUTE_KEY_CHECKSUM(key, checksum) \
-{ \
- const char *k = (key); \
- apr_uint32_t c = (apr_uint32_t)*k; \
- (checksum) = c; \
- (checksum) <<= 8; \
- if (c) { \
- c = (apr_uint32_t)*++k; \
- checksum |= c; \
- } \
- (checksum) <<= 8; \
- if (c) { \
- c = (apr_uint32_t)*++k; \
- checksum |= c; \
- } \
- (checksum) <<= 8; \
- if (c) { \
- c = (apr_uint32_t)*++k; \
- checksum |= c; \
- } \
- checksum &= CASE_MASK; \
-}
+#define TABLE_HASH_SIZE 32
+#define TABLE_INDEX_MASK 0x1f
/** The opaque string-content table type */
struct apr_table_t {
@@ -351,12 +319,21 @@
*/
/** The underlying array for the table */
apr_array_header_t a;
+
+ /* Indices within the table of the first and last
+ * instances of keys beginning with each character
+ */
+ int index_first[TABLE_HASH_SIZE];
+ int index_last[TABLE_HASH_SIZE];
+
#ifdef MAKE_TABLE_PROFILE
/** Who created the array. */
void *creator;
#endif
};
+#define TABLE_HASH(k) (int)(TABLE_INDEX_MASK & *(unsigned char *)(k))
+
/*
* XXX: if you tweak this you should look at is_empty_table() and table_elts()
* in alloc.h
@@ -388,6 +365,8 @@
apr_table_t *t = apr_palloc(p, sizeof(apr_table_t));
make_array_core(&t->a, p, nelts, sizeof(apr_table_entry_t), 0);
+ /* Initialize all the indices with -1 (meaning "no match") */
+ memset(&(t->index_first), 0xff, sizeof(int) * TABLE_HASH_SIZE);
#ifdef MAKE_TABLE_PROFILE
t->creator = __builtin_return_address(0);
#endif
@@ -410,28 +389,55 @@
make_array_core(&new->a, p, t->a.nalloc, sizeof(apr_table_entry_t), 0);
memcpy(new->a.elts, t->a.elts, t->a.nelts * sizeof(apr_table_entry_t));
new->a.nelts = t->a.nelts;
+ memcpy(&(new->index_first), t->index_first, sizeof(int) * TABLE_HASH_SIZE);
+ memcpy(&(new->index_last), t->index_last, sizeof(int) * TABLE_HASH_SIZE);
return new;
}
+static void table_reindex(apr_table_t *t)
+{
+ int i;
+ int hash;
+ apr_table_entry_t *next_elt = (apr_table_entry_t *) t->a.elts;
+
+ memset(&(t->index_first), 0xff, sizeof(int) * TABLE_HASH_SIZE);
+ memset(&(t->index_last), 0xff, sizeof(int) * TABLE_HASH_SIZE);
+ for (i = 0; i < t->a.nelts; i++, next_elt++) {
+ hash = TABLE_HASH(next_elt->key);
+ t->index_last[hash] = i;
+ if (t->index_first[hash] < 0) {
+ t->index_first[hash] = i;
+ }
+ }
+}
+
APR_DECLARE(void) apr_table_clear(apr_table_t *t)
{
t->a.nelts = 0;
+ memset(&(t->index_first), 0xff, sizeof(int) * TABLE_HASH_SIZE);
+ memset(&(t->index_last), 0xff, sizeof(int) * TABLE_HASH_SIZE);
}
APR_DECLARE(const char *) apr_table_get(const apr_table_t *t, const char *key)
{
- apr_table_entry_t *next_elt = (apr_table_entry_t *) t->a.elts;
- apr_table_entry_t *end_elt = next_elt + t->a.nelts;
- apr_uint32_t checksum;
+ apr_table_entry_t *next_elt;
+ int hash;
+ int i;
+ apr_table_entry_t *end_elt;
if (key == NULL) {
return NULL;
}
+ hash = TABLE_HASH(key);
+ i = t->index_first[hash];
+ if (i < 0) {
+ return NULL;
+ }
+ next_elt = ((apr_table_entry_t *) t->a.elts) + i;
+ end_elt = ((apr_table_entry_t *) t->a.elts) + t->index_last[hash];
- COMPUTE_KEY_CHECKSUM(key, checksum);
- for (; next_elt < end_elt; next_elt++) {
- if ((checksum == next_elt->key_checksum) &&
- !strcasecmp(next_elt->key, key)) {
+ for (; next_elt <= end_elt; next_elt++) {
+ if (!strcasecmp(next_elt->key, key)) {
return next_elt->val;
}
}
@@ -442,21 +448,29 @@
APR_DECLARE(void) apr_table_set(apr_table_t *t, const char *key,
const char *val)
{
- apr_table_entry_t *next_elt = (apr_table_entry_t *) t->a.elts;
- apr_table_entry_t *end_elt = next_elt + t->a.nelts;
- apr_table_entry_t *dst_elt;
- apr_uint32_t checksum;
+ apr_table_entry_t *next_elt;
+ int hash;
+ int i;
+ apr_table_entry_t *end_elt;
- COMPUTE_KEY_CHECKSUM(key, checksum);
- for (; next_elt < end_elt; next_elt++) {
- if ((checksum == next_elt->key_checksum) &&
- !strcasecmp(next_elt->key, key)) {
+ hash = TABLE_HASH(key);
+ i = t->index_first[hash];
+ if (i < 0) {
+ goto add_new_elt;
+ }
+ next_elt = ((apr_table_entry_t *) t->a.elts) + i;
+ end_elt = ((apr_table_entry_t *) t->a.elts) + t->index_last[hash];
+
+ for (; next_elt <= end_elt; next_elt++) {
+ if (!strcasecmp(next_elt->key, key)) {
+ int must_reindex = 0;
+ apr_table_entry_t *dst_elt = NULL;
+ apr_table_entry_t *table_end = ((apr_table_entry_t *) t->a.elts) +
+ t->a.nelts;
next_elt->val = apr_pstrdup(t->a.pool, val);
/* remove any other instances of this key */
- dst_elt = NULL;
- for (next_elt++; next_elt < end_elt; next_elt++) {
- if ((checksum == next_elt->key_checksum) &&
- !strcasecmp(next_elt->key, key)) {
+ for (next_elt++; next_elt <= end_elt; next_elt++) {
+ if (!strcasecmp(next_elt->key, key)) {
t->a.nelts--;
if (!dst_elt) {
dst_elt = next_elt;
@@ -464,176 +478,279 @@
}
else if (dst_elt) {
*dst_elt++ = *next_elt;
+ must_reindex = 1;
}
-
+ }
+ if (dst_elt) {
+ /* shift over the remainder of the table */
+ for (; next_elt < table_end; next_elt++) {
+ *dst_elt++ = *next_elt;
+ }
+ must_reindex = 1;
+ }
+ if (must_reindex) {
+ table_reindex(t);
}
return;
}
}
+add_new_elt:
+ t->index_last[hash] = t->a.nelts;
+ if (t->index_first[hash] < 0) {
+ t->index_first[hash] = t->a.nelts;
+ }
next_elt = (apr_table_entry_t *) table_push(t);
next_elt->key = apr_pstrdup(t->a.pool, key);
next_elt->val = apr_pstrdup(t->a.pool, val);
- next_elt->key_checksum = checksum;
}
APR_DECLARE(void) apr_table_setn(apr_table_t *t, const char *key,
const char *val)
{
- apr_table_entry_t *next_elt = (apr_table_entry_t *) t->a.elts;
- apr_table_entry_t *end_elt = next_elt + t->a.nelts;
- apr_table_entry_t *dst_elt;
- apr_uint32_t checksum;
+ apr_table_entry_t *next_elt;
+ int hash;
+ int i;
+ apr_table_entry_t *end_elt;
+
+#ifdef POOL_DEBUG
+ {
+ if (!apr_pool_is_ancestor(apr_pool_find(key), t->a.pool)) {
+ fprintf(stderr, "apr_table_setn: key not in ancestor pool of t\n");
+ abort();
+ }
+ if (!apr_pool_is_ancestor(apr_pool_find(val), t->a.pool)) {
+ fprintf(stderr, "apr_table_setn: key not in ancestor pool of t\n");
+ abort();
+ }
+ }
+#endif
- COMPUTE_KEY_CHECKSUM(key, checksum);
- for (; next_elt < end_elt; next_elt++) {
- if ((checksum == next_elt->key_checksum) &&
- !strcasecmp(next_elt->key, key)) {
+ hash = TABLE_HASH(key);
+ i = t->index_first[hash];
+ if (i < 0) {
+ goto add_new_elt;
+ }
+ next_elt = ((apr_table_entry_t *) t->a.elts) + i;
+ end_elt = ((apr_table_entry_t *) t->a.elts) + t->index_last[hash];
+
+ for (; next_elt <= end_elt; next_elt++) {
+ if (!strcasecmp(next_elt->key, key)) {
+ int must_reindex = 0;
+ apr_table_entry_t *dst_elt = NULL;
+ apr_table_entry_t *table_end = ((apr_table_entry_t *) t->a.elts) +
+ t->a.nelts;
next_elt->val = (char *)val;
/* remove any other instances of this key */
- dst_elt = NULL;
- for (next_elt++; next_elt < end_elt; next_elt++) {
- if ((checksum == next_elt->key_checksum) &&
- !strcasecmp(next_elt->key, key)) {
+ for (next_elt++; next_elt <= end_elt; next_elt++) {
+ if (!strcasecmp(next_elt->key, key)) {
t->a.nelts--;
if (!dst_elt) {
dst_elt = next_elt;
}
+
}
else if (dst_elt) {
*dst_elt++ = *next_elt;
+ must_reindex = 1;
}
-
+ }
+ if (dst_elt) {
+ /* shift over the remainder of the table */
+ for (; next_elt < table_end; next_elt++) {
+ *dst_elt++ = *next_elt;
+ }
+ must_reindex = 1;
+ }
+ if (must_reindex) {
+ table_reindex(t);
}
return;
}
}
+add_new_elt:
+ t->index_last[hash] = t->a.nelts;
+ if (t->index_first[hash] < 0) {
+ t->index_first[hash] = t->a.nelts;
+ }
next_elt = (apr_table_entry_t *) table_push(t);
next_elt->key = (char *)key;
next_elt->val = (char *)val;
- next_elt->key_checksum = checksum;
}
APR_DECLARE(void) apr_table_unset(apr_table_t *t, const char *key)
{
- apr_table_entry_t *next_elt = (apr_table_entry_t *) t->a.elts;
- apr_table_entry_t *end_elt = next_elt + t->a.nelts;
+ apr_table_entry_t *next_elt;
+ int hash;
+ int i;
+ apr_table_entry_t *end_elt;
apr_table_entry_t *dst_elt;
- apr_uint32_t checksum;
+ int must_reindex;
+
+ hash = TABLE_HASH(key);
+ i = t->index_first[hash];
+ if (i < 0) {
+ return;
+ }
+ next_elt = ((apr_table_entry_t *) t->a.elts) + i;
+ end_elt = ((apr_table_entry_t *) t->a.elts) + t->index_last[hash];
- COMPUTE_KEY_CHECKSUM(key, checksum);
- for (; next_elt < end_elt; next_elt++) {
- if ((checksum == next_elt->key_checksum) &&
- !strcasecmp(next_elt->key, key)) {
- t->a.nelts--;
+ must_reindex = 0;
+ for (; next_elt <= end_elt; next_elt++) {
+ if (!strcasecmp(next_elt->key, key)) {
+ apr_table_entry_t *table_end = ((apr_table_entry_t *) t->a.elts) +
+ t->a.nelts;
+ t->a.nelts--;
dst_elt = next_elt;
- for (next_elt++; next_elt < end_elt; next_elt++) {
- if ((checksum == next_elt->key_checksum) &&
- !strcasecmp(next_elt->key, key)) {
+ for (next_elt++; next_elt <= end_elt; next_elt++) {
+ if (!strcasecmp(next_elt->key, key)) {
t->a.nelts--;
}
else {
*dst_elt++ = *next_elt;
}
}
+ /* shift over the remainder of the table */
+ for (; next_elt < table_end; next_elt++) {
+ *dst_elt++ = *next_elt;
+ }
+ must_reindex = 1;
+
break;
- }
+ }
+ }
+ if (must_reindex) {
+ table_reindex(t);
}
}
APR_DECLARE(void) apr_table_merge(apr_table_t *t, const char *key,
const char *val)
{
- apr_table_entry_t *elts = (apr_table_entry_t *) t->a.elts;
+ apr_table_entry_t *next_elt;
+ int hash;
int i;
- apr_uint32_t checksum;
+ apr_table_entry_t *end_elt;
- COMPUTE_KEY_CHECKSUM(key, checksum);
- for (i = 0; i < t->a.nelts; ++i) {
- if ((checksum == elts[i].key_checksum) && !strcasecmp(elts[i].key,
key)) {
- elts[i].val = apr_pstrcat(t->a.pool, elts[i].val, ", ", val, NULL);
- return;
- }
+ hash = TABLE_HASH(key);
+ i = t->index_first[hash];
+ if (i < 0) {
+ goto add_new_elt;
+ }
+ next_elt = ((apr_table_entry_t *) t->a.elts) + i;
+ end_elt = ((apr_table_entry_t *) t->a.elts) + t->index_last[hash];
+
+ for (; next_elt <= end_elt; next_elt++) {
+ if (!strcasecmp(next_elt->key, key)) {
+ next_elt->val = apr_pstrcat(t->a.pool, next_elt->val, ", ",
+ val, NULL);
+ return;
+ }
}
- elts = (apr_table_entry_t *) table_push(t);
- elts->key = apr_pstrdup(t->a.pool, key);
- elts->val = apr_pstrdup(t->a.pool, val);
- elts->key_checksum = checksum;
+add_new_elt:
+ t->index_last[hash] = t->a.nelts;
+ if (t->index_first[hash] < 0) {
+ t->index_first[hash] = t->a.nelts;
+ }
+ next_elt = (apr_table_entry_t *) table_push(t);
+ next_elt->key = apr_pstrdup(t->a.pool, key);
+ next_elt->val = apr_pstrdup(t->a.pool, val);
}
APR_DECLARE(void) apr_table_mergen(apr_table_t *t, const char *key,
const char *val)
{
- apr_table_entry_t *elts = (apr_table_entry_t *) t->a.elts;
+ apr_table_entry_t *next_elt;
+ int hash;
int i;
- apr_uint32_t checksum;
+ apr_table_entry_t *end_elt;
#ifdef POOL_DEBUG
{
if (!apr_pool_is_ancestor(apr_pool_find(key), t->a.pool)) {
- fprintf(stderr, "table_set: key not in ancestor pool of t\n");
+ fprintf(stderr,
+ "apr_table_mergen: key not in ancestor pool of t\n");
abort();
}
if (!apr_pool_is_ancestor(apr_pool_find(val), t->a.pool)) {
- fprintf(stderr, "table_set: key not in ancestor pool of t\n");
+ fprintf(stderr,
+ "apr_table_mergen: key not in ancestor pool of t\n");
abort();
}
}
#endif
- COMPUTE_KEY_CHECKSUM(key, checksum);
- for (i = 0; i < t->a.nelts; ++i) {
- if ((checksum == elts[i].key_checksum) && !strcasecmp(elts[i].key,
key)) {
- elts[i].val = apr_pstrcat(t->a.pool, elts[i].val, ", ", val, NULL);
- return;
- }
+ hash = TABLE_HASH(key);
+ i = t->index_first[hash];
+ if (i < 0) {
+ goto add_new_elt;
+ }
+ next_elt = ((apr_table_entry_t *) t->a.elts) + i;
+ end_elt = ((apr_table_entry_t *) t->a.elts) + t->index_last[hash];
+
+ for (; next_elt <= end_elt; next_elt++) {
+ if (!strcasecmp(next_elt->key, key)) {
+ next_elt->val = apr_pstrcat(t->a.pool, next_elt->val, ", ",
+ val, NULL);
+ return;
+ }
}
- elts = (apr_table_entry_t *) table_push(t);
- elts->key = (char *)key;
- elts->val = (char *)val;
- elts->key_checksum = checksum;
+add_new_elt:
+ next_elt = (apr_table_entry_t *) table_push(t);
+ next_elt->key = (char *)key;
+ next_elt->val = (char *)val;
+ t->index_last[hash] = t->a.nelts - 1;
+ if (t->index_first[hash] < 0) {
+ t->index_first[hash] = t->a.nelts - 1;
+ }
}
APR_DECLARE(void) apr_table_add(apr_table_t *t, const char *key,
const char *val)
{
- apr_table_entry_t *elts;
- apr_uint32_t checksum;
+ apr_table_entry_t *next_elt;
+ int hash;
- COMPUTE_KEY_CHECKSUM(key, checksum);
- elts = (apr_table_entry_t *) table_push(t);
- elts->key = apr_pstrdup(t->a.pool, key);
- elts->val = apr_pstrdup(t->a.pool, val);
- elts->key_checksum = checksum;
+ hash = TABLE_HASH(key);
+ t->index_last[hash] = t->a.nelts;
+ if (t->index_first[hash] < 0) {
+ t->index_first[hash] = t->a.nelts;
+ }
+ next_elt = (apr_table_entry_t *) table_push(t);
+ next_elt->key = apr_pstrdup(t->a.pool, key);
+ next_elt->val = apr_pstrdup(t->a.pool, val);
}
APR_DECLARE(void) apr_table_addn(apr_table_t *t, const char *key,
const char *val)
{
- apr_table_entry_t *elts;
- apr_uint32_t checksum;
+ apr_table_entry_t *next_elt;
+ int hash;
#ifdef POOL_DEBUG
{
if (!apr_pool_is_ancestor(apr_pool_find(key), t->a.pool)) {
- fprintf(stderr, "table_set: key not in ancestor pool of t\n");
+ fprintf(stderr, "apr_table_addn: key not in ancestor pool of t\n");
abort();
}
if (!apr_pool_is_ancestor(apr_pool_find(val), t->a.pool)) {
- fprintf(stderr, "table_set: key not in ancestor pool of t\n");
+ fprintf(stderr, "apr_table_addn: key not in ancestor pool of t\n");
abort();
}
}
#endif
- COMPUTE_KEY_CHECKSUM(key, checksum);
- elts = (apr_table_entry_t *) table_push(t);
- elts->key = (char *)key;
- elts->val = (char *)val;
- elts->key_checksum = checksum;
+ hash = TABLE_HASH(key);
+ t->index_last[hash] = t->a.nelts;
+ if (t->index_first[hash] < 0) {
+ t->index_first[hash] = t->a.nelts;
+ }
+ next_elt = (apr_table_entry_t *) table_push(t);
+ next_elt->key = (char *)key;
+ next_elt->val = (char *)val;
}
APR_DECLARE(apr_table_t *) apr_table_overlay(apr_pool_t *p,
@@ -664,7 +781,7 @@
res->a.pool = p;
copy_array_hdr_core(&res->a, &overlay->a);
apr_array_cat(&res->a, &base->a);
-
+ table_reindex(res);
return res;
}
@@ -763,14 +880,9 @@
argp = va_arg(vp, char *);
do {
- apr_uint32_t checksum = 0;
- if (argp) {
- COMPUTE_KEY_CHECKSUM(argp, checksum);
- }
for (rv = 1, i = 0; rv && (i < t->a.nelts); ++i) {
if (elts[i].key && (!argp ||
- ((checksum == elts[i].key_checksum) &&
- !strcasecmp(elts[i].key, argp)))) {
+ !strcasecmp(elts[i].key, argp))) {
rv = (*comp) (rec, elts[i].key, elts[i].val);
}
}
@@ -865,7 +977,7 @@
/* Each bucket in the hash table holds a red-black tree
* containing the overlap_keys that hash into that bucket
*/
- overlap_key **child = &(hash_table[elt->elt->key_checksum & (nhash - 1)]);
+ overlap_key **child = &(hash_table[TABLE_HASH(elt->elt->key)]);
overlap_key **root = child;
overlap_key *parent = NULL;
overlap_key *next;
@@ -993,9 +1105,6 @@
(*root)->color = BLACK;
}
-/* Must be a power of 2 */
-#define DEFAULT_HASH_SIZE 16
-
APR_DECLARE(void) apr_table_overlap(apr_table_t *a, const apr_table_t *b,
unsigned flags)
{
@@ -1021,10 +1130,7 @@
return;
}
cat_keys = apr_palloc(b->a.pool, sizeof(overlap_key) * max_keys);
- nhash = DEFAULT_HASH_SIZE;
- while (nhash < max_keys) {
- nhash <<= 1;
- }
+ nhash = TABLE_HASH_SIZE;
hash_table = (overlap_key **)apr_pcalloc(b->a.pool,
sizeof(overlap_key *) * nhash);
@@ -1099,14 +1205,13 @@
elt = (apr_table_entry_t *)table_push(a);
elt->key = cat_keys[i].elt->key;
elt->val = new_val;
- elt->key_checksum = cat_keys[i].elt->key_checksum;
}
else {
apr_table_entry_t *elt = (apr_table_entry_t *)table_push(a);
elt->key = cat_keys[i].elt->key;
elt->val = cat_keys[i].elt->val;
- elt->key_checksum = cat_keys[i].elt->key_checksum;
}
}
+ table_reindex(a);
}