On second thought, let's switch objcache from list to hash. This should
help the lookup times if there's a large number of outstanding I/Os.

This also adds the objcache_count for self-testing purpoes.
And a few comments.

Signed-off-by: Pete Zaitcev <[email protected]>

---
 include/objcache.h   |    9 +++++---
 server/objcache.c    |   42 +++++++++++++++++++++++++----------------
 test/objcache-unit.c |    8 ++++++-
 3 files changed, 39 insertions(+), 20 deletions(-)

I think you mentioned that your scripts dealt with 5/4 patch numbering.
Just in case, setting references by replying to a previous patch mail.

commit 6238aa7321335b0a3aba9736ee1467777bdd6861
Author: Master <[email protected]>
Date:   Wed Jan 6 22:04:20 2010 -0700

    Switch objcache from list to hash to make lookups faster.

diff --git a/include/objcache.h b/include/objcache.h
index fce1485..6e36e86 100644
--- a/include/objcache.h
+++ b/include/objcache.h
@@ -20,16 +20,14 @@
 #define _CHUNKD_OBJCACHE_H_
 
 #include <glib.h>
-#include <elist.h>
 #include <stdbool.h>
 
 struct objcache {
-       struct list_head head;
        GMutex *lock;
+       GHashTable *table;
 };
 
 struct objcache_entry {
-       struct list_head link;
        unsigned int hash;
        unsigned int flags;
        int ref;
@@ -60,6 +58,11 @@ extern bool objcache_test_dirty(struct objcache *cache,
 extern void objcache_put(struct objcache *cache, struct objcache_entry *entry);
 
 /*
+ * Count objects in the cache. Can be slow, and used only for debugging.
+ */
+extern int objcache_count(struct objcache *cache);
+
+/*
  * Init a cache. Call once. May fail since it allocates a mutex.
  */
 extern int objcache_init(struct objcache *cache);
diff --git a/server/objcache.c b/server/objcache.c
index 475ac23..e969f89 100644
--- a/server/objcache.c
+++ b/server/objcache.c
@@ -40,18 +40,6 @@ static unsigned int objcache_hash(const char *key, int klen)
        return hash;
 }
 
-static struct objcache_entry *objcache_lookup(struct objcache *cache,
-                                             unsigned int hash)
-{
-       struct objcache_entry *cep;
-
-       list_for_each_entry(cep, &cache->head, link) {
-               if (cep->hash == hash)
-                       return cep;
-       }
-       return NULL;
-}
-
 static struct objcache_entry *objcache_insert(struct objcache *cache,
                                              unsigned int hash)
 {
@@ -63,10 +51,18 @@ static struct objcache_entry *objcache_insert(struct 
objcache *cache,
        cep->hash = hash;
        cep->flags = 0;
        cep->ref = 1;
-       list_add(&cep->link, &cache->head);
+       g_hash_table_insert(cache->table, &cep->hash, cep);
        return cep;
 }
 
+/*
+ * Observe the way we handle conflicts in the computed hash: we treat the
+ * keys with the same hash as same. It's acceptable in our application.
+ * At worst, an unrelated activity main in chunkd may spook self-check.
+ * This policy remains the same for list, tree, hash or any other implementing
+ * structure. If we use Glib's hash, it can have its own conflicts over
+ * a shared bucket indexed with our hash. We don't know anything about those.
+ */
 struct objcache_entry *__objcache_get(struct objcache *cache,
                                      const char *key, int klen,
                                      unsigned int flag)
@@ -76,7 +72,7 @@ struct objcache_entry *__objcache_get(struct objcache *cache,
 
        hash = objcache_hash(key, klen);
        g_mutex_lock(cache->lock);
-       cep = objcache_lookup(cache, hash);
+       cep = g_hash_table_lookup(cache->table, &hash);
        if (cep) {
                cep->ref++;
        } else {
@@ -107,22 +103,36 @@ void objcache_put(struct objcache *cache, struct 
objcache_entry *cep)
        }
        --cep->ref;
        if (!cep->ref) {
-               list_del(&cep->link);
+               gboolean rcb;
+               rcb = g_hash_table_remove(cache->table, &cep->hash);
+               /*
+                * We are so super sure that this cannot happen that
+                * we use abort(), which is not welcome in daemons.
+                */
+               if (!rcb)
+                       abort();
                free(cep);
        }
        g_mutex_unlock(cache->lock);
 }
 
+int objcache_count(struct objcache *cache)
+{
+       return g_hash_table_size(cache->table);
+}
+
 int objcache_init(struct objcache *cache)
 {
        cache->lock = g_mutex_new();
        if (!cache->lock)
                return -1;
-       INIT_LIST_HEAD(&cache->head);
+       /* We do not use g_str_hash becuse our keys may have nul bytes. */
+       cache->table = g_hash_table_new(g_int_hash, g_int_equal);
        return 0;
 }
 
 void objcache_fini(struct objcache *cache)
 {
        g_mutex_free(cache->lock);
+       g_hash_table_destroy(cache->table);
 }
diff --git a/test/objcache-unit.c b/test/objcache-unit.c
index f101445..874b57d 100644
--- a/test/objcache-unit.c
+++ b/test/objcache-unit.c
@@ -42,7 +42,10 @@ int main(int argc, char *argv[])
        ep3 = objcache_get(&cache, k3, sizeof(k3));
        OK(ep3 != NULL);
 
-       OK(ep1->ref == 1);      /* no collisions */
+       rc = objcache_count(&cache);
+       OK(rc == 3);
+
+       OK(ep1->ref == 1);      /* no collisions, else improve hash */
 
        objcache_put(&cache, ep1);
        objcache_put(&cache, ep2);
@@ -53,6 +56,9 @@ int main(int argc, char *argv[])
        OK(ep2->ref == 1);      /* new */
        objcache_put(&cache, ep2);
 
+       rc = objcache_count(&cache);
+       OK(rc == 0);
+
        objcache_fini(&cache);
        return 0;
 }
--
To unsubscribe from this list: send the line "unsubscribe hail-devel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to