Repository: lucy-clownfish Updated Branches: refs/heads/0.4 6fced3c95 -> c0e6f245f
Fix for duplicate hash entries This fixes a rather serious bug in the hash table implementation. Hash_do_store would create a new entry as soon as it found a tombstone, ignoring possibly existing entries after the tombstone. This could lead to entries being created twice in a hash table. Fetching the entry would at first return the new entry. But a rehash would overwrite the new entry with the old entry. Iterating a hash would also return both entries. Fixes CLOWNFISH-33. Project: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/repo Commit: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/commit/c0e6f245 Tree: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/tree/c0e6f245 Diff: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/diff/c0e6f245 Branch: refs/heads/0.4 Commit: c0e6f245fdb14150d24feabde0aab30d5069e74a Parents: 6fced3c Author: Nick Wellnhofer <[email protected]> Authored: Tue Apr 14 17:23:58 2015 +0200 Committer: Nick Wellnhofer <[email protected]> Committed: Wed Apr 15 15:09:36 2015 +0200 ---------------------------------------------------------------------- runtime/core/Clownfish/Hash.c | 14 ++++++------ runtime/core/Clownfish/Test/TestHash.c | 34 ++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/c0e6f245/runtime/core/Clownfish/Hash.c ---------------------------------------------------------------------- diff --git a/runtime/core/Clownfish/Hash.c b/runtime/core/Clownfish/Hash.c index 987631c..5c90e9b 100644 --- a/runtime/core/Clownfish/Hash.c +++ b/runtime/core/Clownfish/Hash.c @@ -117,6 +117,13 @@ Hash_Clear_IMP(Hash *self) { void Hash_do_store(Hash *self, Obj *key, Obj *value, int32_t hash_sum, bool use_this_key) { + HashEntry *entry = SI_fetch_entry(self, key, hash_sum); + if (entry) { + DECREF(entry->value); + entry->value = value; + return; + } + HashEntry *entries = self->size >= self->threshold ? SI_rebuild_hash(self) : (HashEntry*)self->entries; @@ -139,13 +146,6 @@ Hash_do_store(Hash *self, Obj *key, Obj *value, self->size++; break; } - else if (entry->hash_sum == hash_sum - && Obj_Equals(key, entry->key) - ) { - DECREF(entry->value); - entry->value = value; - break; - } tick++; // linear scan } } http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/c0e6f245/runtime/core/Clownfish/Test/TestHash.c ---------------------------------------------------------------------- diff --git a/runtime/core/Clownfish/Test/TestHash.c b/runtime/core/Clownfish/Test/TestHash.c index 54ec399..9d14e1d 100644 --- a/runtime/core/Clownfish/Test/TestHash.c +++ b/runtime/core/Clownfish/Test/TestHash.c @@ -221,14 +221,46 @@ test_stress(TestBatchRunner *runner) { DECREF(hash); } +static void +test_store_skips_tombstone(TestBatchRunner *runner) { + Hash *hash = Hash_new(0); + uint32_t mask = Hash_Get_Capacity(hash) - 1; + + String *one = Str_newf("one"); + uint32_t slot = Str_Hash_Sum(one) & mask; + + // Find a colliding key. + String *two = NULL; + for (int i = 0; i < 100000; i++) { + two = Str_newf("%i32", i); + if (slot == (Str_Hash_Sum(two) & mask)) { + break; + } + DECREF(two); + two = NULL; + } + + Hash_Store(hash, (Obj*)one, (Obj*)CFISH_TRUE); + Hash_Store(hash, (Obj*)two, (Obj*)CFISH_TRUE); + Hash_Delete(hash, (Obj*)one); + Hash_Store(hash, (Obj*)two, (Obj*)CFISH_TRUE); + + TEST_INT_EQ(runner, Hash_Get_Size(hash), 1, "Store skips tombstone"); + + DECREF(one); + DECREF(two); + DECREF(hash); +} + void TestHash_Run_IMP(TestHash *self, TestBatchRunner *runner) { - TestBatchRunner_Plan(runner, (TestBatch*)self, 27); + TestBatchRunner_Plan(runner, (TestBatch*)self, 28); srand((unsigned int)time((time_t*)NULL)); test_Equals(runner); test_Store_and_Fetch(runner); test_Keys_Values_Iter(runner); test_stress(runner); + test_store_skips_tombstone(runner); }
