Repository: lucy-clownfish Updated Branches: refs/heads/master f5139fff5 -> fa009460e
Implement pointer-to-pointer hash table Project: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/repo Commit: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/commit/6c8dbb36 Tree: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/tree/6c8dbb36 Diff: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/diff/6c8dbb36 Branch: refs/heads/master Commit: 6c8dbb36de4171950264c54ea813f1bba275bc61 Parents: 20eef8f Author: Nick Wellnhofer <[email protected]> Authored: Tue Mar 8 11:54:30 2016 +0100 Committer: Nick Wellnhofer <[email protected]> Committed: Thu Mar 10 14:28:49 2016 +0100 ---------------------------------------------------------------------- runtime/core/Clownfish/PtrHash.c | 226 +++++++++++++++++++++++ runtime/core/Clownfish/PtrHash.h | 53 ++++++ runtime/core/Clownfish/Test.c | 2 + runtime/core/Clownfish/Test/TestPtrHash.c | 108 +++++++++++ runtime/core/Clownfish/Test/TestPtrHash.cfh | 29 +++ runtime/perl/t/core/050-ptrhash.t | 23 +++ 6 files changed, 441 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/6c8dbb36/runtime/core/Clownfish/PtrHash.c ---------------------------------------------------------------------- diff --git a/runtime/core/Clownfish/PtrHash.c b/runtime/core/Clownfish/PtrHash.c new file mode 100644 index 0000000..9d42622 --- /dev/null +++ b/runtime/core/Clownfish/PtrHash.c @@ -0,0 +1,226 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <limits.h> +#include <stddef.h> + +#include "charmony.h" + +#define CFISH_USE_SHORT_NAMES +#include "Clownfish/PtrHash.h" +#include "Clownfish/Err.h" +#include "Clownfish/Util/Memory.h" + +#undef PTRHASH_STATS + +#ifdef PTRHASH_STATS + #include <stdio.h> +#endif + +#if CHAR_BIT * CHY_SIZEOF_PTR <= 32 + #define PTR_BITS 32 +#else + #define PTR_BITS 64 +#endif + +#define MAX_FILL_FACTOR 0.625 + +typedef struct PtrHashEntry { + void *key; + void *value; +} PtrHashEntry; + +struct PtrHash { + size_t num_items; + size_t cap; + int shift; + PtrHashEntry *entries; + PtrHashEntry *end; +}; + +static CFISH_INLINE size_t +SI_find_index(void *key, int shift); + +static CFISH_INLINE size_t +SI_get_cap(size_t size); + +static void +S_resize(PtrHash *self); + +PtrHash* +PtrHash_new(size_t min_cap) { + PtrHash *self = (PtrHash*)MALLOCATE(sizeof(PtrHash)); + + // Use minimum size of 8. + // size == 2 ** (PTR_BITS - shift) + size_t size = 8; + int shift = PTR_BITS - 3; + size_t cap = SI_get_cap(size); + + while (cap < min_cap) { + if (size > SIZE_MAX / 2 || shift == 0) { + THROW(ERR, "PtrHash size overflow"); + } + size *= 2; + shift -= 1; + cap = SI_get_cap(size); + } + + self->num_items = 0; + self->cap = cap; + self->shift = shift; + self->entries = (PtrHashEntry*)CALLOCATE(size, sizeof(PtrHashEntry)); + self->end = &self->entries[size]; + + return self; +} + +void +PtrHash_Destroy(PtrHash *self) { + FREEMEM(self->entries); + FREEMEM(self); +} + +// Multiplicative hash function using the prime nearest to the golden ratio. +// Reasonably good and very fast. +static CFISH_INLINE size_t +SI_find_index(void *key, int shift) { +#if PTR_BITS == 32 + uint32_t value = (uint32_t)key * 0x9E3779B1u; +#else + uint64_t value = (uint64_t)key * UINT64_C(0x9E3779B97F4A7C55); +#endif + return (size_t)(value >> shift); +} + +static CFISH_INLINE size_t +SI_get_cap(size_t size) { + return (size_t)(MAX_FILL_FACTOR * size); +} + +void +PtrHash_Store(PtrHash *self, void *key, void *value) { + if (key == NULL) { + THROW(ERR, "Can't store NULL key"); + } + + size_t index = SI_find_index(key, self->shift); + PtrHashEntry *entry = &self->entries[index]; + + while (entry->key != NULL) { + if (entry->key == key) { + entry->value = value; + return; + } + + entry += 1; + if (entry >= self->end) { entry = self->entries; } + } + + if (self->num_items >= self->cap) { + S_resize(self); + index = SI_find_index(key, self->shift); + entry = &self->entries[index]; + + while (entry->key != NULL) { + entry += 1; + if (entry >= self->end) { entry = self->entries; } + } + } + + entry->key = key; + entry->value = value; + self->num_items += 1; +} + +void* +PtrHash_Fetch(PtrHash *self, void *key) { + if (key == NULL) { + THROW(ERR, "Can't fetch NULL key"); + } + + size_t index = SI_find_index(key, self->shift); + PtrHashEntry *entry = &self->entries[index]; + + while (entry->key != NULL) { + if (entry->key == key) { + return entry->value; + } + + entry += 1; + if (entry >= self->end) { entry = self->entries; } + } + + return NULL; +} + +static void +S_resize(PtrHash *self) { + size_t old_size = self->end - self->entries; + if (old_size > SIZE_MAX / 2 || self->shift == 0) { + THROW(ERR, "PtrHash size overflow"); + } + size_t size = old_size * 2; + int shift = self->shift - 1; + + PtrHashEntry *entries + = (PtrHashEntry*)CALLOCATE(size, sizeof(PtrHashEntry)); + PtrHashEntry *end = &entries[size]; + +#ifdef PTRHASH_STATS + size_t extra_probes = 0; +#endif + + for (PtrHashEntry *old_entry = self->entries; + old_entry < self->end; + ++old_entry + ) { + void *key = old_entry->key; + if (key == NULL) { continue; } + +#ifdef PTRHASH_STATS + size_t i = old_entry - self->entries; + size_t old_index = SI_find_index(key, self->shift); + extra_probes += (i - old_index) & (old_size - 1); +#endif + + size_t index = SI_find_index(key, shift); + PtrHashEntry *entry = &entries[index]; + + while (entry->key != NULL) { + entry += 1; + if (entry >= end) { entry = entries; } + } + + entry->key = key; + entry->value = old_entry->value; + } + +#ifdef PTRHASH_STATS + fprintf(stderr, "size: %u, avg probes: %.2f\n", + (unsigned)old_size, + (double)extra_probes / self->num_items + 1.0); +#endif + + FREEMEM(self->entries); + + self->cap = SI_get_cap(size); + self->shift = shift; + self->entries = entries; + self->end = end; +} + + http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/6c8dbb36/runtime/core/Clownfish/PtrHash.h ---------------------------------------------------------------------- diff --git a/runtime/core/Clownfish/PtrHash.h b/runtime/core/Clownfish/PtrHash.h new file mode 100644 index 0000000..4a88bc4 --- /dev/null +++ b/runtime/core/Clownfish/PtrHash.h @@ -0,0 +1,53 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef H_CLOWNFISH_PTRHASH +#define H_CLOWNFISH_PTRHASH 1 + +#include <stddef.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct cfish_PtrHash cfish_PtrHash; + +cfish_PtrHash* +cfish_PtrHash_new(size_t min_cap); + +void +CFISH_PtrHash_Destroy(cfish_PtrHash *self); + +void +CFISH_PtrHash_Store(cfish_PtrHash *self, void *key, void *value); + +void* +CFISH_PtrHash_Fetch(cfish_PtrHash *self, void *key); + +#ifdef CFISH_USE_SHORT_NAMES + #define PtrHash cfish_PtrHash + #define PtrHash_new cfish_PtrHash_new + #define PtrHash_Destroy CFISH_PtrHash_Destroy + #define PtrHash_Store CFISH_PtrHash_Store + #define PtrHash_Fetch CFISH_PtrHash_Fetch +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* H_CLOWNFISH_PTRHASH */ + http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/6c8dbb36/runtime/core/Clownfish/Test.c ---------------------------------------------------------------------- diff --git a/runtime/core/Clownfish/Test.c b/runtime/core/Clownfish/Test.c index 9f03448..4453853 100644 --- a/runtime/core/Clownfish/Test.c +++ b/runtime/core/Clownfish/Test.c @@ -32,6 +32,7 @@ #include "Clownfish/Test/TestLockFreeRegistry.h" #include "Clownfish/Test/TestNum.h" #include "Clownfish/Test/TestObj.h" +#include "Clownfish/Test/TestPtrHash.h" #include "Clownfish/Test/TestVector.h" #include "Clownfish/Test/Util/TestAtomic.h" #include "Clownfish/Test/Util/TestMemory.h" @@ -55,6 +56,7 @@ Test_create_test_suite() { TestSuite_Add_Batch(suite, (TestBatch*)TestAtomic_new()); TestSuite_Add_Batch(suite, (TestBatch*)TestLFReg_new()); TestSuite_Add_Batch(suite, (TestBatch*)TestMemory_new()); + TestSuite_Add_Batch(suite, (TestBatch*)TestPtrHash_new()); return suite; } http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/6c8dbb36/runtime/core/Clownfish/Test/TestPtrHash.c ---------------------------------------------------------------------- diff --git a/runtime/core/Clownfish/Test/TestPtrHash.c b/runtime/core/Clownfish/Test/TestPtrHash.c new file mode 100644 index 0000000..3bf7003 --- /dev/null +++ b/runtime/core/Clownfish/Test/TestPtrHash.c @@ -0,0 +1,108 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define CFISH_USE_SHORT_NAMES +#define TESTCFISH_USE_SHORT_NAMES + +#include <stdlib.h> +#include <time.h> + +#include "Clownfish/Test/TestPtrHash.h" +#include "Clownfish/Class.h" +#include "Clownfish/PtrHash.h" +#include "Clownfish/TestHarness/TestBatchRunner.h" +#include "Clownfish/TestHarness/TestUtils.h" +#include "Clownfish/Util/Memory.h" + +TestPtrHash* +TestPtrHash_new() { + return (TestPtrHash*)Class_Make_Obj(TESTPTRHASH); +} + +static void +test_Store_and_Fetch(TestBatchRunner *runner) { + PtrHash *hash = PtrHash_new(100); + char dummy[100]; + + for (int i = 0; i < 100; i++) { + void *key = &dummy[i]; + PtrHash_Store(hash, key, key); + } + + bool all_equal = true; + for (int i = 0; i < 100; i++) { + void *key = &dummy[i]; + void *value = PtrHash_Fetch(hash, key); + if (value != key) { + all_equal = false; + break; + } + } + TEST_TRUE(runner, all_equal, "basic Store and Fetch"); + + TEST_TRUE(runner, PtrHash_Fetch(hash, &dummy[100]) == NULL, + "Fetch against non-existent key returns NULL"); + + PtrHash_Store(hash, &dummy[50], dummy); + TEST_TRUE(runner, PtrHash_Fetch(hash, &dummy[50]) == dummy, + "Store replaces existing value"); + + PtrHash_Destroy(hash); +} + +static void +test_stress(TestBatchRunner *runner) { + PtrHash *hash = PtrHash_new(0); // trigger multiple rebuilds. + size_t num_elems = 200000; + void **keys = (void**)MALLOCATE(num_elems * sizeof(void*)); + + for (size_t i = 0; i < num_elems; i++) { + size_t index = (size_t)(TestUtils_random_u64() % num_elems); + void *key = &keys[index]; + PtrHash_Store(hash, key, key); + keys[i] = key; + } + + // Overwrite for good measure. + for (size_t i = 0; i < num_elems; i++) { + void *key = keys[i]; + PtrHash_Store(hash, key, key); + } + + bool all_equal = true; + for (size_t i = 0; i < num_elems; i++) { + void *key = keys[i]; + void *got = PtrHash_Fetch(hash, key); + if (got != key) { + all_equal = false; + break; + } + } + TEST_TRUE(runner, all_equal, "stress test"); + + FREEMEM(keys); + PtrHash_Destroy(hash); +} + +void +TestPtrHash_Run_IMP(TestPtrHash *self, TestBatchRunner *runner) { + TestBatchRunner_Plan(runner, (TestBatch*)self, 4); + srand((unsigned int)time(NULL)); + test_Store_and_Fetch(runner); + test_stress(runner); +} + + http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/6c8dbb36/runtime/core/Clownfish/Test/TestPtrHash.cfh ---------------------------------------------------------------------- diff --git a/runtime/core/Clownfish/Test/TestPtrHash.cfh b/runtime/core/Clownfish/Test/TestPtrHash.cfh new file mode 100644 index 0000000..83589cb --- /dev/null +++ b/runtime/core/Clownfish/Test/TestPtrHash.cfh @@ -0,0 +1,29 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +parcel TestClownfish; + +class Clownfish::Test::TestPtrHash + inherits Clownfish::TestHarness::TestBatch { + + inert incremented TestPtrHash* + new(); + + void + Run(TestPtrHash *self, TestBatchRunner *runner); +} + + http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/6c8dbb36/runtime/perl/t/core/050-ptrhash.t ---------------------------------------------------------------------- diff --git a/runtime/perl/t/core/050-ptrhash.t b/runtime/perl/t/core/050-ptrhash.t new file mode 100644 index 0000000..5f03627 --- /dev/null +++ b/runtime/perl/t/core/050-ptrhash.t @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +use strict; +use warnings; + +use Clownfish::Test; +my $success = Clownfish::Test::run_tests("Clownfish::Test::TestPtrHash"); + +exit($success ? 0 : 1); +
