Commit: 1181dda62f2961d1a6dab47aea672f1591dec00b
Author: Bastien Montagne
Date: Fri Feb 20 20:00:52 2015 +0100
Branches: temp-ghash-experiments
https://developer.blender.org/rB1181dda62f2961d1a6dab47aea672f1591dec00b
GHash: fix/hugely enhance GHash tests.
Also, fix some stupid mistakes in some new murmur hashing helpers,
and rework quality function to simply compute variance in buckets occupation.
Note latest change has some quite funny effects, looks like variance is always
very close to load of the ghash...
As for performances:
* Globally, ghash and murmur have very similar results, both in speed and
quality.
* For integers, ghash usually quicker.
* For strings, oddly murmur is much quicker when adding to ghash, but quite
slower
when lookingup in the ghash...
Anyway, still much to do in this area!
===================================================================
M source/blender/blenlib/BLI_ghash.h
M source/blender/blenlib/intern/BLI_ghash.c
M tests/gtests/blenlib/BLI_ghash_test.cc
===================================================================
diff --git a/source/blender/blenlib/BLI_ghash.h
b/source/blender/blenlib/BLI_ghash.h
index 15aae5f..0af320d 100644
--- a/source/blender/blenlib/BLI_ghash.h
+++ b/source/blender/blenlib/BLI_ghash.h
@@ -64,6 +64,7 @@ GHash *BLI_ghash_new_ex(GHashHashFP hashfp, GHashCmpFP cmpfp,
const char *info,
GHash *BLI_ghash_new(GHashHashFP hashfp, GHashCmpFP cmpfp, const char *info)
ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
void BLI_ghash_free(GHash *gh, GHashKeyFreeFP keyfreefp, GHashValFreeFP
valfreefp);
void BLI_ghash_insert(GHash *gh, void *key, void *val);
+bool BLI_ghash_add(GHash *gh, void *key, void *val);
bool BLI_ghash_reinsert(GHash *gh, void *key, void *val, GHashKeyFreeFP
keyfreefp, GHashValFreeFP valfreefp);
void *BLI_ghash_lookup(GHash *gh, const void *key) ATTR_WARN_UNUSED_RESULT;
void *BLI_ghash_lookup_default(GHash *gh, const void *key, void *val_default)
ATTR_WARN_UNUSED_RESULT;
@@ -134,21 +135,30 @@ bool BLI_ghashutil_strcmp(const void *a, const
void *b);
CHECK_TYPE_ANY(&(key), int *, const int *), \
BLI_ghashutil_uinthash((unsigned int)key))
unsigned int BLI_ghashutil_uinthash(unsigned int key);
+unsigned int BLI_ghashutil_inthash_p(const void *ptr);
+unsigned int BLI_ghashutil_inthash_p_murmur(const void *ptr);
+bool BLI_ghashutil_intcmp(const void *a, const void *b);
+
+
+unsigned int BLI_ghashutil_uinthash_v4(const unsigned int key[4]);
#define BLI_ghashutil_inthash_v4(key) ( \
CHECK_TYPE_ANY(key, int *, const int *), \
BLI_ghashutil_uinthash_v4((const unsigned int *)key))
-unsigned int BLI_ghashutil_uinthash_v4(const unsigned int key[4]);
#define BLI_ghashutil_inthash_v4_p \
((GSetHashFP)BLI_ghashutil_uinthash_v4)
#define BLI_ghashutil_uinthash_v4_p \
((GSetHashFP)BLI_ghashutil_uinthash_v4)
-unsigned int BLI_ghashutil_uinthash_v4_p_murmur(const void *ptr);
+unsigned int BLI_ghashutil_uinthash_v4_murmur(const unsigned int key[4]);
+#define BLI_ghashutil_inthash_v4_murmur(key) ( \
+ CHECK_TYPE_ANY(key, int *, const int *), \
+ BLI_ghashutil_uinthash_v4_murmur((const unsigned int *)key))
+#define BLI_ghashutil_inthash_v4_p_murmur \
+ ((GSetHashFP)BLI_ghashutil_uinthash_v4_murmur)
+#define BLI_ghashutil_uinthash_v4_p_murmur \
+ ((GSetHashFP)BLI_ghashutil_uinthash_v4_murmur)
bool BLI_ghashutil_uinthash_v4_cmp(const void *a, const void *b);
#define BLI_ghashutil_inthash_v4_cmp \
BLI_ghashutil_uinthash_v4_cmp
-unsigned int BLI_ghashutil_inthash_p(const void *ptr);
-unsigned int BLI_ghashutil_inthash_p_murmur(const void *ptr);
-bool BLI_ghashutil_intcmp(const void *a, const void *b);
/** \} */
@@ -235,8 +245,8 @@ BLI_INLINE bool BLI_gsetIterator_done(GSetIterator *gsi) {
return BLI_ghashItera
BLI_gsetIterator_step(&gs_iter_), i_++)
//~ #ifdef DEBUG
-double BLI_ghash_calc_quality(GHash *gh);
-double BLI_gset_calc_quality(GSet *gs);
+double BLI_ghash_calc_quality(GHash *gh, double *r_load, int *r_min_bucket,
int *r_max_bucket);
+double BLI_gset_calc_quality(GSet *gs, double *r_load, int *r_min_bucket, int
*r_max_bucket);
//~ #endif
#ifdef __cplusplus
diff --git a/source/blender/blenlib/intern/BLI_ghash.c
b/source/blender/blenlib/intern/BLI_ghash.c
index a8aacbe..e0c2e4b 100644
--- a/source/blender/blenlib/intern/BLI_ghash.c
+++ b/source/blender/blenlib/intern/BLI_ghash.c
@@ -361,6 +361,26 @@ void BLI_ghash_insert(GHash *gh, void *key, void *val)
}
/**
+ * Like #BLI_ghash_insert, but does nothing in case \a key is already in the
\a gh.
+ *
+ * Avoids #BLI_ghash_haskey, #BLI_ghash_insert calls (double lookups)
+ *
+ * \returns true if a new key has been added.
+ */
+bool BLI_ghash_add(GHash *gh, void *key, void *val)
+{
+ const unsigned int hash = ghash_keyhash(gh, key);
+ Entry *e = ghash_lookup_entry_ex(gh, key, hash);
+ if (e) {
+ return false;
+ }
+ else {
+ ghash_insert_ex(gh, key, val, hash);
+ return true;
+ }
+}
+
+/**
* Inserts a new value to a key that may already be in ghash.
*
* Avoids #BLI_ghash_remove, #BLI_ghash_insert calls (double lookups)
@@ -717,11 +737,9 @@ unsigned int BLI_ghashutil_uinthash_v4(const unsigned int
key[4])
hash += key[3];
return hash;
}
-unsigned int BLI_ghashutil_uinthash_v4_p_murmur(const void *ptr)
+unsigned int BLI_ghashutil_uinthash_v4_murmur(const unsigned int key[4])
{
- const unsigned int *key = ptr;
-
- return BLI_hash_mm2((const unsigned char *)key, sizeof(*key) * 4, 0);
+ return BLI_hash_mm2((const unsigned char *)key, sizeof(key), 0);
}
bool BLI_ghashutil_uinthash_v4_cmp(const void *a, const void *b)
@@ -757,9 +775,9 @@ unsigned int BLI_ghashutil_inthash_p(const void *ptr)
unsigned int BLI_ghashutil_inthash_p_murmur(const void *ptr)
{
- const unsigned int *key = ptr;
+ uintptr_t key = (uintptr_t)ptr;
- return BLI_hash_mm2((const unsigned char *)key, sizeof(*key), 0);
+ return BLI_hash_mm2((const unsigned char *)&key, sizeof(key), 0);
}
bool BLI_ghashutil_intcmp(const void *a, const void *b)
@@ -802,7 +820,7 @@ unsigned int BLI_ghashutil_strhash_p_murmur(const void *ptr)
{
const unsigned char *key = ptr;
- return BLI_hash_mm2(key, strlen((const char *)key), 0);
+ return BLI_hash_mm2(key, strlen((const char *)key) + 1, 0);
}
bool BLI_ghashutil_strcmp(const void *a, const void *b)
{
@@ -1044,37 +1062,68 @@ GSet *BLI_gset_pair_new(const char *info)
/** \name Debugging & Introspection
* \{ */
-//~ #ifdef DEBUG
+
+#include "BLI_math.h"
/**
- * Measure how well the hash function performs
- * (1.0 is approx as good as random distribution).
+ * Measure how well the hash function performs, i.e. variance of the
distribution of the entries in the buckets.
+ * (0.0 is approx as good as even distribution).
*
* Smaller is better!
*/
-double BLI_ghash_calc_quality(GHash *gh)
+double BLI_ghash_calc_quality(GHash *gh, double *r_load, int *r_min_bucket,
int *r_max_bucket)
{
- uint64_t sum = 0;
+ double sum = 0.0;
+ double mean;
unsigned int i;
- if (gh->nentries == 0)
- return -1.0;
+ if (gh->nentries == 0) {
+ if (r_load) {
+ *r_load = 0.0;
+ }
+ if (r_min_bucket) {
+ *r_min_bucket = 0;
+ }
+ if (r_max_bucket) {
+ *r_max_bucket = 0;
+ }
+ return 0.0;
+ }
+
+ mean = (double)gh->nentries / (double)gh->nbuckets;
+ if (r_load) {
+ *r_load = mean;
+ }
+ if (r_min_bucket) {
+ *r_min_bucket = INT_MAX;
+ }
+ if (r_max_bucket) {
+ *r_max_bucket = 0;
+ }
+
+ /* We already know our mean (i.e. load factor), easy to compute
variance.
+ * See
http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
+ */
for (i = 0; i < gh->nbuckets; i++) {
- uint64_t count = 0;
+ int count = 0;
Entry *e;
for (e = gh->buckets[i]; e; e = e->next) {
- count += 1;
+ count++;
+ }
+ if (r_min_bucket) {
+ *r_min_bucket = min_ii(*r_min_bucket, (int)count);
+ }
+ if (r_max_bucket) {
+ *r_max_bucket = max_ii(*r_max_bucket, (int)count);
}
- sum += count * (count + 1);
+ sum += ((double)count - mean) * ((double)count - mean);
}
- return ((double)sum * (double)gh->nbuckets /
- ((double)gh->nentries * (gh->nentries + 2 * gh->nbuckets - 1)));
+ return sum / (double)(gh->nbuckets - 1);
}
-double BLI_gset_calc_quality(GSet *gs)
+double BLI_gset_calc_quality(GSet *gs, double *r_load, int *r_min_bucket, int
*r_max_bucket)
{
- return BLI_ghash_calc_quality((GHash *)gs);
+ return BLI_ghash_calc_quality((GHash *)gs, r_load, r_min_bucket,
r_max_bucket);
}
-//~ #endif
/** \} */
diff --git a/tests/gtests/blenlib/BLI_ghash_test.cc
b/tests/gtests/blenlib/BLI_ghash_test.cc
index 0a27443..b45de8f 100644
--- a/tests/gtests/blenlib/BLI_ghash_test.cc
+++ b/tests/gtests/blenlib/BLI_ghash_test.cc
@@ -2,11 +2,11 @@
#include "testing/testing.h"
-#define BLI_INLINE
-
extern "C" {
#include "MEM_guardedalloc.h"
+#include "BLI_utildefines.h"
#include "BLI_ghash.h"
+#include "BLI_rand.h"
#include "BLI_string.h"
#include "PIL_time_utildefines.h"
}
@@ -128,164 +128,250 @@ Vivamus eu ligula blandit, imperdiet arcu at, rutrum
sem. Aliquam erat volutpat.
Nullam eget malesuada justo, at molestie quam. Sed consequat massa eu faucibus
maximus. Curabitur placerat orci sapien, sit amet semper magna sodales non. Ut
fermentum accumsan odio in consectetur. Morbi neque mi, vulputate nec mi ut,
cursus scelerisque lectus. Nulla sapien enim, finibus id ipsum luctus,
consequat ullamcorper lectus. Sed volutpat sed massa in sodales. Morbi lacinia
diam eu commodo vulputate. Fusce aliquet pulvinar dolor in egestas. Fusce
molestie commodo leo eu ultricies [...]
Praesent luctus vitae nunc vitae pellentesque. Praesent faucibus sed urna ut
lacinia. Vivamus id justo quis dolor porta rutrum nec nec odio. Cras euismod
tortor quis diam ultrices, eu mattis nisi consectetur. Fusce mattis nisi vel
condimentum molestie. Fusce fringilla ut nibh volutpat elementum. Mauris
posuere consectetur leo a aliquet. Donec quis sodales sapien. Maecenas ut felis
tempus, eleifend mauris et, faucibus mi. Quisque fringilla orci arcu, sit amet
porta risus hendrerit non. Ae [...]
+/* Using
http://corpora.uni-leipzig.de/downloads/eng_wikipedia_2010_1M-text.tar.gz (1
million of words, about 122MB of text)
+ * from http://corpora.informatik.uni-leipzig.de/download.html */
+#define TEXT_CORPUS_PATH
"/home/i74700deb64/Téléchargements/eng_wikipedia_2010_1M-text/eng_wikipedia_2010_1M-sentences.txt"
-TEST(ghash, TextGHash)
+#define PRINTF_GHASH_ST
@@ Diff output truncated at 10240 characters. @@
_______________________________________________
Bf-blender-cvs mailing list
[email protected]
http://lists.blender.org/mailman/listinfo/bf-blender-cvs