Commit: 1a3b3fa61490ea5efc467e7a9d51444f02883d03
Author: Bastien Montagne
Date:   Sat Feb 21 15:58:02 2015 +0100
Branches: temp-ghash-experiments
https://developer.blender.org/rB1a3b3fa61490ea5efc467e7a9d51444f02883d03

Some minor cleanup, much better statistics...

===================================================================

M       source/blender/blenlib/BLI_ghash.h
M       source/blender/blenlib/intern/BLI_ghash.c
M       tests/gtests/blenlib/BLI_ghash_test.cc

===================================================================

diff --git a/source/blender/blenlib/BLI_ghash.h 
b/source/blender/blenlib/BLI_ghash.h
index 6da92f1..5692213 100644
--- a/source/blender/blenlib/BLI_ghash.h
+++ b/source/blender/blenlib/BLI_ghash.h
@@ -245,10 +245,12 @@ BLI_INLINE bool BLI_gsetIterator_done(GSetIterator *gsi) 
{ return BLI_ghashItera
             BLI_gsetIterator_done(&gs_iter_) == false;                         
  \
             BLI_gsetIterator_step(&gs_iter_), i_++)
 
-//~ #ifdef DEBUG
-double BLI_ghash_calc_quality(GHash *gh, double *r_load, int *r_min_bucket, 
int *r_max_bucket);
-double BLI_gset_calc_quality(GSet *gs, double *r_load, int *r_min_bucket, int 
*r_max_bucket);
-//~ #endif
+double BLI_ghash_calc_quality(
+        GHash *gh, double *r_load, double *r_variance,
+        double *r_prop_empty_buckets, double *r_prop_overloaded_buckets, int 
*r_biggest_bucket);
+double BLI_gset_calc_quality(
+        GSet *gs, double *r_load, double *r_variance,
+        double *r_prop_empty_buckets, double *r_prop_overloaded_buckets, int 
*r_biggest_bucket);
 
 #ifdef __cplusplus
 }
diff --git a/source/blender/blenlib/intern/BLI_ghash.c 
b/source/blender/blenlib/intern/BLI_ghash.c
index e13526d..4a2a234 100644
--- a/source/blender/blenlib/intern/BLI_ghash.c
+++ b/source/blender/blenlib/intern/BLI_ghash.c
@@ -1160,14 +1160,15 @@ GSet *BLI_gset_pair_new(const char *info)
 #include "BLI_math.h"
 
 /**
- * Measure how well the hash function performs, i.e. variance of the 
distribution of the entries in the buckets.
- * (0.0 is approx as good as even distribution).
+ * Measure how well the hash function performs (1.0 is approx as good as 
random distribution),
+ * and return a few other stats like load, variance of the distribution of the 
entries in the buckets, etc.
  *
  * Smaller is better!
  */
-double BLI_ghash_calc_quality(GHash *gh, double *r_load, int *r_min_bucket, 
int *r_max_bucket)
+double BLI_ghash_calc_quality(
+        GHash *gh, double *r_load, double *r_variance,
+        double *r_prop_empty_buckets, double *r_prop_overloaded_buckets, int 
*r_biggest_bucket)
 {
-       double sum = 0.0;
        double mean;
        unsigned int i;
 
@@ -1175,11 +1176,17 @@ double BLI_ghash_calc_quality(GHash *gh, double 
*r_load, int *r_min_bucket, int
                if (r_load) {
                        *r_load = 0.0;
                }
-               if (r_min_bucket) {
-                       *r_min_bucket = 0;
+               if (r_variance) {
+                       *r_variance = 0.0;
                }
-               if (r_max_bucket) {
-                       *r_max_bucket = 0;
+               if (r_prop_empty_buckets) {
+                       *r_prop_empty_buckets = 1.0;
+               }
+               if (r_prop_overloaded_buckets) {
+                       *r_prop_overloaded_buckets = 0.0;
+               }
+               if (r_biggest_bucket) {
+                       *r_biggest_bucket = 0;
                }
 
                return 0.0;
@@ -1189,35 +1196,62 @@ double BLI_ghash_calc_quality(GHash *gh, double 
*r_load, int *r_min_bucket, int
        if (r_load) {
                *r_load = mean;
        }
-       if (r_min_bucket) {
-               *r_min_bucket = INT_MAX;
-       }
-       if (r_max_bucket) {
-               *r_max_bucket = 0;
+       if (r_biggest_bucket) {
+               *r_biggest_bucket = 0;
        }
 
-       /* We already know our mean (i.e. load factor), easy to compute 
variance.
-        * See 
http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
-        */
-       for (i = 0; i < gh->nbuckets; i++) {
-               int count = 0;
-               Entry *e;
-               for (e = gh->buckets[i]; e; e = e->next) {
-                       count++;
-               }
-               if (r_min_bucket) {
-                       *r_min_bucket = min_ii(*r_min_bucket, (int)count);
-               }
-               if (r_max_bucket) {
-                       *r_max_bucket = max_ii(*r_max_bucket, (int)count);
+       if (r_variance) {
+               /* We already know our mean (i.e. load factor), easy to compute 
variance.
+                * See 
http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
+                */
+               double sum = 0.0;
+               for (i = 0; i < gh->nbuckets; i++) {
+                       int count = 0;
+                       Entry *e;
+                       for (e = gh->buckets[i]; e; e = e->next) {
+                               count++;
+                       }
+                       sum += ((double)count - mean) * ((double)count - mean);
                }
-               sum += ((double)count - mean) * ((double)count - mean);
+               *r_variance = sum / (double)(gh->nbuckets - 1);
        }
-       return sum / (double)(gh->nbuckets - 1);
-}
-double BLI_gset_calc_quality(GSet *gs, double *r_load, int *r_min_bucket, int 
*r_max_bucket)
-{
-       return BLI_ghash_calc_quality((GHash *)gs, r_load, r_min_bucket, 
r_max_bucket);
+
+       {
+          uint64_t sum = 0;
+          uint64_t overloaded_buckets_threshold = 
(uint64_t)max_ii(GHASH_LIMIT_GROW(1), 1);
+          uint64_t sum_overloaded = 0;
+          uint64_t sum_empty = 0;
+
+          for (i = 0; i < gh->nbuckets; i++) {
+                  uint64_t count = 0;
+                  Entry *e;
+                  for (e = gh->buckets[i]; e; e = e->next) {
+                          count++;
+                  }
+                  if (r_biggest_bucket) {
+                          *r_biggest_bucket = max_ii(*r_biggest_bucket, 
(int)count);
+                  }
+                  if (r_prop_overloaded_buckets && (count > 
overloaded_buckets_threshold)) {
+                          sum_overloaded++;
+                  }
+                  if (r_prop_empty_buckets && !count) {
+                          sum_empty++;
+                  }
+                  sum += count * (count + 1);
+          }
+          if (r_prop_overloaded_buckets) {
+                  *r_prop_overloaded_buckets = (double)sum_overloaded / 
(double)gh->nbuckets;
+          }
+          if (r_prop_empty_buckets) {
+                  *r_prop_empty_buckets = (double)sum_empty / 
(double)gh->nbuckets;
+          }
+          return ((double)sum * (double)gh->nbuckets /
+                          ((double)gh->nentries * (gh->nentries + 2 * 
gh->nbuckets - 1)));
+   }
+}
+double BLI_gset_calc_quality(GSet *gs, double *r_load, double *r_variance, 
double *r_prop_empty_buckets, double *r_prop_overloaded_buckets, int 
*r_biggest_bucket)
+{
+       return BLI_ghash_calc_quality((GHash *)gs, r_load, r_variance, 
r_prop_empty_buckets, r_prop_overloaded_buckets, r_biggest_bucket);
 }
 
 /** \} */
diff --git a/tests/gtests/blenlib/BLI_ghash_test.cc 
b/tests/gtests/blenlib/BLI_ghash_test.cc
index 495e9a1..fb78140 100644
--- a/tests/gtests/blenlib/BLI_ghash_test.cc
+++ b/tests/gtests/blenlib/BLI_ghash_test.cc
@@ -137,11 +137,13 @@ Praesent luctus vitae nunc vitae pellentesque. Praesent 
faucibus sed urna ut lac
 
 #define PRINTF_GHASH_STATS(_gh) \
 { \
-       double q, lf; \
-       int minb, maxb; \
-       q = BLI_ghash_calc_quality((_gh), &lf, &minb, &maxb); \
-       printf("GHash stats:\n\tQuality (the lower the better): %f\n\tLoad: 
%f\n\tSmallest bucket: %d\n\tBiggest bucket: %d\n", \
-              q, lf, minb, maxb); \
+       double q, lf, var, pempty, poverloaded; \
+       int bigb; \
+       q = BLI_ghash_calc_quality((_gh), &lf, &var, &pempty, &poverloaded, 
&bigb); \
+       printf("GHash stats (%d entries):\n\t" \
+              "Quality (the lower the better): %f\n\tVariance (the lower the 
better): %f\n\tLoad: %f\n\t" \
+              "Empty buckets: %.2f%%\n\tOverloaded buckets: %.2f%% (biggest 
bucket: %d)\n", \
+              BLI_ghash_size(_gh), q, var, lf, pempty * 100.0, poverloaded * 
100.0, bigb); \
 } void (0)
 
 
@@ -267,12 +269,10 @@ TEST(ghash, TextMurmur2a)
 
 /* Int: uniform 50M first integers. */
 
-static void int_ghash_tests(GHash *ghash, const char *id)
+static void int_ghash_tests(GHash *ghash, const char *id, const unsigned int 
nbr)
 {
        printf("\n========== STARTING %s ==========\n", id);
 
-       const unsigned int nbr = 100000000;
-
        {
                unsigned int i = nbr;
 
@@ -313,24 +313,31 @@ TEST(ghash, IntGHash)
 {
        GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p, 
BLI_ghashutil_intcmp, __func__);
 
-       int_ghash_tests(ghash, "IntGHash - GHash");
+       int_ghash_tests(ghash, "IntGHash - GHash - 12000", 12000);
+
+       ghash = BLI_ghash_new(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, 
__func__);
+
+       int_ghash_tests(ghash, "IntGHash - GHash - 100000000", 100000000);
 }
 
 TEST(ghash, IntMurmur2a)
 {
        GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p_murmur, 
BLI_ghashutil_intcmp, __func__);
 
-       int_ghash_tests(ghash, "IntGHash - Murmur");
+       int_ghash_tests(ghash, "IntGHash - Murmur - 12000", 12000);
+
+       ghash = BLI_ghash_new(BLI_ghashutil_inthash_p_murmur, 
BLI_ghashutil_intcmp, __func__);
+
+       int_ghash_tests(ghash, "IntGHash - Murmur - 100000000", 100000000);
 }
 
 
 /* Int_v4: 10M of randomly-generated integer vectors. */
 
-static void int4_ghash_tests(GHash *ghash, const char *id)
+static void int4_ghash_tests(GHash *ghash, const char *id, const unsigned int 
nbr)
 {
        printf("\n========== STARTING %s ==========\n", id);
 
-       const unsigned int nbr = 20000000;
        unsigned int (*data)[4] = (unsigned int 
(*)[4])MEM_mallocN(sizeof(*data) * (size_t)nbr, __func__);
        unsigned int (*dt)[4];
        unsigned int i, j;
@@ -382,12 +389,20 @@ TEST(ghash, Int4GHash)
 {
        GHash *ghash = BLI_ghash_new(BLI_ghashutil_uinthash_v4_p, 
BLI_ghashutil_uinthash_v4_cmp, __func__);
 
-       int4_ghash_tests(ghash, "Int4GHash - GHash");
+       int4_ghash_tests(ghash, "Int4GHash - GHash - 2000", 2000);
+
+       ghash = BLI_ghash_new(BLI_ghashutil_uinthash_v4_p, 
BLI_ghashutil_uinthash_v4_cmp, __func__);
+
+       int4_ghash_tests(ghash, "Int4GHash - GHash - 20000000", 20000000);
 }
 
 TEST(ghash, Int4Murmur2a)
 {
        GHash *ghash = BLI_ghash_new(BLI_ghashutil_uinthash_v4_p_murmur, 
BLI_ghashutil_uinthash_v4_cmp, __func__);
 
-       int4_ghash_tests(ghash, "Int4GHash - Murmur");
+       int4_ghash_tests(ghash, "Int4GHash - Murmur - 2000", 2000);
+
+       ghash = BLI_ghash_new(BLI_ghashutil_uinthash_v4_p_murmur, 
BLI_ghashutil_uinthash_v4_cmp, __func__);
+
+       int4_ghash_tests(ghash, "Int4GHash - Murmur - 20000000", 20000000);
 }

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
http://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to