Changeset: cdf01e261bc6 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=cdf01e261bc6
Modified Files:
        gdk/gdk_group.c
Branch: Jul2017
Log Message:

Better hash algorithm for on-the-fly hash creation.
This improves the time of query 16 of TPC-H scale factor 100 by a
factor of 65.  That's a factor, not a percentage.  (debug build, data
on SSD)


diffs (47 lines):

diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -396,7 +396,7 @@
                                assert(p < end);                        \
                                INIT_1;                                 \
                                prb = HASH;                             \
-                               prb = (prb ^ (BUN) grps[r] << bits) & hs->mask; 
\
+                               prb = (prb ^ hash_oid(hs, &grps[r])) & 
hs->mask; \
                                for (hb = HASHget(hs, prb);             \
                                     hb != HASHnil(hs) && hb >= start;  \
                                     hb = HASHgetlink(hs, hb)) {        \
@@ -1009,8 +1009,6 @@ BATgroup_internal(BAT **groups, BAT **ex
                size_t nmelen;
                Heap *hp = NULL;
                BUN prb;
-               BUN mask;
-               int bits;
 
                GDKclrerr();    /* not interested in BAThash errors */
 
@@ -1032,25 +1030,6 @@ BATgroup_internal(BAT **groups, BAT **ex
                                  subsorted, gc ? " (g clustered)" : "");
                nme = BBP_physical(b->batCacheid);
                nmelen = strlen(nme);
-               if (ATOMsize(t) == 1) {
-                       mask = 1 << 16;
-                       bits = 8;
-               } else if (ATOMsize(t) == 2) {
-                       mask = 1 << 16;
-                       bits = 8;
-               } else {
-                       /* when combining value and group-id hashes,
-                        * we left-shift one of them by half the
-                        * hash-mask width to better spread bits and
-                        * use the entire hash-mask, and thus reduce
-                        * collisions */
-                       mask = HASHmask(cnt) >> 3;
-                       bits = 3;
-                       while (mask >>= 1)
-                               bits++;
-                       bits /= 2;
-                       mask = HASHmask(cnt);
-               }
                if ((hp = GDKzalloc(sizeof(Heap))) == NULL ||
                    (hp->farmid = BBPselectfarm(TRANSIENT, b->ttype, hashheap)) 
< 0 ||
                    (hp->filename = GDKmalloc(nmelen + 30)) == NULL ||
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to