Changeset: cdf01e261bc6 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=cdf01e261bc6
Modified Files:
gdk/gdk_group.c
Branch: Jul2017
Log Message:
Better hash algorithm for on-the-fly hash creation.
This improves the time of query 16 of TPC-H scale factor 100 by a
factor of 65. That's a factor, not a percentage. (debug build, data
on SSD)
diffs (47 lines):
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -396,7 +396,7 @@
assert(p < end); \
INIT_1; \
prb = HASH; \
- prb = (prb ^ (BUN) grps[r] << bits) & hs->mask;
\
+ prb = (prb ^ hash_oid(hs, &grps[r])) &
hs->mask; \
for (hb = HASHget(hs, prb); \
hb != HASHnil(hs) && hb >= start; \
hb = HASHgetlink(hs, hb)) { \
@@ -1009,8 +1009,6 @@ BATgroup_internal(BAT **groups, BAT **ex
size_t nmelen;
Heap *hp = NULL;
BUN prb;
- BUN mask;
- int bits;
GDKclrerr(); /* not interested in BAThash errors */
@@ -1032,25 +1030,6 @@ BATgroup_internal(BAT **groups, BAT **ex
subsorted, gc ? " (g clustered)" : "");
nme = BBP_physical(b->batCacheid);
nmelen = strlen(nme);
- if (ATOMsize(t) == 1) {
- mask = 1 << 16;
- bits = 8;
- } else if (ATOMsize(t) == 2) {
- mask = 1 << 16;
- bits = 8;
- } else {
- /* when combining value and group-id hashes,
- * we left-shift one of them by half the
- * hash-mask width to better spread bits and
- * use the entire hash-mask, and thus reduce
- * collisions */
- mask = HASHmask(cnt) >> 3;
- bits = 3;
- while (mask >>= 1)
- bits++;
- bits /= 2;
- mask = HASHmask(cnt);
- }
if ((hp = GDKzalloc(sizeof(Heap))) == NULL ||
(hp->farmid = BBPselectfarm(TRANSIENT, b->ttype, hashheap))
< 0 ||
(hp->filename = GDKmalloc(nmelen + 30)) == NULL ||
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list