Changeset: efbaa733bbd0 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/efbaa733bbd0
Modified Files:
        gdk/gdk_hash.c
Branch: Mar2025
Log Message:

if the tunique_est property is too small, resize hash table on creation.


diffs (59 lines):

diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -662,7 +662,8 @@ BAThashsave(BAT *b, bool dosync)
 #define starthash(TYPE)                                                        
\
        do {                                                            \
                const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \
-               TIMEOUT_LOOP(p, qry_ctx) {                              \
+               TIMEOUT_LOOP(cnt1, qry_ctx) {                   \
+                       c = hash_##TYPE(h, v + o - b->hseqbase);        \
                        hget = HASHget(h, c);                           \
                        if (hget == BUN_NONE) {                         \
                                if (h->nheads == maxslots)              \
@@ -681,6 +682,7 @@ BAThashsave(BAT *b, bool dosync)
                        HASHputlink(h, p, hget);                        \
                        HASHput(h, c, p);                               \
                        o = canditer_next(ci);                          \
+                       p++;                                            \
                }                                                       \
                TIMEOUT_CHECK(qry_ctx,                                  \
                              GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
@@ -799,7 +801,10 @@ BAThash_impl(BAT *restrict b, struct can
                 * adjusting the hash mask */
                mask = HASHmask(ci->ncand);
        } else if (!hascand && bi.unique_est != 0) {
-               mask = (BUN) (bi.unique_est * 1.15); /* about 8/7 */
+               maxmask = HASHmask(ci->ncand);
+               mask = HASHmask(bi.unique_est);
+               /* it's only an estimate: try out on first 25% of b */
+               cnt1 = ci->ncand >> 2;
        } else {
                /* dynamic hash: we start with HASHmask(ci->ncand)/64, or,
                 * if ci->ncand large enough, HASHmask(ci->ncand)/256; if there
@@ -811,6 +816,8 @@ BAThash_impl(BAT *restrict b, struct can
                mask = maxmask >> 6;
                while (mask > 4096)
                        mask >>= 2;
+               if (mask < BATTINY)
+                       mask = BATTINY;
                /* try out on first 25% of b */
                cnt1 = ci->ncand >> 2;
        }
@@ -863,7 +870,7 @@ BAThash_impl(BAT *restrict b, struct can
                        break;
                default: {
                        int (*atomcmp)(const void *, const void *) = 
ATOMcompare(h->type);
-                       TIMEOUT_LOOP(p, qry_ctx) {
+                       TIMEOUT_LOOP(cnt1, qry_ctx) {
                                const void *restrict v = BUNtail(bi, o - 
b->hseqbase);
                                c = hash_any(h, v);
                                hget = HASHget(h, c);
@@ -885,6 +892,7 @@ BAThash_impl(BAT *restrict b, struct can
                                HASHputlink(h, p, hget);
                                HASHput(h, c, p);
                                o = canditer_next(ci);
+                               p++;
                        }
                        TIMEOUT_CHECK(qry_ctx,
                                      GOTO_LABEL_TIMEOUT_HANDLER(bailout, 
qry_ctx));
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to