Changeset: 3258e25e5b3e for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/3258e25e5b3e
Modified Files:
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_hash.c
        gdk/gdk_hash.h
        gdk/gdk_join.c
        gdk/gdk_private.h
        gdk/gdk_select.c
Branch: ustr
Log Message:

Hash created on ustr bat (i.e. prop ustr set) is on offsets, not on strings.


diffs (truncated from 728 to 300 lines):

diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1155,10 +1155,18 @@ BUNappendmulti(BAT *b, const void *value
                                VALclear(&maxprop);
                        if (b->thash) {
                                p -= count;
-                               for (BUN i = 0; i < count; i++) {
-                                       t = ((void **) values)[i];
-                                       HASHappend_locked(b, p, t);
-                                       p++;
+                               if (b->ustr) {
+                                       for (BUN i = 0; i < count; i++) {
+                                               var_t o = 
VarHeapVal(b->theap->base, p, b->twidth);
+                                               HASHappend_locked(b, p, &o);
+                                               p++;
+                                       }
+                               } else {
+                                       for (BUN i = 0; i < count; i++) {
+                                               t = ((void **) values)[i];
+                                               HASHappend_locked(b, p, t);
+                                               p++;
+                                       }
                                }
                                nunique = b->thash ? b->thash->nunique : 0;
                        }
@@ -1218,7 +1226,7 @@ BUNappendmulti(BAT *b, const void *value
                                return rc;
                        }
                        if (b->thash) {
-                               HASHappend_locked(b, p, t);
+                               HASHappend_locked(b, p, b->ustr ? &(var_t){0} : 
t);
                        }
                        p++;
                }
@@ -1415,7 +1423,7 @@ BUNdelete(BAT *b, oid o)
        /* load hash so that we can maintain it */
        (void) BATcheckhash(b);
 
-       BUN nunique = HASHdelete(&bi, p, BUNtail(&bi, p));
+       BUN nunique = HASHdelete(&bi, p, bi.ustr ? &(var_t){VarHeapVal(bi.base, 
p, bi.width)} : BUNtail(&bi, p));
        ATOMdel(b->ttype, b->tvheap, (var_t *) BUNtloc(&bi, p));
        bat_iterator_end(&bi);
 
@@ -1507,6 +1515,7 @@ BUNinplacemulti(BAT *b, const oid *posit
        /* load hash so that we can maintain it */
        (void) BATcheckhash(b);
        MT_rwlock_wrlock(&b->thashlock);
+       var_t off = 0;
        for (BUN i = 0; i < count; i++) {
                BUN p = autoincr ? positions[0] - b->hseqbase + i : 
positions[i] - b->hseqbase;
                const void *t = b->ttype && b->tvheap ?
@@ -1537,13 +1546,14 @@ BUNinplacemulti(BAT *b, const oid *posit
                } else if (bi.type == TYPE_msk) {
                        val = BUNtmsk(&bi, p);
                } else if (b->tvheap) {
-                       var_t off = VarHeapVal(bi.base, p, bi.width);
+                       off = VarHeapVal(bi.base, p, bi.width);
                        if (off == 0)
                                val = ATOMnilptr(bi.type);
                        else if (off < bi.vhfree)
                                val = bi.vh->base + off;
                        else
                                val = NULL; /* bad offset */
+
                } else {
                        val = BUNtloc(&bi, p);
                }
@@ -1596,7 +1606,7 @@ BUNinplacemulti(BAT *b, const oid *posit
                                        }
                                }
                        }
-                       HASHdelete_locked(&bi, p, val); /* first delete old 
value from hash */
+                       HASHdelete_locked(&bi, p, b->ustr ? &off : val);        
/* first delete old value from hash */
                } else {
                        /* out of range old value, so the properties and
                         * hash cannot be trusted */
@@ -1648,6 +1658,7 @@ BUNinplacemulti(BAT *b, const oid *posit
                                MT_rwlock_wrunlock(&b->thashlock);
                                goto bailout;
                        }
+                       off = _d;
                        if (b->twidth < SIZEOF_VAR_T &&
                            (b->twidth <= 2 && _d != 0 ? _d - GDK_VAROFFSET : 
_d) >= ((size_t) 1 << (8 << b->tshift))) {
                                /* doesn't fit in current heap, upgrade it */
@@ -1723,7 +1734,7 @@ BUNinplacemulti(BAT *b, const oid *posit
                        }
                }
 
-               HASHinsert_locked(&bi, p, t);   /* insert new value into hash */
+               HASHinsert_locked(&bi, p, b->ustr ? &off : t);  /* insert new 
value into hash */
 
                prv = p > 0 ? p - 1 : BUN_NONE;
                nxt = p < last ? p + 1 : BUN_NONE;
@@ -1917,6 +1928,16 @@ BUNfnd(BAT *b, const void *v)
                if (BATordered(b) || BATordered_rev(b))
                        return SORTfnd(b, v);
        }
+       var_t off = 0;
+       if (b->ustr) {
+               BAT *u = getUstrBat();
+               if (u == NULL || (r = BUNfnd(u, v)) == BUN_NONE)
+                       return r;
+               bi = bat_iterator(u);
+               off = VarHeapVal(bi.base, r, bi.width);
+               bat_iterator_end(&bi);
+               v = &off;
+       }
        bi = bat_iterator(b);   /* outside of hashlock */
        if (BAThash(b) == GDK_SUCCEED) {
                MT_rwlock_rdlock(&b->thashlock);
@@ -1968,8 +1989,13 @@ BUNfnd(BAT *b, const void *v)
                                break;
                        break;
                case TYPE_str:
-                       HASHloop_str(&bi, b->thash, r, v)
-                               break;
+                       if (bi.ustr) {
+                               HASHloop_var_t(&bi, b->thash, r, v)
+                                       break;
+                       } else {
+                               HASHloop_str(&bi, b->thash, r, v)
+                                       break;
+                       }
                        break;
                default:
                        HASHloop(&bi, b->thash, r, v)
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -418,9 +418,16 @@ insert_string_bat(BAT *b, BATiter *ni, s
        assert(b->batCapacity >= b->batCount);
        MT_lock_unset(&b->theaplock);
        /* maintain hash */
-       for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
-               off = VarHeapVal(Tloc(b, 0), r, b->twidth);
-               HASHappend_locked(b, r, off == 0 ? str_nil : b->tvheap->base + 
off);
+       if (b->ustr) {
+               for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
+                       off = VarHeapVal(Tloc(b, 0), r, b->twidth);
+                       HASHappend_locked(b, r, &off);
+               }
+       } else {
+               for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
+                       off = VarHeapVal(Tloc(b, 0), r, b->twidth);
+                       HASHappend_locked(b, r, off == 0 ? str_nil : 
b->tvheap->base + off);
+               }
        }
        BUN nunique = b->thash ? b->thash->nunique : 0;
        MT_rwlock_wrunlock(&b->thashlock);
@@ -1512,7 +1519,7 @@ BATappend_or_update(BAT *b, BAT *p, cons
                                locked = true;
                        }
                        if (old)
-                               HASHdelete_locked(&bi, updid, old);
+                               HASHdelete_locked(&bi, updid, bi.ustr ? &off : 
old);
                        else if (b->thash) {
                                doHASHdestroy(b, b->thash);
                                b->thash = NULL;
@@ -1599,7 +1606,7 @@ BATappend_or_update(BAT *b, BAT *p, cons
                        default:
                                MT_UNREACHABLE();
                        }
-                       HASHinsert_locked(&bi, updid, new);
+                       HASHinsert_locked(&bi, updid, bi.ustr ? &prevoff : new);
 
                }
                if (locked) {
@@ -3282,7 +3289,7 @@ BATcount_no_nil(BAT *b, BAT *s)
        }
        if (BATcheckhash(b)) {
                BUN p = 0;
-               const void *nil = ATOMnilptr(b->ttype);
+               const void *nil = b->ustr ? &(var_t){0} : ATOMnilptr(b->ttype);
                cnt = ci.ncand;
                HASHloop(&bi, b->thash, p, nil)
                        if (canditer_contains(&ci, p + b->hseqbase))
@@ -3342,11 +3349,6 @@ BATcount_no_nil(BAT *b, BAT *s)
                        cnt += !is_inet6_nil(((const inet6 *) 
p)[canditer_next(&ci) - hseq]);
                break;
        case TYPE_str:
-               if (bi.ustr) {
-                       /* TODO: check whether nil occurs in ustrbat; if
-                        * not, return BATcount(b), else count offsets
-                        * != nil offset */
-               }
                if (bi.vkey) {
                        if (GDK_ELIMDOUBLES(bi.vh)) {
                                off = strLocate(bi.vh, str_nil);
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -738,31 +738,11 @@ BAThash_impl(BAT *restrict b, struct can
        const char *nme = GDKinmemory(b->theap->farmid) ? ":memory:" : 
BBP_physical(b->batCacheid);
        BATiter bi = bat_iterator(b);
        unsigned int tpe = ATOMbasetype(bi.type);
-       if (offsets) {
-               assert(b->tvheap);
-               switch (bi.width) {
-               case 1:
-                       tpe = TYPE_bte;
-                       break;
-               case 2:
-                       tpe = TYPE_sht;
-                       break;
-               case 4:
-                       tpe = TYPE_int;
-                       break;
-#if SIZEOF_VAR_T == 8
-               case 8:
-                       tpe = TYPE_lng;
-                       break;
-#endif
-               default:
-                       MT_UNREACHABLE();
-               }
-       }
        bool hascand = ci->tpe != cand_dense || ci->ncand != bi.count;
 
        QryCtx *qry_ctx = MT_thread_get_qry_ctx();
 
+       assert(!offsets || ATOMvarsized(b->ttype));
        assert(strcmp(ext, "thash") != 0 || !hascand);
        assert(bi.type != TYPE_msk);
        assert(bi.type != TYPE_void);
@@ -851,8 +831,9 @@ BAThash_impl(BAT *restrict b, struct can
                p = 0;
                HEAPfree(&h->heapbckt, true);
                /* create the hash structures */
-               if (HASHnew(h, ATOMtype(tpe), BATcapacity(b),
-                           mask, ci->ncand, true) != GDK_SUCCEED) {
+               if (HASHnew(h, offsets ? TYPE_oid : ATOMtype(tpe),
+                           BATcapacity(b), mask, ci->ncand,
+                           true) != GDK_SUCCEED) {
                        HEAPfree(&h->heaplink, true);
                        GDKfree(h);
                        bat_iterator_end(&bi);
@@ -893,6 +874,34 @@ BAThash_impl(BAT *restrict b, struct can
                        starthash(inet6);
                        break;
                default: {
+                       if (offsets) {
+                               TIMEOUT_LOOP_IDX(p, cnt1, qry_ctx) {
+                                       var_t off = VarHeapVal(bi.base, o - 
b->hseqbase, bi.width);
+                                       c = hash_oid(h, &off);
+                                       hget = HASHget(h, c);
+                                       if (hget == BUN_NONE) {
+                                               if (h->nheads == maxslots)
+                                                       TIMEOUT_LOOP_BREAK; /* 
mask too full */
+                                               h->nheads++;
+                                               h->nunique++;
+                                       } else {
+                                               for (hb = hget;
+                                                    hb != BUN_NONE;
+                                                    hb = HASHgetlink(h, hb)) {
+                                                       if (off == 
VarHeapVal(bi.base, hb, bi.width))
+                                                               break;
+                                               }
+                                               h->nunique += hb == BUN_NONE;
+                                       }
+                                       HASHputlink(h, p, hget);
+                                       HASHput(h, c, p);
+                                       o = canditer_next(ci);
+                               }
+                               TIMEOUT_CHECK(qry_ctx,
+                                             
GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
+                               break;
+                       }
+
                        bool (*atomeq)(const void *, const void *) = 
ATOMequal(tpe);
                        TIMEOUT_LOOP_IDX(p, cnt1, qry_ctx) {
                                const void *restrict v = BUNtail(&bi, o - 
b->hseqbase);
@@ -976,6 +985,32 @@ BAThash_impl(BAT *restrict b, struct can
                finishhash(inet6);
                break;
        default: {
+               if (offsets) {
+                       TIMEOUT_LOOP(ci->ncand - p, qry_ctx) {
+                               var_t off = VarHeapVal(bi.base, o - 
b->hseqbase, bi.width);
+                               c = hash_oid(h, &off);
+                               hget = HASHget(h, c);
+                               h->nheads += hget == BUN_NONE;
+                               if (!hascand) {
+                                       for (hb = hget;
+                                            hb != BUN_NONE;
+                                            hb = HASHgetlink(h, hb)) {
+                                               if (off == VarHeapVal(bi.base, 
hb, bi.width))
+                                                       break;
+                                       }
+                                       h->nunique += hb == BUN_NONE;
+                                       o = canditer_next_dense(ci);
+                               } else {
+                                       o = canditer_next(ci);
+                               }
+                               HASHputlink(h, p, hget);
+                               HASHput(h, c, p);
+                               p++;
+                       }
+                       TIMEOUT_CHECK(qry_ctx,
+                                     GOTO_LABEL_TIMEOUT_HANDLER(bailout, 
qry_ctx));
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to