Changeset: 3258e25e5b3e for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/3258e25e5b3e
Modified Files:
gdk/gdk_bat.c
gdk/gdk_batop.c
gdk/gdk_hash.c
gdk/gdk_hash.h
gdk/gdk_join.c
gdk/gdk_private.h
gdk/gdk_select.c
Branch: ustr
Log Message:
Hash created on ustr bat (i.e. prop ustr set) is on offsets, not on strings.
diffs (truncated from 728 to 300 lines):
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1155,10 +1155,18 @@ BUNappendmulti(BAT *b, const void *value
VALclear(&maxprop);
if (b->thash) {
p -= count;
- for (BUN i = 0; i < count; i++) {
- t = ((void **) values)[i];
- HASHappend_locked(b, p, t);
- p++;
+ if (b->ustr) {
+ for (BUN i = 0; i < count; i++) {
+ var_t o =
VarHeapVal(b->theap->base, p, b->twidth);
+ HASHappend_locked(b, p, &o);
+ p++;
+ }
+ } else {
+ for (BUN i = 0; i < count; i++) {
+ t = ((void **) values)[i];
+ HASHappend_locked(b, p, t);
+ p++;
+ }
}
nunique = b->thash ? b->thash->nunique : 0;
}
@@ -1218,7 +1226,7 @@ BUNappendmulti(BAT *b, const void *value
return rc;
}
if (b->thash) {
- HASHappend_locked(b, p, t);
+ HASHappend_locked(b, p, b->ustr ? &(var_t){0} :
t);
}
p++;
}
@@ -1415,7 +1423,7 @@ BUNdelete(BAT *b, oid o)
/* load hash so that we can maintain it */
(void) BATcheckhash(b);
- BUN nunique = HASHdelete(&bi, p, BUNtail(&bi, p));
+ BUN nunique = HASHdelete(&bi, p, bi.ustr ? &(var_t){VarHeapVal(bi.base,
p, bi.width)} : BUNtail(&bi, p));
ATOMdel(b->ttype, b->tvheap, (var_t *) BUNtloc(&bi, p));
bat_iterator_end(&bi);
@@ -1507,6 +1515,7 @@ BUNinplacemulti(BAT *b, const oid *posit
/* load hash so that we can maintain it */
(void) BATcheckhash(b);
MT_rwlock_wrlock(&b->thashlock);
+ var_t off = 0;
for (BUN i = 0; i < count; i++) {
BUN p = autoincr ? positions[0] - b->hseqbase + i :
positions[i] - b->hseqbase;
const void *t = b->ttype && b->tvheap ?
@@ -1537,13 +1546,14 @@ BUNinplacemulti(BAT *b, const oid *posit
} else if (bi.type == TYPE_msk) {
val = BUNtmsk(&bi, p);
} else if (b->tvheap) {
- var_t off = VarHeapVal(bi.base, p, bi.width);
+ off = VarHeapVal(bi.base, p, bi.width);
if (off == 0)
val = ATOMnilptr(bi.type);
else if (off < bi.vhfree)
val = bi.vh->base + off;
else
val = NULL; /* bad offset */
+
} else {
val = BUNtloc(&bi, p);
}
@@ -1596,7 +1606,7 @@ BUNinplacemulti(BAT *b, const oid *posit
}
}
}
- HASHdelete_locked(&bi, p, val); /* first delete old
value from hash */
+ HASHdelete_locked(&bi, p, b->ustr ? &off : val);
/* first delete old value from hash */
} else {
/* out of range old value, so the properties and
* hash cannot be trusted */
@@ -1648,6 +1658,7 @@ BUNinplacemulti(BAT *b, const oid *posit
MT_rwlock_wrunlock(&b->thashlock);
goto bailout;
}
+ off = _d;
if (b->twidth < SIZEOF_VAR_T &&
(b->twidth <= 2 && _d != 0 ? _d - GDK_VAROFFSET :
_d) >= ((size_t) 1 << (8 << b->tshift))) {
/* doesn't fit in current heap, upgrade it */
@@ -1723,7 +1734,7 @@ BUNinplacemulti(BAT *b, const oid *posit
}
}
- HASHinsert_locked(&bi, p, t); /* insert new value into hash */
+ HASHinsert_locked(&bi, p, b->ustr ? &off : t); /* insert new
value into hash */
prv = p > 0 ? p - 1 : BUN_NONE;
nxt = p < last ? p + 1 : BUN_NONE;
@@ -1917,6 +1928,16 @@ BUNfnd(BAT *b, const void *v)
if (BATordered(b) || BATordered_rev(b))
return SORTfnd(b, v);
}
+ var_t off = 0;
+ if (b->ustr) {
+ BAT *u = getUstrBat();
+ if (u == NULL || (r = BUNfnd(u, v)) == BUN_NONE)
+ return r;
+ bi = bat_iterator(u);
+ off = VarHeapVal(bi.base, r, bi.width);
+ bat_iterator_end(&bi);
+ v = &off;
+ }
bi = bat_iterator(b); /* outside of hashlock */
if (BAThash(b) == GDK_SUCCEED) {
MT_rwlock_rdlock(&b->thashlock);
@@ -1968,8 +1989,13 @@ BUNfnd(BAT *b, const void *v)
break;
break;
case TYPE_str:
- HASHloop_str(&bi, b->thash, r, v)
- break;
+ if (bi.ustr) {
+ HASHloop_var_t(&bi, b->thash, r, v)
+ break;
+ } else {
+ HASHloop_str(&bi, b->thash, r, v)
+ break;
+ }
break;
default:
HASHloop(&bi, b->thash, r, v)
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -418,9 +418,16 @@ insert_string_bat(BAT *b, BATiter *ni, s
assert(b->batCapacity >= b->batCount);
MT_lock_unset(&b->theaplock);
/* maintain hash */
- for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
- off = VarHeapVal(Tloc(b, 0), r, b->twidth);
- HASHappend_locked(b, r, off == 0 ? str_nil : b->tvheap->base +
off);
+ if (b->ustr) {
+ for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
+ off = VarHeapVal(Tloc(b, 0), r, b->twidth);
+ HASHappend_locked(b, r, &off);
+ }
+ } else {
+ for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
+ off = VarHeapVal(Tloc(b, 0), r, b->twidth);
+ HASHappend_locked(b, r, off == 0 ? str_nil :
b->tvheap->base + off);
+ }
}
BUN nunique = b->thash ? b->thash->nunique : 0;
MT_rwlock_wrunlock(&b->thashlock);
@@ -1512,7 +1519,7 @@ BATappend_or_update(BAT *b, BAT *p, cons
locked = true;
}
if (old)
- HASHdelete_locked(&bi, updid, old);
+ HASHdelete_locked(&bi, updid, bi.ustr ? &off :
old);
else if (b->thash) {
doHASHdestroy(b, b->thash);
b->thash = NULL;
@@ -1599,7 +1606,7 @@ BATappend_or_update(BAT *b, BAT *p, cons
default:
MT_UNREACHABLE();
}
- HASHinsert_locked(&bi, updid, new);
+ HASHinsert_locked(&bi, updid, bi.ustr ? &prevoff : new);
}
if (locked) {
@@ -3282,7 +3289,7 @@ BATcount_no_nil(BAT *b, BAT *s)
}
if (BATcheckhash(b)) {
BUN p = 0;
- const void *nil = ATOMnilptr(b->ttype);
+ const void *nil = b->ustr ? &(var_t){0} : ATOMnilptr(b->ttype);
cnt = ci.ncand;
HASHloop(&bi, b->thash, p, nil)
if (canditer_contains(&ci, p + b->hseqbase))
@@ -3342,11 +3349,6 @@ BATcount_no_nil(BAT *b, BAT *s)
cnt += !is_inet6_nil(((const inet6 *)
p)[canditer_next(&ci) - hseq]);
break;
case TYPE_str:
- if (bi.ustr) {
- /* TODO: check whether nil occurs in ustrbat; if
- * not, return BATcount(b), else count offsets
- * != nil offset */
- }
if (bi.vkey) {
if (GDK_ELIMDOUBLES(bi.vh)) {
off = strLocate(bi.vh, str_nil);
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -738,31 +738,11 @@ BAThash_impl(BAT *restrict b, struct can
const char *nme = GDKinmemory(b->theap->farmid) ? ":memory:" :
BBP_physical(b->batCacheid);
BATiter bi = bat_iterator(b);
unsigned int tpe = ATOMbasetype(bi.type);
- if (offsets) {
- assert(b->tvheap);
- switch (bi.width) {
- case 1:
- tpe = TYPE_bte;
- break;
- case 2:
- tpe = TYPE_sht;
- break;
- case 4:
- tpe = TYPE_int;
- break;
-#if SIZEOF_VAR_T == 8
- case 8:
- tpe = TYPE_lng;
- break;
-#endif
- default:
- MT_UNREACHABLE();
- }
- }
bool hascand = ci->tpe != cand_dense || ci->ncand != bi.count;
QryCtx *qry_ctx = MT_thread_get_qry_ctx();
+ assert(!offsets || ATOMvarsized(b->ttype));
assert(strcmp(ext, "thash") != 0 || !hascand);
assert(bi.type != TYPE_msk);
assert(bi.type != TYPE_void);
@@ -851,8 +831,9 @@ BAThash_impl(BAT *restrict b, struct can
p = 0;
HEAPfree(&h->heapbckt, true);
/* create the hash structures */
- if (HASHnew(h, ATOMtype(tpe), BATcapacity(b),
- mask, ci->ncand, true) != GDK_SUCCEED) {
+ if (HASHnew(h, offsets ? TYPE_oid : ATOMtype(tpe),
+ BATcapacity(b), mask, ci->ncand,
+ true) != GDK_SUCCEED) {
HEAPfree(&h->heaplink, true);
GDKfree(h);
bat_iterator_end(&bi);
@@ -893,6 +874,34 @@ BAThash_impl(BAT *restrict b, struct can
starthash(inet6);
break;
default: {
+ if (offsets) {
+ TIMEOUT_LOOP_IDX(p, cnt1, qry_ctx) {
+ var_t off = VarHeapVal(bi.base, o -
b->hseqbase, bi.width);
+ c = hash_oid(h, &off);
+ hget = HASHget(h, c);
+ if (hget == BUN_NONE) {
+ if (h->nheads == maxslots)
+ TIMEOUT_LOOP_BREAK; /*
mask too full */
+ h->nheads++;
+ h->nunique++;
+ } else {
+ for (hb = hget;
+ hb != BUN_NONE;
+ hb = HASHgetlink(h, hb)) {
+ if (off ==
VarHeapVal(bi.base, hb, bi.width))
+ break;
+ }
+ h->nunique += hb == BUN_NONE;
+ }
+ HASHputlink(h, p, hget);
+ HASHput(h, c, p);
+ o = canditer_next(ci);
+ }
+ TIMEOUT_CHECK(qry_ctx,
+
GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
+ break;
+ }
+
bool (*atomeq)(const void *, const void *) =
ATOMequal(tpe);
TIMEOUT_LOOP_IDX(p, cnt1, qry_ctx) {
const void *restrict v = BUNtail(&bi, o -
b->hseqbase);
@@ -976,6 +985,32 @@ BAThash_impl(BAT *restrict b, struct can
finishhash(inet6);
break;
default: {
+ if (offsets) {
+ TIMEOUT_LOOP(ci->ncand - p, qry_ctx) {
+ var_t off = VarHeapVal(bi.base, o -
b->hseqbase, bi.width);
+ c = hash_oid(h, &off);
+ hget = HASHget(h, c);
+ h->nheads += hget == BUN_NONE;
+ if (!hascand) {
+ for (hb = hget;
+ hb != BUN_NONE;
+ hb = HASHgetlink(h, hb)) {
+ if (off == VarHeapVal(bi.base,
hb, bi.width))
+ break;
+ }
+ h->nunique += hb == BUN_NONE;
+ o = canditer_next_dense(ci);
+ } else {
+ o = canditer_next(ci);
+ }
+ HASHputlink(h, p, hget);
+ HASHput(h, c, p);
+ p++;
+ }
+ TIMEOUT_CHECK(qry_ctx,
+ GOTO_LABEL_TIMEOUT_HANDLER(bailout,
qry_ctx));
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]