Changeset: 20349cf8332d for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/20349cf8332d
Modified Files:
gdk/gdk_bat.c
gdk/gdk_hash.c
gdk/gdk_hash.h
gdk/gdk_select.c
Branch: ustr
Log Message:
Fix hash maintenance and some other stuff for ustr.
diffs (275 lines):
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1870,11 +1870,19 @@ static inline BUN
slowfnd(BATiter *bi, const void *v)
{
BUN p, q;
- bool (*atomeq)(const void *, const void *) = ATOMequal(bi->type);
- BATloop(bi, p, q) {
- if ((*atomeq)(v, BUNtail(bi, p))) {
- return p;
+ if (bi->ustr) {
+ var_t off = *(var_t *) v;
+ BATloop(bi, p, q) {
+ if (off == VarHeapVal(bi->base, p, bi->width))
+ return p;
+ }
+ } else {
+ bool (*atomeq)(const void *, const void *) =
ATOMequal(bi->type);
+ BATloop(bi, p, q) {
+ if ((*atomeq)(v, BUNtail(bi, p))) {
+ return p;
+ }
}
}
return BUN_NONE;
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -375,8 +375,14 @@ HASHgrowbucket(BAT *b)
BATiter bi = bat_iterator(b);
if ((hb = HASHget(h, old)) != BUN_NONE) {
h->nheads--;
+ var_t off = 0;
+ const void *v = &off;
do {
- const void *v = BUNtail(&bi, hb);
+ if (h->offsets) {
+ off = VarHeapVal(bi.base, hb, bi.width);
+ } else {
+ v = BUNtail(&bi, hb);
+ }
BUN hsh = ATOMhash(h->type, v);
assert((hsh & (mask - 1)) == old);
if (hsh & mask) {
@@ -760,6 +766,7 @@ BAThash_impl(BAT *restrict b, struct can
return NULL;
}
h->width = HASHwidth(BATcapacity(b));
+ h->offsets = offsets;
h->heaplink.dirty = true;
h->heapbckt.dirty = true;
strtconcat(h->heaplink.filename, sizeof(h->heaplink.filename),
@@ -1199,12 +1206,21 @@ HASHappend_locked(BAT *b, BUN i, const v
BUN hb = HASHget(h, c);
BUN hb2;
BATiter bi = bat_iterator_nolock(b);
- bool (*atomeq)(const void *, const void *) = ATOMequal(h->type);
- for (hb2 = hb;
- hb2 != BUN_NONE;
- hb2 = HASHgetlink(h, hb2)) {
- if (atomeq(v, BUNtail(&bi, hb2)))
- break;
+ if (h->offsets) {
+ for (hb2 = hb;
+ hb2 != BUN_NONE;
+ hb2 = HASHgetlink(h, hb2)) {
+ if (*(var_t *) v == VarHeapVal(bi.base, hb2, bi.width))
+ break;
+ }
+ } else {
+ bool (*atomeq)(const void *, const void *) = ATOMequal(h->type);
+ for (hb2 = hb;
+ hb2 != BUN_NONE;
+ hb2 = HASHgetlink(h, hb2)) {
+ if (atomeq(v, BUNtail(&bi, hb2)))
+ break;
+ }
}
h->nheads += hb == BUN_NONE;
h->nunique += hb2 == BUN_NONE;
@@ -1265,6 +1281,16 @@ HASHinsert_locked(BATiter *bi, BUN p, co
h->heapbckt.dirty = true;
if (hb == BUN_NONE) {
h->nheads++;
+ } else if (h->offsets) {
+ do {
+ if (*(var_t *) v == VarHeapVal(bi->base, hb,
bi->width)) {
+ /* found another row with the
+ * same value, so don't
+ * increment nunique */
+ return;
+ }
+ hb = HASHgetlink(h, hb);
+ } while (hb != BUN_NONE);
} else {
do {
if (atomeq(v, BUNtail(bi, hb))) {
@@ -1281,23 +1307,44 @@ HASHinsert_locked(BATiter *bi, BUN p, co
return;
}
bool seen = false;
- for (;;) {
- if (!seen)
- seen = atomeq(v, BUNtail(bi, hb));
- BUN hb2 = HASHgetlink(h, hb);
- if (hb2 == BUN_NONE || hb2 < p) {
- HASHputlink(h, p, hb2);
- HASHputlink(h, hb, p);
- h->heaplink.dirty = true;
- while (!seen && hb2 != BUN_NONE) {
- seen = atomeq(v, BUNtail(bi, hb2));
- hb2 = HASHgetlink(h, hb2);
+ if (h->offsets) {
+ for (;;) {
+ if (!seen)
+ seen = *(var_t*)v == VarHeapVal(bi->base, hb,
bi->width);
+ BUN hb2 = HASHgetlink(h, hb);
+ if (hb2 == BUN_NONE || hb2 < p) {
+ HASHputlink(h, p, hb2);
+ HASHputlink(h, hb, p);
+ h->heaplink.dirty = true;
+ while (!seen && hb2 != BUN_NONE) {
+ seen = *(var_t*)v ==
VarHeapVal(bi->base, hb2, bi->width);
+ hb2 = HASHgetlink(h, hb2);
+ }
+ if (!seen)
+ h->nunique++;
+ return;
}
+ hb = hb2;
+ }
+ } else {
+ for (;;) {
if (!seen)
- h->nunique++;
- return;
+ seen = atomeq(v, BUNtail(bi, hb));
+ BUN hb2 = HASHgetlink(h, hb);
+ if (hb2 == BUN_NONE || hb2 < p) {
+ HASHputlink(h, p, hb2);
+ HASHputlink(h, hb, p);
+ h->heaplink.dirty = true;
+ while (!seen && hb2 != BUN_NONE) {
+ seen = atomeq(v, BUNtail(bi, hb2));
+ hb2 = HASHgetlink(h, hb2);
+ }
+ if (!seen)
+ h->nunique++;
+ return;
+ }
+ hb = hb2;
}
- hb = hb2;
}
}
@@ -1351,6 +1398,16 @@ HASHdelete_locked(BATiter *bi, BUN p, co
h->heapbckt.dirty = true;
if (hb2 == BUN_NONE) {
h->nheads--;
+ } else if (h->offsets) {
+ do {
+ if (*(var_t*)v == VarHeapVal(bi->base, hb2,
bi->width)) {
+ /* found another row with the
+ * same value, so don't
+ * decrement nunique below */
+ return;
+ }
+ hb2 = HASHgetlink(h, hb2);
+ } while (hb2 != BUN_NONE);
} else {
do {
if (atomeq(v, BUNtail(bi, hb2))) {
@@ -1369,25 +1426,49 @@ HASHdelete_locked(BATiter *bi, BUN p, co
}
bool seen = false;
BUN links = 0;
- for (;;) {
- if (!seen)
- seen = atomeq(v, BUNtail(bi, hb));
- BUN hb2 = HASHgetlink(h, hb);
- assert(hb2 != BUN_NONE );
- assert(hb2 < hb);
- if (hb2 == p) {
- for (hb2 = HASHgetlink(h, hb2);
- !seen && hb2 != BUN_NONE;
- hb2 = HASHgetlink(h, hb2))
- seen = atomeq(v, BUNtail(bi, hb2));
- break;
+ if (h->offsets) {
+ for (;;) {
+ if (!seen)
+ seen = *(var_t*)v == VarHeapVal(bi->base, hb,
bi->width);
+ BUN hb2 = HASHgetlink(h, hb);
+ assert(hb2 != BUN_NONE );
+ assert(hb2 < hb);
+ if (hb2 == p) {
+ for (hb2 = HASHgetlink(h, hb2);
+ !seen && hb2 != BUN_NONE;
+ hb2 = HASHgetlink(h, hb2))
+ seen = *(var_t*)v ==
VarHeapVal(bi->base, hb2, bi->width);
+ break;
+ }
+ hb = hb2;
+ if (++links > hash_destroy_chain_length) {
+ b->thash = NULL;
+ doHASHdestroy(b, h);
+ GDKclrerr();
+ return;
+ }
}
- hb = hb2;
- if (++links > hash_destroy_chain_length) {
- b->thash = NULL;
- doHASHdestroy(b, h);
- GDKclrerr();
- return;
+ } else {
+ for (;;) {
+ if (!seen)
+ seen = atomeq(v, BUNtail(bi, hb));
+ BUN hb2 = HASHgetlink(h, hb);
+ assert(hb2 != BUN_NONE );
+ assert(hb2 < hb);
+ if (hb2 == p) {
+ for (hb2 = HASHgetlink(h, hb2);
+ !seen && hb2 != BUN_NONE;
+ hb2 = HASHgetlink(h, hb2))
+ seen = atomeq(v, BUNtail(bi, hb2));
+ break;
+ }
+ hb = hb2;
+ if (++links > hash_destroy_chain_length) {
+ b->thash = NULL;
+ doHASHdestroy(b, h);
+ GDKclrerr();
+ return;
+ }
}
}
HASHputlink(h, hb, HASHgetlink(h, p));
diff --git a/gdk/gdk_hash.h b/gdk/gdk_hash.h
--- a/gdk/gdk_hash.h
+++ b/gdk/gdk_hash.h
@@ -14,6 +14,7 @@
struct Hash {
int type; /* type of index entity */
uint8_t width; /* width of hash entries */
+ bool offsets; /* hash on offsets */
BUN mask1; /* .mask1 < .nbucket <= .mask2 */
BUN mask2; /* ... both are power-of-two minus one */
BUN nbucket; /* number of valid hash buckets */
@@ -264,7 +265,7 @@ mix_inet6(const inet6 *u)
for (hb = HASHget(h, HASHprobe(h, v)); \
hb != BUN_NONE; \
hb = HASHgetlink(h, hb)) \
- if ((bi)->ustr ? \
+ if ((h)->offsets ? \
*(var_t*)(v) == VarHeapVal((bi)->base, hb, (bi)->width) : \
ATOMeq(h->type, v, BUNtail(bi, hb)))
#define HASHloop_str(bi, h, hb, v) \
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -92,7 +92,7 @@ virtualize(BAT *bn)
hb = HASHgetlink(h,hb)) \
if (hb >= (lo) && hb < (hi) && \
(eq == NULL || \
- (bi->ustr ? \
+ (h->offsets ? \
*(var_t *) v == VarHeapVal(bi->base, hb, bi->width) : \
(*eq)(v, BUNtail(bi, hb)))))
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]