Changeset: 23c5d54380c9 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=23c5d54380c9
Modified Files:
gdk/gdk.h
gdk/gdk_bat.c
gdk/gdk_group.c
gdk/gdk_search.c
gdk/gdk_search.h
monetdb5/modules/kernel/bat5.c
Branch: default
Log Message:
Use variable-sized hash structures
The hash table and linked list width are derived from their storage size.
This leads to 2-, 4-, and 8-byte wide BUNS and a corresponding BUN_NONE value.
The width is used in the inner loop to access the hash structures.
The code is checked in for cross-platform compilation checks and
eagle eyes (it added 3 reports, most likely a result of assumptions
about BUN_NONE in the context of a hash structure access.)
diffs (truncated from 803 to 300 lines):
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -538,6 +538,15 @@ typedef oid var_t; /* type used for hea
#endif
typedef oid BUN; /* BUN position */
+#define BUN1 1
+#define BUN2 2
+#define BUN4 4
+#define BUN8 8
+typedef unsigned char BUN1type;
+typedef unsigned short BUN2type;
+typedef unsigned int BUN4type;
+typedef BUN BUN8type;
+
#define SIZEOF_BUN SIZEOF_OID
#define BUNFMT OIDFMT
/* alternatively:
@@ -634,6 +643,8 @@ typedef struct {
typedef struct {
int type; /* type of index entity */
+ int width; /* width of hash entries */
+ BUN nil; /* nil representation */
BUN lim; /* collision list size */
BUN mask; /* number of hash buckets-1 (power of 2) */
BUN *hash; /* hash table */
@@ -2904,19 +2915,19 @@ gdk_export int ALIGNsetH(BAT *b1, BAT *b
#define GDK_STREQ(l,r) (*(char*) (l) == *(char*) (r) && !strcmp(l,r))
#define HASHloop(bi, h, hb, v) \
- for (hb = (h)->hash[HASHprobe((h), v)]; \
- hb != BUN_NONE; \
- hb = (h)->link[hb]) \
+ for (hb = HASHget(h, HASHprobe((h), v)); \
+ hb != HASHnil(h); \
+ hb = HASHgetlink(h,hb)) \
if (ATOMcmp(h->type, v, BUNhead(bi, hb)) == 0)
#define HASHloop_str_hv(bi, h, hb, v) \
- for (hb = (h)->hash[((BUN *) (v))[-1]&(h)->mask]; \
- hb != BUN_NONE; \
- hb = (h)->link[hb]) \
+ for (hb = HASHget((h),((BUN *) (v))[-1]&(h)->mask); \
+ hb != HASHnil(h); \
+ hb = HASHgetlink(h,hb)) \
if (GDK_STREQ(v, BUNhvar(bi, hb)))
#define HASHloop_str(bi, h, hb, v) \
- for (hb = (h)->hash[strHash(v)&(h)->mask]; \
- hb != BUN_NONE; \
- hb = (h)->link[hb]) \
+ for (hb = HASHget((h),strHash(v)&(h)->mask); \
+ hb != HASHnil(h); \
+ hb = HASHgetlink(h,hb)) \
if (GDK_STREQ(v, BUNhvar(bi, hb)))
/*
@@ -2927,8 +2938,8 @@ gdk_export int ALIGNsetH(BAT *b1, BAT *b
* numbers instead of strings:
*/
#define HASHloop_fstr(bi, h, hb, idx, v) \
- for (hb = h->hash[strHash(v)&h->mask], idx =
strLocate((bi.b)->H->vheap,v); \
- hb != BUN_NONE; hb = h->link[hb]) \
+ for (hb = HASHget(h, strHash(v)&h->mask), idx =
strLocate((bi.b)->H->vheap,v); \
+ hb != HASHnil(h); hb = HASHgetlink(h,hb))
\
if (VarHeapValRaw((bi).b->H->heap.base, hb, (bi).b->H->width)
== idx)
/*
* The following example shows how the hashloop is used:
@@ -2958,20 +2969,20 @@ gdk_export int ALIGNsetH(BAT *b1, BAT *b
* (HASHlooploc) or variable-sized (HASHloopvar).
*/
#define HASHlooploc(bi, h, hb, v) \
- for (hb = (h)->hash[HASHprobe(h, v)]; \
- hb != BUN_NONE; \
- hb = (h)->link[hb]) \
+ for (hb = HASHget(h, HASHprobe(h, v)); \
+ hb != HASHnil(h); \
+ hb = HASHgetlink(h,hb)) \
if (ATOMcmp(h->type, v, BUNhloc(bi, hb)) == 0)
#define HASHloopvar(bi, h, hb, v) \
- for (hb = (h)->hash[HASHprobe(h, v)]; \
- hb != BUN_NONE; \
- hb = (h)->link[hb]) \
+ for (hb = HASHget(h,HASHprobe(h, v)); \
+ hb != HASHnil(h); \
+ hb = HASHgetlink(h,hb)) \
if (ATOMcmp(h->type, v, BUNhvar(bi, hb)) == 0)
#define HASHloop_TYPE(bi, h, hb, v, TYPE) \
- for (hb = (h)->hash[hash_##TYPE(h, v)]; \
- hb != BUN_NONE; \
- hb = (h)->link[hb]) \
+ for (hb = HASHget(h, hash_##TYPE(h, v)); \
+ hb != HASHnil(h); \
+ hb = HASHgetlink(h,hb)) \
if (simple_EQ(v, BUNhloc(bi, hb), TYPE))
#define HASHloop_bit(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, bte)
@@ -2987,9 +2998,9 @@ gdk_export int ALIGNsetH(BAT *b1, BAT *b
#define HASHloop_ptr(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, ptr)
#define HASHloop_any(bi, h, hb, v) \
- for (hb = (h)->hash[hash_any(h, v)]; \
- hb != BUN_NONE; \
- hb = (h)->link[hb]) \
+ for (hb = HASHget(h, hash_any(h, v)); \
+ hb != HASHnil(h); \
+ hb = HASHgetlink(h,hb)) \
if (atom_EQ(v, BUNhead(bi, hb), (bi).b->htype))
/*
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1915,9 +1915,9 @@ BUNlocate(BAT *b, const void *x, const v
BUN i;
for (i = 0; i <= v->H->hash->mask; i++)
- hcnt += (v->H->hash->hash[i] !=
BUN_NONE);
+ hcnt += HASHget(v->H->hash,i) !=
HASHnil(v->H->hash);
for (i = 0; i <= v->T->hash->mask; i++)
- tcnt += (v->T->hash->hash[i] !=
BUN_NONE);
+ tcnt += HASHget(v->T->hash,i) !=
HASHnil(v->T->hash);
if (hcnt < tcnt) {
usemirror();
v = BATmirror(v);
@@ -2976,13 +2976,13 @@ BATassertHeadProps(BAT *b)
BUN prb;
valp = BUNhead(bi, p);
prb = HASHprobe(hs, valp);
- for (hb = hs->hash[prb];
- hb != BUN_NONE;
- hb = hs->link[hb])
+ for (hb = HASHget(hs,prb);
+ hb != HASHnil(hs);
+ hb = HASHgetlink(hs,hb))
if (cmpf(valp, BUNhead(bi, hb)) == 0)
assert(!b->hkey);
- hs->link[p] = hs->hash[prb];
- hs->hash[prb] = p;
+ HASHputlink(hs,p, HASHget(hs,prb));
+ HASHput(hs,prb,p);
cmp = cmpf(valp, nilp);
assert(!b->H->nonil || cmp != 0);
if (cmp == 0)
@@ -3242,9 +3242,9 @@ BATderiveHeadProps(BAT *b, int expensive
prev = valp;
if (key && hs) {
prb = HASHprobe(hs, valp);
- for (hb = hs->hash[prb];
- hb != BUN_NONE;
- hb = hs->link[hb]) {
+ for (hb = HASHget(hs,prb);
+ hb != HASHnil(hs);
+ hb = HASHgetlink(hs,hb)) {
if (cmpf(valp, BUNhead(bi, hb)) == 0) {
key = 0;
b->H->nokey[0] = hb;
@@ -3252,8 +3252,8 @@ BATderiveHeadProps(BAT *b, int expensive
break;
}
}
- hs->link[p] = hs->hash[prb];
- hs->hash[prb] = p;
+ HASHputlink(hs,p, HASHget(hs,prb));
+ HASHput(hs,prb,p);
}
}
if (hs) {
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -102,12 +102,12 @@
for (r = BUNfirst(b), p = r, q = r + BATcount(b); p < q; p++) {
\
prb = hash_##TYPE(hs, &w[p]); \
if (gc) { \
- for (hb = hs->hash[prb]; \
- hb != BUN_NONE && \
+ for (hb = HASHget(hs,prb); \
+ hb != HASHnil(hs) &&
\
grps[hb - r] == grps[p - r]; \
- hb = hs->link[hb]) { \
- assert(hs->link[hb] == BUN_NONE \
- || hs->link[hb] < hb); \
+ hb = HASHgetlink(hs,hb)) { \
+ assert( HASHgetlink(hs,hb) ==
HASHnil(hs) \
+ || HASHgetlink(hs,hb) < hb);
\
if (w[p] == w[hb]) { \
oid grp = ngrps[hb - r]; \
ngrps[p - r] = grp; \
@@ -119,17 +119,17 @@
break; \
} \
} \
- if (hb != BUN_NONE && \
+ if (hb != HASHnil(hs) &&
\
grps[hb - r] != grps[p - r]) { \
/* we didn't assign a group */ \
/* yet */ \
- hb = BUN_NONE; \
+ hb = HASHnil(hs);
\
} \
} else if (grps) { \
prb = ((prb << bits) ^ (BUN) grps[p-r]) &
hs->mask; \
- for (hb = hs->hash[prb]; \
- hb != BUN_NONE; \
- hb = hs->link[hb]) { \
+ for (hb = HASHget(hs,prb); \
+ hb != HASHnil(hs); \
+ hb = HASHgetlink(hs,hb)) { \
if (grps[hb - r] == grps[p - r] && \
w[p] == w[hb]) { \
oid grp = ngrps[hb - r]; \
@@ -143,9 +143,9 @@
} \
} \
} else { \
- for (hb = hs->hash[prb]; \
- hb != BUN_NONE; \
- hb = hs->link[hb]) { \
+ for (hb = HASHget(hs,prb); \
+ hb != HASHnil(hs); \
+ hb = HASHgetlink(hs,hb)) { \
if (w[p] == w[hb]) { \
oid grp = ngrps[hb - r]; \
ngrps[p - r] = grp; \
@@ -158,11 +158,11 @@
} \
} \
} \
- if (hb == BUN_NONE) { \
+ if (hb == HASHnil(hs)) {
\
GRPnotfound(); \
/* enter new group into hash table */ \
- hs->link[p] = hs->hash[prb]; \
- hs->hash[prb] = p; \
+ HASHputlink(hs,p, HASHget(hs,prb)); \
+ HASHput(hs,prb,p); \
} \
} \
} while (0)
@@ -478,19 +478,19 @@ BATgroup_internal(BAT **groups, BAT **ex
/* skip irrelevant BUNs after the current
* BUNs; exploit that hash-table links
* backwards through BAT */
- for (hb = hs->hash[HASHprobe(hs, v)];
- hb != BUN_NONE && hb >= p;
- hb = hs->link[hb]) {
- assert(hs->link[hb] == BUN_NONE
- || hs->link[hb] < hb);
+ for (hb = HASHget(hs,HASHprobe(hs, v));
+ hb != HASHnil(hs)&& hb >= p;
+ hb = HASHgetlink(hs,hb)) {
+ assert( HASHgetlink(hs,hb) == HASHnil(hs)
+ || HASHgetlink(hs,hb) < hb);
}
;
if (gc) {
for (;
- hb != BUN_NONE && grps[hb - r] == grps[p -
r];
- hb = hs->link[hb]) {
- assert(hs->link[hb] == BUN_NONE
- || hs->link[hb] < hb);
+ hb != HASHnil(hs) && grps[hb - r] ==
grps[p - r];
+ hb = HASHgetlink(hs,hb)) {
+ assert( HASHgetlink(hs,hb) ==
HASHnil(hs)
+ || HASHgetlink(hs,hb) < hb);
if (cmp(v, BUNtail(bi, hb)) == 0) {
oid grp = ngrps[hb - r];
ngrps[p - r] = grp;
@@ -502,16 +502,16 @@ BATgroup_internal(BAT **groups, BAT **ex
break;
}
}
- if (hb != BUN_NONE &&
+ if (hb != HASHnil(hs) &&
grps[hb - r] != grps[p - r]) {
/* we didn't assign a group
* yet */
- hb = BUN_NONE;
+ hb = HASHnil(hs);
}
} else if (grps) {
for (;
- hb != BUN_NONE;
- hb = hs->link[hb]) {
+ hb != HASHnil(hs);
+ hb = HASHgetlink(hs,hb)) {
if (grps[hb - r] == grps[p - r] &&
cmp(v, BUNtail(bi, hb)) == 0) {
oid grp = ngrps[hb - r];
@@ -526,8 +526,8 @@ BATgroup_internal(BAT **groups, BAT **ex
}
} else {
for (;
- hb != BUN_NONE;
- hb = hs->link[hb]) {
+ hb != HASHnil(hs);
+ hb = HASHgetlink(hs,hb)) {
if (cmp(v, BUNtail(bi, hb)) == 0) {
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list