Changeset: 23c5d54380c9 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=23c5d54380c9
Modified Files:
        gdk/gdk.h
        gdk/gdk_bat.c
        gdk/gdk_group.c
        gdk/gdk_search.c
        gdk/gdk_search.h
        monetdb5/modules/kernel/bat5.c
Branch: default
Log Message:

Use variable-sized hash structures
The hash table and linked list width are derived from their storage size.
This leads to 2-, 4-, and 8-byte wide BUNS and a corresponding BUN_NONE value.
The width is used in the inner loop to access the hash structures.

The code is checked in for cross-platform compilation checks and
eagle eyes (it added 3 reports,  most likely a result of assumptions
about BUN_NONE in the context of a hash structure access.)


diffs (truncated from 803 to 300 lines):

diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -538,6 +538,15 @@ typedef oid var_t;         /* type used for hea
 #endif
 
 typedef oid BUN;               /* BUN position */
+#define BUN1 1
+#define BUN2 2
+#define BUN4 4
+#define BUN8 8
+typedef unsigned char BUN1type;
+typedef unsigned short BUN2type;
+typedef unsigned int BUN4type;
+typedef BUN BUN8type;
+
 #define SIZEOF_BUN     SIZEOF_OID
 #define BUNFMT         OIDFMT
 /* alternatively:
@@ -634,6 +643,8 @@ typedef struct {
 
 typedef struct {
        int type;               /* type of index entity */
+       int width;              /* width of hash entries */
+       BUN nil;                /* nil representation */
        BUN lim;                /* collision list size */
        BUN mask;               /* number of hash buckets-1 (power of 2) */
        BUN *hash;              /* hash table */
@@ -2904,19 +2915,19 @@ gdk_export int ALIGNsetH(BAT *b1, BAT *b
 #define GDK_STREQ(l,r) (*(char*) (l) == *(char*) (r) && !strcmp(l,r))
 
 #define HASHloop(bi, h, hb, v)                                 \
-       for (hb = (h)->hash[HASHprobe((h), v)];                 \
-            hb != BUN_NONE;                                    \
-            hb = (h)->link[hb])                                \
+       for (hb = HASHget(h, HASHprobe((h), v));                        \
+            hb != HASHnil(h);                                  \
+            hb = HASHgetlink(h,hb))                            \
                if (ATOMcmp(h->type, v, BUNhead(bi, hb)) == 0)
 #define HASHloop_str_hv(bi, h, hb, v)                          \
-       for (hb = (h)->hash[((BUN *) (v))[-1]&(h)->mask];       \
-            hb != BUN_NONE;                                    \
-            hb = (h)->link[hb])                                \
+       for (hb = HASHget((h),((BUN *) (v))[-1]&(h)->mask);     \
+            hb != HASHnil(h);                                  \
+            hb = HASHgetlink(h,hb))                            \
                if (GDK_STREQ(v, BUNhvar(bi, hb)))
 #define HASHloop_str(bi, h, hb, v)                     \
-       for (hb = (h)->hash[strHash(v)&(h)->mask];      \
-            hb != BUN_NONE;                            \
-            hb = (h)->link[hb])                        \
+       for (hb = HASHget((h),strHash(v)&(h)->mask);    \
+            hb != HASHnil(h);                          \
+            hb = HASHgetlink(h,hb))                    \
                if (GDK_STREQ(v, BUNhvar(bi, hb)))
 
 /*
@@ -2927,8 +2938,8 @@ gdk_export int ALIGNsetH(BAT *b1, BAT *b
  * numbers instead of strings:
  */
 #define HASHloop_fstr(bi, h, hb, idx, v)                               \
-       for (hb = h->hash[strHash(v)&h->mask], idx = 
strLocate((bi.b)->H->vheap,v); \
-            hb != BUN_NONE; hb = h->link[hb])                          \
+       for (hb = HASHget(h, strHash(v)&h->mask), idx = 
strLocate((bi.b)->H->vheap,v); \
+            hb != HASHnil(h); hb = HASHgetlink(h,hb))                          
\
                if (VarHeapValRaw((bi).b->H->heap.base, hb, (bi).b->H->width) 
== idx)
 /*
  * The following example shows how the hashloop is used:
@@ -2958,20 +2969,20 @@ gdk_export int ALIGNsetH(BAT *b1, BAT *b
  * (HASHlooploc) or variable-sized (HASHloopvar).
  */
 #define HASHlooploc(bi, h, hb, v)                              \
-       for (hb = (h)->hash[HASHprobe(h, v)];                   \
-            hb != BUN_NONE;                                    \
-            hb = (h)->link[hb])                                \
+       for (hb = HASHget(h, HASHprobe(h, v));                  \
+            hb != HASHnil(h);                                  \
+            hb = HASHgetlink(h,hb))                            \
                if (ATOMcmp(h->type, v, BUNhloc(bi, hb)) == 0)
 #define HASHloopvar(bi, h, hb, v)                              \
-       for (hb = (h)->hash[HASHprobe(h, v)];                   \
-            hb != BUN_NONE;                                    \
-            hb = (h)->link[hb])                                \
+       for (hb = HASHget(h,HASHprobe(h, v));                   \
+            hb != HASHnil(h);                                  \
+            hb = HASHgetlink(h,hb))                            \
                if (ATOMcmp(h->type, v, BUNhvar(bi, hb)) == 0)
 
 #define HASHloop_TYPE(bi, h, hb, v, TYPE)                      \
-       for (hb = (h)->hash[hash_##TYPE(h, v)];                 \
-            hb != BUN_NONE;                                    \
-            hb = (h)->link[hb])                                \
+       for (hb = HASHget(h, hash_##TYPE(h, v));                        \
+            hb != HASHnil(h);                                  \
+            hb = HASHgetlink(h,hb))                            \
                if (simple_EQ(v, BUNhloc(bi, hb), TYPE))
 
 #define HASHloop_bit(bi, h, hb, v)     HASHloop_TYPE(bi, h, hb, v, bte)
@@ -2987,9 +2998,9 @@ gdk_export int ALIGNsetH(BAT *b1, BAT *b
 #define HASHloop_ptr(bi, h, hb, v)     HASHloop_TYPE(bi, h, hb, v, ptr)
 
 #define HASHloop_any(bi, h, hb, v)                             \
-       for (hb = (h)->hash[hash_any(h, v)];                    \
-            hb != BUN_NONE;                                    \
-            hb = (h)->link[hb])                                \
+       for (hb = HASHget(h, hash_any(h, v));                   \
+            hb != HASHnil(h);                                  \
+            hb = HASHgetlink(h,hb))                            \
                if (atom_EQ(v, BUNhead(bi, hb), (bi).b->htype))
 
 /*
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1915,9 +1915,9 @@ BUNlocate(BAT *b, const void *x, const v
                                BUN i;
 
                                for (i = 0; i <= v->H->hash->mask; i++)
-                                       hcnt += (v->H->hash->hash[i] != 
BUN_NONE);
+                                       hcnt += HASHget(v->H->hash,i) != 
HASHnil(v->H->hash);
                                for (i = 0; i <= v->T->hash->mask; i++)
-                                       tcnt += (v->T->hash->hash[i] != 
BUN_NONE);
+                                       tcnt += HASHget(v->T->hash,i) != 
HASHnil(v->T->hash);
                                if (hcnt < tcnt) {
                                        usemirror();
                                        v = BATmirror(v);
@@ -2976,13 +2976,13 @@ BATassertHeadProps(BAT *b)
                                BUN prb;
                                valp = BUNhead(bi, p);
                                prb = HASHprobe(hs, valp);
-                               for (hb = hs->hash[prb];
-                                    hb != BUN_NONE;
-                                    hb = hs->link[hb])
+                               for (hb = HASHget(hs,prb);
+                                    hb != HASHnil(hs);
+                                    hb = HASHgetlink(hs,hb))
                                        if (cmpf(valp, BUNhead(bi, hb)) == 0)
                                                assert(!b->hkey);
-                               hs->link[p] = hs->hash[prb];
-                               hs->hash[prb] = p;
+                               HASHputlink(hs,p, HASHget(hs,prb));
+                               HASHput(hs,prb,p);
                                cmp = cmpf(valp, nilp);
                                assert(!b->H->nonil || cmp != 0);
                                if (cmp == 0)
@@ -3242,9 +3242,9 @@ BATderiveHeadProps(BAT *b, int expensive
                prev = valp;
                if (key && hs) {
                        prb = HASHprobe(hs, valp);
-                       for (hb = hs->hash[prb];
-                            hb != BUN_NONE;
-                            hb = hs->link[hb]) {
+                       for (hb = HASHget(hs,prb);
+                            hb != HASHnil(hs);
+                            hb = HASHgetlink(hs,hb)) {
                                if (cmpf(valp, BUNhead(bi, hb)) == 0) {
                                        key = 0;
                                        b->H->nokey[0] = hb;
@@ -3252,8 +3252,8 @@ BATderiveHeadProps(BAT *b, int expensive
                                        break;
                                }
                        }
-                       hs->link[p] = hs->hash[prb];
-                       hs->hash[prb] = p;
+                       HASHputlink(hs,p, HASHget(hs,prb));
+                       HASHput(hs,prb,p);
                }
        }
        if (hs) {
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -102,12 +102,12 @@
                for (r = BUNfirst(b), p = r, q = r + BATcount(b); p < q; p++) { 
\
                        prb = hash_##TYPE(hs, &w[p]);                   \
                        if (gc) {                                       \
-                               for (hb = hs->hash[prb];                \
-                                    hb != BUN_NONE &&                  \
+                               for (hb = HASHget(hs,prb);              \
+                                    hb != HASHnil(hs) &&                       
\
                                      grps[hb - r] == grps[p - r];      \
-                                    hb = hs->link[hb]) {               \
-                                       assert(hs->link[hb] == BUN_NONE \
-                                              || hs->link[hb] < hb);   \
+                                    hb = HASHgetlink(hs,hb)) {         \
+                                       assert( HASHgetlink(hs,hb) == 
HASHnil(hs) \
+                                              || HASHgetlink(hs,hb) < hb);     
\
                                        if (w[p] == w[hb]) {            \
                                                oid grp = ngrps[hb - r]; \
                                                ngrps[p - r] = grp;     \
@@ -119,17 +119,17 @@
                                                break;                  \
                                        }                               \
                                }                                       \
-                               if (hb != BUN_NONE &&                   \
+                               if (hb != HASHnil(hs) &&                        
\
                                    grps[hb - r] != grps[p - r]) {      \
                                        /* we didn't assign a group */  \
                                        /* yet */                       \
-                                       hb = BUN_NONE;                  \
+                                       hb = HASHnil(hs);                       
\
                                }                                       \
                        } else if (grps) {                              \
                                prb = ((prb << bits) ^ (BUN) grps[p-r]) & 
hs->mask; \
-                               for (hb = hs->hash[prb];                \
-                                    hb != BUN_NONE;                    \
-                                    hb = hs->link[hb]) {               \
+                               for (hb = HASHget(hs,prb);              \
+                                    hb != HASHnil(hs);                 \
+                                    hb = HASHgetlink(hs,hb)) {         \
                                        if (grps[hb - r] == grps[p - r] && \
                                            w[p] == w[hb]) {            \
                                                oid grp = ngrps[hb - r]; \
@@ -143,9 +143,9 @@
                                        }                               \
                                }                                       \
                        } else {                                        \
-                               for (hb = hs->hash[prb];                \
-                                    hb != BUN_NONE;                    \
-                                    hb = hs->link[hb]) {               \
+                               for (hb = HASHget(hs,prb);              \
+                                    hb != HASHnil(hs);                 \
+                                    hb = HASHgetlink(hs,hb)) {         \
                                        if (w[p] == w[hb]) {            \
                                                oid grp = ngrps[hb - r]; \
                                                ngrps[p - r] = grp;     \
@@ -158,11 +158,11 @@
                                        }                               \
                                }                                       \
                        }                                               \
-                       if (hb == BUN_NONE) {                           \
+                       if (hb == HASHnil(hs)) {                                
\
                                GRPnotfound();                          \
                                /* enter new group into hash table */   \
-                               hs->link[p] = hs->hash[prb];            \
-                               hs->hash[prb] = p;                      \
+                               HASHputlink(hs,p, HASHget(hs,prb));     \
+                               HASHput(hs,prb,p);                      \
                        }                                               \
                }                                                       \
        } while (0)
@@ -478,19 +478,19 @@ BATgroup_internal(BAT **groups, BAT **ex
                        /* skip irrelevant BUNs after the current
                         * BUNs; exploit that hash-table links
                         * backwards through BAT */
-                       for (hb = hs->hash[HASHprobe(hs, v)];
-                            hb != BUN_NONE && hb >= p;
-                            hb = hs->link[hb]) {
-                               assert(hs->link[hb] == BUN_NONE
-                                      || hs->link[hb] < hb);
+                       for (hb = HASHget(hs,HASHprobe(hs, v));
+                            hb != HASHnil(hs)&& hb >= p;
+                            hb = HASHgetlink(hs,hb)) {
+                               assert( HASHgetlink(hs,hb) == HASHnil(hs)
+                                      || HASHgetlink(hs,hb) < hb);
                        }
                                ;
                        if (gc) {
                                for (;
-                                    hb != BUN_NONE && grps[hb - r] == grps[p - 
r];
-                                    hb = hs->link[hb]) {
-                                       assert(hs->link[hb] == BUN_NONE
-                                              || hs->link[hb] < hb);
+                                    hb != HASHnil(hs) && grps[hb - r] == 
grps[p - r];
+                                    hb = HASHgetlink(hs,hb)) {
+                                       assert( HASHgetlink(hs,hb) == 
HASHnil(hs)
+                                              || HASHgetlink(hs,hb) < hb);
                                        if (cmp(v, BUNtail(bi, hb)) == 0) {
                                                oid grp = ngrps[hb - r];
                                                ngrps[p - r] = grp;
@@ -502,16 +502,16 @@ BATgroup_internal(BAT **groups, BAT **ex
                                                break;
                                        }
                                }
-                               if (hb != BUN_NONE &&
+                               if (hb != HASHnil(hs) &&
                                    grps[hb - r] != grps[p - r]) {
                                        /* we didn't assign a group
                                         * yet */
-                                       hb = BUN_NONE;
+                                       hb = HASHnil(hs);
                                }
                        } else if (grps) {
                                for (;
-                                    hb != BUN_NONE;
-                                    hb = hs->link[hb]) {
+                                    hb != HASHnil(hs);
+                                    hb = HASHgetlink(hs,hb)) {
                                        if (grps[hb - r] == grps[p - r] &&
                                            cmp(v, BUNtail(bi, hb)) == 0) {
                                                oid grp = ngrps[hb - r];
@@ -526,8 +526,8 @@ BATgroup_internal(BAT **groups, BAT **ex
                                }
                        } else {
                                for (;
-                                    hb != BUN_NONE;
-                                    hb = hs->link[hb]) {
+                                    hb != HASHnil(hs);
+                                    hb = HASHgetlink(hs,hb)) {
                                        if (cmp(v, BUNtail(bi, hb)) == 0) {
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to