Changeset: a24f92121547 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a24f92121547
Modified Files:
        clients/Tests/exports.stable.out
        gdk/gdk.h
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_bbp.c
        gdk/gdk_calc.c
        gdk/gdk_group.c
        gdk/gdk_hash.c
        gdk/gdk_hash.h
        gdk/gdk_heap.c
        gdk/gdk_imprints.c
        gdk/gdk_join.c
        gdk/gdk_orderidx.c
        gdk/gdk_private.h
        gdk/gdk_select.c
        gdk/gdk_storage.c
        gdk/gdk_unique.c
        monetdb5/mal/mal_resource.h
        monetdb5/modules/kernel/bat5.c
        monetdb5/modules/kernel/status.c
        monetdb5/modules/mal/tokenizer.c
        sql/backends/monet5/sql.c
        sql/storage/bat/bat_storage.c
        sql/test/mergetables/Tests/sqlsmith-exist-lateral.reqtests
Branch: default
Log Message:

merged with linear-hashing


diffs (truncated from 2500 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -294,8 +294,10 @@ const char *GDKversion(void);
 size_t GDKvm_cursize(void);
 void *GDKzalloc(size_t size) __attribute__((__malloc__)) 
__attribute__((__alloc_size__(1))) __attribute__((__warn_unused_result__));
 void HASHdestroy(BAT *b);
+gdk_return HASHgrowbucket(BAT *b);
 BUN HASHlist(Hash *h, BUN i);
 BUN HASHprobe(const Hash *h, const void *v);
+gdk_return HASHupgradehashheap(BAT *b, BUN cap);
 void HEAP_free(Heap *heap, var_t block);
 void HEAP_initialize(Heap *heap, size_t nbytes, size_t nprivate, int 
alignment);
 var_t HEAP_malloc(Heap *heap, size_t nbytes);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -537,22 +537,6 @@ typedef size_t BUN;
 #endif
 #define BUN_MAX (BUN_NONE - 1) /* maximum allowed size of a BAT */
 
-#define BUN2 2
-#define BUN4 4
-#if SIZEOF_BUN > 4
-#define BUN8 8
-#endif
-typedef uint16_t BUN2type;
-typedef uint32_t BUN4type;
-#if SIZEOF_BUN > 4
-typedef uint64_t BUN8type;
-#endif
-#define BUN2_NONE ((BUN2type) UINT16_C(0xFFFF))
-#define BUN4_NONE ((BUN4type) UINT32_C(0xFFFFFFFF))
-#if SIZEOF_BUN > 4
-#define BUN8_NONE ((BUN8type) UINT64_C(0xFFFFFFFFFFFFFFFF))
-#endif
-
 /*
  * @- Checking and Error definitions:
  */
@@ -593,17 +577,7 @@ typedef struct {
        bat parentid;           /* cache id of VIEW parent bat */
 } Heap;
 
-typedef struct {
-       int type;               /* type of index entity */
-       int width;              /* width of hash entries */
-       BUN nil;                /* nil representation */
-       BUN lim;                /* collision list size */
-       BUN mask;               /* number of hash buckets-1 (power of 2) */
-       void *Hash;             /* hash table */
-       void *Link;             /* collision list */
-       Heap heap;              /* heap where the hash is stored */
-} Hash;
-
+typedef struct Hash Hash;
 typedef struct Imprints Imprints;
 
 /*
@@ -859,7 +833,7 @@ typedef struct BATiter {
  *  HEAPload (Heap *h, str nme,ext, bool trunc);
  * @item int
  * @tab
- *  HEAPsave (Heap *h, str nme,ext);
+ *  HEAPsave (Heap *h, str nme,ext, bool dosync);
  * @item int
  * @tab
  *  HEAPcopy (Heap *dst,*src);
@@ -1895,24 +1869,6 @@ bunfastappVAR(BAT *b, const void *v)
 }
 
 /*
- * @- Built-in Accelerator Functions
- *
- * @multitable @columnfractions 0.08 0.7
- * @item BAT*
- * @tab
- *  BAThash (BAT *b)
- * @end multitable
- *
- * The current BAT implementation supports three search accelerators:
- * hashing, imprints, and ordered index.
- *
- * The routine BAThash makes sure that a hash accelerator on the tail of the
- * BAT exists. GDK_FAIL is returned upon failure to create the supportive
- * structures.
- */
-gdk_export gdk_return BAThash(BAT *b);
-
-/*
  * @- Column Imprints Functions
  *
  * @multitable @columnfractions 0.08 0.7
@@ -2627,62 +2583,6 @@ gdk_export void VIEWbounds(BAT *b, BAT *
        for (q = BUNlast(r), p = 0; p < q; p++)
 
 /*
- * @- hash-table supported loop over BUNs
- * The first parameter `b' is a BAT, the second (`h') should point to
- * `b->thash', and `v' a pointer to an atomic value (corresponding
- * to the head column of `b'). The 'hb' is an integer index, pointing
- * out the `hb'-th BUN.
- */
-#define HASHloop(bi, h, hb, v)                                 \
-       for (hb = HASHget(h, HASHprobe((h), v));                \
-            hb != HASHnil(h);                                  \
-            hb = HASHgetlink(h,hb))                            \
-               if (ATOMcmp(h->type, v, BUNtail(bi, hb)) == 0)
-#define HASHloop_str_hv(bi, h, hb, v)                          \
-       for (hb = HASHget((h),((BUN *) (v))[-1]&(h)->mask);     \
-            hb != HASHnil(h);                                  \
-            hb = HASHgetlink(h,hb))                            \
-               if (GDK_STREQ(v, BUNtvar(bi, hb)))
-#define HASHloop_str(bi, h, hb, v)                     \
-       for (hb = HASHget((h),GDK_STRHASH(v)&(h)->mask);        \
-            hb != HASHnil(h);                          \
-            hb = HASHgetlink(h,hb))                    \
-               if (GDK_STREQ(v, BUNtvar(bi, hb)))
-
-/*
- * HASHloops come in various flavors, from the general HASHloop, as
- * above, to specialized versions (for speed) where the type is known
- * (e.g. HASHloop_int), or the fact that the atom is fixed-sized
- * (HASHlooploc) or variable-sized (HASHloopvar).
- */
-#define HASHlooploc(bi, h, hb, v)                              \
-       for (hb = HASHget(h, HASHprobe(h, v));                  \
-            hb != HASHnil(h);                                  \
-            hb = HASHgetlink(h,hb))                            \
-               if (ATOMcmp(h->type, v, BUNtloc(bi, hb)) == 0)
-#define HASHloopvar(bi, h, hb, v)                              \
-       for (hb = HASHget(h,HASHprobe(h, v));                   \
-            hb != HASHnil(h);                                  \
-            hb = HASHgetlink(h,hb))                            \
-               if (ATOMcmp(h->type, v, BUNtvar(bi, hb)) == 0)
-
-#define HASHloop_TYPE(bi, h, hb, v, TYPE)                      \
-       for (hb = HASHget(h, hash_##TYPE(h, v));                \
-            hb != HASHnil(h);                                  \
-            hb = HASHgetlink(h,hb))                            \
-               if (* (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
-
-#define HASHloop_bte(bi, h, hb, v)     HASHloop_TYPE(bi, h, hb, v, bte)
-#define HASHloop_sht(bi, h, hb, v)     HASHloop_TYPE(bi, h, hb, v, sht)
-#define HASHloop_int(bi, h, hb, v)     HASHloop_TYPE(bi, h, hb, v, int)
-#define HASHloop_lng(bi, h, hb, v)     HASHloop_TYPE(bi, h, hb, v, lng)
-#ifdef HAVE_HGE
-#define HASHloop_hge(bi, h, hb, v)     HASHloop_TYPE(bi, h, hb, v, hge)
-#endif
-#define HASHloop_flt(bi, h, hb, v)     HASHloop_TYPE(bi, h, hb, v, flt)
-#define HASHloop_dbl(bi, h, hb, v)     HASHloop_TYPE(bi, h, hb, v, dbl)
-
-/*
  * @+ Common BAT Operations
  * Much used, but not necessarily kernel-operations on BATs.
  *
@@ -2693,7 +2593,8 @@ gdk_export void VIEWbounds(BAT *b, BAT *
 enum prop_t {
        GDK_MIN_VALUE = 3,      /* smallest non-nil value in BAT */
        GDK_MAX_VALUE,          /* largest non-nil value in BAT */
-       GDK_HASH_MASK,          /* last used hash mask */
+       GDK_HASH_BUCKETS,       /* last used hash bucket size */
+       GDK_NUNIQUE,            /* number of unique values */
 };
 
 /*
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -452,9 +452,6 @@ BATextend(BAT *b, BUN newcap)
        if (b->theap.base &&
            HEAPextend(&b->theap, theap_size, b->batRestricted == BAT_READ) != 
GDK_SUCCEED)
                return GDK_FAIL;
-       HASHdestroy(b);
-       IMPSdestroy(b);
-       OIDXdestroy(b);
        return GDK_SUCCEED;
 }
 
@@ -1078,13 +1075,14 @@ BUNappend(BAT *b, const void *t, bool fo
 
        IMPSdestroy(b); /* no support for inserts in imprints yet */
        OIDXdestroy(b);
+       BATrmprop(b, GDK_NUNIQUE);
 #if 0          /* enable if we have more properties than just min/max */
        PROPrec *prop;
        do {
                for (prop = b->tprops; prop; prop = prop->next)
                        if (prop->id != GDK_MAX_VALUE &&
                            prop->id != GDK_MIN_VALUE &&
-                           prop->id != GDK_HASH_MASK) {
+                           prop->id != GDK_HASH_BUCKETS) {
                                BATrmprop(b, prop->id);
                                break;
                        }
@@ -1092,6 +1090,9 @@ BUNappend(BAT *b, const void *t, bool fo
 #endif
        if (b->thash) {
                HASHins(b, p, t);
+               if (b->thash)
+                       BATsetprop(b, GDK_NUNIQUE,
+                                  TYPE_oid, &(oid){b->thash->nunique});
                if (tsize && tsize != b->tvheap->size)
                        HEAPwarm(b->tvheap);
        }
@@ -1159,12 +1160,13 @@ BUNdelete(BAT *b, oid o)
        IMPSdestroy(b);
        OIDXdestroy(b);
        HASHdestroy(b);
+       BATrmprop(b, GDK_NUNIQUE);
 #if 0          /* enable if we have more properties than just min/max */
        do {
                for (prop = b->tprops; prop; prop = prop->next)
                        if (prop->id != GDK_MAX_VALUE &&
                            prop->id != GDK_MIN_VALUE &&
-                           prop->id != GDK_HASH_MASK) {
+                           prop->id != GDK_HASH_BUCKETS) {
                                BATrmprop(b, prop->id);
                                break;
                        }
@@ -1247,12 +1249,13 @@ BUNinplace(BAT *b, BUN p, const void *t,
                                BATrmprop(b, GDK_MIN_VALUE);
                        }
                }
+               BATrmprop(b, GDK_NUNIQUE);
 #if 0          /* enable if we have more properties than just min/max */
                do {
                        for (prop = b->tprops; prop; prop = prop->next)
                                if (prop->id != GDK_MAX_VALUE &&
                                    prop->id != GDK_MIN_VALUE &&
-                                   prop->id != GDK_HASH_MASK) {
+                                   prop->id != GDK_HASH_BUCKETS) {
                                        BATrmprop(b, prop->id);
                                        break;
                                }
@@ -2354,7 +2357,6 @@ BATassertProps(BAT *b)
                        const char *nme = BBP_physical(b->batCacheid);
                        Hash *hs = NULL;
                        BUN mask;
-                       int len;
 
                        if ((hs = GDKzalloc(sizeof(Hash))) == NULL) {
                                fprintf(stderr,
@@ -2362,8 +2364,8 @@ BATassertProps(BAT *b)
                                        "hash table\n", MT_thread_getname());
                                goto abort_check;
                        }
-                       len = snprintf(hs->heap.filename, 
sizeof(hs->heap.filename), "%s.hash%d", nme, THRgettid());
-                       if (len == -1 || len > (int) sizeof(hs->heap.filename)) 
{
+                       if (snprintf(hs->heaplink.filename, 
sizeof(hs->heaplink.filename), "%s.thshprpl%x", nme, THRgettid()) >= (int) 
sizeof(hs->heaplink.filename) ||
+                           snprintf(hs->heapbckt.filename, 
sizeof(hs->heapbckt.filename), "%s.thshprpb%x", nme, THRgettid()) >= (int) 
sizeof(hs->heapbckt.filename)) {
                                GDKfree(hs);
                                fprintf(stderr,
                                        "#%s: BATassertProps: heap filename "
@@ -2376,10 +2378,12 @@ BATassertProps(BAT *b)
                                mask = (BUN) 1 << 16;
                        else
                                mask = HASHmask(b->batCount);
-                       if ((hs->heap.farmid = BBPselectfarm(TRANSIENT, 
b->ttype,
-                                                       hashheap)) < 0 ||
+                       if ((hs->heaplink.farmid = BBPselectfarm(
+                                    TRANSIENT, b->ttype, hashheap)) < 0 ||
+                           (hs->heapbckt.farmid = BBPselectfarm(
+                                   TRANSIENT, b->ttype, hashheap)) < 0 ||
                            HASHnew(hs, b->ttype, BUNlast(b),
-                                   mask, BUN_NONE) != GDK_SUCCEED) {
+                                   mask, BUN_NONE, false) != GDK_SUCCEED) {
                                GDKfree(hs);
                                fprintf(stderr,
                                        "#%s: BATassertProps: cannot allocate "
@@ -2402,17 +2406,18 @@ BATassertProps(BAT *b)
                                        seenmin |= cmp == 0;
                                }
                                prb = HASHprobe(hs, valp);
-                               for (hb = HASHget(hs,prb);
+                               for (hb = HASHget(hs, prb);
                                     hb != HASHnil(hs);
-                                    hb = HASHgetlink(hs,hb))
+                                    hb = HASHgetlink(hs, hb))
                                        if (cmpf(valp, BUNtail(bi, hb)) == 0)
                                                assert(!b->tkey);
-                               HASHputlink(hs,p, HASHget(hs,prb));
-                               HASHput(hs,prb,p);
+                               HASHputlink(hs, p, HASHget(hs, prb));
+                               HASHput(hs, prb, p);
                                assert(!b->tnonil || !isnil);
                                seennil |= isnil;
                        }
-                       HEAPfree(&hs->heap, true);
+                       HEAPfree(&hs->heaplink, true);
+                       HEAPfree(&hs->heapbckt, true);
                        GDKfree(hs);
                }
          abort_check:
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -68,6 +68,7 @@ insert_string_bat(BAT *b, BAT *n, BAT *s
        size_t off;             /* offset within n's string heap */
        struct canditer ci;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to