Changeset: a24f92121547 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a24f92121547
Modified Files:
clients/Tests/exports.stable.out
gdk/gdk.h
gdk/gdk_bat.c
gdk/gdk_batop.c
gdk/gdk_bbp.c
gdk/gdk_calc.c
gdk/gdk_group.c
gdk/gdk_hash.c
gdk/gdk_hash.h
gdk/gdk_heap.c
gdk/gdk_imprints.c
gdk/gdk_join.c
gdk/gdk_orderidx.c
gdk/gdk_private.h
gdk/gdk_select.c
gdk/gdk_storage.c
gdk/gdk_unique.c
monetdb5/mal/mal_resource.h
monetdb5/modules/kernel/bat5.c
monetdb5/modules/kernel/status.c
monetdb5/modules/mal/tokenizer.c
sql/backends/monet5/sql.c
sql/storage/bat/bat_storage.c
sql/test/mergetables/Tests/sqlsmith-exist-lateral.reqtests
Branch: default
Log Message:
merged with linear-hashing
diffs (truncated from 2500 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -294,8 +294,10 @@ const char *GDKversion(void);
size_t GDKvm_cursize(void);
void *GDKzalloc(size_t size) __attribute__((__malloc__))
__attribute__((__alloc_size__(1))) __attribute__((__warn_unused_result__));
void HASHdestroy(BAT *b);
+gdk_return HASHgrowbucket(BAT *b);
BUN HASHlist(Hash *h, BUN i);
BUN HASHprobe(const Hash *h, const void *v);
+gdk_return HASHupgradehashheap(BAT *b, BUN cap);
void HEAP_free(Heap *heap, var_t block);
void HEAP_initialize(Heap *heap, size_t nbytes, size_t nprivate, int
alignment);
var_t HEAP_malloc(Heap *heap, size_t nbytes);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -537,22 +537,6 @@ typedef size_t BUN;
#endif
#define BUN_MAX (BUN_NONE - 1) /* maximum allowed size of a BAT */
-#define BUN2 2
-#define BUN4 4
-#if SIZEOF_BUN > 4
-#define BUN8 8
-#endif
-typedef uint16_t BUN2type;
-typedef uint32_t BUN4type;
-#if SIZEOF_BUN > 4
-typedef uint64_t BUN8type;
-#endif
-#define BUN2_NONE ((BUN2type) UINT16_C(0xFFFF))
-#define BUN4_NONE ((BUN4type) UINT32_C(0xFFFFFFFF))
-#if SIZEOF_BUN > 4
-#define BUN8_NONE ((BUN8type) UINT64_C(0xFFFFFFFFFFFFFFFF))
-#endif
-
/*
* @- Checking and Error definitions:
*/
@@ -593,17 +577,7 @@ typedef struct {
bat parentid; /* cache id of VIEW parent bat */
} Heap;
-typedef struct {
- int type; /* type of index entity */
- int width; /* width of hash entries */
- BUN nil; /* nil representation */
- BUN lim; /* collision list size */
- BUN mask; /* number of hash buckets-1 (power of 2) */
- void *Hash; /* hash table */
- void *Link; /* collision list */
- Heap heap; /* heap where the hash is stored */
-} Hash;
-
+typedef struct Hash Hash;
typedef struct Imprints Imprints;
/*
@@ -859,7 +833,7 @@ typedef struct BATiter {
* HEAPload (Heap *h, str nme,ext, bool trunc);
* @item int
* @tab
- * HEAPsave (Heap *h, str nme,ext);
+ * HEAPsave (Heap *h, str nme,ext, bool dosync);
* @item int
* @tab
* HEAPcopy (Heap *dst,*src);
@@ -1895,24 +1869,6 @@ bunfastappVAR(BAT *b, const void *v)
}
/*
- * @- Built-in Accelerator Functions
- *
- * @multitable @columnfractions 0.08 0.7
- * @item BAT*
- * @tab
- * BAThash (BAT *b)
- * @end multitable
- *
- * The current BAT implementation supports three search accelerators:
- * hashing, imprints, and ordered index.
- *
- * The routine BAThash makes sure that a hash accelerator on the tail of the
- * BAT exists. GDK_FAIL is returned upon failure to create the supportive
- * structures.
- */
-gdk_export gdk_return BAThash(BAT *b);
-
-/*
* @- Column Imprints Functions
*
* @multitable @columnfractions 0.08 0.7
@@ -2627,62 +2583,6 @@ gdk_export void VIEWbounds(BAT *b, BAT *
for (q = BUNlast(r), p = 0; p < q; p++)
/*
- * @- hash-table supported loop over BUNs
- * The first parameter `b' is a BAT, the second (`h') should point to
- * `b->thash', and `v' a pointer to an atomic value (corresponding
- * to the head column of `b'). The 'hb' is an integer index, pointing
- * out the `hb'-th BUN.
- */
-#define HASHloop(bi, h, hb, v) \
- for (hb = HASHget(h, HASHprobe((h), v)); \
- hb != HASHnil(h); \
- hb = HASHgetlink(h,hb)) \
- if (ATOMcmp(h->type, v, BUNtail(bi, hb)) == 0)
-#define HASHloop_str_hv(bi, h, hb, v) \
- for (hb = HASHget((h),((BUN *) (v))[-1]&(h)->mask); \
- hb != HASHnil(h); \
- hb = HASHgetlink(h,hb)) \
- if (GDK_STREQ(v, BUNtvar(bi, hb)))
-#define HASHloop_str(bi, h, hb, v) \
- for (hb = HASHget((h),GDK_STRHASH(v)&(h)->mask); \
- hb != HASHnil(h); \
- hb = HASHgetlink(h,hb)) \
- if (GDK_STREQ(v, BUNtvar(bi, hb)))
-
-/*
- * HASHloops come in various flavors, from the general HASHloop, as
- * above, to specialized versions (for speed) where the type is known
- * (e.g. HASHloop_int), or the fact that the atom is fixed-sized
- * (HASHlooploc) or variable-sized (HASHloopvar).
- */
-#define HASHlooploc(bi, h, hb, v) \
- for (hb = HASHget(h, HASHprobe(h, v)); \
- hb != HASHnil(h); \
- hb = HASHgetlink(h,hb)) \
- if (ATOMcmp(h->type, v, BUNtloc(bi, hb)) == 0)
-#define HASHloopvar(bi, h, hb, v) \
- for (hb = HASHget(h,HASHprobe(h, v)); \
- hb != HASHnil(h); \
- hb = HASHgetlink(h,hb)) \
- if (ATOMcmp(h->type, v, BUNtvar(bi, hb)) == 0)
-
-#define HASHloop_TYPE(bi, h, hb, v, TYPE) \
- for (hb = HASHget(h, hash_##TYPE(h, v)); \
- hb != HASHnil(h); \
- hb = HASHgetlink(h,hb)) \
- if (* (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
-
-#define HASHloop_bte(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, bte)
-#define HASHloop_sht(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, sht)
-#define HASHloop_int(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, int)
-#define HASHloop_lng(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, lng)
-#ifdef HAVE_HGE
-#define HASHloop_hge(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, hge)
-#endif
-#define HASHloop_flt(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, flt)
-#define HASHloop_dbl(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, dbl)
-
-/*
* @+ Common BAT Operations
* Much used, but not necessarily kernel-operations on BATs.
*
@@ -2693,7 +2593,8 @@ gdk_export void VIEWbounds(BAT *b, BAT *
enum prop_t {
GDK_MIN_VALUE = 3, /* smallest non-nil value in BAT */
GDK_MAX_VALUE, /* largest non-nil value in BAT */
- GDK_HASH_MASK, /* last used hash mask */
+ GDK_HASH_BUCKETS, /* last used hash bucket size */
+ GDK_NUNIQUE, /* number of unique values */
};
/*
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -452,9 +452,6 @@ BATextend(BAT *b, BUN newcap)
if (b->theap.base &&
HEAPextend(&b->theap, theap_size, b->batRestricted == BAT_READ) !=
GDK_SUCCEED)
return GDK_FAIL;
- HASHdestroy(b);
- IMPSdestroy(b);
- OIDXdestroy(b);
return GDK_SUCCEED;
}
@@ -1078,13 +1075,14 @@ BUNappend(BAT *b, const void *t, bool fo
IMPSdestroy(b); /* no support for inserts in imprints yet */
OIDXdestroy(b);
+ BATrmprop(b, GDK_NUNIQUE);
#if 0 /* enable if we have more properties than just min/max */
PROPrec *prop;
do {
for (prop = b->tprops; prop; prop = prop->next)
if (prop->id != GDK_MAX_VALUE &&
prop->id != GDK_MIN_VALUE &&
- prop->id != GDK_HASH_MASK) {
+ prop->id != GDK_HASH_BUCKETS) {
BATrmprop(b, prop->id);
break;
}
@@ -1092,6 +1090,9 @@ BUNappend(BAT *b, const void *t, bool fo
#endif
if (b->thash) {
HASHins(b, p, t);
+ if (b->thash)
+ BATsetprop(b, GDK_NUNIQUE,
+ TYPE_oid, &(oid){b->thash->nunique});
if (tsize && tsize != b->tvheap->size)
HEAPwarm(b->tvheap);
}
@@ -1159,12 +1160,13 @@ BUNdelete(BAT *b, oid o)
IMPSdestroy(b);
OIDXdestroy(b);
HASHdestroy(b);
+ BATrmprop(b, GDK_NUNIQUE);
#if 0 /* enable if we have more properties than just min/max */
do {
for (prop = b->tprops; prop; prop = prop->next)
if (prop->id != GDK_MAX_VALUE &&
prop->id != GDK_MIN_VALUE &&
- prop->id != GDK_HASH_MASK) {
+ prop->id != GDK_HASH_BUCKETS) {
BATrmprop(b, prop->id);
break;
}
@@ -1247,12 +1249,13 @@ BUNinplace(BAT *b, BUN p, const void *t,
BATrmprop(b, GDK_MIN_VALUE);
}
}
+ BATrmprop(b, GDK_NUNIQUE);
#if 0 /* enable if we have more properties than just min/max */
do {
for (prop = b->tprops; prop; prop = prop->next)
if (prop->id != GDK_MAX_VALUE &&
prop->id != GDK_MIN_VALUE &&
- prop->id != GDK_HASH_MASK) {
+ prop->id != GDK_HASH_BUCKETS) {
BATrmprop(b, prop->id);
break;
}
@@ -2354,7 +2357,6 @@ BATassertProps(BAT *b)
const char *nme = BBP_physical(b->batCacheid);
Hash *hs = NULL;
BUN mask;
- int len;
if ((hs = GDKzalloc(sizeof(Hash))) == NULL) {
fprintf(stderr,
@@ -2362,8 +2364,8 @@ BATassertProps(BAT *b)
"hash table\n", MT_thread_getname());
goto abort_check;
}
- len = snprintf(hs->heap.filename,
sizeof(hs->heap.filename), "%s.hash%d", nme, THRgettid());
- if (len == -1 || len > (int) sizeof(hs->heap.filename))
{
+ if (snprintf(hs->heaplink.filename,
sizeof(hs->heaplink.filename), "%s.thshprpl%x", nme, THRgettid()) >= (int)
sizeof(hs->heaplink.filename) ||
+ snprintf(hs->heapbckt.filename,
sizeof(hs->heapbckt.filename), "%s.thshprpb%x", nme, THRgettid()) >= (int)
sizeof(hs->heapbckt.filename)) {
GDKfree(hs);
fprintf(stderr,
"#%s: BATassertProps: heap filename "
@@ -2376,10 +2378,12 @@ BATassertProps(BAT *b)
mask = (BUN) 1 << 16;
else
mask = HASHmask(b->batCount);
- if ((hs->heap.farmid = BBPselectfarm(TRANSIENT,
b->ttype,
- hashheap)) < 0 ||
+ if ((hs->heaplink.farmid = BBPselectfarm(
+ TRANSIENT, b->ttype, hashheap)) < 0 ||
+ (hs->heapbckt.farmid = BBPselectfarm(
+ TRANSIENT, b->ttype, hashheap)) < 0 ||
HASHnew(hs, b->ttype, BUNlast(b),
- mask, BUN_NONE) != GDK_SUCCEED) {
+ mask, BUN_NONE, false) != GDK_SUCCEED) {
GDKfree(hs);
fprintf(stderr,
"#%s: BATassertProps: cannot allocate "
@@ -2402,17 +2406,18 @@ BATassertProps(BAT *b)
seenmin |= cmp == 0;
}
prb = HASHprobe(hs, valp);
- for (hb = HASHget(hs,prb);
+ for (hb = HASHget(hs, prb);
hb != HASHnil(hs);
- hb = HASHgetlink(hs,hb))
+ hb = HASHgetlink(hs, hb))
if (cmpf(valp, BUNtail(bi, hb)) == 0)
assert(!b->tkey);
- HASHputlink(hs,p, HASHget(hs,prb));
- HASHput(hs,prb,p);
+ HASHputlink(hs, p, HASHget(hs, prb));
+ HASHput(hs, prb, p);
assert(!b->tnonil || !isnil);
seennil |= isnil;
}
- HEAPfree(&hs->heap, true);
+ HEAPfree(&hs->heaplink, true);
+ HEAPfree(&hs->heapbckt, true);
GDKfree(hs);
}
abort_check:
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -68,6 +68,7 @@ insert_string_bat(BAT *b, BAT *n, BAT *s
size_t off; /* offset within n's string heap */
struct canditer ci;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list