Changeset: 0f951a98884e for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0f951a98884e Modified Files: gdk/gdk_bat.c gdk/gdk_batop.c gdk/gdk_group.c gdk/gdk_hash.c gdk/gdk_hash.h gdk/gdk_join.c gdk/gdk_private.h gdk/gdk_select.c gdk/gdk_unique.c sql/backends/monet5/rel_bin.c sql/backends/monet5/rel_bin.h sql/backends/monet5/sql.c sql/backends/monet5/sql_cat.c sql/backends/monet5/sql_statement.c sql/server/rel_dump.c sql/test/BugTracker-2018/Tests/dependency_column_on_sequence.Bug-6618.stable.err sql/test/BugTracker-2018/Tests/dependency_column_on_sequence.Bug-6618.stable.out sql/test/BugTracker/Tests/case_in_aggr_bug.SF-1506545.sql sql/test/seq-default.sql Branch: default Log Message:
Merge with Mar2018 branch. diffs (truncated from 999 to 300 lines): diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c --- a/gdk/gdk_bat.c +++ b/gdk/gdk_bat.c @@ -1050,8 +1050,10 @@ BUNappend(BAT *b, const void *t, bool fo OIDXdestroy(b); PROPdestroy(b->tprops); b->tprops = NULL; - if (b->thash == (Hash *) 1) { - /* don't bother first loading the hash to then change it */ + if (b->thash == (Hash *) 1 || + (b->thash && ((size_t *) b->thash->heap.base)[0] & (1 << 24))) { + /* don't bother first loading the hash to then change + * it, also, cannot maintain persistent hashes */ HASHdestroy(b); } if (b->thash) { diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -597,9 +597,11 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f OIDXdestroy(b); PROPdestroy(b->tprops); b->tprops = NULL; - if (b->thash == (Hash *) 1 || BATcount(b) == 0) { + if (b->thash == (Hash *) 1 || BATcount(b) == 0 || + (b->thash && ((size_t *) b->thash->heap.base)[0] & (1 << 24))) { /* don't bother first loading the hash to then change - * it, or updating the hash if we replace the heap */ + * it, or updating the hash if we replace the heap, + * also, we cannot maintain persistent hashes */ HASHdestroy(b); } @@ -1058,12 +1060,9 @@ BATkeyed(BAT *b) b->tkey = true; } else if (BATcheckhash(b) || (b->batPersistence == PERSISTENT && - BAThash(b, 0) == GDK_SUCCEED) -#ifndef DISABLE_PARENT_HASH - || (VIEWtparent(b) != 0 && - BATcheckhash(BBPdescriptor(VIEWtparent(b)))) -#endif - ) { + BAThash(b, 0) == GDK_SUCCEED) || + (VIEWtparent(b) != 0 && + BATcheckhash(BBPdescriptor(VIEWtparent(b))))) { /* we already have a hash table on b, or b is * persistent and we could create a hash * table, or b is a view on a bat that already @@ -1071,13 +1070,11 @@ BATkeyed(BAT *b) BUN lo = 0; hs = b->thash; -#ifndef DISABLE_PARENT_HASH - if (b->thash == NULL && VIEWtparent(b) != 0) { + if (hs == NULL && VIEWtparent(b) != 0) { BAT *b2 = BBPdescriptor(VIEWtparent(b)); lo = (BUN) ((b->theap.base - b2->theap.base) >> b->tshift); hs = b2->thash; } -#endif for (q = BUNlast(b), p = 0; p < q; p++) { const void *v = BUNtail(bi, p); for (hb = HASHgetlink(hs, p + lo); diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c --- a/gdk/gdk_group.c +++ b/gdk/gdk_group.c @@ -506,9 +506,7 @@ BATgroup_internal(BAT **groups, BAT **ex Hash *hs = NULL; BUN hb; BUN maxgrps; -#ifndef DISABLE_PARENT_HASH bat parent; -#endif BUN start, end, cnt; BUN lo = 0; const oid *restrict cand, *candend; @@ -968,12 +966,9 @@ BATgroup_internal(BAT **groups, BAT **ex } else if (g == NULL && (BATcheckhash(b) || (b->batPersistence == PERSISTENT && - BAThash(b, 0) == GDK_SUCCEED) -#ifndef DISABLE_PARENT_HASH - || ((parent = VIEWtparent(b)) != 0 && - BATcheckhash(BBPdescriptor(parent))) -#endif - )) { + BAThash(b, 0) == GDK_SUCCEED) || + ((parent = VIEWtparent(b)) != 0 && + BATcheckhash(BBPdescriptor(parent))))) { /* we already have a hash table on b, or b is * persistent and we could create a hash table, or b * is a view on a bat that already has a hash table; @@ -993,7 +988,6 @@ BATgroup_internal(BAT **groups, BAT **ex e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0, h ? BATgetId(h) : "NULL", h ? BATcount(h) : 0, subsorted); -#ifndef DISABLE_PARENT_HASH if (b->thash == NULL && (parent = VIEWtparent(b)) != 0) { /* b is a view on another bat (b2 for now). * calculate the bounds [lo, lo+BATcount(b)) @@ -1005,7 +999,6 @@ BATgroup_internal(BAT **groups, BAT **ex start += lo; end += lo; } -#endif hs = b->thash; gn->tsorted = 1; /* be optimistic */ diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c --- a/gdk/gdk_hash.c +++ b/gdk/gdk_hash.c @@ -396,18 +396,21 @@ BAThash_impl(BAT *b, BAT *s, BUN masksiz * adjusting the hash mask */ mask = HASHmask(cnt); } else { - /* dynamic hash: we start with - * HASHmask(cnt)/64; if there are too many - * collisions we try HASHmask(cnt)/16, then - * HASHmask(cnt)/4, and finally - * HASHmask(cnt). */ + /* dynamic hash: we start with HASHmask(cnt)/64, or, + * if cnt large enough, HASHmask(cnt)/256; if there + * are too many collisions we try HASHmask(cnt)/64, + * HASHmask(cnt)/16, HASHmask(cnt)/4, and finally + * HASHmask(cnt), but we might skip some of these if + * there are many distinct values. */ maxmask = HASHmask(cnt); mask = maxmask >> 6; + while (mask > 4096) + mask >>= 2; /* try out on first 25% of b */ cnt1 = cnt >> 2; } - do { + for (;;) { BUN maxslots = (mask >> 3) - 1; /* 1/8 full is too full */ nslots = 0; @@ -477,7 +480,18 @@ BAThash_impl(BAT *b, BAT *s, BUN masksiz } break; } - } while (p < cnt1 && mask < maxmask && (mask <<= 2)); + ALGODEBUG if (p < cnt1) + fprintf(stderr, "#BAThash(%s): abort starthash with " + "mask " BUNFMT " at " BUNFMT "\n", BATgetId(b), + mask, p); + if (p == cnt1 || mask == maxmask) + break; + mask <<= 2; + /* if we fill up the slots fast (p <= maxslots * 1.2) + * increase mask size a bit more quickly */ + if (mask < maxmask && p <= maxslots * 1.2) + mask <<= 2; + } /* finish the hashtable with the current mask */ switch (tpe) { diff --git a/gdk/gdk_hash.h b/gdk/gdk_hash.h --- a/gdk/gdk_hash.h +++ b/gdk/gdk_hash.h @@ -232,12 +232,13 @@ gdk_export BUN HASHlist(Hash *h, BUN i); * a pointer to the value to be stored. * * HASHins receives a BAT* param and is adaptive, killing wrongly - * configured hash tables. - * Use HASHins_any or HASHins_<tpe> instead if you know what you're - * doing or want to keep the hash. */ + * configured hash tables. Also, persistent hashes cannot be + * maintained, so must be destroyed before this macro is called. */ #define HASHins(b,i,v) \ do { \ if ((b)->thash) { \ + assert((b)->thash != (Hash *) 1); \ + assert((((size_t *) (b)->thash->heap.base)[0] & (1 << 24)) == 0); \ if (((i) & 1023) == 1023 && HASHgonebad((b), (v))) { \ HASHdestroy(b); \ } else { \ diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -3820,6 +3820,7 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA bool lhash = false, rhash = false; bool plhash = false, prhash = false; bool swap; + bat parent; size_t mem_size; lng t0 = 0; const char *reason = ""; @@ -3875,10 +3876,8 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA if (sl == NULL) { lhash = BATcheckhash(l); -#ifndef DISABLE_PARENT_HASH - bat lparent; - if (!lhash && (lparent = VIEWtparent(l)) != 0) { - BAT *b = BBPdescriptor(lparent); + if (!lhash && (parent = VIEWtparent(l)) != 0) { + BAT *b = BBPdescriptor(parent); /* use hash on parent if the average chain * length times the number of required probes * is less than the cost for creating and @@ -3888,16 +3887,13 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA BATcount(b) / ((size_t *) b->thash->heap.base)[5] * rcount < lcount + rcount); plhash = lhash; } -#endif } else if (BATtdense(sl) && BATcheckhash(l)) { lhash = BATcount(l) / ((size_t *) l->thash->heap.base)[5] * rcount < lcount + rcount; } if (sr == NULL) { rhash = BATcheckhash(r); -#ifndef DISABLE_PARENT_HASH - bat rparent; - if (!rhash && (rparent = VIEWtparent(r)) != 0) { - BAT *b = BBPdescriptor(rparent); + if (!rhash && (parent = VIEWtparent(r)) != 0) { + BAT *b = BBPdescriptor(parent); /* use hash on parent if the average chain * length times the number of required probes * is less than the cost for creating and @@ -3907,7 +3903,6 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA BATcount(b) / ((size_t *) b->thash->heap.base)[5] * lcount < lcount + rcount); prhash = rhash; } -#endif } else if (BATtdense(sr) && BATcheckhash(r)) { rhash = BATcount(r) / ((size_t *) r->thash->heap.base)[5] * lcount < lcount + rcount; } diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h --- a/gdk/gdk_private.h +++ b/gdk/gdk_private.h @@ -12,9 +12,6 @@ #error this file should not be included outside its source directory #endif -/* if the parent BAT of a view has a hash, don't use it */ -/* #define DISABLE_PARENT_HASH 1 */ - /* persist hash heaps for persistent BATs */ /* #define PERSISTENTHASH 1 */ diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c --- a/gdk/gdk_select.c +++ b/gdk/gdk_select.c @@ -136,7 +136,7 @@ doubleslice(BAT *b, BUN l1, BUN h1, BUN (*cmp)(v, BUNtail(bi, hb)) == 0)) static BAT * -BAT_hashselect(BAT *b, BAT *s, BAT *bn, const void *tl, BUN maximum, bool phash) +hashselect(BAT *b, BAT *s, BAT *bn, const void *tl, BUN maximum, bool phash) { BATiter bi; BUN i, cnt; @@ -838,9 +838,9 @@ scan_sel(fullscan, o = (oid) (p+off), w static BAT * -BAT_scanselect(BAT *b, BAT *s, BAT *bn, const void *tl, const void *th, - bool li, bool hi, bool equi, bool anti, bool lval, bool hval, - bool lnil, BUN maximum, bool use_imprints) +scanselect(BAT *b, BAT *s, BAT *bn, const void *tl, const void *th, + bool li, bool hi, bool equi, bool anti, bool lval, bool hval, + bool lnil, BUN maximum, bool use_imprints) { #ifndef NDEBUG int (*cmp)(const void *, const void *); @@ -985,78 +985,44 @@ BAT_scanselect(BAT *b, BAT *s, BAT *bn, return bn; } -/* generic range select - * - * Return a BAT with the OID values of b for qualifying tuples. The - * return BAT is sorted (i.e. in the same order as the input BAT). - * - * If s is non-NULL, it is a list of candidates. s must be sorted. - * - * tl may not be NULL, li, hi, and anti must be either 0 or 1. - * - * If th is NULL, hi is ignored. - * - * If anti is 0, qualifying tuples are those whose value is between tl - * and th (as in x >[=] tl && x <[=] th, where equality depends on li - * and hi--so if tl > th, nothing will be returned). If li or hi is - * 1, the respective boundary is inclusive, otherwise exclusive. If - * th is NULL it is taken to be equal to tl, turning this into an - * equi- or point-select. Note that for a point select to return - * anything, li (and hi if th was not NULL) must be 1. There is a - * special case if tl is nil and th is NULL. This is the only way to - * select for nil values. - * - * If anti is 1, the result is the complement of what the result would - * be if anti were 0, except that nils are filtered out. - * - * In brief: - * - if tl==nil and th==NULL and anti==0, return all nils (only way to - * get nils); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list