Changeset: 0f951a98884e for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0f951a98884e
Modified Files:
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_group.c
        gdk/gdk_hash.c
        gdk/gdk_hash.h
        gdk/gdk_join.c
        gdk/gdk_private.h
        gdk/gdk_select.c
        gdk/gdk_unique.c
        sql/backends/monet5/rel_bin.c
        sql/backends/monet5/rel_bin.h
        sql/backends/monet5/sql.c
        sql/backends/monet5/sql_cat.c
        sql/backends/monet5/sql_statement.c
        sql/server/rel_dump.c
        
sql/test/BugTracker-2018/Tests/dependency_column_on_sequence.Bug-6618.stable.err
        
sql/test/BugTracker-2018/Tests/dependency_column_on_sequence.Bug-6618.stable.out
        sql/test/BugTracker/Tests/case_in_aggr_bug.SF-1506545.sql
        sql/test/seq-default.sql
Branch: default
Log Message:

Merge with Mar2018 branch.


diffs (truncated from 999 to 300 lines):

diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1050,8 +1050,10 @@ BUNappend(BAT *b, const void *t, bool fo
        OIDXdestroy(b);
        PROPdestroy(b->tprops);
        b->tprops = NULL;
-       if (b->thash == (Hash *) 1) {
-               /* don't bother first loading the hash to then change it */
+       if (b->thash == (Hash *) 1 ||
+           (b->thash && ((size_t *) b->thash->heap.base)[0] & (1 << 24))) {
+               /* don't bother first loading the hash to then change
+                * it, also, cannot maintain persistent hashes */
                HASHdestroy(b);
        }
        if (b->thash) {
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -597,9 +597,11 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f
        OIDXdestroy(b);
        PROPdestroy(b->tprops);
        b->tprops = NULL;
-       if (b->thash == (Hash *) 1 || BATcount(b) == 0) {
+       if (b->thash == (Hash *) 1 || BATcount(b) == 0 ||
+           (b->thash && ((size_t *) b->thash->heap.base)[0] & (1 << 24))) {
                /* don't bother first loading the hash to then change
-                * it, or updating the hash if we replace the heap */
+                * it, or updating the hash if we replace the heap,
+                * also, we cannot maintain persistent hashes */
                HASHdestroy(b);
        }
 
@@ -1058,12 +1060,9 @@ BATkeyed(BAT *b)
                        b->tkey = true;
                } else if (BATcheckhash(b) ||
                           (b->batPersistence == PERSISTENT &&
-                           BAThash(b, 0) == GDK_SUCCEED)
-#ifndef DISABLE_PARENT_HASH
-                          || (VIEWtparent(b) != 0 &&
-                              BATcheckhash(BBPdescriptor(VIEWtparent(b))))
-#endif
-                       ) {
+                           BAThash(b, 0) == GDK_SUCCEED) ||
+                          (VIEWtparent(b) != 0 &&
+                           BATcheckhash(BBPdescriptor(VIEWtparent(b))))) {
                        /* we already have a hash table on b, or b is
                         * persistent and we could create a hash
                         * table, or b is a view on a bat that already
@@ -1071,13 +1070,11 @@ BATkeyed(BAT *b)
                        BUN lo = 0;
 
                        hs = b->thash;
-#ifndef DISABLE_PARENT_HASH
-                       if (b->thash == NULL && VIEWtparent(b) != 0) {
+                       if (hs == NULL && VIEWtparent(b) != 0) {
                                BAT *b2 = BBPdescriptor(VIEWtparent(b));
                                lo = (BUN) ((b->theap.base - b2->theap.base) >> 
b->tshift);
                                hs = b2->thash;
                        }
-#endif
                        for (q = BUNlast(b), p = 0; p < q; p++) {
                                const void *v = BUNtail(bi, p);
                                for (hb = HASHgetlink(hs, p + lo);
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -506,9 +506,7 @@ BATgroup_internal(BAT **groups, BAT **ex
        Hash *hs = NULL;
        BUN hb;
        BUN maxgrps;
-#ifndef DISABLE_PARENT_HASH
        bat parent;
-#endif
        BUN start, end, cnt;
        BUN lo = 0;
        const oid *restrict cand, *candend;
@@ -968,12 +966,9 @@ BATgroup_internal(BAT **groups, BAT **ex
        } else if (g == NULL &&
                   (BATcheckhash(b) ||
                    (b->batPersistence == PERSISTENT &&
-                    BAThash(b, 0) == GDK_SUCCEED)
-#ifndef DISABLE_PARENT_HASH
-                   || ((parent = VIEWtparent(b)) != 0 &&
-                       BATcheckhash(BBPdescriptor(parent)))
-#endif
-                          )) {
+                    BAThash(b, 0) == GDK_SUCCEED) ||
+                   ((parent = VIEWtparent(b)) != 0 &&
+                    BATcheckhash(BBPdescriptor(parent))))) {
                /* we already have a hash table on b, or b is
                 * persistent and we could create a hash table, or b
                 * is a view on a bat that already has a hash table;
@@ -993,7 +988,6 @@ BATgroup_internal(BAT **groups, BAT **ex
                                  e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
                                  h ? BATgetId(h) : "NULL", h ? BATcount(h) : 0,
                                  subsorted);
-#ifndef DISABLE_PARENT_HASH
                if (b->thash == NULL && (parent = VIEWtparent(b)) != 0) {
                        /* b is a view on another bat (b2 for now).
                         * calculate the bounds [lo, lo+BATcount(b))
@@ -1005,7 +999,6 @@ BATgroup_internal(BAT **groups, BAT **ex
                        start += lo;
                        end += lo;
                }
-#endif
                hs = b->thash;
                gn->tsorted = 1; /* be optimistic */
 
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -396,18 +396,21 @@ BAThash_impl(BAT *b, BAT *s, BUN masksiz
                 * adjusting the hash mask */
                mask = HASHmask(cnt);
        } else {
-               /* dynamic hash: we start with
-                * HASHmask(cnt)/64; if there are too many
-                * collisions we try HASHmask(cnt)/16, then
-                * HASHmask(cnt)/4, and finally
-                * HASHmask(cnt).  */
+               /* dynamic hash: we start with HASHmask(cnt)/64, or,
+                * if cnt large enough, HASHmask(cnt)/256; if there
+                * are too many collisions we try HASHmask(cnt)/64,
+                * HASHmask(cnt)/16, HASHmask(cnt)/4, and finally
+                * HASHmask(cnt), but we might skip some of these if
+                * there are many distinct values.  */
                maxmask = HASHmask(cnt);
                mask = maxmask >> 6;
+               while (mask > 4096)
+                       mask >>= 2;
                /* try out on first 25% of b */
                cnt1 = cnt >> 2;
        }
 
-       do {
+       for (;;) {
                BUN maxslots = (mask >> 3) - 1; /* 1/8 full is too full */
 
                nslots = 0;
@@ -477,7 +480,18 @@ BAThash_impl(BAT *b, BAT *s, BUN masksiz
                        }
                        break;
                }
-       } while (p < cnt1 && mask < maxmask && (mask <<= 2));
+               ALGODEBUG if (p < cnt1)
+                       fprintf(stderr, "#BAThash(%s): abort starthash with "
+                               "mask " BUNFMT " at " BUNFMT "\n", BATgetId(b),
+                               mask, p);
+               if (p == cnt1 || mask == maxmask)
+                       break;
+               mask <<= 2;
+               /* if we fill up the slots fast (p <= maxslots * 1.2)
+                * increase mask size a bit more quickly */
+               if (mask < maxmask && p <= maxslots * 1.2)
+                       mask <<= 2;
+       }
 
        /* finish the hashtable with the current mask */
        switch (tpe) {
diff --git a/gdk/gdk_hash.h b/gdk/gdk_hash.h
--- a/gdk/gdk_hash.h
+++ b/gdk/gdk_hash.h
@@ -232,12 +232,13 @@ gdk_export BUN HASHlist(Hash *h, BUN i);
  * a pointer to the value to be stored.
  *
  * HASHins receives a BAT* param and is adaptive, killing wrongly
- * configured hash tables.
- * Use HASHins_any or HASHins_<tpe> instead if you know what you're
- * doing or want to keep the hash. */
+ * configured hash tables.  Also, persistent hashes cannot be
+ * maintained, so must be destroyed before this macro is called. */
 #define HASHins(b,i,v)                                                 \
        do {                                                            \
                if ((b)->thash) {                                       \
+                       assert((b)->thash != (Hash *) 1);               \
+                       assert((((size_t *) (b)->thash->heap.base)[0] & (1 << 
24)) == 0); \
                        if (((i) & 1023) == 1023 && HASHgonebad((b), (v))) { \
                                HASHdestroy(b);                         \
                        } else {                                        \
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3820,6 +3820,7 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA
        bool lhash = false, rhash = false;
        bool plhash = false, prhash = false;
        bool swap;
+       bat parent;
        size_t mem_size;
        lng t0 = 0;
        const char *reason = "";
@@ -3875,10 +3876,8 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA
 
        if (sl == NULL) {
                lhash = BATcheckhash(l);
-#ifndef DISABLE_PARENT_HASH
-               bat lparent;
-               if (!lhash && (lparent = VIEWtparent(l)) != 0) {
-                       BAT *b = BBPdescriptor(lparent);
+               if (!lhash && (parent = VIEWtparent(l)) != 0) {
+                       BAT *b = BBPdescriptor(parent);
                        /* use hash on parent if the average chain
                         * length times the number of required probes
                         * is less than the cost for creating and
@@ -3888,16 +3887,13 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA
                                 BATcount(b) / ((size_t *) 
b->thash->heap.base)[5] * rcount < lcount + rcount);
                        plhash = lhash;
                }
-#endif
        } else if (BATtdense(sl) && BATcheckhash(l)) {
                lhash = BATcount(l) / ((size_t *) l->thash->heap.base)[5] * 
rcount < lcount + rcount;
        }
        if (sr == NULL) {
                rhash = BATcheckhash(r);
-#ifndef DISABLE_PARENT_HASH
-               bat rparent;
-               if (!rhash && (rparent = VIEWtparent(r)) != 0) {
-                       BAT *b = BBPdescriptor(rparent);
+               if (!rhash && (parent = VIEWtparent(r)) != 0) {
+                       BAT *b = BBPdescriptor(parent);
                        /* use hash on parent if the average chain
                         * length times the number of required probes
                         * is less than the cost for creating and
@@ -3907,7 +3903,6 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA
                                 BATcount(b) / ((size_t *) 
b->thash->heap.base)[5] * lcount < lcount + rcount);
                        prhash = rhash;
                }
-#endif
        } else if (BATtdense(sr) && BATcheckhash(r)) {
                rhash = BATcount(r) / ((size_t *) r->thash->heap.base)[5] * 
lcount < lcount + rcount;
        }
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -12,9 +12,6 @@
 #error this file should not be included outside its source directory
 #endif
 
-/* if the parent BAT of a view has a hash, don't use it */
-/* #define DISABLE_PARENT_HASH 1 */
-
 /* persist hash heaps for persistent BATs */
 /* #define PERSISTENTHASH 1 */
 
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -136,7 +136,7 @@ doubleslice(BAT *b, BUN l1, BUN h1, BUN 
                     (*cmp)(v, BUNtail(bi, hb)) == 0))
 
 static BAT *
-BAT_hashselect(BAT *b, BAT *s, BAT *bn, const void *tl, BUN maximum, bool 
phash)
+hashselect(BAT *b, BAT *s, BAT *bn, const void *tl, BUN maximum, bool phash)
 {
        BATiter bi;
        BUN i, cnt;
@@ -838,9 +838,9 @@ scan_sel(fullscan, o = (oid) (p+off), w 
 
 
 static BAT *
-BAT_scanselect(BAT *b, BAT *s, BAT *bn, const void *tl, const void *th,
-              bool li, bool hi, bool equi, bool anti, bool lval, bool hval,
-              bool lnil, BUN maximum, bool use_imprints)
+scanselect(BAT *b, BAT *s, BAT *bn, const void *tl, const void *th,
+          bool li, bool hi, bool equi, bool anti, bool lval, bool hval,
+          bool lnil, BUN maximum, bool use_imprints)
 {
 #ifndef NDEBUG
        int (*cmp)(const void *, const void *);
@@ -985,78 +985,44 @@ BAT_scanselect(BAT *b, BAT *s, BAT *bn, 
        return bn;
 }
 
-/* generic range select
- *
- * Return a BAT with the OID values of b for qualifying tuples.  The
- * return BAT is sorted (i.e. in the same order as the input BAT).
- *
- * If s is non-NULL, it is a list of candidates.  s must be sorted.
- *
- * tl may not be NULL, li, hi, and anti must be either 0 or 1.
- *
- * If th is NULL, hi is ignored.
- *
- * If anti is 0, qualifying tuples are those whose value is between tl
- * and th (as in x >[=] tl && x <[=] th, where equality depends on li
- * and hi--so if tl > th, nothing will be returned).  If li or hi is
- * 1, the respective boundary is inclusive, otherwise exclusive.  If
- * th is NULL it is taken to be equal to tl, turning this into an
- * equi- or point-select.  Note that for a point select to return
- * anything, li (and hi if th was not NULL) must be 1.  There is a
- * special case if tl is nil and th is NULL.  This is the only way to
- * select for nil values.
- *
- * If anti is 1, the result is the complement of what the result would
- * be if anti were 0, except that nils are filtered out.
- *
- * In brief:
- * - if tl==nil and th==NULL and anti==0, return all nils (only way to
- *   get nils);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to