Changeset: 46f9a0d2a263 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=46f9a0d2a263
Modified Files:
        gdk/gdk_align.c
        gdk/gdk_batop.c
        gdk/gdk_hash.c
        gdk/gdk_imprints.c
        gdk/gdk_join.c
        gdk/gdk_orderidx.c
        gdk/gdk_private.h
        gdk/gdk_project.c
        gdk/gdk_sample.c
        gdk/gdk_select.c
        gdk/gdk_unique.c
Branch: default
Log Message:

Merge with Mar2018 branch.


diffs (truncated from 1841 to 300 lines):

diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -167,7 +167,8 @@ BATmaterialize(BAT *b)
        p = 0;
        q = BUNlast(b);
        assert(cnt >= q - p);
-       ALGODEBUG fprintf(stderr, "#BATmaterialize(%d);\n", (int) 
b->batCacheid);
+       ALGODEBUG fprintf(stderr, "#BATmaterialize(" ALGOBATFMT ")\n",
+                         ALGOBATPAR(b));
 
        if (tt != TYPE_void) {
                /* no voids */
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -1341,9 +1341,12 @@ gdk_return
 BATsort(BAT **sorted, BAT **order, BAT **groups,
           BAT *b, BAT *o, BAT *g, bool reverse, bool stable)
 {
-       BAT *bn = NULL, *on = NULL, *gn, *pb = NULL;
+       BAT *bn = NULL, *on = NULL, *gn = NULL, *pb = NULL;
        oid *restrict grps, *restrict ords, prev;
        BUN p, q, r;
+       lng t0 = 0;
+
+       ALGODEBUG t0 = GDKusec();
 
        if (b == NULL) {
                GDKerror("BATsort: b must exist\n");
@@ -1420,6 +1423,15 @@ BATsort(BAT **sorted, BAT **order, BAT *
                        }
                        *groups = gn;
                }
+               ALGODEBUG fprintf(stderr, "#BATsort(b=" ALGOBATFMT ",o="
+                                 ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
+                                 ",reverse=%d,stable=%d) = (" ALGOOPTBATFMT
+                                 "," ALGOOPTBATFMT "," ALGOOPTBATFMT
+                                 ") -- trivial (" LLFMT " usec)\n",
+                                 ALGOBATPAR(b), ALGOOPTBATPAR(o),
+                                 ALGOOPTBATPAR(g), reverse, stable,
+                                 ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
+                                 ALGOOPTBATPAR(on), GDKusec() - t0);
                return GDK_SUCCEED;
        }
        if (VIEWtparent(b)) {
@@ -1468,13 +1480,26 @@ BATsort(BAT **sorted, BAT **order, BAT *
                        }
                        if (sorted)
                                *sorted = bn;
-                       else
+                       else {
                                BBPunfix(bn->batCacheid);
+                               bn = NULL;
+                       }
                }
                if (order)
                        *order = on;
-               else
+               else {
                        BBPunfix(on->batCacheid);
+                       on = NULL;
+               }
+               ALGODEBUG fprintf(stderr, "#BATsort(b=" ALGOBATFMT ",o="
+                                 ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
+                                 ",reverse=%d,stable=%d) = (" ALGOOPTBATFMT
+                                 "," ALGOOPTBATFMT "," ALGOOPTBATFMT
+                                 ") -- orderidx (" LLFMT " usec)\n",
+                                 ALGOBATPAR(b), ALGOOPTBATPAR(o),
+                                 ALGOOPTBATPAR(g), reverse, stable,
+                                 ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
+                                 ALGOOPTBATPAR(on), GDKusec() - t0);
                return GDK_SUCCEED;
        }
        if (o) {
@@ -1482,9 +1507,9 @@ BATsort(BAT **sorted, BAT **order, BAT *
                if (bn == NULL)
                        goto error;
                if (bn->ttype == TYPE_void || isVIEW(bn)) {
-                       b = COLcopy(bn, ATOMtype(bn->ttype), true, TRANSIENT);
+                       BAT *b2 = COLcopy(bn, ATOMtype(bn->ttype), true, 
TRANSIENT);
                        BBPunfix(bn->batCacheid);
-                       bn = b;
+                       bn = b2;
                }
                pb = NULL;
        } else {
@@ -1537,6 +1562,7 @@ BATsort(BAT **sorted, BAT **order, BAT *
                                *sorted = bn;
                        } else {
                                BBPunfix(bn->batCacheid);
+                               bn = NULL;
                        }
                        if (order) {
                                *order = on;
@@ -1566,6 +1592,16 @@ BATsort(BAT **sorted, BAT **order, BAT *
                                        goto error;
                                *groups = gn;
                        }
+                       ALGODEBUG fprintf(stderr, "#BATsort(b=" ALGOBATFMT
+                                         ",o=" ALGOOPTBATFMT ",g=" ALGOBATFMT
+                                         ",reverse=%d,stable=%d) = ("
+                                         ALGOOPTBATFMT "," ALGOOPTBATFMT ","
+                                         ALGOOPTBATFMT ") -- key group (" LLFMT
+                                         " usec)\n", ALGOBATPAR(b),
+                                         ALGOOPTBATPAR(o), ALGOBATPAR(g),
+                                         reverse, stable, ALGOOPTBATPAR(bn),
+                                         ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
+                                         GDKusec() - t0);
                        return GDK_SUCCEED;
                }
                assert(g->ttype == TYPE_oid);
@@ -1674,9 +1710,19 @@ BATsort(BAT **sorted, BAT **order, BAT *
 
        if (sorted)
                *sorted = bn;
-       else
+       else {
                BBPunfix(bn->batCacheid);
+               bn = NULL;
+       }
 
+       ALGODEBUG fprintf(stderr, "#BATsort(b=" ALGOBATFMT ",o=" ALGOOPTBATFMT
+                         ",g=" ALGOOPTBATFMT ",reverse=%d,stable=%d) = ("
+                         ALGOOPTBATFMT "," ALGOOPTBATFMT "," ALGOOPTBATFMT
+                         ") -- %ssort (" LLFMT " usec)\n", ALGOBATPAR(b),
+                         ALGOOPTBATPAR(o), ALGOOPTBATPAR(g), reverse, stable,
+                         ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
+                         ALGOOPTBATPAR(on), g ? "grouped " : "",
+                         GDKusec() - t0);
        return GDK_SUCCEED;
 
   error:
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -34,6 +34,7 @@
 
 #include "monetdb_config.h"
 #include "gdk.h"
+#include "gdk_cand.h"
 #include "gdk_private.h"
 
 static int
@@ -87,8 +88,8 @@ HASHclear(Hash *h)
        memset(h->Hash, 0xFF, (h->mask + 1) * h->width);
 }
 
-#define HASH_VERSION           1
-#define HASH_HEADER_SIZE       5 /* nr of size_t fields in header */
+#define HASH_VERSION           2
+#define HASH_HEADER_SIZE       6 /* nr of size_t fields in header */
 
 gdk_return
 HASHnew(Hash *h, int tpe, BUN size, BUN mask, BUN count)
@@ -126,33 +127,11 @@ HASHnew(Hash *h, int tpe, BUN size, BUN 
        ((size_t *) h->heap.base)[2] = mask;
        ((size_t *) h->heap.base)[3] = width;
        ((size_t *) h->heap.base)[4] = count;
+       ((size_t *) h->heap.base)[5] = 0; /* # filled slots (chain heads) */
        ALGODEBUG fprintf(stderr, "#HASHnew: create hash(size " BUNFMT ", mask 
" BUNFMT ", width %d, nil " BUNFMT ", total " BUNFMT " bytes);\n", size, mask, 
width, h->nil, (size + mask) * width);
        return GDK_SUCCEED;
 }
 
-#define starthash(TYPE)                                                        
\
-       do {                                                            \
-               TYPE *v = (TYPE *) BUNtloc(bi, 0);                      \
-               for (; r < p; r++) {                                    \
-                       BUN c = (BUN) hash_##TYPE(h, v+r);              \
-                                                                       \
-                       if (HASHget(h, c) == HASHnil(h) && nslots-- == 0) \
-                               break; /* mask too full */              \
-                       HASHputlink(h, r, HASHget(h, c));               \
-                       HASHput(h, c, r);                               \
-               }                                                       \
-       } while (0)
-#define finishhash(TYPE)                                       \
-       do {                                                    \
-               TYPE *v = (TYPE *) BUNtloc(bi, 0);              \
-               for (; p < q; p++) {                            \
-                       BUN c = (BUN) hash_##TYPE(h, v + p);    \
-                                                               \
-                       HASHputlink(h, p, HASHget(h, c));       \
-                       HASHput(h, c, p);                       \
-               }                                               \
-       } while (0)
-
 /* collect HASH statistics for analysis */
 static void
 HASHcollisions(BAT *b, Hash *h)
@@ -303,176 +282,282 @@ BAThashsync(void *arg)
 }
 #endif
 
+#define starthash(TYPE)                                                        
\
+       do {                                                            \
+               const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \
+               if (cand) {                                             \
+                       for (; p < cnt1; p++) {                         \
+                               c = hash_##TYPE(h, v + cand[p] - b->hseqbase); \
+                               hget = HASHget(h, c);                   \
+                               if (hget == hnil) {                     \
+                                       if (nslots == maxslots)         \
+                                               break; /* mask too full */ \
+                                       nslots++;                       \
+                               }                                       \
+                               HASHputlink(h, p, hget);                \
+                               HASHput(h, c, p);                       \
+                       }                                               \
+               } else {                                                \
+                       v += start;                                     \
+                       for (; p < cnt1; p++) {                         \
+                               c = hash_##TYPE(h, v + p);              \
+                               hget = HASHget(h, c);                   \
+                               if (hget == hnil) {                     \
+                                       if (nslots == maxslots)         \
+                                               break; /* mask too full */ \
+                                       nslots++;                       \
+                               }                                       \
+                               HASHputlink(h, p, hget);                \
+                               HASHput(h, c, p);                       \
+                       }                                               \
+               }                                                       \
+       } while (0)
+#define finishhash(TYPE)                                               \
+       do {                                                            \
+               const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \
+               if (cand) {                                             \
+                       for (; p < cnt; p++) {                          \
+                               c = hash_##TYPE(h, v + cand[p] - b->hseqbase); \
+                               hget = HASHget(h, c);                   \
+                               nslots += hget == hnil;                 \
+                               HASHputlink(h, p, hget);                \
+                               HASHput(h, c, p);                       \
+                       }                                               \
+               } else {                                                \
+                       v += start;                                     \
+                       for (; p < cnt; p++) {                          \
+                               c = hash_##TYPE(h, v + p);              \
+                               hget = HASHget(h, c);                   \
+                               nslots += hget == hnil;                 \
+                               HASHputlink(h, p, hget);                \
+                               HASHput(h, c, p);                       \
+                       }                                               \
+               }                                                       \
+       } while (0)
+
 /*
  * The prime routine for the BAT layer is to create a new hash index.
  * Its argument is the element type and the maximum number of BUNs be
  * stored under the hash function.
  */
+Hash *
+BAThash_impl(BAT *b, BAT *s, BUN masksize, const char *ext)
+{
+       lng t0 = 0;
+       unsigned int tpe = ATOMbasetype(b->ttype);
+       BUN cnt, start, end, cnt1;
+       const oid *cand, *candend;
+       BUN mask, maxmask = 0;
+       BUN p, c;
+       BUN nslots;
+       BUN hnil, hget;
+       Hash *h = NULL;
+       const char *nme = BBP_physical(b->batCacheid);
+       BATiter bi = bat_iterator(b);
+
+       ALGODEBUG t0 = GDKusec();
+       ALGODEBUG fprintf(stderr, "#BAThash: create hash(" ALGOBATFMT ");\n",
+                         ALGOBATPAR(b));
+       if (b->ttype == TYPE_void) {
+               if (is_oid_nil(b->tseqbase)) {
+                       ALGODEBUG fprintf(stderr, "#BAThash: cannot create 
hash-table on void-NIL column.\n");
+                       GDKerror("BAThash: no hash on void/nil column\n");
+                       return NULL;
+               }
+               ALGODEBUG fprintf(stderr, "#BAThash: creating hash-table on 
void column..\n");
+
+               tpe = TYPE_void;
+       }
+
+       CANDINIT(b, s, start, end, cnt, cand, candend);
+       cnt = cand ? (BUN) (candend - cand) : end - start;
+
+       if ((h = GDKzalloc(sizeof(*h))) == NULL ||
+           (h->heap.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) < 
0) {
+               GDKfree(h);
+               return NULL;
+       }
+       h->heap.dirty = TRUE;
+       snprintf(h->heap.filename, sizeof(h->heap.filename), "%s.%s", nme, ext);
+
+       /* determine hash mask size */
+       cnt1 = 0;
+       if (masksize > 0) {
+               /* specified by caller */
+               mask = HASHmask(masksize);
+       } else if (ATOMsize(tpe) == 1) {
+               /* perfect hash for one-byte sized atoms */
+               mask = (1 << 8);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to