Changeset: 46f9a0d2a263 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=46f9a0d2a263 Modified Files: gdk/gdk_align.c gdk/gdk_batop.c gdk/gdk_hash.c gdk/gdk_imprints.c gdk/gdk_join.c gdk/gdk_orderidx.c gdk/gdk_private.h gdk/gdk_project.c gdk/gdk_sample.c gdk/gdk_select.c gdk/gdk_unique.c Branch: default Log Message:
Merge with Mar2018 branch. diffs (truncated from 1841 to 300 lines): diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c --- a/gdk/gdk_align.c +++ b/gdk/gdk_align.c @@ -167,7 +167,8 @@ BATmaterialize(BAT *b) p = 0; q = BUNlast(b); assert(cnt >= q - p); - ALGODEBUG fprintf(stderr, "#BATmaterialize(%d);\n", (int) b->batCacheid); + ALGODEBUG fprintf(stderr, "#BATmaterialize(" ALGOBATFMT ")\n", + ALGOBATPAR(b)); if (tt != TYPE_void) { /* no voids */ diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -1341,9 +1341,12 @@ gdk_return BATsort(BAT **sorted, BAT **order, BAT **groups, BAT *b, BAT *o, BAT *g, bool reverse, bool stable) { - BAT *bn = NULL, *on = NULL, *gn, *pb = NULL; + BAT *bn = NULL, *on = NULL, *gn = NULL, *pb = NULL; oid *restrict grps, *restrict ords, prev; BUN p, q, r; + lng t0 = 0; + + ALGODEBUG t0 = GDKusec(); if (b == NULL) { GDKerror("BATsort: b must exist\n"); @@ -1420,6 +1423,15 @@ BATsort(BAT **sorted, BAT **order, BAT * } *groups = gn; } + ALGODEBUG fprintf(stderr, "#BATsort(b=" ALGOBATFMT ",o=" + ALGOOPTBATFMT ",g=" ALGOOPTBATFMT + ",reverse=%d,stable=%d) = (" ALGOOPTBATFMT + "," ALGOOPTBATFMT "," ALGOOPTBATFMT + ") -- trivial (" LLFMT " usec)\n", + ALGOBATPAR(b), ALGOOPTBATPAR(o), + ALGOOPTBATPAR(g), reverse, stable, + ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn), + ALGOOPTBATPAR(on), GDKusec() - t0); return GDK_SUCCEED; } if (VIEWtparent(b)) { @@ -1468,13 +1480,26 @@ BATsort(BAT **sorted, BAT **order, BAT * } if (sorted) *sorted = bn; - else + else { BBPunfix(bn->batCacheid); + bn = NULL; + } } if (order) *order = on; - else + else { BBPunfix(on->batCacheid); + on = NULL; + } + ALGODEBUG fprintf(stderr, "#BATsort(b=" ALGOBATFMT ",o=" + ALGOOPTBATFMT ",g=" ALGOOPTBATFMT + ",reverse=%d,stable=%d) = (" ALGOOPTBATFMT + "," ALGOOPTBATFMT "," ALGOOPTBATFMT + ") -- orderidx (" LLFMT " usec)\n", + ALGOBATPAR(b), ALGOOPTBATPAR(o), + ALGOOPTBATPAR(g), reverse, stable, + ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn), + ALGOOPTBATPAR(on), GDKusec() - t0); return GDK_SUCCEED; } if (o) { @@ -1482,9 +1507,9 @@ BATsort(BAT **sorted, BAT **order, BAT * if (bn == NULL) goto error; if (bn->ttype == TYPE_void || isVIEW(bn)) { - b = COLcopy(bn, ATOMtype(bn->ttype), true, TRANSIENT); + BAT *b2 = COLcopy(bn, ATOMtype(bn->ttype), true, TRANSIENT); BBPunfix(bn->batCacheid); - bn = b; + bn = b2; } pb = NULL; } else { @@ -1537,6 +1562,7 @@ BATsort(BAT **sorted, BAT **order, BAT * *sorted = bn; } else { BBPunfix(bn->batCacheid); + bn = NULL; } if (order) { *order = on; @@ -1566,6 +1592,16 @@ BATsort(BAT **sorted, BAT **order, BAT * goto error; *groups = gn; } + ALGODEBUG fprintf(stderr, "#BATsort(b=" ALGOBATFMT + ",o=" ALGOOPTBATFMT ",g=" ALGOBATFMT + ",reverse=%d,stable=%d) = (" + ALGOOPTBATFMT "," ALGOOPTBATFMT "," + ALGOOPTBATFMT ") -- key group (" LLFMT + " usec)\n", ALGOBATPAR(b), + ALGOOPTBATPAR(o), ALGOBATPAR(g), + reverse, stable, ALGOOPTBATPAR(bn), + ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on), + GDKusec() - t0); return GDK_SUCCEED; } assert(g->ttype == TYPE_oid); @@ -1674,9 +1710,19 @@ BATsort(BAT **sorted, BAT **order, BAT * if (sorted) *sorted = bn; - else + else { BBPunfix(bn->batCacheid); + bn = NULL; + } + ALGODEBUG fprintf(stderr, "#BATsort(b=" ALGOBATFMT ",o=" ALGOOPTBATFMT + ",g=" ALGOOPTBATFMT ",reverse=%d,stable=%d) = (" + ALGOOPTBATFMT "," ALGOOPTBATFMT "," ALGOOPTBATFMT + ") -- %ssort (" LLFMT " usec)\n", ALGOBATPAR(b), + ALGOOPTBATPAR(o), ALGOOPTBATPAR(g), reverse, stable, + ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn), + ALGOOPTBATPAR(on), g ? "grouped " : "", + GDKusec() - t0); return GDK_SUCCEED; error: diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c --- a/gdk/gdk_hash.c +++ b/gdk/gdk_hash.c @@ -34,6 +34,7 @@ #include "monetdb_config.h" #include "gdk.h" +#include "gdk_cand.h" #include "gdk_private.h" static int @@ -87,8 +88,8 @@ HASHclear(Hash *h) memset(h->Hash, 0xFF, (h->mask + 1) * h->width); } -#define HASH_VERSION 1 -#define HASH_HEADER_SIZE 5 /* nr of size_t fields in header */ +#define HASH_VERSION 2 +#define HASH_HEADER_SIZE 6 /* nr of size_t fields in header */ gdk_return HASHnew(Hash *h, int tpe, BUN size, BUN mask, BUN count) @@ -126,33 +127,11 @@ HASHnew(Hash *h, int tpe, BUN size, BUN ((size_t *) h->heap.base)[2] = mask; ((size_t *) h->heap.base)[3] = width; ((size_t *) h->heap.base)[4] = count; + ((size_t *) h->heap.base)[5] = 0; /* # filled slots (chain heads) */ ALGODEBUG fprintf(stderr, "#HASHnew: create hash(size " BUNFMT ", mask " BUNFMT ", width %d, nil " BUNFMT ", total " BUNFMT " bytes);\n", size, mask, width, h->nil, (size + mask) * width); return GDK_SUCCEED; } -#define starthash(TYPE) \ - do { \ - TYPE *v = (TYPE *) BUNtloc(bi, 0); \ - for (; r < p; r++) { \ - BUN c = (BUN) hash_##TYPE(h, v+r); \ - \ - if (HASHget(h, c) == HASHnil(h) && nslots-- == 0) \ - break; /* mask too full */ \ - HASHputlink(h, r, HASHget(h, c)); \ - HASHput(h, c, r); \ - } \ - } while (0) -#define finishhash(TYPE) \ - do { \ - TYPE *v = (TYPE *) BUNtloc(bi, 0); \ - for (; p < q; p++) { \ - BUN c = (BUN) hash_##TYPE(h, v + p); \ - \ - HASHputlink(h, p, HASHget(h, c)); \ - HASHput(h, c, p); \ - } \ - } while (0) - /* collect HASH statistics for analysis */ static void HASHcollisions(BAT *b, Hash *h) @@ -303,176 +282,282 @@ BAThashsync(void *arg) } #endif +#define starthash(TYPE) \ + do { \ + const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \ + if (cand) { \ + for (; p < cnt1; p++) { \ + c = hash_##TYPE(h, v + cand[p] - b->hseqbase); \ + hget = HASHget(h, c); \ + if (hget == hnil) { \ + if (nslots == maxslots) \ + break; /* mask too full */ \ + nslots++; \ + } \ + HASHputlink(h, p, hget); \ + HASHput(h, c, p); \ + } \ + } else { \ + v += start; \ + for (; p < cnt1; p++) { \ + c = hash_##TYPE(h, v + p); \ + hget = HASHget(h, c); \ + if (hget == hnil) { \ + if (nslots == maxslots) \ + break; /* mask too full */ \ + nslots++; \ + } \ + HASHputlink(h, p, hget); \ + HASHput(h, c, p); \ + } \ + } \ + } while (0) +#define finishhash(TYPE) \ + do { \ + const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \ + if (cand) { \ + for (; p < cnt; p++) { \ + c = hash_##TYPE(h, v + cand[p] - b->hseqbase); \ + hget = HASHget(h, c); \ + nslots += hget == hnil; \ + HASHputlink(h, p, hget); \ + HASHput(h, c, p); \ + } \ + } else { \ + v += start; \ + for (; p < cnt; p++) { \ + c = hash_##TYPE(h, v + p); \ + hget = HASHget(h, c); \ + nslots += hget == hnil; \ + HASHputlink(h, p, hget); \ + HASHput(h, c, p); \ + } \ + } \ + } while (0) + /* * The prime routine for the BAT layer is to create a new hash index. * Its argument is the element type and the maximum number of BUNs be * stored under the hash function. */ +Hash * +BAThash_impl(BAT *b, BAT *s, BUN masksize, const char *ext) +{ + lng t0 = 0; + unsigned int tpe = ATOMbasetype(b->ttype); + BUN cnt, start, end, cnt1; + const oid *cand, *candend; + BUN mask, maxmask = 0; + BUN p, c; + BUN nslots; + BUN hnil, hget; + Hash *h = NULL; + const char *nme = BBP_physical(b->batCacheid); + BATiter bi = bat_iterator(b); + + ALGODEBUG t0 = GDKusec(); + ALGODEBUG fprintf(stderr, "#BAThash: create hash(" ALGOBATFMT ");\n", + ALGOBATPAR(b)); + if (b->ttype == TYPE_void) { + if (is_oid_nil(b->tseqbase)) { + ALGODEBUG fprintf(stderr, "#BAThash: cannot create hash-table on void-NIL column.\n"); + GDKerror("BAThash: no hash on void/nil column\n"); + return NULL; + } + ALGODEBUG fprintf(stderr, "#BAThash: creating hash-table on void column..\n"); + + tpe = TYPE_void; + } + + CANDINIT(b, s, start, end, cnt, cand, candend); + cnt = cand ? (BUN) (candend - cand) : end - start; + + if ((h = GDKzalloc(sizeof(*h))) == NULL || + (h->heap.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) < 0) { + GDKfree(h); + return NULL; + } + h->heap.dirty = TRUE; + snprintf(h->heap.filename, sizeof(h->heap.filename), "%s.%s", nme, ext); + + /* determine hash mask size */ + cnt1 = 0; + if (masksize > 0) { + /* specified by caller */ + mask = HASHmask(masksize); + } else if (ATOMsize(tpe) == 1) { + /* perfect hash for one-byte sized atoms */ + mask = (1 << 8); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list