Changeset: 34ebe73ec2b1 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=34ebe73ec2b1
Modified Files:
        gdk/gdk_imprints.c
        gdk/gdk_private.h
        gdk/gdk_select.c
Branch: Oct2014
Log Message:

Use a single heap for the imprints information.


diffs (truncated from 675 to 300 lines):

diff --git a/gdk/gdk_imprints.c b/gdk/gdk_imprints.c
--- a/gdk/gdk_imprints.c
+++ b/gdk/gdk_imprints.c
@@ -410,39 +410,40 @@
 do {                                                                   \
        uint##B##_t mask, prvmask;                                      \
        uint##B##_t *im = (uint##B##_t *) imps;                         \
-       TYPE *col = (TYPE *) Tloc(b, 0);                                \
+       TYPE *col = (TYPE *) Tloc(b, b->batFirst);                      \
        TYPE *bins = (TYPE *) inbins;                                   \
        prvmask = mask = 0;                                             \
        new = (IMPS_PAGE/sizeof(TYPE))-1;                               \
-       for (i = 0; i < b->batFirst+b->batCount; i++) {                 \
+       for (i = 0; i < b->batCount; i++) {                             \
                if (!(i&new) && i>0) {                                  \
                        /* same mask as previous and enough count to add */ \
                        if ((prvmask == mask) &&                        \
-                           (d[dcnt-1].cnt < (IMPS_MAX_CNT-1))) {       \
+                           (dict[dcnt-1].cnt < (IMPS_MAX_CNT-1))) {    \
                                /* not a repeat header */               \
-                               if (!d[dcnt-1].repeat) {                \
+                               if (!dict[dcnt-1].repeat) {             \
                                        /* if compressed */             \
-                                       if (d[dcnt-1].cnt > 1) {        \
+                                       if (dict[dcnt-1].cnt > 1) {     \
                                                /* uncompress last */   \
-                                               d[dcnt-1].cnt--;        \
+                                               dict[dcnt-1].cnt--;     \
                                                dcnt++; /* new header */ \
-                                               d[dcnt-1].cnt = 1;      \
+                                               dict[dcnt-1].cnt = 1;   \
                                        }                               \
                                        /* set repeat */                \
-                                       d[dcnt-1].repeat = 1;           \
+                                       dict[dcnt-1].repeat = 1;        \
                                }                                       \
                                /* increase cnt */                      \
-                               d[dcnt-1].cnt++;                        \
+                               dict[dcnt-1].cnt++;                     \
                        } else { /* new mask (or run out of header count) */ \
                                prvmask=mask;                           \
                                im[icnt] = mask;                        \
                                icnt++;                                 \
-                               if ((dcnt > 0) && !(d[dcnt-1].repeat) && \
-                                   (d[dcnt-1].cnt < (IMPS_MAX_CNT-1))) { \
-                                       d[dcnt-1].cnt++;                \
+                               if ((dcnt > 0) && !(dict[dcnt-1].repeat) && \
+                                   (dict[dcnt-1].cnt < (IMPS_MAX_CNT-1))) { \
+                                       dict[dcnt-1].cnt++;             \
                                } else {                                \
-                                       d[dcnt].cnt = 1;                \
-                                       d[dcnt].repeat = 0;             \
+                                       dict[dcnt].cnt = 1;             \
+                                       dict[dcnt].repeat = 0;          \
+                                       dict[dcnt].flags = 0;           \
                                        dcnt++;                         \
                                }                                       \
                        }                                               \
@@ -454,38 +455,39 @@ do {                                                      
                \
        }                                                               \
        /* one last left */                                             \
        if (prvmask == mask && dcnt > 0 &&                              \
-           (d[dcnt-1].cnt < (IMPS_MAX_CNT-1))) {                       \
-               if (!d[dcnt-1].repeat) {                                \
-                       if (d[dcnt-1].cnt > 1) {                        \
-                               d[dcnt-1].cnt--;                        \
+           (dict[dcnt-1].cnt < (IMPS_MAX_CNT-1))) {                    \
+               if (!dict[dcnt-1].repeat) {                             \
+                       if (dict[dcnt-1].cnt > 1) {                     \
+                               dict[dcnt-1].cnt--;                     \
+                               dict[dcnt].cnt = 1;                     \
+                               dict[dcnt].flags = 0;                   \
                                dcnt++;                                 \
-                               d[dcnt-1].cnt = 1;                      \
                        }                                               \
-                       d[dcnt-1].repeat = 1;                           \
+                       dict[dcnt-1].repeat = 1;                        \
                }                                                       \
-               d[dcnt-1].cnt ++;                                       \
+               dict[dcnt-1].cnt ++;                                    \
        } else {                                                        \
                im[icnt] = mask;                                        \
                icnt++;                                                 \
-               if ((dcnt > 0) && !(d[dcnt-1].repeat) &&                \
-                   (d[dcnt-1].cnt < (IMPS_MAX_CNT-1))) {               \
-                       d[dcnt-1].cnt++;                                \
+               if ((dcnt > 0) && !(dict[dcnt-1].repeat) &&             \
+                   (dict[dcnt-1].cnt < (IMPS_MAX_CNT-1))) {            \
+                       dict[dcnt-1].cnt++;                             \
                } else {                                                \
-                       d[dcnt].cnt = 1;                                \
-                       d[dcnt].repeat = 0;                             \
+                       dict[dcnt].cnt = 1;                             \
+                       dict[dcnt].repeat = 0;                          \
+                       dict[dcnt].flags = 0;                           \
                        dcnt++;                                         \
                }                                                       \
        }                                                               \
 } while (0)
 
 static int
-imprints_create(BAT *b, char *inbins, bte bits,
-               char *imps, BUN *impcnt, char *dict, BUN *dictcnt)
+imprints_create(BAT *b, void *inbins, bte bits,
+               void *imps, BUN *impcnt, cchdc_t *dict, BUN *dictcnt)
 {
        BUN i;
        BUN dcnt, icnt, new;
        bte bin = 0;
-       cchdc_t *d = (cchdc_t *) dict;
        dcnt = icnt = 0;
 
        switch (ATOMstorage(b->T->type)) {
@@ -522,19 +524,11 @@ imprints_create(BAT *b, char *inbins, bt
 do {                                                                   \
        BUN k;                                                          \
        TYPE *s = (TYPE *) Tloc(smp, smp->batFirst);                    \
-       TYPE *h = (TYPE *) imprints->bins->base;                        \
+       TYPE *h = imprints->bins;                                       \
        if (cnt < 64-1) {                                               \
                TYPE max = GDK_##TYPE##_max;                            \
                for (k = 0; k < cnt; k++)                               \
                        h[k] = s[k];                                    \
-               if (k < 8)                                              \
-                       imprints->bits = 8;                             \
-               if (8 <= k && k < 16)                                   \
-                       imprints->bits = 16;                            \
-               if (16 <= k && k < 32)                                  \
-                       imprints->bits = 32;                            \
-               if (32 <= k && k < 64)                                  \
-                       imprints->bits = 64;                            \
                while (k < (BUN) imprints->bits)                        \
                        h[k++] = max;                                   \
        } else {                                                        \
@@ -543,7 +537,6 @@ do {                                                        
                \
                        h[k] = s[(BUN) y];                              \
                if (k == 64 - 1) /* there is one left */                \
                        h[k] = s[cnt - 1];                              \
-               imprints->bits = 64;                                    \
        }                                                               \
 } while (0)
 
@@ -566,7 +559,7 @@ BATimprints(BAT *b)
        default:                /* type not supported */
                GDKerror("#BATimprints: col type not "
                         "suitable for imprints index.\n");
-               return b;       /* do nothing */
+               return NULL;    /* do nothing */
        }
 
        BATcheck(b, "BATimprints");
@@ -576,12 +569,20 @@ BATimprints(BAT *b)
                o = b;
                b = BATmirror(BATdescriptor(p));
        }
+       if (b->batFirst > 0) {
+               /* no imprints if batFirst is not 0
+                * this shouldn't really happen */
+               if (o)
+                       BBPunfix(b->batCacheid);
+               return NULL;
+       }
 
        MT_lock_set(&GDKimprintsLock(abs(b->batCacheid)), "BATimprints");
        if (b->T->imprints == NULL) {
                BAT *smp, *s;
                BUN cnt;
                str nme = BBP_physical(b->batCacheid);
+               size_t pages;
 
                ALGODEBUG fprintf(stderr, "#BATimprints(b=%s#" BUNFMT ") %s: "
                                  "created imprints\n", BATgetId(b),
@@ -630,31 +631,50 @@ BATimprints(BAT *b)
                /* smp now is ordered and unique on tail */
                assert(smp->tkey && smp->tsorted);
                cnt = BATcount(smp);
+               imprints->bits = 64;
+               if (cnt < 32)
+                       imprints->bits = 32;
+               if (cnt < 16)
+                       imprints->bits = 16;
+               if (cnt < 8)
+                       imprints->bits = 8;
 
                /* bins of histogram */
-               imprints->bins = (Heap *) GDKzalloc(sizeof(Heap));
-               if (imprints->bins == NULL ||
-                   (imprints->bins->filename =
+               imprints->imprints = GDKzalloc(sizeof(Heap));
+               if (imprints->imprints == NULL ||
+                   (imprints->imprints->filename =
                     GDKmalloc(strlen(nme) + 12)) == NULL) {
-                       if (imprints->bins != NULL) {
-                               GDKfree(imprints->bins);
-                       }
+                       GDKfree(imprints->imprints);
+                       GDKfree(imprints);
                        GDKerror("#BATimprints: memory allocation error.\n");
-                       GDKfree(imprints);
                        BBPunfix(smp->batCacheid);
                        MT_lock_unset(&GDKimprintsLock(abs(b->batCacheid)),
                                      "BATimprints");
                        return NULL;
                }
-               sprintf(imprints->bins->filename, "%s.bins", nme);
-               if (HEAPalloc(imprints->bins, 64, b->T->width) < 0) {
+               sprintf(imprints->imprints->filename, "%s.imprints", nme);
+               pages = (((size_t) BATcount(b) * b->T->width) + IMPS_PAGE - 1) 
/ IMPS_PAGE;
+               /* The heap we create here consists of three parts:
+                * bins, max 64 entries with bin boundaries;
+                * imps;
+                * dict. */
+               if (HEAPalloc(imprints->imprints,
+                             64 * b->T->width +
+                             pages * (imprints->bits / 8) +
+                             pages * sizeof(cchdc_t) +
+                             sizeof(uint64_t) /* padding for alignment */
+                             + 4 * SIZEOF_SIZE_T, /* extra info */
+                             1) < 0) {
+                       GDKfree(imprints->imprints);
+                       GDKfree(imprints);
                        GDKerror("#BATimprints: memory allocation error");
-                       GDKfree(imprints->bins);
-                       GDKfree(imprints);
                        MT_lock_unset(&GDKimprintsLock(abs(b->batCacheid)),
                                      "BATimprints");
                        return NULL;
                }
+               imprints->bins = imprints->imprints->base + 4 * SIZEOF_SIZE_T;
+               imprints->imps = (char *) imprints->bins + 64 * b->T->width;
+               imprints->dict = (void *) ((size_t) ((char *) imprints->imps + 
pages * (imprints->bits / 8) + sizeof(uint64_t)) & ~(sizeof(uint64_t) - 1));
 
                switch (ATOMstorage(b->T->type)) {
                case TYPE_bte:
@@ -682,59 +702,28 @@ BATimprints(BAT *b)
 
                BBPunfix(smp->batCacheid);
 
-               /* alloc heaps for imprints vectors and cache dictionary */
-               imprints->imps = (Heap *) GDKzalloc(sizeof(Heap));
-               imprints->dict = (Heap *) GDKzalloc(sizeof(Heap));
-               if (imprints->imps == NULL ||
-                   imprints->dict == NULL ||
-                   (imprints->imps->filename =
-                    GDKmalloc(strlen(nme) + 12)) == NULL ||
-                   (imprints->dict->filename =
-                    GDKmalloc(strlen(nme) + 12)) == NULL) {
-                       GDKerror("#BATimprints: memory allocation error");
-                       HEAPfree(imprints->bins);
-                       GDKfree(imprints->bins);
-                       if (imprints->imps != NULL) {
-                               if (imprints->imps->filename != NULL) {
-                                       GDKfree(imprints->imps->filename);
-                               }
-                               GDKfree(imprints->imps);
-                       }
-                       if (imprints->dict != NULL) {
-                               if (imprints->dict->filename != NULL) {
-                                       GDKfree(imprints->dict->filename);
-                               }
-                               GDKfree(imprints->dict);
-                       }
+               if (!imprints_create(b,
+                                    imprints->bins,
+                                    imprints->bits,
+                                    imprints->imps,
+                                    &imprints->impcnt,
+                                    imprints->dict,
+                                    &imprints->dictcnt)) {
+                       GDKerror("#BATimprints: failed to create imprints");
+                       HEAPfree(imprints->imprints);
+                       GDKfree(imprints->imprints);
                        GDKfree(imprints);
                        MT_lock_unset(&GDKimprintsLock(abs(b->batCacheid)),
                                      "BATimprints");
                        return NULL;
                }
-               sprintf(imprints->imps->filename, "%s.imps", nme);
-               sprintf(imprints->dict->filename, "%s.dict", nme);
-
-               /* TODO: better estimation for the size to alloc */
-               if (HEAPalloc(imprints->imps,
-                             (b->T->heap.size + IMPS_PAGE - 1) / IMPS_PAGE,
-                             imprints->bits / 8) < 0 ||
-                   HEAPalloc(imprints->dict,
-                             (b->T->heap.size + IMPS_PAGE - 1) / IMPS_PAGE,
-                             sizeof(cchdc_t)) < 0) {
-                       GDKerror("#BATimprints: memory allocation error");
-                       goto bailout;
-               }
-
-               if (!imprints_create(b,
-                                    imprints->bins->base,
-                                    imprints->bits,
-                                    imprints->imps->base,
-                                    &imprints->impcnt,
-                                    imprints->dict->base,
-                                    &imprints->dictcnt)) {
-                       GDKerror("#BATimprints: failed to create imprints");
-                       goto bailout;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to