Changeset: 15e0b01de705 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=15e0b01de705
Modified Files:
        gdk/gdk_imprints.c
        gdk/gdk_private.h
Branch: Oct2014
Log Message:

Keep statistics (min,max,count) per bin for the imprints index.


diffs (145 lines):

diff --git a/gdk/gdk_imprints.c b/gdk/gdk_imprints.c
--- a/gdk/gdk_imprints.c
+++ b/gdk/gdk_imprints.c
@@ -452,6 +452,12 @@ do {                                                       
                \
                }                                                       \
                GETBIN##B(bin,col[i]);                                  \
                mask = IMPSsetBit(B,mask,bin);                          \
+               if (!cnt_bins[bin]++) {                                         
\
+                       min_bins[bin] = max_bins[bin] = i;              \
+               } else {                                                        
                \
+                       if (col[i] < col[min_bins[bin]]) min_bins[bin] = i;     
\
+                       if (col[i] > col[max_bins[bin]]) max_bins[bin] = i;     
\
+               }                                                               
                                                \
        }                                                               \
        /* one last left */                                             \
        if (prvmask == mask && dcnt > 0 &&                              \
@@ -482,13 +488,18 @@ do {                                                      
                \
 } while (0)
 
 static int
-imprints_create(BAT *b, void *inbins, bte bits,
+imprints_create(BAT *b, void *inbins, void *stats, bte bits,
                void *imps, BUN *impcnt, cchdc_t *dict, BUN *dictcnt)
 {
        BUN i;
        BUN dcnt, icnt, new;
+       oid *min_bins = (oid *) stats;
+       oid *max_bins = min_bins + 64;
+       BUN *cnt_bins = (BUN *)(max_bins + 64);
        bte bin = 0;
        dcnt = icnt = 0;
+       for (i = 0; i < 64; i++)
+               cnt_bins[i] = 0;
 
        switch (ATOMstorage(b->T->type)) {
        case TYPE_bte:
@@ -615,7 +626,8 @@ BATimprints(BAT *b)
                }
                sprintf(imprints->imprints->filename, "%s.%cimprints", nme,
                        b->batCacheid > 0 ? 't' : 'h');
-               pages = (((size_t) BATcount(b) * b->T->width) + IMPS_PAGE - 1) 
/ IMPS_PAGE;
+               pages = (((size_t) BATcount(b) * b->T->width) + IMPS_PAGE - 1)
+                                                                             / 
IMPS_PAGE;
                imprints->imprints->farmid = BBPselectfarm(PERSISTENT, b->ttype,
                                                           imprintsheap);
                if ((fd = GDKfdlocate(imprints->imprints->farmid, nme, "rb",
@@ -624,6 +636,7 @@ BATimprints(BAT *b)
                        struct stat st;
                        if (read(fd, hdata, sizeof(hdata)) == sizeof(hdata) &&
                            hdata[0] & ((size_t) 1 << 16) &&
+                           ((hdata[0] & 0xFF00) >> 8) == 1 && /* version 1 */
                            hdata[3] == (size_t) BATcount(b) &&
                            fstat(fd, &st) == 0 &&
                            st.st_size >= (off_t) (imprints->imprints->size = 
imprints->imprints->free = 64 * b->T->width +
@@ -637,7 +650,9 @@ BATimprints(BAT *b)
                                imprints->impcnt = (BUN) hdata[1];
                                imprints->dictcnt = (BUN) hdata[2];
                                imprints->bins = imprints->imprints->base + 4 * 
SIZEOF_SIZE_T;
-                               imprints->imps = (char *) imprints->bins + 64 * 
b->T->width;
+                               imprints->stats = (char *) imprints->bins + 64 
* b->T->width;
+                               imprints->imps = (char *) imprints->stats + 64 
* 2 * SIZEOF_OID
+                                                                             + 
64 * SIZEOF_BUN;
                                imprints->dict = (void *) ((uintptr_t) ((char 
*) imprints->imps + pages * (imprints->bits / 8) + sizeof(uint64_t)) & 
~(sizeof(uint64_t) - 1));
                                b->T->imprints = imprints;
                                close(fd);
@@ -693,15 +708,19 @@ BATimprints(BAT *b)
                if (cnt < 8)
                        imprints->bits = 8;
 
-               /* The heap we create here consists of three parts:
+               /* The heap we create here consists of four parts:
                 * bins, max 64 entries with bin boundaries, domain of b;
+                * stats, min/max/count for each bin, min/max are oid, and 
count BUN;
                 * imps, max one entry per "page", entry is "bits" wide;
                 * dict, max two entries per three "pages".
                 * In addition, we add some housekeeping entries at
                 * the start so that we can determine whether we can
-                * trust the imprints when encountered on startup. */
+                * trust the imprints when encountered on startup (including
+                * a version number -- CURRENT VERSION is 1 ). */
                if (HEAPalloc(imprints->imprints,
                              64 * b->T->width +
+                             64 * 2 * SIZEOF_OID +
+                             64 * SIZEOF_BUN +
                              pages * (imprints->bits / 8) +
                              pages * sizeof(cchdc_t) +
                              sizeof(uint64_t) /* padding for alignment */
@@ -715,7 +734,9 @@ BATimprints(BAT *b)
                        return NULL;
                }
                imprints->bins = imprints->imprints->base + 4 * SIZEOF_SIZE_T;
-               imprints->imps = (char *) imprints->bins + 64 * b->T->width;
+               imprints->stats = (char *) imprints->bins + 64 * b->T->width;
+               imprints->imps = (char *) imprints->stats + 64 * 2 * SIZEOF_OID 
+
+                                                               64 * SIZEOF_BUN;
                imprints->dict = (void *) ((uintptr_t) ((char *) imprints->imps 
+ pages * (imprints->bits / 8) + sizeof(uint64_t)) & ~(sizeof(uint64_t) - 1));
 
                switch (ATOMstorage(b->T->type)) {
@@ -746,6 +767,7 @@ BATimprints(BAT *b)
 
                if (!imprints_create(b,
                                     imprints->bins,
+                                    imprints->stats,
                                     imprints->bits,
                                     imprints->imps,
                                     &imprints->impcnt,
@@ -763,13 +785,16 @@ BATimprints(BAT *b)
                assert(imprints->dictcnt <= pages);
                imprints->imprints->free = (size_t) ((char *) ((cchdc_t *) 
imprints->dict + imprints->dictcnt) - imprints->imprints->base);
                /* add info to heap for when they become persistent */
-               ((size_t *) imprints->imprints->base)[0] = (size_t) 
imprints->bits;
+               ((size_t *) imprints->imprints->base)[0] = (size_t) 
(imprints->bits);
                ((size_t *) imprints->imprints->base)[1] = (size_t) 
imprints->impcnt;
                ((size_t *) imprints->imprints->base)[2] = (size_t) 
imprints->dictcnt;
                ((size_t *) imprints->imprints->base)[3] = (size_t) BATcount(b);
                if (HEAPsave(imprints->imprints, nme, b->batCacheid > 0 ? 
"timprints" : "himprints") == 0 &&
                    (fd = GDKfdlocate(imprints->imprints->farmid, nme, "rb+",
                                      b->batCacheid > 0 ? "timprints" : 
"himprints")) >= 0) {
+                       /* add version number */
+                       ((size_t *) imprints->imprints->base)[0] |= (size_t) 1 
<< 8;
+                       /* sync-on-disk checked bit */
                        ((size_t *) imprints->imprints->base)[0] |= (size_t) 1 
<< 16;
                        if (write(fd, imprints->imprints->base, sizeof(size_t)) 
< 0)
                                perror("write imprints");
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -214,11 +214,12 @@ struct PROPrec {
 struct Imprints {
        bte bits;               /* how many bits in imprints */
        Heap *imprints;
-       void *bins;             /* pointer into imprints heap */
-       void *imps;             /* pointer into imprints heap */
-       void *dict;             /* pointer into imprints heap */
-       BUN impcnt;             /* counter for imprints*/
-       BUN dictcnt;            /* counter for cache dictionary */
+       void *bins;             /* pointer into imprints heap (bins borders)  */
+       void *stats;    /* pointer into imprints heap (stats per bin) */
+       void *imps;             /* pointer into imprints heap (bit vectors)   */
+       void *dict;             /* pointer into imprints heap (dictionary)    */
+       BUN impcnt;             /* counter for imprints                       */
+       BUN dictcnt;    /* counter for cache dictionary               */
 };
 
 typedef struct {
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to