Changeset: 15e0b01de705 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=15e0b01de705
Modified Files:
gdk/gdk_imprints.c
gdk/gdk_private.h
Branch: Oct2014
Log Message:
Keep statistics (min,max,count) per bin for the imprints index.
diffs (145 lines):
diff --git a/gdk/gdk_imprints.c b/gdk/gdk_imprints.c
--- a/gdk/gdk_imprints.c
+++ b/gdk/gdk_imprints.c
@@ -452,6 +452,12 @@ do {
\
} \
GETBIN##B(bin,col[i]); \
mask = IMPSsetBit(B,mask,bin); \
+ if (!cnt_bins[bin]++) {
\
+ min_bins[bin] = max_bins[bin] = i; \
+ } else {
\
+ if (col[i] < col[min_bins[bin]]) min_bins[bin] = i;
\
+ if (col[i] > col[max_bins[bin]]) max_bins[bin] = i;
\
+ }
\
} \
/* one last left */ \
if (prvmask == mask && dcnt > 0 && \
@@ -482,13 +488,18 @@ do {
\
} while (0)
static int
-imprints_create(BAT *b, void *inbins, bte bits,
+imprints_create(BAT *b, void *inbins, void *stats, bte bits,
void *imps, BUN *impcnt, cchdc_t *dict, BUN *dictcnt)
{
BUN i;
BUN dcnt, icnt, new;
+ oid *min_bins = (oid *) stats;
+ oid *max_bins = min_bins + 64;
+ BUN *cnt_bins = (BUN *)(max_bins + 64);
bte bin = 0;
dcnt = icnt = 0;
+ for (i = 0; i < 64; i++)
+ cnt_bins[i] = 0;
switch (ATOMstorage(b->T->type)) {
case TYPE_bte:
@@ -615,7 +626,8 @@ BATimprints(BAT *b)
}
sprintf(imprints->imprints->filename, "%s.%cimprints", nme,
b->batCacheid > 0 ? 't' : 'h');
- pages = (((size_t) BATcount(b) * b->T->width) + IMPS_PAGE - 1)
/ IMPS_PAGE;
+ pages = (((size_t) BATcount(b) * b->T->width) + IMPS_PAGE - 1)
+ /
IMPS_PAGE;
imprints->imprints->farmid = BBPselectfarm(PERSISTENT, b->ttype,
imprintsheap);
if ((fd = GDKfdlocate(imprints->imprints->farmid, nme, "rb",
@@ -624,6 +636,7 @@ BATimprints(BAT *b)
struct stat st;
if (read(fd, hdata, sizeof(hdata)) == sizeof(hdata) &&
hdata[0] & ((size_t) 1 << 16) &&
+ ((hdata[0] & 0xFF00) >> 8) == 1 && /* version 1 */
hdata[3] == (size_t) BATcount(b) &&
fstat(fd, &st) == 0 &&
st.st_size >= (off_t) (imprints->imprints->size =
imprints->imprints->free = 64 * b->T->width +
@@ -637,7 +650,9 @@ BATimprints(BAT *b)
imprints->impcnt = (BUN) hdata[1];
imprints->dictcnt = (BUN) hdata[2];
imprints->bins = imprints->imprints->base + 4 *
SIZEOF_SIZE_T;
- imprints->imps = (char *) imprints->bins + 64 *
b->T->width;
+ imprints->stats = (char *) imprints->bins + 64
* b->T->width;
+ imprints->imps = (char *) imprints->stats + 64
* 2 * SIZEOF_OID
+ +
64 * SIZEOF_BUN;
imprints->dict = (void *) ((uintptr_t) ((char
*) imprints->imps + pages * (imprints->bits / 8) + sizeof(uint64_t)) &
~(sizeof(uint64_t) - 1));
b->T->imprints = imprints;
close(fd);
@@ -693,15 +708,19 @@ BATimprints(BAT *b)
if (cnt < 8)
imprints->bits = 8;
- /* The heap we create here consists of three parts:
+ /* The heap we create here consists of four parts:
* bins, max 64 entries with bin boundaries, domain of b;
+ * stats, min/max/count for each bin, min/max are oid, and
count BUN;
* imps, max one entry per "page", entry is "bits" wide;
* dict, max two entries per three "pages".
* In addition, we add some housekeeping entries at
* the start so that we can determine whether we can
- * trust the imprints when encountered on startup. */
+ * trust the imprints when encountered on startup (including
+ * a version number -- CURRENT VERSION is 1 ). */
if (HEAPalloc(imprints->imprints,
64 * b->T->width +
+ 64 * 2 * SIZEOF_OID +
+ 64 * SIZEOF_BUN +
pages * (imprints->bits / 8) +
pages * sizeof(cchdc_t) +
sizeof(uint64_t) /* padding for alignment */
@@ -715,7 +734,9 @@ BATimprints(BAT *b)
return NULL;
}
imprints->bins = imprints->imprints->base + 4 * SIZEOF_SIZE_T;
- imprints->imps = (char *) imprints->bins + 64 * b->T->width;
+ imprints->stats = (char *) imprints->bins + 64 * b->T->width;
+ imprints->imps = (char *) imprints->stats + 64 * 2 * SIZEOF_OID
+
+ 64 * SIZEOF_BUN;
imprints->dict = (void *) ((uintptr_t) ((char *) imprints->imps
+ pages * (imprints->bits / 8) + sizeof(uint64_t)) & ~(sizeof(uint64_t) - 1));
switch (ATOMstorage(b->T->type)) {
@@ -746,6 +767,7 @@ BATimprints(BAT *b)
if (!imprints_create(b,
imprints->bins,
+ imprints->stats,
imprints->bits,
imprints->imps,
&imprints->impcnt,
@@ -763,13 +785,16 @@ BATimprints(BAT *b)
assert(imprints->dictcnt <= pages);
imprints->imprints->free = (size_t) ((char *) ((cchdc_t *)
imprints->dict + imprints->dictcnt) - imprints->imprints->base);
/* add info to heap for when they become persistent */
- ((size_t *) imprints->imprints->base)[0] = (size_t)
imprints->bits;
+ ((size_t *) imprints->imprints->base)[0] = (size_t)
(imprints->bits);
((size_t *) imprints->imprints->base)[1] = (size_t)
imprints->impcnt;
((size_t *) imprints->imprints->base)[2] = (size_t)
imprints->dictcnt;
((size_t *) imprints->imprints->base)[3] = (size_t) BATcount(b);
if (HEAPsave(imprints->imprints, nme, b->batCacheid > 0 ?
"timprints" : "himprints") == 0 &&
(fd = GDKfdlocate(imprints->imprints->farmid, nme, "rb+",
b->batCacheid > 0 ? "timprints" :
"himprints")) >= 0) {
+ /* add version number */
+ ((size_t *) imprints->imprints->base)[0] |= (size_t) 1
<< 8;
+ /* sync-on-disk checked bit */
((size_t *) imprints->imprints->base)[0] |= (size_t) 1
<< 16;
if (write(fd, imprints->imprints->base, sizeof(size_t))
< 0)
perror("write imprints");
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -214,11 +214,12 @@ struct PROPrec {
struct Imprints {
bte bits; /* how many bits in imprints */
Heap *imprints;
- void *bins; /* pointer into imprints heap */
- void *imps; /* pointer into imprints heap */
- void *dict; /* pointer into imprints heap */
- BUN impcnt; /* counter for imprints*/
- BUN dictcnt; /* counter for cache dictionary */
+ void *bins; /* pointer into imprints heap (bins borders) */
+ void *stats; /* pointer into imprints heap (stats per bin) */
+ void *imps; /* pointer into imprints heap (bit vectors) */
+ void *dict; /* pointer into imprints heap (dictionary) */
+ BUN impcnt; /* counter for imprints */
+ BUN dictcnt; /* counter for cache dictionary */
};
typedef struct {
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list