Changeset: 32828a94d868 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=32828a94d868
Modified Files:
gdk/Makefile.ag
gdk/gdk.h
gdk/gdk_align.c
gdk/gdk_bat.c
gdk/gdk_batop.c
gdk/gdk_private.h
gdk/gdk_select.c
gdk/gdk_storage.c
Branch: default
Log Message:
The Column Imprints secondary index infrastructure.
Implementation to create and destroy an imprint for a column,
plus the extension for the select operator to use imprints.
diffs (truncated from 683 to 300 lines):
diff --git a/gdk/Makefile.ag b/gdk/Makefile.ag
--- a/gdk/Makefile.ag
+++ b/gdk/Makefile.ag
@@ -37,6 +37,7 @@ lib_gdk = {
gdk_system.h gdk_tm.h gdk_storage.h \
gdk_calc.c gdk_calc.h gdk_calc_compare.h gdk_calc_private.h \
gdk_aggr.c gdk_group.c gdk_mapreduce.c gdk_mapreduce.h \
+ gdk_imprints.c gdk_imprints.h \
bat.feps bat1.feps bat2.feps \
libbat.rc
LIBS = ../common/options/libmoptions \
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -641,6 +641,16 @@ typedef struct {
Heap *heap; /* heap where the hash is stored */
} Hash;
+typedef struct {
+ bte bits; /* how many bits in imprints */
+ bat histogram; /* id for histogram bat */
+ Heap *imps; /* heap of imprints */
+ BUN impcnt; /* counter for imprints*/
+ Heap *dict; /* cache dictionary for compressing imprints */
+ BUN dictcnt; /* counter for cache dictionary */
+} Imprints;
+
+
/*
* @+ Binary Association Tables
* Having gone to the previous preliminary definitions, we will now
@@ -762,6 +772,7 @@ gdk_export int VALisnil(const ValRecord
* int hloc; // byte-offset in BUN for head elements
* Heap *hheap; // heap for varsized head values
* Hash *hhash; // linear chained hash table on head
+ * Imprints *himprints; // column imprints index on head
* // Tail properties
* int ttype; // Tail type number
* str tident; // name for tail column
@@ -774,6 +785,7 @@ gdk_export int VALisnil(const ValRecord
* int tloc; // byte-offset in BUN for tail elements
* Heap *theap; // heap for varsized tail values
* Hash *thash; // linear chained hash table on tail
+ * Imprints *timprints; // column imprints index on tail
* } BAT;
* @end verbatim
*
@@ -834,29 +846,30 @@ typedef struct PROPrec {
/* see also comment near BATassertProps() for more information about
* the properties */
typedef struct {
- str id; /* label for head/tail column */
+ str id; /* label for head/tail column */
unsigned short width; /* byte-width of the atom array */
- bte type; /* type id. */
- bte shift; /* log2 of bunwidth */
+ bte type; /* type id. */
+ bte shift; /* log2 of bunwidth */
unsigned int
varsized:1, /* varsized (1) or fixedsized (0) */
- key:2, /* duplicates allowed? */
- dense:1, /* OID only: only consecutive values */
- nonil:1, /* nonil isn't propchecked yet */
- nil:1, /* there is a nil in the column */
- sorted:1, /* column is sorted in ascending order */
+ key:2, /* duplicates allowed? */
+ dense:1, /* OID only: only consecutive values */
+ nonil:1, /* nonil isn't propchecked yet */
+ nil:1, /* there is a nil in the column */
+ sorted:1, /* column is sorted in ascending order
*/
revsorted:1; /* column is sorted in descending order */
- oid align; /* OID for sync alignment */
+ oid align; /* OID for sync alignment */
BUN nokey[2]; /* positions that prove key ==FALSE */
BUN nosorted; /* position that proves sorted==FALSE */
BUN norevsorted; /* position that proves revsorted==FALSE */
BUN nodense; /* position that proves dense==FALSE */
- oid seq; /* start of dense head sequence */
+ oid seq; /* start of dense head sequence */
- Heap heap; /* space for the column. */
+ Heap heap; /* space for the column. */
Heap *vheap; /* space for the varsized data. */
- Hash *hash; /* hash table */
+ Hash *hash; /* hash table */
+ Imprints *imprints; /* column imprints index */
PROPrec *props; /* list of dynamic properties stored in the bat
descriptor */
} COLrec;
@@ -2150,6 +2163,24 @@ gdk_export BAT *BAThashjoin(BAT *l, BAT
/* low level functions */
#define BATprepareHash(X) (((X)->H->hash == NULL) && !BAThash(X, 0))
+
+/*
+ * @- Column Imprints Functions
+ *
+ * @multitable @columnfractions 0.08 0.7
+ * @item BAT*
+ * @tab
+ * BATimprints (BAT *b)
+ * @end multitable
+ *
+ * The column imprints index structure.
+ *
+ */
+
+#define BATprepareImprints(X) (((X)->T->imprints == NULL) && !BATimprints(X))
+gdk_export void IMPSdestroy(BAT *b);
+gdk_export BAT *BATimprints(BAT *b);
+
/*
* @- Multilevel Storage Modes
*
diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -343,6 +343,9 @@ VIEWcreate_(BAT *h, BAT *t, int slice_vi
bn->T->hash = NULL;
else
bn->T->hash = t->T->hash;
+ /* imprints can and must be shared */
+ bn->H->imprints = h->H->imprints;
+ bn->T->imprints = t->T->imprints;
BBPcacheit(bs, 1); /* enter in BBP */
/* View of VIEW combine, ie we need to fix the head of the mirror */
if (vc) {
@@ -462,6 +465,7 @@ BATmaterializeh(BAT *b)
/* cleanup possible ACC's */
HASHdestroy(b);
+ IMPSdestroy(b);
b->H->heap.filename = NULL;
if (HEAPalloc(&b->H->heap, cnt, sizeof(oid)) < 0) {
@@ -790,6 +794,7 @@ VIEWdestroy(BAT *b)
HASHremove(b);
if (b->T->hash)
HASHremove(BATmirror(b));
+ IMPSdestroy(b);
VIEWunlink(b);
if (b->htype && !b->H->heap.parentid) {
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -487,6 +487,7 @@ BATextend(BAT *b, BUN newcap)
if (b->T->heap.base && HEAPextend(&b->T->heap, theap_size) < 0)
return NULL;
HASHdestroy(b);
+ IMPSdestroy(b);
return b;
}
@@ -536,6 +537,7 @@ BATclear(BAT *b, int force)
if (b->T->hash) {
HASHremove(bm);
}
+ IMPSdestroy(b);
/* we must dispose of all inserted atoms */
if (b->batDeleted == b->batInserted &&
@@ -628,6 +630,7 @@ BATfree(BAT *b)
PROPdestroy(b->T->props);
b->T->props = NULL;
HASHdestroy(b);
+ IMPSdestroy(b);
if (b->htype)
HEAPfree(&b->H->heap);
else
@@ -1242,6 +1245,7 @@ BUNins(BAT *b, const void *h, const void
HEAPwarm(b->T->vheap);
}
}
+ IMPSdestroy(b); /* no support for inserts in imprints yet */
return b;
bunins_failed:
return NULL;
@@ -1327,6 +1331,9 @@ BUNappend(BAT *b, const void *t, bit for
BATsetcount(b, b->batCount + 1);
}
+
+ IMPSdestroy(b); /* no support for inserts in imprints yet */
+
/* first adapt the hashes; then the user-defined accelerators.
* REASON: some accelerator updates (qsignature) use the hashes!
*/
@@ -1514,6 +1521,7 @@ BUNdelete_(BAT *b, BUN p, bit force)
}
b->batCount--;
b->batDirty = 1; /* bat is dirty */
+ IMPSdestroy(b); /* no support for inserts in imprints yet */
return p;
}
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -496,6 +496,7 @@ BATappend(BAT *b, BAT *n, bit force)
return NULL;
}
+ IMPSdestroy(b); /* imprints do not support updates yet */
/* a hash is useless for void bats */
if (b->H->hash)
HASHremove(b);
@@ -862,6 +863,7 @@ BATtopN(BAT *b, BUN topN)
HASHremove(b);
BATsetcount(b, topN);
}
+ IMPSdestroy(b);
/* we no longer know if there are NILs */
b->H->nil = b->htype == TYPE_void && b->hseqbase == oid_nil && topN >=
1;
b->T->nil = b->ttype == TYPE_void && b->tseqbase == oid_nil && topN >=
1;
@@ -1170,6 +1172,7 @@ BATorder_internal(BAT *b, int stable, in
}
b->tsorted = b->trevsorted = 0;
HASHdestroy(b);
+ IMPSdestroy(b);
ALIGNdel(b, func, FALSE);
b->hdense = 0;
b->tdense = 0;
@@ -1504,6 +1507,7 @@ BATrevert(BAT *b)
GDKfree(t);
}
HASHdestroy(b);
+ IMPSdestroy(b);
/* interchange sorted and revsorted */
x = b->hrevsorted;
b->hrevsorted = b->hsorted;
@@ -1976,6 +1980,7 @@ BATpropagate(BAT *dst, BAT *src, int idx
(* (int *) BUNtloc(bni, r))++; \
} \
HASHdestroy(bn); \
+ IMPSdestroy(bn); \
} while (0)
BAT *
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -111,6 +111,8 @@ int VALprint(stream *fd, const ValRecord
void VIEWdestroy(BAT *b);
BAT *VIEWreset(BAT *b);
void VIEWunlink(BAT *b);
+int IMPSgetbin(int tpe, bte bits, void *bins, const void *v);
+void IMPSremove(BAT *b);
#define BBP_BATMASK 511
#define BBP_THREADMASK 63
@@ -118,6 +120,7 @@ void VIEWunlink(BAT *b);
typedef struct {
MT_Lock swap;
MT_Lock hash;
+ MT_Lock imprints;
} batlock_t;
typedef struct {
@@ -145,6 +148,7 @@ extern MT_Lock MT_system_lock;
#define GDKswapLock(x) GDKbatLock[(x)&BBP_BATMASK].swap
#define GDKhashLock(x) GDKbatLock[(x)&BBP_BATMASK].hash
+#define GDKimprintsLock(x) GDKbatLock[(x)&BBP_BATMASK].imprints
#define GDKtrimLock(y) GDKbbpLock[(y)&BBP_THREADMASK].trim
#define GDKcacheLock(y) GDKbbpLock[(y)&BBP_THREADMASK].alloc
#define BBP_free(y) GDKbbpLock[(y)&BBP_THREADMASK].free
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -27,6 +27,9 @@
float nextafterf(float x, float y);
#endif
+/* auxilary functions and structs for imprints */
+#include "gdk_imprints.h"
+
#define buninsfix(B,C,A,I,T,V,G,M,R) \
do { \
if ((I) == BATcapacity(B)) { \
@@ -175,9 +178,137 @@ BAT_hashselect(BAT *b, BAT *s, BAT *bn,
return bn;
}
+/* Imprints select code */
+
+/* inner check */
+#define impscheck(CAND,TEST,ADD) \
+do { \
+ e = i+limit-pr_off; \
+ if (im[icnt] & mask) { \
+ if ((im[icnt] & ~innermask) == 0) { \
+ while (o < e && p <= q) { \
+ v = src[o]; \
+ ADD; \
+ cnt++; \
+ CAND; \
+ p++; \
+ } \
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list