Changeset: 32828a94d868 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=32828a94d868
Modified Files:
        gdk/Makefile.ag
        gdk/gdk.h
        gdk/gdk_align.c
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_private.h
        gdk/gdk_select.c
        gdk/gdk_storage.c
Branch: default
Log Message:

The Column Imprints secondary index infrastructure.

Implementation to create and destroy an imprint for a column,
plus the extension for the select operator to use imprints.


diffs (truncated from 683 to 300 lines):

diff --git a/gdk/Makefile.ag b/gdk/Makefile.ag
--- a/gdk/Makefile.ag
+++ b/gdk/Makefile.ag
@@ -37,6 +37,7 @@ lib_gdk = {
                gdk_system.h gdk_tm.h gdk_storage.h \
                gdk_calc.c gdk_calc.h gdk_calc_compare.h gdk_calc_private.h \
                gdk_aggr.c gdk_group.c gdk_mapreduce.c gdk_mapreduce.h \
+               gdk_imprints.c gdk_imprints.h \
                bat.feps bat1.feps bat2.feps \
                libbat.rc
        LIBS = ../common/options/libmoptions \
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -641,6 +641,16 @@ typedef struct {
        Heap *heap;             /* heap where the hash is stored */
 } Hash;
 
+typedef struct {
+       bte bits;       /* how many bits in imprints */
+       bat histogram;  /* id for histogram bat */
+       Heap *imps;     /* heap of imprints */
+       BUN impcnt;     /* counter for imprints*/
+       Heap *dict;     /* cache dictionary for compressing imprints */
+       BUN dictcnt;    /* counter for cache dictionary */
+} Imprints;
+
+
 /*
  * @+ Binary Association Tables
  * Having gone to the previous preliminary definitions, we will now
@@ -762,6 +772,7 @@ gdk_export int VALisnil(const ValRecord 
  *           int    hloc;             // byte-offset in BUN for head elements
  *           Heap   *hheap;           // heap for varsized head values
  *           Hash   *hhash;           // linear chained hash table on head
+ *           Imprints *himprints;     // column imprints index on head
  *           // Tail properties
  *           int    ttype;            // Tail type number
  *           str    tident;           // name for tail column
@@ -774,6 +785,7 @@ gdk_export int VALisnil(const ValRecord 
  *           int    tloc;             // byte-offset in BUN for tail elements
  *           Heap   *theap;           // heap for varsized tail values
  *           Hash   *thash;           // linear chained hash table on tail
+ *           Imprints *timprints;     // column imprints index on tail
  *  } BAT;
  * @end verbatim
  *
@@ -834,29 +846,30 @@ typedef struct PROPrec {
 /* see also comment near BATassertProps() for more information about
  * the properties */
 typedef struct {
-       str id;                 /* label for head/tail column */
+       str id;                         /* label for head/tail column */
 
        unsigned short width;   /* byte-width of the atom array */
-       bte type;               /* type id. */
-       bte shift;              /* log2 of bunwidth */
+       bte type;                       /* type id. */
+       bte shift;                      /* log2 of bunwidth */
        unsigned int
         varsized:1,            /* varsized (1) or fixedsized (0) */
-        key:2,                 /* duplicates allowed? */
-        dense:1,               /* OID only: only consecutive values */
-        nonil:1,               /* nonil isn't propchecked yet */
-        nil:1,                 /* there is a nil in the column */
-        sorted:1,              /* column is sorted in ascending order */
+        key:2,                         /* duplicates allowed? */
+        dense:1,                       /* OID only: only consecutive values */
+        nonil:1,                       /* nonil isn't propchecked yet */
+        nil:1,                         /* there is a nil in the column */
+        sorted:1,                      /* column is sorted in ascending order 
*/
         revsorted:1;           /* column is sorted in descending order */
-       oid align;              /* OID for sync alignment */
+       oid align;                      /* OID for sync alignment */
        BUN nokey[2];           /* positions that prove key ==FALSE */
        BUN nosorted;           /* position that proves sorted==FALSE */
        BUN norevsorted;        /* position that proves revsorted==FALSE */
        BUN nodense;            /* position that proves dense==FALSE */
-       oid seq;                /* start of dense head sequence */
+       oid seq;                        /* start of dense head sequence */
 
-       Heap heap;              /* space for the column. */
+       Heap heap;                      /* space for the column. */
        Heap *vheap;            /* space for the varsized data. */
-       Hash *hash;             /* hash table */
+       Hash *hash;                     /* hash table */
+       Imprints *imprints;     /* column imprints index */
 
        PROPrec *props;         /* list of dynamic properties stored in the bat 
descriptor */
 } COLrec;
@@ -2150,6 +2163,24 @@ gdk_export BAT *BAThashjoin(BAT *l, BAT 
 /* low level functions */
 
 #define BATprepareHash(X) (((X)->H->hash == NULL) && !BAThash(X, 0))
+
+/*
+ * @- Column Imprints Functions
+ *
+ * @multitable @columnfractions 0.08 0.7
+ * @item BAT*
+ * @tab
+ *  BATimprints (BAT *b)
+ * @end multitable
+ *
+ * The column imprints index structure.
+ *
+ */
+
+#define BATprepareImprints(X) (((X)->T->imprints == NULL) && !BATimprints(X))
+gdk_export void IMPSdestroy(BAT *b);
+gdk_export BAT *BATimprints(BAT *b);
+
 /*
  * @- Multilevel Storage Modes
  *
diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -343,6 +343,9 @@ VIEWcreate_(BAT *h, BAT *t, int slice_vi
                bn->T->hash = NULL;
        else
                bn->T->hash = t->T->hash;
+       /* imprints can and must be shared */
+       bn->H->imprints = h->H->imprints;
+       bn->T->imprints = t->T->imprints;
        BBPcacheit(bs, 1);      /* enter in BBP */
        /* View of VIEW combine, ie we need to fix the head of the mirror */
        if (vc) {
@@ -462,6 +465,7 @@ BATmaterializeh(BAT *b)
 
        /* cleanup possible ACC's */
        HASHdestroy(b);
+       IMPSdestroy(b);
 
        b->H->heap.filename = NULL;
        if (HEAPalloc(&b->H->heap, cnt, sizeof(oid)) < 0) {
@@ -790,6 +794,7 @@ VIEWdestroy(BAT *b)
                HASHremove(b);
        if (b->T->hash)
                HASHremove(BATmirror(b));
+       IMPSdestroy(b);
        VIEWunlink(b);
 
        if (b->htype && !b->H->heap.parentid) {
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -487,6 +487,7 @@ BATextend(BAT *b, BUN newcap)
        if (b->T->heap.base && HEAPextend(&b->T->heap, theap_size) < 0)
                return NULL;
        HASHdestroy(b);
+       IMPSdestroy(b);
        return b;
 }
 
@@ -536,6 +537,7 @@ BATclear(BAT *b, int force)
        if (b->T->hash) {
                HASHremove(bm);
        }
+       IMPSdestroy(b);
 
        /* we must dispose of all inserted atoms */
        if (b->batDeleted == b->batInserted &&
@@ -628,6 +630,7 @@ BATfree(BAT *b)
                PROPdestroy(b->T->props);
        b->T->props = NULL;
        HASHdestroy(b);
+       IMPSdestroy(b);
        if (b->htype)
                HEAPfree(&b->H->heap);
        else
@@ -1242,6 +1245,7 @@ BUNins(BAT *b, const void *h, const void
                                HEAPwarm(b->T->vheap);
                }
        }
+       IMPSdestroy(b); /* no support for inserts in imprints yet */
        return b;
       bunins_failed:
        return NULL;
@@ -1327,6 +1331,9 @@ BUNappend(BAT *b, const void *t, bit for
                BATsetcount(b, b->batCount + 1);
        }
 
+
+       IMPSdestroy(b); /* no support for inserts in imprints yet */
+
        /* first adapt the hashes; then the user-defined accelerators.
         * REASON: some accelerator updates (qsignature) use the hashes!
         */
@@ -1514,6 +1521,7 @@ BUNdelete_(BAT *b, BUN p, bit force)
        }
        b->batCount--;
        b->batDirty = 1;        /* bat is dirty */
+       IMPSdestroy(b); /* no support for inserts in imprints yet */
        return p;
 }
 
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -496,6 +496,7 @@ BATappend(BAT *b, BAT *n, bit force)
                        return NULL;
        }
 
+       IMPSdestroy(b); /* imprints do not support updates yet */
        /* a hash is useless for void bats */
        if (b->H->hash)
                HASHremove(b);
@@ -862,6 +863,7 @@ BATtopN(BAT *b, BUN topN)
                HASHremove(b);
                BATsetcount(b, topN);
        }
+       IMPSdestroy(b);
        /* we no longer know if there are NILs */
        b->H->nil = b->htype == TYPE_void && b->hseqbase == oid_nil && topN >= 
1;
        b->T->nil = b->ttype == TYPE_void && b->tseqbase == oid_nil && topN >= 
1;
@@ -1170,6 +1172,7 @@ BATorder_internal(BAT *b, int stable, in
        }
        b->tsorted = b->trevsorted = 0;
        HASHdestroy(b);
+       IMPSdestroy(b);
        ALIGNdel(b, func, FALSE);
        b->hdense = 0;
        b->tdense = 0;
@@ -1504,6 +1507,7 @@ BATrevert(BAT *b)
                GDKfree(t);
        }
        HASHdestroy(b);
+       IMPSdestroy(b);
        /* interchange sorted and revsorted */
        x = b->hrevsorted;
        b->hrevsorted = b->hsorted;
@@ -1976,6 +1980,7 @@ BATpropagate(BAT *dst, BAT *src, int idx
                        (* (int *) BUNtloc(bni, r))++;                  \
                }                                                       \
                HASHdestroy(bn);                                        \
+               IMPSdestroy(bn);                                        \
        } while (0)
 
 BAT *
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -111,6 +111,8 @@ int VALprint(stream *fd, const ValRecord
 void VIEWdestroy(BAT *b);
 BAT *VIEWreset(BAT *b);
 void VIEWunlink(BAT *b);
+int IMPSgetbin(int tpe, bte bits, void *bins, const void *v);
+void IMPSremove(BAT *b);
 
 #define BBP_BATMASK    511
 #define BBP_THREADMASK 63
@@ -118,6 +120,7 @@ void VIEWunlink(BAT *b);
 typedef struct {
        MT_Lock swap;
        MT_Lock hash;
+       MT_Lock imprints;
 } batlock_t;
 
 typedef struct {
@@ -145,6 +148,7 @@ extern MT_Lock MT_system_lock;
 
 #define GDKswapLock(x)  GDKbatLock[(x)&BBP_BATMASK].swap
 #define GDKhashLock(x)  GDKbatLock[(x)&BBP_BATMASK].hash
+#define GDKimprintsLock(x)  GDKbatLock[(x)&BBP_BATMASK].imprints
 #define GDKtrimLock(y)  GDKbbpLock[(y)&BBP_THREADMASK].trim
 #define GDKcacheLock(y) GDKbbpLock[(y)&BBP_THREADMASK].alloc
 #define BBP_free(y)    GDKbbpLock[(y)&BBP_THREADMASK].free
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -27,6 +27,9 @@
 float nextafterf(float x, float y);
 #endif
 
+/* auxilary functions and structs for imprints */
+#include "gdk_imprints.h"
+
 #define buninsfix(B,C,A,I,T,V,G,M,R)                           \
        do {                                                    \
                if ((I) == BATcapacity(B)) {                    \
@@ -175,9 +178,137 @@ BAT_hashselect(BAT *b, BAT *s, BAT *bn, 
        return bn;
 }
 
+/* Imprints select code */
+
+/* inner check */
+#define impscheck(CAND,TEST,ADD)                                           \
+do {                                                                       \
+       e = i+limit-pr_off;                                                 \
+       if (im[icnt] & mask) {                                              \
+               if ((im[icnt] & ~innermask) == 0) {                         \
+                       while (o < e && p <= q) {                           \
+                               v = src[o];                                 \
+                               ADD;                                        \
+                               cnt++;                                      \
+                               CAND;                                       \
+                               p++;                                        \
+                       }                                                   \
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to