Changeset: 6ab7ac7f1321 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6ab7ac7f1321
Modified Files:
gdk/gdk.h
gdk/gdk_private.h
gdk/gdk_strimps.c
gdk/gdk_strimps.h
Branch: string_imprints
Log Message:
Functions to construct the string imprint for a given BAT
diffs (153 lines):
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -701,6 +701,7 @@ typedef struct {
Hash *hash; /* hash table */
Imprints *imprints; /* column imprints index */
Heap *orderidx; /* order oid index */
+ Heap *strimps; /* string imprint index */
PROPrec *props; /* list of dynamic properties stored in the bat
descriptor */
} COLrec;
@@ -772,6 +773,7 @@ typedef struct BATiter {
#define thash T.hash
#define timprints T.imprints
#define tprops T.props
+#define tstrimps T.strimps
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -25,7 +25,8 @@ enum heaptype {
varheap,
hashheap,
imprintsheap,
- orderidxheap
+ orderidxheap,
+ strimpheap
};
#ifdef GDKLIBRARY_OLDDATE
diff --git a/gdk/gdk_strimps.c b/gdk/gdk_strimps.c
--- a/gdk/gdk_strimps.c
+++ b/gdk/gdk_strimps.c
@@ -267,3 +267,85 @@ GDKstrimp_make_bitstring(const str s, St
return ret;
}
+
+/* Create the heap for a string imprint. Returns NULL on failure. */
+static Heap *
+createStrimpheap(BAT *b, StrimpHeader *h)
+{
+ Heap *r = NULL;
+ uint64_t *d;
+ size_t i,j;
+ const char *nme;
+
+ nme = GDKinmemory(b->theap.farmid) ? ":memory:" :
BBP_physical(b->batCacheid);
+ if ((r = GDKzalloc(sizeof(Heap))) == NULL ||
+ (r->farmid = BBPselectfarm(b->batRole, b->ttype, strimpheap)) < 0 ||
+ strconcat_len(r->filename, sizeof(r->filename),
+ nme, ".strimp", NULL) >= sizeof(r->filename) ||
+ HEAPalloc(r, BATcount(b) + STRIMP_OFFSET, sizeof(uint64_t)) !=
GDK_SUCCEED) {
+ GDKfree(r);
+ return NULL;
+ }
+ r->free = STRIMP_OFFSET * sizeof(uint64_t);
+
+ d = (uint64_t *)r->base;
+ /* This loop assumes that we are working with byte pairs
+ * (i.e. the type of the header is uint16_t). TODO: generalize.
+ */
+ for(i = 0; i < STRIMP_HEADER_SIZE; i += 4) {
+ *d = 0;
+ for(j = 0; j < 4; j++) {
+ *d <<= 16;
+ *d |= h->bytepairs[i + j];
+ }
+ }
+ return r;
+}
+
+/* Create */
+gdk_return
+GDKstrimp_create_strimp(BAT *b)
+{
+ lng t0 = 0;
+ BATiter bi;
+ BUN i;
+ str s;
+ StrimpHeader *head;
+ Heap *h;
+ uint64_t *dh;
+
+ assert(b->ttype == TYPE_str);
+ TRC_DEBUG_IF(ALGO) t0 = GDKusec();
+
+ if ((head = create_header(b)) == NULL) {
+ return GDK_FAIL;
+ }
+
+ if ((h = createStrimpheap(b, head)) == NULL) {
+ GDKfree(head);
+ return GDK_FAIL;
+ }
+ dh = (uint64_t *)h->base + h->free;
+
+ bi = bat_iterator(b);
+ for (i = 0; i < b->batCount; i++) {
+ s = (str)BUNtvar(bi, i);
+ if (!strNil(s))
+ *dh++ = GDKstrimp_make_bitstring(s, head);
+ else
+ *dh++ = 0; /* no pairs in nil values */
+
+ }
+
+ /* After we have computed the strimp, attempt to write it back
+ * to the BAT.
+ */
+ MT_lock_set(&b->batIdxLock);
+ b->tstrimps = h;
+ b->batDirtydesc = true;
+ /* persistStrimp(b) */
+ MT_lock_unset(&b->batIdxLock);
+
+ TRC_DEBUG(ALGO, "strimp creation took " LLFMT " usec\n", GDKusec()-t0);
+ return GDK_SUCCEED;
+}
diff --git a/gdk/gdk_strimps.h b/gdk/gdk_strimps.h
--- a/gdk/gdk_strimps.h
+++ b/gdk/gdk_strimps.h
@@ -11,19 +11,25 @@
#include <stdint.h>
+
+#define STRIMP_VERSION (uint64_t)1
/* Count the occurences of pairs of bytes. This is a compromise between
* just handling ASCII and full UTF-8 support.
*/
#define STRIMP_HISTSIZE 256*256
-#define STRIMP_SIZE 64
+#define STRIMP_HEADER_SIZE 64
+#define STRIMP_OFFSET 1 + STRIMP_HEADER_SIZE*sizeof(DataPair)/sizeof(uint64_t)
/* version + header */
+
+typedef uint16_t DataPair;
typedef struct {
// TODO: find a better name for this
- uint16_t bytepairs[STRIMP_SIZE];
+ DataPair bytepairs[STRIMP_HEADER_SIZE];
} StrimpHeader;
gdk_export gdk_return GDKstrimp_ndigrams(BAT *b, size_t *n); // Remove?
gdk_export gdk_return GDKstrimp_make_histogram(BAT *b, uint64_t *hist, size_t
hist_size, size_t *nbins); // make static
// gdk_export gdk_return GDKstrimp_make_header(StrimpHeader *h, uint64_t
*hist, size_t hist_size); // make static
-gdk_export gdk_return GDKstrimp_make_header(BAT *b);
+//gdk_export gdk_return GDKstrimp_make_header(BAT *b);
+gdk_export gdk_return GDKstrimp_create_strimp(BAT *b);
#endif /* _GDK_STRIMPS_H_ */
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list