Changeset: 932837855b0a for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=932837855b0a
Modified Files:
gdk/gdk.h
gdk/gdk_imprints.c
Branch: default
Log Message:
First take on implementing a bloom filter for a column.
diffs (134 lines):
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2196,6 +2196,8 @@ gdk_export void IMPSdestroy(BAT *b);
gdk_export BAT *BATimprints(BAT *b);
gdk_export lng IMPSimprintsize(BAT *b);
+gdk_export BAT *BATbloom(BAT *b);
+
/*
* @- Multilevel Storage Modes
*
diff --git a/gdk/gdk_imprints.c b/gdk/gdk_imprints.c
--- a/gdk/gdk_imprints.c
+++ b/gdk/gdk_imprints.c
@@ -902,3 +902,118 @@ do {
\
free(s);
}
#endif
+
+/* round hashing */
+
+#define smpl_xor_rng(R,X) {\
+R = X; \
+R ^= (R<<13); \
+R ^= (R>>17); \
+R ^= (R<<5); \
+}
+
+#define hash_init(S,X,Y,Z) {\
+smpl_xor_rng(X,S); \
+smpl_xor_rng(Y,X); \
+smpl_xor_rng(Z,Y); \
+}
+
+#define next_hash(N,X,Y,Z) {\
+N = (X^(X<<3))^(Y^(Y>>19))^(Z^(Z<<6)); \
+X = Y; \
+Y = Z; \
+Z = N; \
+}
+
+#define hash_mod(V,MOD) ((V) % (MOD))
+
+BAT *
+BATbloom(BAT *b) {
+ BAT *bn;
+ BUN cnt;
+ BUN mn;
+ BUN p;
+ bit *o;
+
+ assert(BAThdense(b)); /* assert void head */
+
+ switch (ATOMstorage(b->T->type)) {
+ case TYPE_bte:
+ case TYPE_sht:
+ case TYPE_int:
+ case TYPE_lng:
+ case TYPE_flt:
+ case TYPE_dbl:
+ break;
+ default: /* type not supported */
+ GDKerror("#BATbloom: col type not "
+ "suitable for bloom filters.\n");
+ return b; /* do nothing */
+ }
+
+ BATcheck(b, "BATblooms");
+
+ cnt = BATcount(b);
+ mn = 4 * cnt; /* make it power of 2 for faster modulo */
+
+ bn = BATnew(TYPE_void, TYPE_bit, mn);
+ if (bn == NULL) {
+ GDKerror("#BATbloom: memory allocation error");
+ return NULL;
+ }
+
+ o = (bit *) Tloc(bn, BUNfirst(bn));
+ for (p = 0; (o[p] = 0) && (p < mn); p++);
+
+#define BLOOM_BUILD(TYPE) \
+do {
\
+ oid key,hv,x,y,z; /* for hashing */ \
+ TYPE *ob = (TYPE *)Tloc(b, BUNfirst(b)); \
+ for (p = 0; p < cnt; p++) { \
+ key = (oid) ob[p];
\
+ hash_init(key, x,y,z); \
+ next_hash(hv, x,y,z); \
+ o[hash_mod(hv,mn)] = 1; \
+ next_hash(hv, x,y,z); \
+ o[hash_mod(hv,mn)] = 1; \
+ next_hash(hv, x,y,z); \
+ o[hash_mod(hv,mn)] = 1; \
+ }
\
+} while (0)
+ switch (ATOMstorage(b->T->type)) {
+ case TYPE_bte:
+ BLOOM_BUILD(bte);
+ break;
+ case TYPE_sht:
+ BLOOM_BUILD(sht);
+ break;
+ case TYPE_int:
+ BLOOM_BUILD(int);
+ break;
+ case TYPE_lng:
+ BLOOM_BUILD(lng);
+ break;
+ case TYPE_flt:
+ BLOOM_BUILD(flt);
+ break;
+ case TYPE_dbl:
+ BLOOM_BUILD(dbl);
+ break;
+ default:
+ /* should never reach here */
+ assert(0);
+ }
+
+ /* property management */
+ BATsetcount(bn, mn);
+ bn->trevsorted = 0;
+ bn->tsorted = 0;
+ bn->tkey = 0;
+ bn->tdense = 0;
+ bn->hdense = 1;
+ bn->hseqbase = 0;
+ bn->hkey = 1;
+ bn->hrevsorted = bn->batCount <= 1;
+
+ return bn;
+}
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list