Changeset: a53f3b4db482 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a53f3b4db482
Modified Files:
gdk/gdk.h
gdk/gdk_imprints.c
monetdb5/modules/mal/batExtensions.c
monetdb5/modules/mal/batExtensions.h
monetdb5/modules/mal/batExtensions.mal
Branch: transaction-replication
Log Message:
Merge with default branch
diffs (250 lines):
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2197,6 +2197,7 @@ gdk_export BAT *BATimprints(BAT *b);
gdk_export lng IMPSimprintsize(BAT *b);
gdk_export BAT *BATbloom(BAT *b);
+gdk_export BAT *BLOOMselect(BAT *b, BAT *s, BAT *bf);
/*
* @- Multilevel Storage Modes
diff --git a/gdk/gdk_imprints.c b/gdk/gdk_imprints.c
--- a/gdk/gdk_imprints.c
+++ b/gdk/gdk_imprints.c
@@ -28,6 +28,7 @@
#include "gdk.h"
#include "gdk_private.h"
#include "gdk_imprints.h"
+#include "gdk_calc_private.h"
#define BINSIZE(B, FUNC, T) do { \
switch (B) { \
@@ -1019,3 +1020,123 @@ do {
\
return bn;
}
+
+BAT *
+BLOOMselect(BAT *b, BAT *s, BAT *bf) {
+ BAT *bn;
+ BUN start, end, cnt, mn;
+ const oid *cand = NULL, *candend = NULL;
+ const bit *bloom;
+
+ assert(BAThdense(b)); /* assert void head */
+ assert(BAThdense(bf)); /* assert void head*/
+
+ switch (ATOMstorage(b->T->type)) {
+ case TYPE_bte:
+ case TYPE_sht:
+ case TYPE_int:
+ case TYPE_lng:
+ case TYPE_flt:
+ case TYPE_dbl:
+ break;
+ default: /* type not supported */
+ GDKerror("#BATbloom: b col type not "
+ "suitable for bloom filters.\n");
+ return NULL; /* do nothing */
+ }
+
+ if (BATttype(bf) != TYPE_bit) {
+ GDKerror("#BATbloom: bf col type not "
+ "a bloom filters.\n");
+ return NULL; /* do nothing */
+ }
+
+ bloom = (bit *) Tloc(bf, BUNfirst(bf));
+ mn = BATcount(bf);
+
+ CANDINIT(b, s, start, end, cnt, cand, candend);
+
+ if (start == end) {
+ /* trivial: empty result */
+ bn = BATnew(TYPE_void, TYPE_void, 0);
+ if (bn == NULL) {
+ return NULL;
+ }
+ BATsetcount(bn, 0);
+ BATseqbase(bn, 0);
+ BATseqbase(BATmirror(bn), b->hseqbase);
+ return bn;
+ }
+
+ bn = BATnew(TYPE_void, TYPE_oid, 1024);
+ if (bn == NULL) {
+ return NULL;
+ }
+
+#define TEST_BLOOM(TYPE) \
+do {
\
+ oid key,hv,x,y,z; /* for hashing */ \
+ oid i, o;
\
+ TYPE *ob = (TYPE *)Tloc(b, BUNfirst(b)); \
+ for (;;) {
\
+ if (cand) {
\
+ if (cand == candend) \
+ break;
\
+ i = *cand++ - b->hseqbase; \
+ if (i >= end)
\
+ break;
\
+ } else {
\
+ i = start++;
\
+ if (i == end)
\
+ break;
\
+ }
\
+ key = ob[i];
\
+ hash_init(key, x,y,z); \
+ next_hash(hv, x,y,z); \
+ if (bloom[hash_mod(hv,mn)]) { \
+ next_hash(hv, x,y,z); \
+ if (bloom[hash_mod(hv,mn)]) { \
+ next_hash(hv, x,y,z); \
+ if (bloom[hash_mod(hv,mn)]) {\
+ o = i + b->hseqbase; \
+ bunfastapp(bn, &o);
\
+ }
\
+ }
\
+ }
\
+ }
\
+} while (0)
+ switch (ATOMstorage(b->T->type)) {
+ case TYPE_bte:
+ TEST_BLOOM(bte);
+ break;
+ case TYPE_sht:
+ TEST_BLOOM(sht);
+ break;
+ case TYPE_int:
+ TEST_BLOOM(int);
+ break;
+ case TYPE_lng:
+ TEST_BLOOM(lng);
+ break;
+ case TYPE_flt:
+ TEST_BLOOM(flt);
+ break;
+ case TYPE_dbl:
+ TEST_BLOOM(dbl);
+ break;
+ default:
+ /* should never reach here */
+ assert(0);
+ }
+
+ bn->tsorted = 1;
+ bn->trevsorted = BATcount(bn) <= 1;
+ bn->tkey = 1;
+ bn->T->nil = 0;
+ bn->T->nonil = 1;
+ return bn;
+
+bunins_failed:
+ BBPreclaim(bn);
+ return NULL;
+}
diff --git a/monetdb5/modules/mal/batExtensions.c
b/monetdb5/modules/mal/batExtensions.c
--- a/monetdb5/modules/mal/batExtensions.c
+++ b/monetdb5/modules/mal/batExtensions.c
@@ -283,6 +283,7 @@ CMDBATimprints(int *ret, int *bid)
BBPkeepref(*ret = b->batCacheid);
return MAL_SUCCEED;
}
+
str
CMDBATimprintsize(lng *ret, int *bid)
{
@@ -295,3 +296,45 @@ CMDBATimprintsize(lng *ret, int *bid)
BBPreleaseref(b->batCacheid);
return MAL_SUCCEED;
}
+
+str
+CMDBATbloom(int *ret, int *bid)
+{
+ BAT *b, *bn;
+
+ if ((b = BATdescriptor(*bid)) == NULL)
+ throw(MAL, "bat.bloom", INTERNAL_BAT_ACCESS);
+
+ bn = BATbloom(b);
+ BBPkeepref(*ret = bn->batCacheid);
+ BBPreleaseref(b->batCacheid);
+ return MAL_SUCCEED;
+}
+
+str
+CMDBLOOMselect(int *ret, int *bid, int *sid, int *bfid)
+{
+ BAT *b, *s, *bf, *bn;
+
+ if ((b = BATdescriptor(*bid)) == NULL)
+ throw(MAL, "bat.bloom", INTERNAL_BAT_ACCESS);
+
+ if ((s = BATdescriptor(*sid)) == NULL)
+ throw(MAL, "bat.bloom", INTERNAL_BAT_ACCESS);
+
+ if ((bf = BATdescriptor(*bfid)) == NULL)
+ throw(MAL, "bat.bloom", INTERNAL_BAT_ACCESS);
+
+ bn = BLOOMselect(b, s, bf);
+ if (bn == NULL) {
+ BBPunfix(b->batCacheid);
+ BBPunfix(s->batCacheid);
+ BBPunfix(bf->batCacheid);
+ throw(MAL,"bat.bloom", INTERNAL_OBJ_CREATE);
+ }
+ BBPkeepref(*ret = bn->batCacheid);
+ BBPreleaseref(b->batCacheid);
+ BBPreleaseref(s->batCacheid);
+ BBPreleaseref(bf->batCacheid);
+ return MAL_SUCCEED;
+}
diff --git a/monetdb5/modules/mal/batExtensions.h
b/monetdb5/modules/mal/batExtensions.h
--- a/monetdb5/modules/mal/batExtensions.h
+++ b/monetdb5/modules/mal/batExtensions.h
@@ -44,5 +44,8 @@ be_export str CMDBATpartition(Client cnt
be_export str CMDBATpartition2(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
InstrPtr pci);
be_export str CMDBATimprints(int *ret, int *bid);
be_export str CMDBATimprintsize(lng *ret, int *bid);
+be_export str CMDBATbloom(int *ret, int *bid);
+be_export str CMDBLOOMselect(int *ret, int *bid, int *sid, int *bfid);
+
#endif /* _BAT_EXTENSIONS_ */
diff --git a/monetdb5/modules/mal/batExtensions.mal
b/monetdb5/modules/mal/batExtensions.mal
--- a/monetdb5/modules/mal/batExtensions.mal
+++ b/monetdb5/modules/mal/batExtensions.mal
@@ -72,3 +72,31 @@ command bat.imprintsize(b:bat[:oid,:dbl]
address CMDBATimprintsize
comment "Return the size of the imprints";
+command bat.bloom(b:bat[:oid,:bte]):bat[:oid,:bit]
+address CMDBATbloom;
+command bat.bloom(b:bat[:oid,:sht]):bat[:oid,:bit]
+address CMDBATbloom;
+command bat.bloom(b:bat[:oid,:int]):bat[:oid,:bit]
+address CMDBATbloom;
+command bat.bloom(b:bat[:oid,:lng]):bat[:oid,:bit]
+address CMDBATbloom;
+command bat.bloom(b:bat[:oid,:flt]):bat[:oid,:bit]
+address CMDBATbloom;
+command bat.bloom(b:bat[:oid,:dbl]):bat[:oid,:bit]
+address CMDBATbloom
+comment "Create a Bloom filter on the BAT";
+
+command bat.bloomselect(b:bat[:oid,:bte], cand:bat[:oid,:oid],
bloom:bat[:oid,:bit]):bat[:oid,:oid]
+address CMDBLOOMselect;
+command bat.bloomselect(b:bat[:oid,:sht], cand:bat[:oid,:oid],
bloom:bat[:oid,:bit]):bat[:oid,:oid]
+address CMDBLOOMselect;
+command bat.bloomselect(b:bat[:oid,:int], cand:bat[:oid,:oid],
bloom:bat[:oid,:bit]):bat[:oid,:oid]
+address CMDBLOOMselect;
+command bat.bloomselect(b:bat[:oid,:lng], cand:bat[:oid,:oid],
bloom:bat[:oid,:bit]):bat[:oid,:oid]
+address CMDBLOOMselect;
+command bat.bloomselect(b:bat[:oid,:flt], cand:bat[:oid,:oid],
bloom:bat[:oid,:bit]):bat[:oid,:oid]
+address CMDBLOOMselect;
+command bat.bloomselect(b:bat[:oid,:dbl], cand:bat[:oid,:oid],
bloom:bat[:oid,:bit]):bat[:oid,:oid]
+address CMDBLOOMselect
+comment "Select using a Bloom filter";
+
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list