Changeset: f1151375a58f for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f1151375a58f
Removed Files:
gdk/gdk_rangejoin.mx
Modified Files:
gdk/Makefile.ag
gdk/gdk.h
gdk/gdk_join.c
monetdb5/extras/rdf/rdf_shredder.mx
monetdb5/modules/kernel/algebra.c
Branch: default
Log Message:
Implemented BATsubrangejoin. One more .mx file bites the dust.
diffs (truncated from 875 to 300 lines):
diff --git a/gdk/Makefile.ag b/gdk/Makefile.ag
--- a/gdk/Makefile.ag
+++ b/gdk/Makefile.ag
@@ -31,7 +31,6 @@ lib_gdk = {
gdk_qsort.c gdk_qsort_impl.h gdk_ssort.c gdk_ssort_impl.h \
gdk_storage.c gdk_bat.c \
gdk_delta.c gdk_cross.c gdk_system.c gdk_value.c \
- gdk_rangejoin.mx \
gdk_posix.c gdk_logger.c gdk_sample.c \
gdk_private.h gdk_delta.h gdk_logger.h gdk_posix.h \
gdk_system.h gdk_tm.h gdk_storage.h \
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -3201,6 +3201,7 @@ gdk_export gdk_return BATcross1(BAT **r1
gdk_export gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr);
gdk_export BAT *BATcross(BAT *l, BAT *r);
gdk_export BAT *BATbandjoin(BAT *l, BAT *r, const void *mnus, const void
*plus, bit li, bit hi);
+gdk_export BAT *BATrangejoin(BAT *l, BAT *rl, BAT *rh, bit li, bit hi);
gdk_export gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, int nil_matches, BUN estimate);
gdk_export gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
BAT *sl, BAT *sr, int nil_matches, BUN estimate);
@@ -3209,6 +3210,7 @@ gdk_export gdk_return BATsubsemijoin(BAT
gdk_export gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, int nil_matches, BUN estimate);
gdk_export gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT
*r, BAT *sl, BAT *sr, int nil_matches, BUN estimate);
gdk_export gdk_return BATsubbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, const void *c1, const void *c2, int li, int hi, BUN estimate);
+gdk_export gdk_return BATsubrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl,
BAT *rh, BAT *sl, BAT *sr, int li, int hi, BUN estimate);
gdk_export BAT *BATproject(BAT *l, BAT *r);
gdk_export BAT *BATslice(BAT *b, BUN low, BUN high);
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -71,20 +71,31 @@
* ranges are inclusive or not; values in the left and right
* inputs match if right - c1 <[=] left <[=] right + c2; if c1 or
* c2 is NIL, there are no matches
+ * BATsubrangejoin
+ * range-join: the right input consists of two aligned BATs,
+ * values match if the left value is between two corresponding
+ * right values; two extra Boolean parameters, li and hi,
+ * indicate whether equal values match
*/
/* Perform a bunch of sanity checks on the inputs to a join. */
static gdk_return
-joinparamcheck(BAT *l, BAT *r, BAT *sl, BAT *sr, const char *func)
+joinparamcheck(BAT *l, BAT *r1, BAT *r2, BAT *sl, BAT *sr, const char *func)
{
- if (!BAThdense(l) || !BAThdense(r)) {
+ if (!BAThdense(l) || !BAThdense(r1) || (r2 && !BAThdense(r2))) {
GDKerror("%s: inputs must have dense head.\n", func);
return GDK_FAIL;
}
- if (ATOMtype(l->ttype) != ATOMtype(r->ttype)) {
+ if (ATOMtype(l->ttype) != ATOMtype(r1->ttype) ||
+ (r2 && ATOMtype(l->ttype) != ATOMtype(r2->ttype))) {
GDKerror("%s: inputs not compatible.\n", func);
return GDK_FAIL;
}
+ if (r2 &&
+ (BATcount(r1) != BATcount(r2) || r1->hseqbase != r2->hseqbase)) {
+ GDKerror("%s: right inputs not aligned.\n", func);
+ return GDK_FAIL;
+ }
if ((sl && !BAThdense(sl)) || (sr && !BAThdense(sr))) {
GDKerror("%s: candidate lists must have dense head.\n", func);
return GDK_FAIL;
@@ -2148,6 +2159,306 @@ bandjoin(BAT *r1, BAT *r2, BAT *l, BAT *
return GDK_FAIL;
}
+static gdk_return
+rangejoin(BAT *r1, BAT *r2, BAT *l, BAT *rl, BAT *rh, BAT *sl, BAT *sr, int
li, int hi)
+{
+ BUN lstart, lend, lcnt;
+ const oid *lcand = NULL, *lcandend = NULL;
+ BUN rstart, rend, rcnt;
+ const oid *rcand = NULL, *rcandend = NULL;
+ const char *lvals, *rlvals, *rhvals;
+ const char *lvars, *rlvars, *rhvars;
+ int lwidth, rlwidth, rhwidth;
+ const void *nil = ATOMnilptr(l->ttype);
+ int (*cmp)(const void *, const void *) = BATatoms[l->ttype].atomCmp;
+ const char *vl, *vrl, *vrh;
+ const oid *p;
+ oid lastr = 0;
+ BUN n, nr, newcap;
+ oid lo, ro;
+ int c;
+ int lskipped = 0;
+ wrd loff = 0;
+ oid lval = oid_nil, rlval = oid_nil, rhval = oid_nil;
+
+ assert(BAThdense(l));
+ assert(BAThdense(rl));
+ assert(BAThdense(rh));
+ assert(ATOMtype(l->ttype) == ATOMtype(rl->ttype));
+ assert(ATOMtype(l->ttype) == ATOMtype(rh->ttype));
+ assert(sl == NULL || sl->tsorted);
+ assert(sr == NULL || sr->tsorted);
+
+ ALGODEBUG fprintf(stderr, "#rangejoin(l=%s#" BUNFMT "[%s]%s%s,"
+ "rl=%s#" BUNFMT "[%s]%s%s,rh=%s#" BUNFMT "[%s]%s%s,"
+ "sl=%s#" BUNFMT "%s%s,sr=%s#" BUNFMT "%s%s)\n",
+ BATgetId(l), BATcount(l), ATOMname(l->ttype),
+ l->tsorted ? "-sorted" : "",
+ l->trevsorted ? "-revsorted" : "",
+ BATgetId(rl), BATcount(rl), ATOMname(rl->ttype),
+ rl->tsorted ? "-sorted" : "",
+ rl->trevsorted ? "-revsorted" : "",
+ BATgetId(rh), BATcount(rh), ATOMname(rh->ttype),
+ rh->tsorted ? "-sorted" : "",
+ rh->trevsorted ? "-revsorted" : "",
+ sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) : 0,
+ sl && sl->tsorted ? "-sorted" : "",
+ sl && sl->trevsorted ? "-revsorted" : "",
+ sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) : 0,
+ sr && sr->tsorted ? "-sorted" : "",
+ sr && sr->trevsorted ? "-revsorted" : "");
+
+ CANDINIT(l, sl, lstart, lend, lcnt, lcand, lcandend);
+ CANDINIT(rl, sr, rstart, rend, rcnt, rcand, rcandend);
+
+ lvals = l->ttype == TYPE_void ? NULL : (const char *) Tloc(l,
BUNfirst(l));
+ rlvals = rl->ttype == TYPE_void ? NULL : (const char *) Tloc(rl,
BUNfirst(rl));
+ rhvals = rh->ttype == TYPE_void ? NULL : (const char *) Tloc(rh,
BUNfirst(rh));
+ if (l->tvarsized && l->ttype) {
+ assert(rl->tvarsized && rl->ttype);
+ assert(rh->tvarsized && rh->ttype);
+ lvars = l->T->vheap->base;
+ rlvars = rl->T->vheap->base;
+ rhvars = rh->T->vheap->base;
+ } else {
+ assert(!rl->tvarsized || !rl->ttype);
+ assert(!rh->tvarsized || !rh->ttype);
+ lvars = rlvars = rhvars = NULL;
+ }
+ lwidth = l->T->width;
+ rlwidth = rl->T->width;
+ rhwidth = rh->T->width;
+
+ if (l->ttype == TYPE_void) {
+ if (l->tseqbase == oid_nil) {
+ /* trivial: nils don't match anything */
+ return GDK_SUCCEED;
+ }
+ if (lcand) {
+ lstart = 0;
+ lend = (BUN) (lcandend - lcand);
+ lvals = (const char *) lcand;
+ lcand = NULL;
+ lwidth = SIZEOF_OID;
+ }
+ loff = (wrd) l->tseqbase - (wrd) l->hseqbase;
+ }
+
+ /* nested loop implementation for range join */
+ for (;;) {
+ if (lcand) {
+ if (lcand == lcandend)
+ break;
+ lo = *lcand++;
+ vl = VALUE(l, lstart);
+ } else {
+ if (lstart == lend)
+ break;
+ if (lvals) {
+ vl = VALUE(l, lstart);
+ if (loff != 0) {
+ lval = (oid) (*(const oid *)vl + loff);
+ vl = (const char *) &lval;
+ }
+ } else {
+ lval = lstart + l->tseqbase;
+ vl = (const char *) &lval;
+ }
+ lo = lstart++ + l->hseqbase;
+ }
+ if (cmp(vl, nil) == 0)
+ continue;
+ nr = 0;
+ p = rcand;
+ n = rstart;
+ for (;;) {
+ if (rcand) {
+ if (p == rcandend)
+ break;
+ ro = *p++;
+ if (rlvals)
+ vrl = VALUE(rl, ro - rl->hseqbase);
+ else {
+ /* TYPE_void */
+ rlval = ro;
+ vrl = (const char *) &rlval;
+ }
+ if (rhvals)
+ vrh = VALUE(rh, ro - rh->hseqbase);
+ else {
+ /* TYPE_void */
+ rhval = ro;
+ vrh = (const char *) &rhval;
+ }
+ } else {
+ if (n == rend)
+ break;
+ if (rlvals) {
+ vrl = VALUE(rl, n);
+ } else {
+ /* TYPE_void */
+ rlval = n + rl->tseqbase;
+ vrl = (const char *) &rlval;
+ }
+ if (rhvals) {
+ vrh = VALUE(rh, n);
+ } else {
+ /* TYPE_void */
+ rhval = n + rh->tseqbase;
+ vrh = (const char *) &rhval;
+ }
+ ro = n++ + rl->hseqbase;
+ }
+ switch (ATOMtype(l->ttype)) {
+ case TYPE_bte:
+ if (*(const bte*)vrl == bte_nil ||
+ *(const bte*)vrh == bte_nil ||
+ *(const bte*)vl < *(const bte*)vrl ||
+ *(const bte*)vl > *(const bte*)vrh ||
+ (!li && *(const bte*)vl == *(const
bte*)vrl) ||
+ (!hi && *(const bte*)vl == *(const
bte*)vrh))
+ continue;
+ break;
+ case TYPE_sht:
+ if (*(const sht*)vrl == sht_nil ||
+ *(const sht*)vrh == sht_nil ||
+ *(const sht*)vl < *(const sht*)vrl ||
+ *(const sht*)vl > *(const sht*)vrh ||
+ (!li && *(const sht*)vl == *(const
sht*)vrl) ||
+ (!hi && *(const sht*)vl == *(const
sht*)vrh))
+ continue;
+ break;
+ case TYPE_int:
+#if SIZEOF_WRD == SIZEOF_INT
+ case TYPE_wrd:
+#endif
+ if (*(const int*)vrl == int_nil ||
+ *(const int*)vrh == int_nil ||
+ *(const int*)vl < *(const int*)vrl ||
+ *(const int*)vl > *(const int*)vrh ||
+ (!li && *(const int*)vl == *(const
int*)vrl) ||
+ (!hi && *(const int*)vl == *(const
int*)vrh))
+ continue;
+ break;
+ case TYPE_lng:
+#if SIZEOF_WRD == SIZEOF_LNG
+ case TYPE_wrd:
+#endif
+ if (*(const lng*)vrl == lng_nil ||
+ *(const lng*)vrh == lng_nil ||
+ *(const lng*)vl < *(const lng*)vrl ||
+ *(const lng*)vl > *(const lng*)vrh ||
+ (!li && *(const lng*)vl == *(const
lng*)vrl) ||
+ (!hi && *(const lng*)vl == *(const
lng*)vrh))
+ continue;
+ break;
+ case TYPE_oid:
+ if (*(const oid*)vrl == oid_nil ||
+ *(const oid*)vrh == oid_nil ||
+ *(const oid*)vl < *(const oid*)vrl ||
+ *(const oid*)vl > *(const oid*)vrh ||
+ (!li && *(const oid*)vl == *(const
oid*)vrl) ||
+ (!hi && *(const oid*)vl == *(const
oid*)vrh))
+ continue;
+ break;
+ case TYPE_flt:
+ if (*(const flt*)vrl == flt_nil ||
+ *(const flt*)vrh == flt_nil ||
+ *(const flt*)vl < *(const flt*)vrl ||
+ *(const flt*)vl > *(const flt*)vrh ||
+ (!li && *(const flt*)vl == *(const
flt*)vrl) ||
+ (!hi && *(const flt*)vl == *(const
flt*)vrh))
+ continue;
+ break;
+ case TYPE_dbl:
+ if (*(const dbl*)vrl == dbl_nil ||
+ *(const dbl*)vrh == dbl_nil ||
+ *(const dbl*)vl < *(const dbl*)vrl ||
+ *(const dbl*)vl > *(const dbl*)vrh ||
+ (!li && *(const dbl*)vl == *(const
dbl*)vrl) ||
+ (!hi && *(const dbl*)vl == *(const
dbl*)vrh))
+ continue;
+ break;
+ default:
+ if (cmp(vrl, nil) == 0 || cmp(vrh, nil) == 0)
+ continue;
+ c = cmp(vl, vrl);
+ if (c < 0 || (c == 0 && !li))
+ continue;
+ c = cmp(vl, vrh);
+ if (c > 0 || (c == 0 && !hi))
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list