Changeset: 6a0689b35f63 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6a0689b35f63
Removed Files:
gdk/gdk_rangejoin.mx
Modified Files:
gdk/Makefile.ag
gdk/gdk.h
gdk/gdk_join.c
monetdb5/extras/rdf/rdf_shredder.mx
monetdb5/modules/kernel/algebra.c
Branch: int128
Log Message:
Merge with default branch (changeset f1151375a58f).
diffs (truncated from 1057 to 300 lines):
diff --git a/gdk/Makefile.ag b/gdk/Makefile.ag
--- a/gdk/Makefile.ag
+++ b/gdk/Makefile.ag
@@ -31,7 +31,6 @@ lib_gdk = {
gdk_qsort.c gdk_qsort_impl.h gdk_ssort.c gdk_ssort_impl.h \
gdk_storage.c gdk_bat.c \
gdk_delta.c gdk_cross.c gdk_system.c gdk_value.c \
- gdk_rangejoin.mx \
gdk_posix.c gdk_logger.c gdk_sample.c \
gdk_private.h gdk_delta.h gdk_logger.h gdk_posix.h \
gdk_system.h gdk_tm.h gdk_storage.h \
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -3220,6 +3220,7 @@ gdk_export gdk_return BATcross1(BAT **r1
gdk_export gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr);
gdk_export BAT *BATcross(BAT *l, BAT *r);
gdk_export BAT *BATbandjoin(BAT *l, BAT *r, const void *mnus, const void
*plus, bit li, bit hi);
+gdk_export BAT *BATrangejoin(BAT *l, BAT *rl, BAT *rh, bit li, bit hi);
gdk_export gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, int nil_matches, BUN estimate);
gdk_export gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
BAT *sl, BAT *sr, int nil_matches, BUN estimate);
@@ -3228,6 +3229,7 @@ gdk_export gdk_return BATsubsemijoin(BAT
gdk_export gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, int nil_matches, BUN estimate);
gdk_export gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT
*r, BAT *sl, BAT *sr, int nil_matches, BUN estimate);
gdk_export gdk_return BATsubbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, const void *c1, const void *c2, int li, int hi, BUN estimate);
+gdk_export gdk_return BATsubrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl,
BAT *rh, BAT *sl, BAT *sr, int li, int hi, BUN estimate);
gdk_export BAT *BATproject(BAT *l, BAT *r);
gdk_export BAT *BATslice(BAT *b, BUN low, BUN high);
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -71,20 +71,31 @@
* ranges are inclusive or not; values in the left and right
* inputs match if right - c1 <[=] left <[=] right + c2; if c1 or
* c2 is NIL, there are no matches
+ * BATsubrangejoin
+ * range-join: the right input consists of two aligned BATs,
+ * values match if the left value is between two corresponding
+ * right values; two extra Boolean parameters, li and hi,
+ * indicate whether equal values match
*/
/* Perform a bunch of sanity checks on the inputs to a join. */
static gdk_return
-joinparamcheck(BAT *l, BAT *r, BAT *sl, BAT *sr, const char *func)
+joinparamcheck(BAT *l, BAT *r1, BAT *r2, BAT *sl, BAT *sr, const char *func)
{
- if (!BAThdense(l) || !BAThdense(r)) {
+ if (!BAThdense(l) || !BAThdense(r1) || (r2 && !BAThdense(r2))) {
GDKerror("%s: inputs must have dense head.\n", func);
return GDK_FAIL;
}
- if (ATOMtype(l->ttype) != ATOMtype(r->ttype)) {
+ if (ATOMtype(l->ttype) != ATOMtype(r1->ttype) ||
+ (r2 && ATOMtype(l->ttype) != ATOMtype(r2->ttype))) {
GDKerror("%s: inputs not compatible.\n", func);
return GDK_FAIL;
}
+ if (r2 &&
+ (BATcount(r1) != BATcount(r2) || r1->hseqbase != r2->hseqbase)) {
+ GDKerror("%s: right inputs not aligned.\n", func);
+ return GDK_FAIL;
+ }
if ((sl && !BAThdense(sl)) || (sr && !BAThdense(sr))) {
GDKerror("%s: candidate lists must have dense head.\n", func);
return GDK_FAIL;
@@ -232,6 +243,9 @@ BINSEARCHFUNC(sht)
BINSEARCHFUNC(int)
BINSEARCHFUNC(oid)
BINSEARCHFUNC(lng)
+#ifdef HAVE_HGE
+BINSEARCHFUNC(hge)
+#endif
BINSEARCHFUNC(flt)
BINSEARCHFUNC(dbl)
@@ -271,6 +285,11 @@ binsearch(const oid *rcand, oid offset,
#endif
return binsearch_lng(rcand, offset, (const lng *) rvals,
lo, hi, (const lng *) v, ordering, last);
+#ifdef HAVE_HGE
+ case TYPE_hge:
+ return binsearch_hge(rcand, offset, (const hge *) rvals,
+ lo, hi, (const hge *) v, ordering, last);
+#endif
case TYPE_flt:
return binsearch_flt(rcand, offset, (const flt *) rvals,
lo, hi, (const flt *) v, ordering, last);
@@ -1475,6 +1494,12 @@ hashjoin(BAT *r1, BAT *r2, BAT *l, BAT *
break;
}
break;
+#ifdef HAVE_HGE
+ case TYPE_hge:
+ /* do we need to handle TYPE_hge, here?
*/
+ assert(0);
+ break;
+#endif
default:
if (!nil_matches && cmp(v, nil) == 0) {
lskipped = BATcount(r1) > 0;
@@ -1876,6 +1901,15 @@ bandjoin(BAT *r1, BAT *r2, BAT *l, BAT *
((!hi || !li) && -*(const lng *)c1 == *(const lng *)c2))
return GDK_SUCCEED;
break;
+#ifdef HAVE_HGE
+ case TYPE_hge:
+ if (*(const hge *)c1 == hge_nil ||
+ *(const hge *)c2 == hge_nil ||
+ -*(const hge *)c1 > *(const hge *)c2 ||
+ ((!hi || !li) && -*(const hge *)c1 == *(const hge *)c2))
+ return GDK_SUCCEED;
+ break;
+#endif
case TYPE_flt:
if (*(const flt *)c1 == flt_nil ||
*(const flt *)c2 == flt_nil ||
@@ -1992,6 +2026,24 @@ bandjoin(BAT *r1, BAT *r2, BAT *l, BAT *
continue;
break;
}
+#ifdef HAVE_HGE
+ case TYPE_lng: {
+ hge v1 = (hge) *(const lng *) vr, v2;
+
+ if (v1 == lng_nil)
+ continue;
+ v2 = v1;
+ v1 -= *(const lng *)c1;
+ if (*(const lng *)vl <= v1 &&
+ (!li || *(const lng *)vl != v1))
+ continue;
+ v2 += *(const lng *)c2;
+ if (*(const lng *)vl >= v2 &&
+ (!hi || *(const lng *)vl != v2))
+ continue;
+ break;
+ }
+#else
#ifdef HAVE___INT128
case TYPE_lng: {
__int128 v1 = (__int128) *(const lng *) vr, v2;
@@ -2037,6 +2089,35 @@ bandjoin(BAT *r1, BAT *r2, BAT *l, BAT *
continue;
}
#endif
+#endif
+#ifdef HAVE_HAVE
+ case TYPE_hge: {
+ hge v1, v2;
+ int abort_on_error = 1;
+
+ if (*(const hge *)vr == hge_nil)
+ continue;
+ SUB_WITH_CHECK(hge, *(const hge *)vr,
+ hge, *(const hge *)c1,
+ hge, v1,
+ do{if(*(const hge*)c1<0)goto
nohmatch;else goto hmatch1;}while(0));
+ if (*(const hge *)vl <= v1 &&
+ (!li || *(const hge *)vl != v1))
+ continue;
+ hmatch1:
+ ADD_WITH_CHECK(hge, *(const hge *)vr,
+ hge, *(const hge *)c2,
+ hge, v2,
+ do{if(*(const hge*)c2>0)goto
nohmatch;else goto hmatch2;}while(0));
+ if (*(const hge *)vl >= v2 &&
+ (!hi || *(const hge *)vl != v2))
+ continue;
+ hmatch2:
+ break;
+ nohmatch:
+ continue;
+ }
+#endif
case TYPE_flt: {
dbl v1 = (dbl) *(const flt *) vr, v2;
@@ -2148,6 +2229,317 @@ bandjoin(BAT *r1, BAT *r2, BAT *l, BAT *
return GDK_FAIL;
}
+static gdk_return
+rangejoin(BAT *r1, BAT *r2, BAT *l, BAT *rl, BAT *rh, BAT *sl, BAT *sr, int
li, int hi)
+{
+ BUN lstart, lend, lcnt;
+ const oid *lcand = NULL, *lcandend = NULL;
+ BUN rstart, rend, rcnt;
+ const oid *rcand = NULL, *rcandend = NULL;
+ const char *lvals, *rlvals, *rhvals;
+ const char *lvars, *rlvars, *rhvars;
+ int lwidth, rlwidth, rhwidth;
+ const void *nil = ATOMnilptr(l->ttype);
+ int (*cmp)(const void *, const void *) = BATatoms[l->ttype].atomCmp;
+ const char *vl, *vrl, *vrh;
+ const oid *p;
+ oid lastr = 0;
+ BUN n, nr, newcap;
+ oid lo, ro;
+ int c;
+ int lskipped = 0;
+ wrd loff = 0;
+ oid lval = oid_nil, rlval = oid_nil, rhval = oid_nil;
+
+ assert(BAThdense(l));
+ assert(BAThdense(rl));
+ assert(BAThdense(rh));
+ assert(ATOMtype(l->ttype) == ATOMtype(rl->ttype));
+ assert(ATOMtype(l->ttype) == ATOMtype(rh->ttype));
+ assert(sl == NULL || sl->tsorted);
+ assert(sr == NULL || sr->tsorted);
+
+ ALGODEBUG fprintf(stderr, "#rangejoin(l=%s#" BUNFMT "[%s]%s%s,"
+ "rl=%s#" BUNFMT "[%s]%s%s,rh=%s#" BUNFMT "[%s]%s%s,"
+ "sl=%s#" BUNFMT "%s%s,sr=%s#" BUNFMT "%s%s)\n",
+ BATgetId(l), BATcount(l), ATOMname(l->ttype),
+ l->tsorted ? "-sorted" : "",
+ l->trevsorted ? "-revsorted" : "",
+ BATgetId(rl), BATcount(rl), ATOMname(rl->ttype),
+ rl->tsorted ? "-sorted" : "",
+ rl->trevsorted ? "-revsorted" : "",
+ BATgetId(rh), BATcount(rh), ATOMname(rh->ttype),
+ rh->tsorted ? "-sorted" : "",
+ rh->trevsorted ? "-revsorted" : "",
+ sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) : 0,
+ sl && sl->tsorted ? "-sorted" : "",
+ sl && sl->trevsorted ? "-revsorted" : "",
+ sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) : 0,
+ sr && sr->tsorted ? "-sorted" : "",
+ sr && sr->trevsorted ? "-revsorted" : "");
+
+ CANDINIT(l, sl, lstart, lend, lcnt, lcand, lcandend);
+ CANDINIT(rl, sr, rstart, rend, rcnt, rcand, rcandend);
+
+ lvals = l->ttype == TYPE_void ? NULL : (const char *) Tloc(l,
BUNfirst(l));
+ rlvals = rl->ttype == TYPE_void ? NULL : (const char *) Tloc(rl,
BUNfirst(rl));
+ rhvals = rh->ttype == TYPE_void ? NULL : (const char *) Tloc(rh,
BUNfirst(rh));
+ if (l->tvarsized && l->ttype) {
+ assert(rl->tvarsized && rl->ttype);
+ assert(rh->tvarsized && rh->ttype);
+ lvars = l->T->vheap->base;
+ rlvars = rl->T->vheap->base;
+ rhvars = rh->T->vheap->base;
+ } else {
+ assert(!rl->tvarsized || !rl->ttype);
+ assert(!rh->tvarsized || !rh->ttype);
+ lvars = rlvars = rhvars = NULL;
+ }
+ lwidth = l->T->width;
+ rlwidth = rl->T->width;
+ rhwidth = rh->T->width;
+
+ if (l->ttype == TYPE_void) {
+ if (l->tseqbase == oid_nil) {
+ /* trivial: nils don't match anything */
+ return GDK_SUCCEED;
+ }
+ if (lcand) {
+ lstart = 0;
+ lend = (BUN) (lcandend - lcand);
+ lvals = (const char *) lcand;
+ lcand = NULL;
+ lwidth = SIZEOF_OID;
+ }
+ loff = (wrd) l->tseqbase - (wrd) l->hseqbase;
+ }
+
+ /* nested loop implementation for range join */
+ for (;;) {
+ if (lcand) {
+ if (lcand == lcandend)
+ break;
+ lo = *lcand++;
+ vl = VALUE(l, lstart);
+ } else {
+ if (lstart == lend)
+ break;
+ if (lvals) {
+ vl = VALUE(l, lstart);
+ if (loff != 0) {
+ lval = (oid) (*(const oid *)vl + loff);
+ vl = (const char *) &lval;
+ }
+ } else {
+ lval = lstart + l->tseqbase;
+ vl = (const char *) &lval;
+ }
+ lo = lstart++ + l->hseqbase;
+ }
+ if (cmp(vl, nil) == 0)
+ continue;
+ nr = 0;
+ p = rcand;
+ n = rstart;
+ for (;;) {
+ if (rcand) {
+ if (p == rcandend)
+ break;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list