Changeset: d7abc7279d26 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d7abc7279d26
Modified Files:
clients/Tests/exports.stable.out
gdk/gdk.h
gdk/gdk_join.c
gdk/gdk_rangejoin.mx
monetdb5/modules/kernel/algebra.c
monetdb5/modules/kernel/algebra.h
Branch: default
Log Message:
Implemented subbandjoin.
I'm not sure this is called in the test set, so it has not been
exhaustively tested.
diffs (truncated from 926 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -25,7 +25,7 @@ BAT *BATappend(BAT *b, BAT *c, bit force
void BATassertProps(BAT *b);
atomDesc BATatoms[];
BAT *BATattach(int tt, const char *heapfile);
-BAT *BATbandjoin(BAT *l, BAT *r, ptr mnus, ptr plus, bit li, bit hi);
+BAT *BATbandjoin(BAT *l, BAT *r, const void *mnus, const void *plus, bit li,
bit hi);
BAT *BATcalcabsolute(BAT *b, BAT *s);
BAT *BATcalcadd(BAT *b1, BAT *b2, BAT *s, int tp, int abort_on_error);
BAT *BATcalcaddcst(BAT *b, const ValRecord *v, BAT *s, int tp, int
abort_on_error);
@@ -195,6 +195,7 @@ BAT *BATsort(BAT *b);
BAT *BATsort_rev(BAT *b);
BAT *BATssort(BAT *b);
BAT *BATssort_rev(BAT *b);
+gdk_return BATsubbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT
*sr, const void *c1, const void *c2, int li, int hi, BUN estimate);
gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr);
gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
int nil_matches, BUN estimate);
gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl,
BAT *sr, int nil_matches, BUN estimate);
@@ -734,9 +735,9 @@ str ALGantijoin2(int *l, int *r, int *li
str ALGantiuselect1(int *result, int *bid, ptr value);
str ALGantiuselectInclusive(int *result, int *bid, ptr low, ptr high, bit
*lin, bit *rin);
str ALGavg(dbl *res, int *bid);
-str ALGbandjoin(int *result, int *lid, int *rid, ptr *minus, ptr *plus, bit
*li, bit *hi);
-str ALGbandjoin2(int *l, int *r, int *lid, int *rid, ptr *minus, ptr *plus,
bit *li, bit *hi);
-str ALGbandjoin_default(int *result, int *lid, int *rid, ptr *minus, ptr
*plus);
+str ALGbandjoin(int *result, int *lid, int *rid, const void *minus, const void
*plus, bit *li, bit *hi);
+str ALGbandjoin2(int *l, int *r, int *lid, int *rid, const void *minus, const
void *plus, bit *li, bit *hi);
+str ALGbandjoin_default(int *result, int *lid, int *rid, const void *minus,
const void *plus);
str ALGcard(lng *result, int *bid);
str ALGcopy(int *result, int *bid);
str ALGcount_bat(wrd *result, int *bid);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -3206,6 +3206,7 @@ gdk_export BAT *BATouterjoin(BAT *l, BAT
gdk_export gdk_return BATcross1(BAT **r1p, BAT **r2p, BAT *l, BAT *r);
gdk_export gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr);
gdk_export BAT *BATcross(BAT *l, BAT *r);
+gdk_export BAT *BATbandjoin(BAT *l, BAT *r, const void *mnus, const void
*plus, bit li, bit hi);
gdk_export gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, int nil_matches, BUN estimate);
gdk_export gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
BAT *sl, BAT *sr, int nil_matches, BUN estimate);
@@ -3213,6 +3214,7 @@ gdk_export gdk_return BATsubthetajoin(BA
gdk_export gdk_return BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, int nil_matches, BUN estimate);
gdk_export gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, int nil_matches, BUN estimate);
gdk_export gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT
*r, BAT *sl, BAT *sr, int nil_matches, BUN estimate);
+gdk_export gdk_return BATsubbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, const void *c1, const void *c2, int li, int hi, BUN estimate);
gdk_export BAT *BATproject(BAT *l, BAT *r);
gdk_export BAT *BATslice(BAT *b, BUN low, BUN high);
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -95,6 +95,7 @@ joininitresults(BAT **r1p, BAT **r2p, BU
#define VALUE(s, x) (s##vars ? \
s##vars + VarHeapVal(s##vals, (x), s##width) : \
s##vals + ((x) * s##width))
+#define FVALUE(s, x) (s##vals + ((x) * s##width))
/* Do a binary search for the first/last occurrence of v between lo and hi
* (lo inclusive, hi not inclusive) in rvals/rvars.
@@ -1733,6 +1734,350 @@ thetajoin(BAT *r1, BAT *r2, BAT *l, BAT
return GDK_FAIL;
}
+static gdk_return
+bandjoin(BAT *r1, BAT *r2, BAT *l, BAT *r, BAT *sl, BAT *sr,
+ const void *c1, const void *c2, int li, int hi)
+{
+ BUN lstart, lend, lcnt;
+ const oid *lcand = NULL, *lcandend = NULL;
+ BUN rstart, rend, rcnt;
+ const oid *rcand = NULL, *rcandend = NULL;
+ const char *lvals, *rvals;
+ int lwidth, rwidth;
+ const void *nil = ATOMnilptr(l->ttype);
+ int (*cmp)(const void *, const void *) = BATatoms[l->ttype].atomCmp;
+ const char *vl, *vr;
+ const oid *p;
+ oid lastr = 0; /* last value inserted into r2 */
+ BUN n, nr;
+ BUN newcap;
+ oid lo, ro;
+ int lskipped = 0; /* whether we skipped values in l */
+ BUN nils = 0; /* needed for XXX_WITH_CHECK macros */
+
+ ALGODEBUG fprintf(stderr, "#bandjoin(l=%s#" BUNFMT "[%s]%s%s,"
+ "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
+ "sr=%s#" BUNFMT "%s%s)\n",
+ BATgetId(l), BATcount(l), ATOMname(l->ttype),
+ l->tsorted ? "-sorted" : "",
+ l->trevsorted ? "-revsorted" : "",
+ BATgetId(r), BATcount(r), ATOMname(r->ttype),
+ r->tsorted ? "-sorted" : "",
+ r->trevsorted ? "-revsorted" : "",
+ sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) : 0,
+ sl && sl->tsorted ? "-sorted" : "",
+ sl && sl->trevsorted ? "-revsorted" : "",
+ sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) : 0,
+ sr && sr->tsorted ? "-sorted" : "",
+ sr && sr->trevsorted ? "-revsorted" : "");
+
+ assert(BAThdense(l));
+ assert(BAThdense(r));
+ assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
+ assert(sl == NULL || sl->tsorted);
+ assert(sr == NULL || sr->tsorted);
+
+ switch (ATOMtype(l->ttype)) {
+ case TYPE_bte:
+ if (*(const bte *)c1 == bte_nil ||
+ *(const bte *)c2 == bte_nil ||
+ -*(const bte *)c1 > *(const bte *)c2 ||
+ ((!hi || !li) && -*(const bte *)c1 == *(const bte *)c2))
+ return GDK_SUCCEED;
+ break;
+ case TYPE_sht:
+ if (*(const sht *)c1 == sht_nil ||
+ *(const sht *)c2 == sht_nil ||
+ -*(const sht *)c1 > *(const sht *)c2 ||
+ ((!hi || !li) && -*(const sht *)c1 == *(const sht *)c2))
+ return GDK_SUCCEED;
+ break;
+ case TYPE_int:
+ if (*(const int *)c1 == int_nil ||
+ *(const int *)c2 == int_nil ||
+ -*(const int *)c1 > *(const int *)c2 ||
+ ((!hi || !li) && -*(const int *)c1 == *(const int *)c2))
+ return GDK_SUCCEED;
+ break;
+ case TYPE_lng:
+ if (*(const lng *)c1 == lng_nil ||
+ *(const lng *)c2 == lng_nil ||
+ -*(const lng *)c1 > *(const lng *)c2 ||
+ ((!hi || !li) && -*(const lng *)c1 == *(const lng *)c2))
+ return GDK_SUCCEED;
+ break;
+ case TYPE_flt:
+ if (*(const flt *)c1 == flt_nil ||
+ *(const flt *)c2 == flt_nil ||
+ -*(const flt *)c1 > *(const flt *)c2 ||
+ ((!hi || !li) && -*(const flt *)c1 == *(const flt *)c2))
+ return GDK_SUCCEED;
+ break;
+ case TYPE_dbl:
+ if (*(const dbl *)c1 == dbl_nil ||
+ *(const dbl *)c2 == dbl_nil ||
+ -*(const dbl *)c1 > *(const dbl *)c2 ||
+ ((!hi || !li) && -*(const dbl *)c1 == *(const dbl *)c2))
+ return GDK_SUCCEED;
+ break;
+ default:
+ goto bailout;
+ }
+
+ CANDINIT(l, sl, lstart, lend, lcnt, lcand, lcandend);
+ CANDINIT(r, sr, rstart, rend, rcnt, rcand, rcandend);
+
+ lvals = (const char *) Tloc(l, BUNfirst(l));
+ rvals = (const char *) Tloc(r, BUNfirst(r));
+ assert(!r->tvarsized);
+ lwidth = l->T->width;
+ rwidth = r->T->width;
+
+ assert(lvals != NULL);
+ assert(rvals != NULL);
+
+ r1->tkey = 1;
+ r1->tsorted = 1;
+ r1->trevsorted = 1;
+ r2->tkey = 1;
+ r2->tsorted = 1;
+ r2->trevsorted = 1;
+
+ /* nested loop implementation for band join */
+ for (;;) {
+ if (lcand) {
+ if (lcand == lcandend)
+ break;
+ lo = *lcand++;
+ vl = FVALUE(l, lo - l->hseqbase);
+ } else {
+ if (lstart == lend)
+ break;
+ vl = FVALUE(l, lstart);
+ lo = lstart++ + l->hseqbase;
+ }
+ if (cmp(vl, nil) == 0)
+ continue;
+ nr = 0;
+ p = rcand;
+ n = rstart;
+ for (;;) {
+ if (rcand) {
+ if (p == rcandend)
+ break;
+ ro = *p++;
+ vr = FVALUE(r, ro - r->hseqbase);
+ } else {
+ if (n == rend)
+ break;
+ vr = FVALUE(r, n);
+ ro = n++ + r->hseqbase;
+ }
+ switch (ATOMtype(l->ttype)) {
+ case TYPE_bte: {
+ sht v1 = (sht) *(const bte *) vr, v2;
+
+ if (v1 == bte_nil)
+ continue;
+ v2 = v1;
+ v1 -= *(const bte *)c1;
+ if (*(const bte *)vl <= v1 &&
+ (!li || *(const bte *)vl != v1))
+ continue;
+ v2 += *(const bte *)c2;
+ if (*(const bte *)vl >= v2 &&
+ (!hi || *(const bte *)vl != v2))
+ continue;
+ break;
+ }
+ case TYPE_sht: {
+ int v1 = (int) *(const sht *) vr, v2;
+
+ if (v1 == sht_nil)
+ continue;
+ v2 = v1;
+ v1 -= *(const sht *)c1;
+ if (*(const sht *)vl <= v1 &&
+ (!li || *(const sht *)vl != v1))
+ continue;
+ v2 += *(const sht *)c2;
+ if (*(const sht *)vl >= v2 &&
+ (!hi || *(const sht *)vl != v2))
+ continue;
+ break;
+ }
+ case TYPE_int: {
+ lng v1 = (lng) *(const int *) vr, v2;
+
+ if (v1 == int_nil)
+ continue;
+ v2 = v1;
+ v1 -= *(const int *)c1;
+ if (*(const int *)vl <= v1 &&
+ (!li || *(const int *)vl != v1))
+ continue;
+ v2 += *(const int *)c2;
+ if (*(const int *)vl >= v2 &&
+ (!hi || *(const int *)vl != v2))
+ continue;
+ break;
+ }
+#ifdef HAVE___INT128
+ case TYPE_lng: {
+ __int128 v1 = (__int128) *(const lng *) vr, v2;
+
+ if (v1 == lng_nil)
+ continue;
+ v2 = v1;
+ v1 -= *(const lng *)c1;
+ if (*(const lng *)vl <= v1 &&
+ (!li || *(const lng *)vl != v1))
+ continue;
+ v2 += *(const lng *)c2;
+ if (*(const lng *)vl >= v2 &&
+ (!hi || *(const lng *)vl != v2))
+ continue;
+ break;
+ }
+#else
+ case TYPE_lng: {
+ lng v1, v2;
+ int abort_on_error = 1;
+
+ if (*(const lng *)vr == lng_nil)
+ continue;
+ SUB_WITH_CHECK(lng, *(const lng *)vr,
+ lng, *(const lng *)c1,
+ lng, v1,
+ do{if(*(const lng*)c1<0)goto
nolmatch;else goto lmatch1;}while(0));
+ if (*(const lng *)vl <= v1 &&
+ (!li || *(const lng *)vl != v1))
+ continue;
+ lmatch1:
+ ADD_WITH_CHECK(lng, *(const lng *)vr,
+ lng, *(const lng *)c2,
+ lng, v2,
+ do{if(*(const lng*)c2>0)goto
nolmatch;else goto lmatch2;}while(0));
+ if (*(const lng *)vl >= v2 &&
+ (!hi || *(const lng *)vl != v2))
+ continue;
+ lmatch2:
+ break;
+ nolmatch:
+ continue;
+ }
+#endif
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list