Changeset: d7abc7279d26 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d7abc7279d26
Modified Files:
        clients/Tests/exports.stable.out
        gdk/gdk.h
        gdk/gdk_join.c
        gdk/gdk_rangejoin.mx
        monetdb5/modules/kernel/algebra.c
        monetdb5/modules/kernel/algebra.h
Branch: default
Log Message:

Implemented subbandjoin.
I'm not sure this is called in the test set, so it has not been
exhaustively tested.


diffs (truncated from 926 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -25,7 +25,7 @@ BAT *BATappend(BAT *b, BAT *c, bit force
 void BATassertProps(BAT *b);
 atomDesc BATatoms[];
 BAT *BATattach(int tt, const char *heapfile);
-BAT *BATbandjoin(BAT *l, BAT *r, ptr mnus, ptr plus, bit li, bit hi);
+BAT *BATbandjoin(BAT *l, BAT *r, const void *mnus, const void *plus, bit li, 
bit hi);
 BAT *BATcalcabsolute(BAT *b, BAT *s);
 BAT *BATcalcadd(BAT *b1, BAT *b2, BAT *s, int tp, int abort_on_error);
 BAT *BATcalcaddcst(BAT *b, const ValRecord *v, BAT *s, int tp, int 
abort_on_error);
@@ -195,6 +195,7 @@ BAT *BATsort(BAT *b);
 BAT *BATsort_rev(BAT *b);
 BAT *BATssort(BAT *b);
 BAT *BATssort_rev(BAT *b);
+gdk_return BATsubbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT 
*sr, const void *c1, const void *c2, int li, int hi, BUN estimate);
 gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr);
 gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, 
int nil_matches, BUN estimate);
 gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, 
BAT *sr, int nil_matches, BUN estimate);
@@ -734,9 +735,9 @@ str ALGantijoin2(int *l, int *r, int *li
 str ALGantiuselect1(int *result, int *bid, ptr value);
 str ALGantiuselectInclusive(int *result, int *bid, ptr low, ptr high, bit 
*lin, bit *rin);
 str ALGavg(dbl *res, int *bid);
-str ALGbandjoin(int *result, int *lid, int *rid, ptr *minus, ptr *plus, bit 
*li, bit *hi);
-str ALGbandjoin2(int *l, int *r, int *lid, int *rid, ptr *minus, ptr *plus, 
bit *li, bit *hi);
-str ALGbandjoin_default(int *result, int *lid, int *rid, ptr *minus, ptr 
*plus);
+str ALGbandjoin(int *result, int *lid, int *rid, const void *minus, const void 
*plus, bit *li, bit *hi);
+str ALGbandjoin2(int *l, int *r, int *lid, int *rid, const void *minus, const 
void *plus, bit *li, bit *hi);
+str ALGbandjoin_default(int *result, int *lid, int *rid, const void *minus, 
const void *plus);
 str ALGcard(lng *result, int *bid);
 str ALGcopy(int *result, int *bid);
 str ALGcount_bat(wrd *result, int *bid);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -3206,6 +3206,7 @@ gdk_export BAT *BATouterjoin(BAT *l, BAT
 gdk_export gdk_return BATcross1(BAT **r1p, BAT **r2p, BAT *l, BAT *r);
 gdk_export gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr);
 gdk_export BAT *BATcross(BAT *l, BAT *r);
+gdk_export BAT *BATbandjoin(BAT *l, BAT *r, const void *mnus, const void 
*plus, bit li, bit hi);
 
 gdk_export gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, int nil_matches, BUN estimate);
 gdk_export gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, 
BAT *sl, BAT *sr, int nil_matches, BUN estimate);
@@ -3213,6 +3214,7 @@ gdk_export gdk_return BATsubthetajoin(BA
 gdk_export gdk_return BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, int nil_matches, BUN estimate);
 gdk_export gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, int nil_matches, BUN estimate);
 gdk_export gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT 
*r, BAT *sl, BAT *sr, int nil_matches, BUN estimate);
+gdk_export gdk_return BATsubbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, const void *c1, const void *c2, int li, int hi, BUN estimate);
 gdk_export BAT *BATproject(BAT *l, BAT *r);
 
 gdk_export BAT *BATslice(BAT *b, BUN low, BUN high);
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -95,6 +95,7 @@ joininitresults(BAT **r1p, BAT **r2p, BU
 #define VALUE(s, x)    (s##vars ? \
                         s##vars + VarHeapVal(s##vals, (x), s##width) : \
                         s##vals + ((x) * s##width))
+#define FVALUE(s, x)   (s##vals + ((x) * s##width))
 
 /* Do a binary search for the first/last occurrence of v between lo and hi
  * (lo inclusive, hi not inclusive) in rvals/rvars.
@@ -1733,6 +1734,350 @@ thetajoin(BAT *r1, BAT *r2, BAT *l, BAT 
        return GDK_FAIL;
 }
 
+static gdk_return
+bandjoin(BAT *r1, BAT *r2, BAT *l, BAT *r, BAT *sl, BAT *sr,
+        const void *c1, const void *c2, int li, int hi)
+{
+       BUN lstart, lend, lcnt;
+       const oid *lcand = NULL, *lcandend = NULL;
+       BUN rstart, rend, rcnt;
+       const oid *rcand = NULL, *rcandend = NULL;
+       const char *lvals, *rvals;
+       int lwidth, rwidth;
+       const void *nil = ATOMnilptr(l->ttype);
+       int (*cmp)(const void *, const void *) = BATatoms[l->ttype].atomCmp;
+       const char *vl, *vr;
+       const oid *p;
+       oid lastr = 0;          /* last value inserted into r2 */
+       BUN n, nr;
+       BUN newcap;
+       oid lo, ro;
+       int lskipped = 0;       /* whether we skipped values in l */
+       BUN nils = 0;           /* needed for XXX_WITH_CHECK macros */
+
+       ALGODEBUG fprintf(stderr, "#bandjoin(l=%s#" BUNFMT "[%s]%s%s,"
+                         "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
+                         "sr=%s#" BUNFMT "%s%s)\n",
+                         BATgetId(l), BATcount(l), ATOMname(l->ttype),
+                         l->tsorted ? "-sorted" : "",
+                         l->trevsorted ? "-revsorted" : "",
+                         BATgetId(r), BATcount(r), ATOMname(r->ttype),
+                         r->tsorted ? "-sorted" : "",
+                         r->trevsorted ? "-revsorted" : "",
+                         sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) : 0,
+                         sl && sl->tsorted ? "-sorted" : "",
+                         sl && sl->trevsorted ? "-revsorted" : "",
+                         sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) : 0,
+                         sr && sr->tsorted ? "-sorted" : "",
+                         sr && sr->trevsorted ? "-revsorted" : "");
+
+       assert(BAThdense(l));
+       assert(BAThdense(r));
+       assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
+       assert(sl == NULL || sl->tsorted);
+       assert(sr == NULL || sr->tsorted);
+
+       switch (ATOMtype(l->ttype)) {
+       case TYPE_bte:
+               if (*(const bte *)c1 == bte_nil ||
+                   *(const bte *)c2 == bte_nil ||
+                   -*(const bte *)c1 > *(const bte *)c2 ||
+                   ((!hi || !li) && -*(const bte *)c1 == *(const bte *)c2))
+                       return GDK_SUCCEED;
+               break;
+       case TYPE_sht:
+               if (*(const sht *)c1 == sht_nil ||
+                   *(const sht *)c2 == sht_nil ||
+                   -*(const sht *)c1 > *(const sht *)c2 ||
+                   ((!hi || !li) && -*(const sht *)c1 == *(const sht *)c2))
+                       return GDK_SUCCEED;
+               break;
+       case TYPE_int:
+               if (*(const int *)c1 == int_nil ||
+                   *(const int *)c2 == int_nil ||
+                   -*(const int *)c1 > *(const int *)c2 ||
+                   ((!hi || !li) && -*(const int *)c1 == *(const int *)c2))
+                       return GDK_SUCCEED;
+               break;
+       case TYPE_lng:
+               if (*(const lng *)c1 == lng_nil ||
+                   *(const lng *)c2 == lng_nil ||
+                   -*(const lng *)c1 > *(const lng *)c2 ||
+                   ((!hi || !li) && -*(const lng *)c1 == *(const lng *)c2))
+                       return GDK_SUCCEED;
+               break;
+       case TYPE_flt:
+               if (*(const flt *)c1 == flt_nil ||
+                   *(const flt *)c2 == flt_nil ||
+                   -*(const flt *)c1 > *(const flt *)c2 ||
+                   ((!hi || !li) && -*(const flt *)c1 == *(const flt *)c2))
+                       return GDK_SUCCEED;
+               break;
+       case TYPE_dbl:
+               if (*(const dbl *)c1 == dbl_nil ||
+                   *(const dbl *)c2 == dbl_nil ||
+                   -*(const dbl *)c1 > *(const dbl *)c2 ||
+                   ((!hi || !li) && -*(const dbl *)c1 == *(const dbl *)c2))
+                       return GDK_SUCCEED;
+               break;
+       default:
+               goto bailout;
+       }
+
+       CANDINIT(l, sl, lstart, lend, lcnt, lcand, lcandend);
+       CANDINIT(r, sr, rstart, rend, rcnt, rcand, rcandend);
+
+       lvals = (const char *) Tloc(l, BUNfirst(l));
+       rvals = (const char *) Tloc(r, BUNfirst(r));
+       assert(!r->tvarsized);
+       lwidth = l->T->width;
+       rwidth = r->T->width;
+
+       assert(lvals != NULL);
+       assert(rvals != NULL);
+
+       r1->tkey = 1;
+       r1->tsorted = 1;
+       r1->trevsorted = 1;
+       r2->tkey = 1;
+       r2->tsorted = 1;
+       r2->trevsorted = 1;
+
+       /* nested loop implementation for band join */
+       for (;;) {
+               if (lcand) {
+                       if (lcand == lcandend)
+                               break;
+                       lo = *lcand++;
+                       vl = FVALUE(l, lo - l->hseqbase);
+               } else {
+                       if (lstart == lend)
+                               break;
+                       vl = FVALUE(l, lstart);
+                       lo = lstart++ + l->hseqbase;
+               }
+               if (cmp(vl, nil) == 0)
+                       continue;
+               nr = 0;
+               p = rcand;
+               n = rstart;
+               for (;;) {
+                       if (rcand) {
+                               if (p == rcandend)
+                                       break;
+                               ro = *p++;
+                               vr = FVALUE(r, ro - r->hseqbase);
+                       } else {
+                               if (n == rend)
+                                       break;
+                               vr = FVALUE(r, n);
+                               ro = n++ + r->hseqbase;
+                       }
+                       switch (ATOMtype(l->ttype)) {
+                       case TYPE_bte: {
+                               sht v1 = (sht) *(const bte *) vr, v2;
+
+                               if (v1 == bte_nil)
+                                       continue;
+                               v2 = v1;
+                               v1 -= *(const bte *)c1;
+                               if (*(const bte *)vl <= v1 &&
+                                   (!li || *(const bte *)vl != v1))
+                                       continue;
+                               v2 += *(const bte *)c2;
+                               if (*(const bte *)vl >= v2 &&
+                                   (!hi || *(const bte *)vl != v2))
+                                       continue;
+                               break;
+                       }
+                       case TYPE_sht: {
+                               int v1 = (int) *(const sht *) vr, v2;
+
+                               if (v1 == sht_nil)
+                                       continue;
+                               v2 = v1;
+                               v1 -= *(const sht *)c1;
+                               if (*(const sht *)vl <= v1 &&
+                                   (!li || *(const sht *)vl != v1))
+                                       continue;
+                               v2 += *(const sht *)c2;
+                               if (*(const sht *)vl >= v2 &&
+                                   (!hi || *(const sht *)vl != v2))
+                                       continue;
+                               break;
+                       }
+                       case TYPE_int: {
+                               lng v1 = (lng) *(const int *) vr, v2;
+
+                               if (v1 == int_nil)
+                                       continue;
+                               v2 = v1;
+                               v1 -= *(const int *)c1;
+                               if (*(const int *)vl <= v1 &&
+                                   (!li || *(const int *)vl != v1))
+                                       continue;
+                               v2 += *(const int *)c2;
+                               if (*(const int *)vl >= v2 &&
+                                   (!hi || *(const int *)vl != v2))
+                                       continue;
+                               break;
+                       }
+#ifdef HAVE___INT128
+                       case TYPE_lng: {
+                               __int128 v1 = (__int128) *(const lng *) vr, v2;
+
+                               if (v1 == lng_nil)
+                                       continue;
+                               v2 = v1;
+                               v1 -= *(const lng *)c1;
+                               if (*(const lng *)vl <= v1 &&
+                                   (!li || *(const lng *)vl != v1))
+                                       continue;
+                               v2 += *(const lng *)c2;
+                               if (*(const lng *)vl >= v2 &&
+                                   (!hi || *(const lng *)vl != v2))
+                                       continue;
+                               break;
+                       }
+#else
+                       case TYPE_lng: {
+                               lng v1, v2;
+                               int abort_on_error = 1;
+
+                               if (*(const lng *)vr == lng_nil)
+                                       continue;
+                               SUB_WITH_CHECK(lng, *(const lng *)vr,
+                                              lng, *(const lng *)c1,
+                                              lng, v1,
+                                              do{if(*(const lng*)c1<0)goto 
nolmatch;else goto lmatch1;}while(0));
+                               if (*(const lng *)vl <= v1 &&
+                                   (!li || *(const lng *)vl != v1))
+                                       continue;
+                         lmatch1:
+                               ADD_WITH_CHECK(lng, *(const lng *)vr,
+                                              lng, *(const lng *)c2,
+                                              lng, v2,
+                                              do{if(*(const lng*)c2>0)goto 
nolmatch;else goto lmatch2;}while(0));
+                               if (*(const lng *)vl >= v2 &&
+                                   (!hi || *(const lng *)vl != v2))
+                                       continue;
+                         lmatch2:
+                               break;
+                         nolmatch:
+                               continue;
+                       }
+#endif
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to