Changeset: f1151375a58f for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f1151375a58f
Removed Files:
        gdk/gdk_rangejoin.mx
Modified Files:
        gdk/Makefile.ag
        gdk/gdk.h
        gdk/gdk_join.c
        monetdb5/extras/rdf/rdf_shredder.mx
        monetdb5/modules/kernel/algebra.c
Branch: default
Log Message:

Implemented BATsubrangejoin.  One more .mx file bites the dust.


diffs (truncated from 875 to 300 lines):

diff --git a/gdk/Makefile.ag b/gdk/Makefile.ag
--- a/gdk/Makefile.ag
+++ b/gdk/Makefile.ag
@@ -31,7 +31,6 @@ lib_gdk = {
                gdk_qsort.c gdk_qsort_impl.h gdk_ssort.c gdk_ssort_impl.h \
                gdk_storage.c gdk_bat.c \
                gdk_delta.c gdk_cross.c gdk_system.c gdk_value.c \
-               gdk_rangejoin.mx \
                gdk_posix.c gdk_logger.c gdk_sample.c \
                gdk_private.h gdk_delta.h gdk_logger.h gdk_posix.h \
                gdk_system.h gdk_tm.h gdk_storage.h \
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -3201,6 +3201,7 @@ gdk_export gdk_return BATcross1(BAT **r1
 gdk_export gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr);
 gdk_export BAT *BATcross(BAT *l, BAT *r);
 gdk_export BAT *BATbandjoin(BAT *l, BAT *r, const void *mnus, const void 
*plus, bit li, bit hi);
+gdk_export BAT *BATrangejoin(BAT *l, BAT *rl, BAT *rh, bit li, bit hi);
 
 gdk_export gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, int nil_matches, BUN estimate);
 gdk_export gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, 
BAT *sl, BAT *sr, int nil_matches, BUN estimate);
@@ -3209,6 +3210,7 @@ gdk_export gdk_return BATsubsemijoin(BAT
 gdk_export gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, int nil_matches, BUN estimate);
 gdk_export gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT 
*r, BAT *sl, BAT *sr, int nil_matches, BUN estimate);
 gdk_export gdk_return BATsubbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, const void *c1, const void *c2, int li, int hi, BUN estimate);
+gdk_export gdk_return BATsubrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl, 
BAT *rh, BAT *sl, BAT *sr, int li, int hi, BUN estimate);
 gdk_export BAT *BATproject(BAT *l, BAT *r);
 
 gdk_export BAT *BATslice(BAT *b, BUN low, BUN high);
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -71,20 +71,31 @@
  *     ranges are inclusive or not; values in the left and right
  *     inputs match if right - c1 <[=] left <[=] right + c2; if c1 or
  *     c2 is NIL, there are no matches
+ * BATsubrangejoin
+ *     range-join: the right input consists of two aligned BATs,
+ *     values match if the left value is between two corresponding
+ *     right values; two extra Boolean parameters, li and hi,
+ *     indicate whether equal values match
  */
 
 /* Perform a bunch of sanity checks on the inputs to a join. */
 static gdk_return
-joinparamcheck(BAT *l, BAT *r, BAT *sl, BAT *sr, const char *func)
+joinparamcheck(BAT *l, BAT *r1, BAT *r2, BAT *sl, BAT *sr, const char *func)
 {
-       if (!BAThdense(l) || !BAThdense(r)) {
+       if (!BAThdense(l) || !BAThdense(r1) || (r2 && !BAThdense(r2))) {
                GDKerror("%s: inputs must have dense head.\n", func);
                return GDK_FAIL;
        }
-       if (ATOMtype(l->ttype) != ATOMtype(r->ttype)) {
+       if (ATOMtype(l->ttype) != ATOMtype(r1->ttype) ||
+           (r2 && ATOMtype(l->ttype) != ATOMtype(r2->ttype))) {
                GDKerror("%s: inputs not compatible.\n", func);
                return GDK_FAIL;
        }
+       if (r2 &&
+           (BATcount(r1) != BATcount(r2) || r1->hseqbase != r2->hseqbase)) {
+               GDKerror("%s: right inputs not aligned.\n", func);
+               return GDK_FAIL;
+       }
        if ((sl && !BAThdense(sl)) || (sr && !BAThdense(sr))) {
                GDKerror("%s: candidate lists must have dense head.\n", func);
                return GDK_FAIL;
@@ -2148,6 +2159,306 @@ bandjoin(BAT *r1, BAT *r2, BAT *l, BAT *
        return GDK_FAIL;
 }
 
+static gdk_return
+rangejoin(BAT *r1, BAT *r2, BAT *l, BAT *rl, BAT *rh, BAT *sl, BAT *sr, int 
li, int hi)
+{
+       BUN lstart, lend, lcnt;
+       const oid *lcand = NULL, *lcandend = NULL;
+       BUN rstart, rend, rcnt;
+       const oid *rcand = NULL, *rcandend = NULL;
+       const char *lvals, *rlvals, *rhvals;
+       const char *lvars, *rlvars, *rhvars;
+       int lwidth, rlwidth, rhwidth;
+       const void *nil = ATOMnilptr(l->ttype);
+       int (*cmp)(const void *, const void *) = BATatoms[l->ttype].atomCmp;
+       const char *vl, *vrl, *vrh;
+       const oid *p;
+       oid lastr = 0;
+       BUN n, nr, newcap;
+       oid lo, ro;
+       int c;
+       int lskipped = 0;
+       wrd loff = 0;
+       oid lval = oid_nil, rlval = oid_nil, rhval = oid_nil;
+
+       assert(BAThdense(l));
+       assert(BAThdense(rl));
+       assert(BAThdense(rh));
+       assert(ATOMtype(l->ttype) == ATOMtype(rl->ttype));
+       assert(ATOMtype(l->ttype) == ATOMtype(rh->ttype));
+       assert(sl == NULL || sl->tsorted);
+       assert(sr == NULL || sr->tsorted);
+
+       ALGODEBUG fprintf(stderr, "#rangejoin(l=%s#" BUNFMT "[%s]%s%s,"
+                         "rl=%s#" BUNFMT "[%s]%s%s,rh=%s#" BUNFMT "[%s]%s%s,"
+                         "sl=%s#" BUNFMT "%s%s,sr=%s#" BUNFMT "%s%s)\n",
+                         BATgetId(l), BATcount(l), ATOMname(l->ttype),
+                         l->tsorted ? "-sorted" : "",
+                         l->trevsorted ? "-revsorted" : "",
+                         BATgetId(rl), BATcount(rl), ATOMname(rl->ttype),
+                         rl->tsorted ? "-sorted" : "",
+                         rl->trevsorted ? "-revsorted" : "",
+                         BATgetId(rh), BATcount(rh), ATOMname(rh->ttype),
+                         rh->tsorted ? "-sorted" : "",
+                         rh->trevsorted ? "-revsorted" : "",
+                         sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) : 0,
+                         sl && sl->tsorted ? "-sorted" : "",
+                         sl && sl->trevsorted ? "-revsorted" : "",
+                         sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) : 0,
+                         sr && sr->tsorted ? "-sorted" : "",
+                         sr && sr->trevsorted ? "-revsorted" : "");
+
+       CANDINIT(l, sl, lstart, lend, lcnt, lcand, lcandend);
+       CANDINIT(rl, sr, rstart, rend, rcnt, rcand, rcandend);
+
+       lvals = l->ttype == TYPE_void ? NULL : (const char *) Tloc(l, 
BUNfirst(l));
+       rlvals = rl->ttype == TYPE_void ? NULL : (const char *) Tloc(rl, 
BUNfirst(rl));
+       rhvals = rh->ttype == TYPE_void ? NULL : (const char *) Tloc(rh, 
BUNfirst(rh));
+       if (l->tvarsized && l->ttype) {
+               assert(rl->tvarsized && rl->ttype);
+               assert(rh->tvarsized && rh->ttype);
+               lvars = l->T->vheap->base;
+               rlvars = rl->T->vheap->base;
+               rhvars = rh->T->vheap->base;
+       } else {
+               assert(!rl->tvarsized || !rl->ttype);
+               assert(!rh->tvarsized || !rh->ttype);
+               lvars = rlvars = rhvars = NULL;
+       }
+       lwidth = l->T->width;
+       rlwidth = rl->T->width;
+       rhwidth = rh->T->width;
+
+       if (l->ttype == TYPE_void) {
+               if (l->tseqbase == oid_nil) {
+                       /* trivial: nils don't match anything */
+                       return GDK_SUCCEED;
+               }
+               if (lcand) {
+                       lstart = 0;
+                       lend = (BUN) (lcandend - lcand);
+                       lvals = (const char *) lcand;
+                       lcand = NULL;
+                       lwidth = SIZEOF_OID;
+               }
+               loff = (wrd) l->tseqbase - (wrd) l->hseqbase;
+       }
+
+       /* nested loop implementation for range join */
+       for (;;) {
+               if (lcand) {
+                       if (lcand == lcandend)
+                               break;
+                       lo = *lcand++;
+                       vl = VALUE(l, lstart);
+               } else {
+                       if (lstart == lend)
+                               break;
+                       if (lvals) {
+                               vl = VALUE(l, lstart);
+                               if (loff != 0) {
+                                       lval = (oid) (*(const oid *)vl + loff);
+                                       vl = (const char *) &lval;
+                               }
+                       } else {
+                               lval = lstart + l->tseqbase;
+                               vl = (const char *) &lval;
+                       }
+                       lo = lstart++ + l->hseqbase;
+               }
+               if (cmp(vl, nil) == 0)
+                       continue;
+               nr = 0;
+               p = rcand;
+               n = rstart;
+               for (;;) {
+                       if (rcand) {
+                               if (p == rcandend)
+                                       break;
+                               ro = *p++;
+                               if (rlvals)
+                                       vrl = VALUE(rl, ro - rl->hseqbase);
+                               else {
+                                       /* TYPE_void */
+                                       rlval = ro;
+                                       vrl = (const char *) &rlval;
+                               }
+                               if (rhvals)
+                                       vrh = VALUE(rh, ro - rh->hseqbase);
+                               else {
+                                       /* TYPE_void */
+                                       rhval = ro;
+                                       vrh = (const char *) &rhval;
+                               }
+                       } else {
+                               if (n == rend)
+                                       break;
+                               if (rlvals) {
+                                       vrl = VALUE(rl, n);
+                               } else {
+                                       /* TYPE_void */
+                                       rlval = n + rl->tseqbase;
+                                       vrl = (const char *) &rlval;
+                               }
+                               if (rhvals) {
+                                       vrh = VALUE(rh, n);
+                               } else {
+                                       /* TYPE_void */
+                                       rhval = n + rh->tseqbase;
+                                       vrh = (const char *) &rhval;
+                               }
+                               ro = n++ + rl->hseqbase;
+                       }
+                       switch (ATOMtype(l->ttype)) {
+                       case TYPE_bte:
+                               if (*(const bte*)vrl == bte_nil ||
+                                   *(const bte*)vrh == bte_nil ||
+                                   *(const bte*)vl < *(const bte*)vrl ||
+                                   *(const bte*)vl > *(const bte*)vrh ||
+                                   (!li && *(const bte*)vl == *(const 
bte*)vrl) ||
+                                   (!hi && *(const bte*)vl == *(const 
bte*)vrh))
+                                       continue;
+                               break;
+                       case TYPE_sht:
+                               if (*(const sht*)vrl == sht_nil ||
+                                   *(const sht*)vrh == sht_nil ||
+                                   *(const sht*)vl < *(const sht*)vrl ||
+                                   *(const sht*)vl > *(const sht*)vrh ||
+                                   (!li && *(const sht*)vl == *(const 
sht*)vrl) ||
+                                   (!hi && *(const sht*)vl == *(const 
sht*)vrh))
+                                       continue;
+                               break;
+                       case TYPE_int:
+#if SIZEOF_WRD == SIZEOF_INT
+                       case TYPE_wrd:
+#endif
+                               if (*(const int*)vrl == int_nil ||
+                                   *(const int*)vrh == int_nil ||
+                                   *(const int*)vl < *(const int*)vrl ||
+                                   *(const int*)vl > *(const int*)vrh ||
+                                   (!li && *(const int*)vl == *(const 
int*)vrl) ||
+                                   (!hi && *(const int*)vl == *(const 
int*)vrh))
+                                       continue;
+                               break;
+                       case TYPE_lng:
+#if SIZEOF_WRD == SIZEOF_LNG
+                       case TYPE_wrd:
+#endif
+                               if (*(const lng*)vrl == lng_nil ||
+                                   *(const lng*)vrh == lng_nil ||
+                                   *(const lng*)vl < *(const lng*)vrl ||
+                                   *(const lng*)vl > *(const lng*)vrh ||
+                                   (!li && *(const lng*)vl == *(const 
lng*)vrl) ||
+                                   (!hi && *(const lng*)vl == *(const 
lng*)vrh))
+                                       continue;
+                               break;
+                       case TYPE_oid:
+                               if (*(const oid*)vrl == oid_nil ||
+                                   *(const oid*)vrh == oid_nil ||
+                                   *(const oid*)vl < *(const oid*)vrl ||
+                                   *(const oid*)vl > *(const oid*)vrh ||
+                                   (!li && *(const oid*)vl == *(const 
oid*)vrl) ||
+                                   (!hi && *(const oid*)vl == *(const 
oid*)vrh))
+                                       continue;
+                               break;
+                       case TYPE_flt:
+                               if (*(const flt*)vrl == flt_nil ||
+                                   *(const flt*)vrh == flt_nil ||
+                                   *(const flt*)vl < *(const flt*)vrl ||
+                                   *(const flt*)vl > *(const flt*)vrh ||
+                                   (!li && *(const flt*)vl == *(const 
flt*)vrl) ||
+                                   (!hi && *(const flt*)vl == *(const 
flt*)vrh))
+                                       continue;
+                               break;
+                       case TYPE_dbl:
+                               if (*(const dbl*)vrl == dbl_nil ||
+                                   *(const dbl*)vrh == dbl_nil ||
+                                   *(const dbl*)vl < *(const dbl*)vrl ||
+                                   *(const dbl*)vl > *(const dbl*)vrh ||
+                                   (!li && *(const dbl*)vl == *(const 
dbl*)vrl) ||
+                                   (!hi && *(const dbl*)vl == *(const 
dbl*)vrh))
+                                       continue;
+                               break;
+                       default:
+                               if (cmp(vrl, nil) == 0 || cmp(vrh, nil) == 0)
+                                       continue;
+                               c = cmp(vl, vrl);
+                               if (c < 0 || (c == 0 && !li))
+                                       continue;
+                               c = cmp(vl, vrh);
+                               if (c > 0 || (c == 0 && !hi))
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to