Changeset: 6a0689b35f63 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6a0689b35f63
Removed Files:
        gdk/gdk_rangejoin.mx
Modified Files:
        gdk/Makefile.ag
        gdk/gdk.h
        gdk/gdk_join.c
        monetdb5/extras/rdf/rdf_shredder.mx
        monetdb5/modules/kernel/algebra.c
Branch: int128
Log Message:

Merge with default branch (changeset f1151375a58f).


diffs (truncated from 1057 to 300 lines):

diff --git a/gdk/Makefile.ag b/gdk/Makefile.ag
--- a/gdk/Makefile.ag
+++ b/gdk/Makefile.ag
@@ -31,7 +31,6 @@ lib_gdk = {
                gdk_qsort.c gdk_qsort_impl.h gdk_ssort.c gdk_ssort_impl.h \
                gdk_storage.c gdk_bat.c \
                gdk_delta.c gdk_cross.c gdk_system.c gdk_value.c \
-               gdk_rangejoin.mx \
                gdk_posix.c gdk_logger.c gdk_sample.c \
                gdk_private.h gdk_delta.h gdk_logger.h gdk_posix.h \
                gdk_system.h gdk_tm.h gdk_storage.h \
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -3220,6 +3220,7 @@ gdk_export gdk_return BATcross1(BAT **r1
 gdk_export gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr);
 gdk_export BAT *BATcross(BAT *l, BAT *r);
 gdk_export BAT *BATbandjoin(BAT *l, BAT *r, const void *mnus, const void 
*plus, bit li, bit hi);
+gdk_export BAT *BATrangejoin(BAT *l, BAT *rl, BAT *rh, bit li, bit hi);
 
 gdk_export gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, int nil_matches, BUN estimate);
 gdk_export gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, 
BAT *sl, BAT *sr, int nil_matches, BUN estimate);
@@ -3228,6 +3229,7 @@ gdk_export gdk_return BATsubsemijoin(BAT
 gdk_export gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, int nil_matches, BUN estimate);
 gdk_export gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT 
*r, BAT *sl, BAT *sr, int nil_matches, BUN estimate);
 gdk_export gdk_return BATsubbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, const void *c1, const void *c2, int li, int hi, BUN estimate);
+gdk_export gdk_return BATsubrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl, 
BAT *rh, BAT *sl, BAT *sr, int li, int hi, BUN estimate);
 gdk_export BAT *BATproject(BAT *l, BAT *r);
 
 gdk_export BAT *BATslice(BAT *b, BUN low, BUN high);
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -71,20 +71,31 @@
  *     ranges are inclusive or not; values in the left and right
  *     inputs match if right - c1 <[=] left <[=] right + c2; if c1 or
  *     c2 is NIL, there are no matches
+ * BATsubrangejoin
+ *     range-join: the right input consists of two aligned BATs,
+ *     values match if the left value is between two corresponding
+ *     right values; two extra Boolean parameters, li and hi,
+ *     indicate whether equal values match
  */
 
 /* Perform a bunch of sanity checks on the inputs to a join. */
 static gdk_return
-joinparamcheck(BAT *l, BAT *r, BAT *sl, BAT *sr, const char *func)
+joinparamcheck(BAT *l, BAT *r1, BAT *r2, BAT *sl, BAT *sr, const char *func)
 {
-       if (!BAThdense(l) || !BAThdense(r)) {
+       if (!BAThdense(l) || !BAThdense(r1) || (r2 && !BAThdense(r2))) {
                GDKerror("%s: inputs must have dense head.\n", func);
                return GDK_FAIL;
        }
-       if (ATOMtype(l->ttype) != ATOMtype(r->ttype)) {
+       if (ATOMtype(l->ttype) != ATOMtype(r1->ttype) ||
+           (r2 && ATOMtype(l->ttype) != ATOMtype(r2->ttype))) {
                GDKerror("%s: inputs not compatible.\n", func);
                return GDK_FAIL;
        }
+       if (r2 &&
+           (BATcount(r1) != BATcount(r2) || r1->hseqbase != r2->hseqbase)) {
+               GDKerror("%s: right inputs not aligned.\n", func);
+               return GDK_FAIL;
+       }
        if ((sl && !BAThdense(sl)) || (sr && !BAThdense(sr))) {
                GDKerror("%s: candidate lists must have dense head.\n", func);
                return GDK_FAIL;
@@ -232,6 +243,9 @@ BINSEARCHFUNC(sht)
 BINSEARCHFUNC(int)
 BINSEARCHFUNC(oid)
 BINSEARCHFUNC(lng)
+#ifdef HAVE_HGE
+BINSEARCHFUNC(hge)
+#endif
 BINSEARCHFUNC(flt)
 BINSEARCHFUNC(dbl)
 
@@ -271,6 +285,11 @@ binsearch(const oid *rcand, oid offset,
 #endif
                return binsearch_lng(rcand, offset, (const lng *) rvals,
                                     lo, hi, (const lng *) v, ordering, last);
+#ifdef HAVE_HGE
+       case TYPE_hge:
+               return binsearch_hge(rcand, offset, (const hge *) rvals,
+                                    lo, hi, (const hge *) v, ordering, last);
+#endif
        case TYPE_flt:
                return binsearch_flt(rcand, offset, (const flt *) rvals,
                                     lo, hi, (const flt *) v, ordering, last);
@@ -1475,6 +1494,12 @@ hashjoin(BAT *r1, BAT *r2, BAT *l, BAT *
                                                        break;
                                        }
                                        break;
+#ifdef HAVE_HGE
+                               case TYPE_hge:
+                                       /* do we need to handle TYPE_hge, here? 
*/
+                                       assert(0);
+                                       break;
+#endif
                                default:
                                        if (!nil_matches && cmp(v, nil) == 0) {
                                                lskipped = BATcount(r1) > 0;
@@ -1876,6 +1901,15 @@ bandjoin(BAT *r1, BAT *r2, BAT *l, BAT *
                    ((!hi || !li) && -*(const lng *)c1 == *(const lng *)c2))
                        return GDK_SUCCEED;
                break;
+#ifdef HAVE_HGE
+       case TYPE_hge:
+               if (*(const hge *)c1 == hge_nil ||
+                   *(const hge *)c2 == hge_nil ||
+                   -*(const hge *)c1 > *(const hge *)c2 ||
+                   ((!hi || !li) && -*(const hge *)c1 == *(const hge *)c2))
+                       return GDK_SUCCEED;
+               break;
+#endif
        case TYPE_flt:
                if (*(const flt *)c1 == flt_nil ||
                    *(const flt *)c2 == flt_nil ||
@@ -1992,6 +2026,24 @@ bandjoin(BAT *r1, BAT *r2, BAT *l, BAT *
                                        continue;
                                break;
                        }
+#ifdef HAVE_HGE
+                       case TYPE_lng: {
+                               hge v1 = (hge) *(const lng *) vr, v2;
+
+                               if (v1 == lng_nil)
+                                       continue;
+                               v2 = v1;
+                               v1 -= *(const lng *)c1;
+                               if (*(const lng *)vl <= v1 &&
+                                   (!li || *(const lng *)vl != v1))
+                                       continue;
+                               v2 += *(const lng *)c2;
+                               if (*(const lng *)vl >= v2 &&
+                                   (!hi || *(const lng *)vl != v2))
+                                       continue;
+                               break;
+                       }
+#else
 #ifdef HAVE___INT128
                        case TYPE_lng: {
                                __int128 v1 = (__int128) *(const lng *) vr, v2;
@@ -2037,6 +2089,35 @@ bandjoin(BAT *r1, BAT *r2, BAT *l, BAT *
                                continue;
                        }
 #endif
+#endif
+#ifdef HAVE_HAVE
+                       case TYPE_hge: {
+                               hge v1, v2;
+                               int abort_on_error = 1;
+
+                               if (*(const hge *)vr == hge_nil)
+                                       continue;
+                               SUB_WITH_CHECK(hge, *(const hge *)vr,
+                                              hge, *(const hge *)c1,
+                                              hge, v1,
+                                              do{if(*(const hge*)c1<0)goto 
nohmatch;else goto hmatch1;}while(0));
+                               if (*(const hge *)vl <= v1 &&
+                                   (!li || *(const hge *)vl != v1))
+                                       continue;
+                         hmatch1:
+                               ADD_WITH_CHECK(hge, *(const hge *)vr,
+                                              hge, *(const hge *)c2,
+                                              hge, v2,
+                                              do{if(*(const hge*)c2>0)goto 
nohmatch;else goto hmatch2;}while(0));
+                               if (*(const hge *)vl >= v2 &&
+                                   (!hi || *(const hge *)vl != v2))
+                                       continue;
+                         hmatch2:
+                               break;
+                         nohmatch:
+                               continue;
+                       }
+#endif
                        case TYPE_flt: {
                                dbl v1 = (dbl) *(const flt *) vr, v2;
 
@@ -2148,6 +2229,317 @@ bandjoin(BAT *r1, BAT *r2, BAT *l, BAT *
        return GDK_FAIL;
 }
 
+static gdk_return
+rangejoin(BAT *r1, BAT *r2, BAT *l, BAT *rl, BAT *rh, BAT *sl, BAT *sr, int 
li, int hi)
+{
+       BUN lstart, lend, lcnt;
+       const oid *lcand = NULL, *lcandend = NULL;
+       BUN rstart, rend, rcnt;
+       const oid *rcand = NULL, *rcandend = NULL;
+       const char *lvals, *rlvals, *rhvals;
+       const char *lvars, *rlvars, *rhvars;
+       int lwidth, rlwidth, rhwidth;
+       const void *nil = ATOMnilptr(l->ttype);
+       int (*cmp)(const void *, const void *) = BATatoms[l->ttype].atomCmp;
+       const char *vl, *vrl, *vrh;
+       const oid *p;
+       oid lastr = 0;
+       BUN n, nr, newcap;
+       oid lo, ro;
+       int c;
+       int lskipped = 0;
+       wrd loff = 0;
+       oid lval = oid_nil, rlval = oid_nil, rhval = oid_nil;
+
+       assert(BAThdense(l));
+       assert(BAThdense(rl));
+       assert(BAThdense(rh));
+       assert(ATOMtype(l->ttype) == ATOMtype(rl->ttype));
+       assert(ATOMtype(l->ttype) == ATOMtype(rh->ttype));
+       assert(sl == NULL || sl->tsorted);
+       assert(sr == NULL || sr->tsorted);
+
+       ALGODEBUG fprintf(stderr, "#rangejoin(l=%s#" BUNFMT "[%s]%s%s,"
+                         "rl=%s#" BUNFMT "[%s]%s%s,rh=%s#" BUNFMT "[%s]%s%s,"
+                         "sl=%s#" BUNFMT "%s%s,sr=%s#" BUNFMT "%s%s)\n",
+                         BATgetId(l), BATcount(l), ATOMname(l->ttype),
+                         l->tsorted ? "-sorted" : "",
+                         l->trevsorted ? "-revsorted" : "",
+                         BATgetId(rl), BATcount(rl), ATOMname(rl->ttype),
+                         rl->tsorted ? "-sorted" : "",
+                         rl->trevsorted ? "-revsorted" : "",
+                         BATgetId(rh), BATcount(rh), ATOMname(rh->ttype),
+                         rh->tsorted ? "-sorted" : "",
+                         rh->trevsorted ? "-revsorted" : "",
+                         sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) : 0,
+                         sl && sl->tsorted ? "-sorted" : "",
+                         sl && sl->trevsorted ? "-revsorted" : "",
+                         sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) : 0,
+                         sr && sr->tsorted ? "-sorted" : "",
+                         sr && sr->trevsorted ? "-revsorted" : "");
+
+       CANDINIT(l, sl, lstart, lend, lcnt, lcand, lcandend);
+       CANDINIT(rl, sr, rstart, rend, rcnt, rcand, rcandend);
+
+       lvals = l->ttype == TYPE_void ? NULL : (const char *) Tloc(l, 
BUNfirst(l));
+       rlvals = rl->ttype == TYPE_void ? NULL : (const char *) Tloc(rl, 
BUNfirst(rl));
+       rhvals = rh->ttype == TYPE_void ? NULL : (const char *) Tloc(rh, 
BUNfirst(rh));
+       if (l->tvarsized && l->ttype) {
+               assert(rl->tvarsized && rl->ttype);
+               assert(rh->tvarsized && rh->ttype);
+               lvars = l->T->vheap->base;
+               rlvars = rl->T->vheap->base;
+               rhvars = rh->T->vheap->base;
+       } else {
+               assert(!rl->tvarsized || !rl->ttype);
+               assert(!rh->tvarsized || !rh->ttype);
+               lvars = rlvars = rhvars = NULL;
+       }
+       lwidth = l->T->width;
+       rlwidth = rl->T->width;
+       rhwidth = rh->T->width;
+
+       if (l->ttype == TYPE_void) {
+               if (l->tseqbase == oid_nil) {
+                       /* trivial: nils don't match anything */
+                       return GDK_SUCCEED;
+               }
+               if (lcand) {
+                       lstart = 0;
+                       lend = (BUN) (lcandend - lcand);
+                       lvals = (const char *) lcand;
+                       lcand = NULL;
+                       lwidth = SIZEOF_OID;
+               }
+               loff = (wrd) l->tseqbase - (wrd) l->hseqbase;
+       }
+
+       /* nested loop implementation for range join */
+       for (;;) {
+               if (lcand) {
+                       if (lcand == lcandend)
+                               break;
+                       lo = *lcand++;
+                       vl = VALUE(l, lstart);
+               } else {
+                       if (lstart == lend)
+                               break;
+                       if (lvals) {
+                               vl = VALUE(l, lstart);
+                               if (loff != 0) {
+                                       lval = (oid) (*(const oid *)vl + loff);
+                                       vl = (const char *) &lval;
+                               }
+                       } else {
+                               lval = lstart + l->tseqbase;
+                               vl = (const char *) &lval;
+                       }
+                       lo = lstart++ + l->hseqbase;
+               }
+               if (cmp(vl, nil) == 0)
+                       continue;
+               nr = 0;
+               p = rcand;
+               n = rstart;
+               for (;;) {
+                       if (rcand) {
+                               if (p == rcandend)
+                                       break;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to