Changeset: 16d6012d9375 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=16d6012d9375
Modified Files:
        gdk/gdk_join.c
        gdk/gdk_unique.c
        sql/server/sql_parser.y
Branch: default
Log Message:

Merge with Aug2018 branch.


diffs (truncated from 420 to 300 lines):

diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -138,14 +138,16 @@ joininitresults(BAT **r1p, BAT **r2p, BU
                } else {
                        maxsize = rcnt;
                }
-       } else {
+       } else if (rcnt == 0) {
+               /* nil_on_miss must be true due to previous checks, so
+                * all values on left miss */
+               maxsize = lcnt;
+       } else if (BUN_MAX / lcnt >= rcnt) {
                /* in the worst case we have a full cross product */
-               if (lcnt == 0 || rcnt == 0)
-                       maxsize = nil_on_miss ? lcnt : 0;
-               else if (BUN_MAX / lcnt >= rcnt)
-                       maxsize = lcnt * rcnt;
-               else
-                       maxsize = BUN_MAX;
+               maxsize = lcnt * rcnt;
+       } else {
+               /* a BAT cannot grow larger than BUN_MAX */
+               maxsize = BUN_MAX;
        }
        size = estimate == BUN_NONE ? lcnt : estimate;
        if (size > maxsize)
@@ -221,19 +223,20 @@ nomatch(BAT **r1p, BAT **r2p, BAT *l, BA
        bool nil_on_miss, bool only_misses, const char *func, lng t0)
 {
        BUN cnt;
-       BAT *r1, *r2;
+       BAT *r1, *r2 = NULL;
 
        if (lstart == lend || !(nil_on_miss | only_misses)) {
                /* return empty BATs */
-               if ((*r1p = BATdense(0, 0, 0)) == NULL)
+               if ((r1 = BATdense(0, 0, 0)) == NULL)
                        return GDK_FAIL;
-               if (r2p && (*r2p = BATdense(0, 0, 0)) == NULL) {
-                       BBPreclaim(*r1p);
-                       *r1p = NULL;
-                       return GDK_FAIL;
+               if (r2p) {
+                       if ((r2 = BATdense(0, 0, 0)) == NULL) {
+                               BBPreclaim(r1);
+                               return GDK_FAIL;
+                       }
+                       *r2p = r2;
                }
-               r1 = *r1p;
-               r2 = r2p ? *r2p : NULL;
+               *r1p = r1;
                ALGODEBUG fprintf(stderr,
                                  "#%s(l=%s,r=%s)=(" ALGOBATFMT "," 
ALGOOPTBATFMT ") " LLFMT "us -- nomatch\n",
                                  func, BATgetId(l), BATgetId(r),
@@ -243,8 +246,6 @@ nomatch(BAT **r1p, BAT **r2p, BAT *l, BA
        }
 
        if (lcand) {
-               BAT *r1;
-
                cnt = (BUN) (lcandend - lcand);
                r1 = COLnew(0, TYPE_oid, cnt, TRANSIENT);
                if (r1 == NULL)
@@ -265,20 +266,19 @@ nomatch(BAT **r1p, BAT **r2p, BAT *l, BA
                r1->tnil = false;
                r1->tnonil = true;
                BATsetcount(r1, cnt);
-               *r1p = r1;
        } else {
                cnt = lend - lstart;
-               if ((*r1p = BATdense(0, l->hseqbase + lstart, cnt)) == NULL)
+               if ((r1 = BATdense(0, l->hseqbase + lstart, cnt)) == NULL)
                        return GDK_FAIL;
        }
-       if (r2p &&
-           (*r2p = BATconstant(0, TYPE_void, &oid_nil, cnt, TRANSIENT)) == 
NULL) {
-               BBPreclaim(*r1p);
-               *r1p = NULL;
-               return GDK_FAIL;
+       if (r2p) {
+               if ((r2 = BATconstant(0, TYPE_void, &oid_nil, cnt, TRANSIENT)) 
== NULL) {
+                       BBPreclaim(r1);
+                       return GDK_FAIL;
+               }
+               *r2p = r2;
        }
-       r1 = *r1p;
-       r2 = r2p ? *r2p : NULL;
+       *r1p = r1;
        ALGODEBUG fprintf(stderr,
                          "#%s(l=%s,r=%s)=(" ALGOBATFMT "," ALGOOPTBATFMT ") " 
LLFMT "us -- nomatch\n",
                          func, BATgetId(l), BATgetId(r),
@@ -421,6 +421,12 @@ selectjoin(BAT **r1p, BAT **r2p, BAT *l,
 #define binsearch_oid(indir, offset, vals, lo, hi, v, ordering, last) 
binsearch_lng(indir, offset, (const lng *) vals, lo, hi, (lng) (v), ordering, 
last)
 #endif
 
+/* Implementation of join where the right-hand side is dense, and if
+ * there is a right candidate list, it too is dense.  In case
+ * nil_on_miss is not set, we use a range select (BATselect) to find
+ * the matching values in the left column and then calculate the
+ * corresponding matches from the right.  If nil_on_miss is set, we
+ * need to do some more work. */
 static gdk_return
 mergejoin_void(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
               BUN lstart, BUN lend, BUN lcnt,
@@ -533,7 +539,7 @@ mergejoin_void(BAT **r1p, BAT **r2p, BAT
                        BATsetcount(r2, BATcount(r1));
                }
                *r2p = r2;
-               return GDK_SUCCEED;
+               goto doreturn2;
        }
        /* nil_on_miss is set, this means we must have a second output */
        assert(r2p);
@@ -571,7 +577,7 @@ mergejoin_void(BAT **r1p, BAT **r2p, BAT
                         * respective OID values that we can store in
                         * r1 and r2; note that r1 will be dense since
                         * all values in l will match something (even
-                        * if nil if nil_on_miss is set) */
+                        * if nil since nil_on_miss is set) */
                        *r1p = r1 = BATdense(0, seq, lcnt);
                        if (r1 == NULL)
                                return GDK_FAIL;
@@ -803,6 +809,7 @@ mergejoin_void(BAT **r1p, BAT **r2p, BAT
                virtualize(r1);
        if (r2->tkey && r2->tsorted)
                virtualize(r2);
+  doreturn2:
        ALGODEBUG fprintf(stderr, "#mergejoin_void(l=%s,r=%s)=(" ALGOBATFMT "," 
ALGOOPTBATFMT ") " LLFMT "us\n",
                          BATgetId(l), BATgetId(r),
                          ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
@@ -810,6 +817,8 @@ mergejoin_void(BAT **r1p, BAT **r2p, BAT
        return GDK_SUCCEED;
 }
 
+/* Implementation of mergejoin (see below) for the special case that
+ * the values are of type int, and some more conditions are met. */
 static gdk_return
 mergejoin_int(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
              bool nil_matches, BUN estimate, lng t0, bool swapped)
@@ -1104,6 +1113,8 @@ mergejoin_int(BAT **r1p, BAT **r2p, BAT 
        return GDK_FAIL;
 }
 
+/* Implementation of mergejoin (see below) for the special case that
+ * the values are of type lng, and some more conditions are met. */
 static gdk_return
 mergejoin_lng(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
              bool nil_matches, BUN estimate, lng t0, bool swapped)
@@ -1413,9 +1424,6 @@ mergejoin_lng(BAT **r1p, BAT **r2p, BAT 
  * there is a match of l in r, no matter how many matches there are in
  * r; otherwise all matches are returned.
  *
- * maxsize is the absolute maximum size the output BATs can become (if
- * they were to become larger, we have a bug).
- *
  * t0 and swapped are only for debugging (ALGOMASK set in GDKdebug).
  */
 static gdk_return
@@ -2484,6 +2492,8 @@ binsearchcand(const oid *cand, BUN lo, B
                }                                                       \
        } while (false)
 
+/* Implementation of join using a hash lookup of values in the right
+ * column. */
 static gdk_return
 hashjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
         BUN lstart, BUN lend, BUN lcnt,
@@ -3573,26 +3583,44 @@ bandjoin(BAT **r1p, BAT **r2p, BAT *l, B
 
 /* small ordered right, dense left, oid's only, do fetches */
 static gdk_return
-fetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, lng t0)
+fetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BUN lstart, BUN lend,
+         BUN rstart, BUN rend, lng t0)
 {
-       oid lo = l->tseqbase, hi = lo + BATcount(l);
-       BUN b = SORTfndfirst(r, &lo), e = SORTfndlast(r, &hi), p;
+       oid lo = l->tseqbase + lstart, hi = lo + lend;
+       BUN b, e, p;
+       BAT *r1, *r2 = NULL;
 
        ALGODEBUG fprintf(stderr, "#fetchjoin(l=" ALGOBATFMT ","
                          "r=" ALGOBATFMT ")\n",
                          ALGOBATPAR(l), ALGOBATPAR(r));
 
-       if (r2p &&
-           (*r2p = BATdense(0, e == b ? 0 : r->hseqbase + b, e - b)) == NULL) {
-               return GDK_FAIL;
+       if (r->tsorted) {
+               b = SORTfndfirst(r, &lo);
+               e = SORTfndfirst(r, &hi);
+       } else {
+               assert(r->trevsorted);
+               b = SORTfndlast(r, &hi);
+               e = SORTfndlast(r, &lo);
        }
-       BAT *r2 = r2p ? *r2p : NULL;
-       BAT *r1 = *r1p = COLnew(0, TYPE_oid, e - b, TRANSIENT);
-       if (r1 == NULL) {
-               if (r2p)
-                       BBPreclaim(*r2p);
+       if (b < rstart)
+               b = rstart;
+       if (e > rend)
+               e = rend;
+       if (e == b) {
+               return nomatch(r1p, r2p, l, r, lstart, lend, NULL, NULL,
+                              false, false, "fetchjoin", t0);
+       }
+       r1 = COLnew(0, TYPE_oid, e - b, TRANSIENT);
+       if (r1 == NULL)
                return GDK_FAIL;
+       if (r2p) {
+               if ((r2 = BATdense(0, r->hseqbase + b, e - b)) == NULL) {
+                       BBPreclaim(r1);
+                       return GDK_FAIL;
+               }
+               *r2p = r2;
        }
+       *r1p = r1;
        oid *op = (oid *) Tloc(r1, 0);
        const oid *rp = (const oid *) Tloc(r, 0);
        for (p = b; p < e; p++) {
@@ -3615,7 +3643,7 @@ fetchjoin(BAT **r1p, BAT **r2p, BAT *l, 
 static gdk_return
 leftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
         bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
-        BUN estimate, const char *name, lng t0)
+        BUN estimate, const char *func, lng t0)
 {
        BUN lstart, lend, lcnt;
        const oid *lcand, *lcandend;
@@ -3632,7 +3660,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
        *r1p = NULL;
        if (r2p)
                *r2p = NULL;
-       if (joinparamcheck(l, r, NULL, sl, sr, name) != GDK_SUCCEED)
+       if (joinparamcheck(l, r, NULL, sl, sr, func) != GDK_SUCCEED)
                return GDK_FAIL;
 
        CANDINIT(l, sl, lstart, lend, lcnt, lcand, lcandend);
@@ -3640,9 +3668,18 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
        lcnt = lcand ? (BUN) (lcandend - lcand) : lend - lstart;
        rcnt = rcand ? (BUN) (rcandend - rcand) : rend - rstart;
 
-       if (lcnt == 0 || (!only_misses && !nil_on_miss && rcnt == 0))
+       if (lcnt == 0 || (!only_misses && !nil_on_miss && rcnt == 0)) {
+               ALGODEBUG fprintf(stderr, "#%s(l=" ALGOBATFMT ","
+                                 "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
+                                 "sr=" ALGOOPTBATFMT ",nil_matches=%d,"
+                                 "nil_on_miss=%d,semi=%d,only_misses=%d)\n",
+                                 func,
+                                 ALGOBATPAR(l), ALGOBATPAR(r),
+                                 ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
+                                 nil_matches, nil_on_miss, semi, only_misses);
                return nomatch(r1p, r2p, l, r, lstart, lend, lcand, lcandend,
-                              nil_on_miss, only_misses, "leftjoin", t0);
+                              nil_on_miss, only_misses, func, t0);
+       }
 
        if (!nil_on_miss && !semi && !only_misses &&
            (lcnt == 1 || (BATordered(l) && BATordered_rev(l)))) {
@@ -3656,27 +3693,29 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
                return mergejoin_void(r1p, r2p, l, r, sl, sr,
                                      lstart, lend, lcnt, lcand, lcandend,
                                      nil_on_miss, only_misses, t0, false);
+       } else if (BATtdense(l)
+                  && lcand == NULL
+                  && rcand == NULL
+                  && !semi
+                  && !nil_matches
+                  && !only_misses
+                  /* && (rcnt * 1024) < lcnt */
+                  && (BATordered(r) || BATordered_rev(r))) {
+               assert(ATOMtype(l->ttype) == TYPE_oid); /* tdense */
+               return fetchjoin(r1p, r2p, l, r,
+                                lstart, lend, rstart, rend, t0);
        } else if ((BATordered(r) || BATordered_rev(r))
                   && (BATordered(l)
                       || BATordered_rev(l)
                       || BATtdense(r)
                       || lcnt < 1024
-                      || BATcount(r) * (Tsize(r) + (r->tvheap ? 
r->tvheap->size : 0) + 2 * sizeof(BUN)) > GDK_mem_maxsize / (GDKnr_threads ? 
GDKnr_threads : 1)))
+                      || BATcount(r) * (Tsize(r) + (r->tvheap ? 
r->tvheap->size : 0) + 2 * sizeof(BUN)) > GDK_mem_maxsize / (GDKnr_threads ? 
GDKnr_threads : 1))) {
                return mergejoin(r1p, r2p, l, r, sl, sr,
                                 lstart, lend, lcnt, lcand, lcandend,
                                 rstart, rend, rcnt, rcand, rcandend,
                                 nil_matches, nil_on_miss, semi, only_misses,
                                 estimate, t0, false);
-       if (BATtdense(l)
-           && ATOMtype(l->ttype) == TYPE_oid
-           && sl == NULL
-           && sr == NULL
-           && !semi
-           && !nil_matches
-           && !only_misses
-           && (rcnt * 1024) < lcnt
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to