Changeset: 16d6012d9375 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=16d6012d9375
Modified Files:
gdk/gdk_join.c
gdk/gdk_unique.c
sql/server/sql_parser.y
Branch: default
Log Message:
Merge with Aug2018 branch.
diffs (truncated from 420 to 300 lines):
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -138,14 +138,16 @@ joininitresults(BAT **r1p, BAT **r2p, BU
} else {
maxsize = rcnt;
}
- } else {
+ } else if (rcnt == 0) {
+ /* nil_on_miss must be true due to previous checks, so
+ * all values on left miss */
+ maxsize = lcnt;
+ } else if (BUN_MAX / lcnt >= rcnt) {
/* in the worst case we have a full cross product */
- if (lcnt == 0 || rcnt == 0)
- maxsize = nil_on_miss ? lcnt : 0;
- else if (BUN_MAX / lcnt >= rcnt)
- maxsize = lcnt * rcnt;
- else
- maxsize = BUN_MAX;
+ maxsize = lcnt * rcnt;
+ } else {
+ /* a BAT cannot grow larger than BUN_MAX */
+ maxsize = BUN_MAX;
}
size = estimate == BUN_NONE ? lcnt : estimate;
if (size > maxsize)
@@ -221,19 +223,20 @@ nomatch(BAT **r1p, BAT **r2p, BAT *l, BA
bool nil_on_miss, bool only_misses, const char *func, lng t0)
{
BUN cnt;
- BAT *r1, *r2;
+ BAT *r1, *r2 = NULL;
if (lstart == lend || !(nil_on_miss | only_misses)) {
/* return empty BATs */
- if ((*r1p = BATdense(0, 0, 0)) == NULL)
+ if ((r1 = BATdense(0, 0, 0)) == NULL)
return GDK_FAIL;
- if (r2p && (*r2p = BATdense(0, 0, 0)) == NULL) {
- BBPreclaim(*r1p);
- *r1p = NULL;
- return GDK_FAIL;
+ if (r2p) {
+ if ((r2 = BATdense(0, 0, 0)) == NULL) {
+ BBPreclaim(r1);
+ return GDK_FAIL;
+ }
+ *r2p = r2;
}
- r1 = *r1p;
- r2 = r2p ? *r2p : NULL;
+ *r1p = r1;
ALGODEBUG fprintf(stderr,
"#%s(l=%s,r=%s)=(" ALGOBATFMT ","
ALGOOPTBATFMT ") " LLFMT "us -- nomatch\n",
func, BATgetId(l), BATgetId(r),
@@ -243,8 +246,6 @@ nomatch(BAT **r1p, BAT **r2p, BAT *l, BA
}
if (lcand) {
- BAT *r1;
-
cnt = (BUN) (lcandend - lcand);
r1 = COLnew(0, TYPE_oid, cnt, TRANSIENT);
if (r1 == NULL)
@@ -265,20 +266,19 @@ nomatch(BAT **r1p, BAT **r2p, BAT *l, BA
r1->tnil = false;
r1->tnonil = true;
BATsetcount(r1, cnt);
- *r1p = r1;
} else {
cnt = lend - lstart;
- if ((*r1p = BATdense(0, l->hseqbase + lstart, cnt)) == NULL)
+ if ((r1 = BATdense(0, l->hseqbase + lstart, cnt)) == NULL)
return GDK_FAIL;
}
- if (r2p &&
- (*r2p = BATconstant(0, TYPE_void, &oid_nil, cnt, TRANSIENT)) ==
NULL) {
- BBPreclaim(*r1p);
- *r1p = NULL;
- return GDK_FAIL;
+ if (r2p) {
+ if ((r2 = BATconstant(0, TYPE_void, &oid_nil, cnt, TRANSIENT))
== NULL) {
+ BBPreclaim(r1);
+ return GDK_FAIL;
+ }
+ *r2p = r2;
}
- r1 = *r1p;
- r2 = r2p ? *r2p : NULL;
+ *r1p = r1;
ALGODEBUG fprintf(stderr,
"#%s(l=%s,r=%s)=(" ALGOBATFMT "," ALGOOPTBATFMT ") "
LLFMT "us -- nomatch\n",
func, BATgetId(l), BATgetId(r),
@@ -421,6 +421,12 @@ selectjoin(BAT **r1p, BAT **r2p, BAT *l,
#define binsearch_oid(indir, offset, vals, lo, hi, v, ordering, last)
binsearch_lng(indir, offset, (const lng *) vals, lo, hi, (lng) (v), ordering,
last)
#endif
+/* Implementation of join where the right-hand side is dense, and if
+ * there is a right candidate list, it too is dense. In case
+ * nil_on_miss is not set, we use a range select (BATselect) to find
+ * the matching values in the left column and then calculate the
+ * corresponding matches from the right. If nil_on_miss is set, we
+ * need to do some more work. */
static gdk_return
mergejoin_void(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
BUN lstart, BUN lend, BUN lcnt,
@@ -533,7 +539,7 @@ mergejoin_void(BAT **r1p, BAT **r2p, BAT
BATsetcount(r2, BATcount(r1));
}
*r2p = r2;
- return GDK_SUCCEED;
+ goto doreturn2;
}
/* nil_on_miss is set, this means we must have a second output */
assert(r2p);
@@ -571,7 +577,7 @@ mergejoin_void(BAT **r1p, BAT **r2p, BAT
* respective OID values that we can store in
* r1 and r2; note that r1 will be dense since
* all values in l will match something (even
- * if nil if nil_on_miss is set) */
+ * if nil since nil_on_miss is set) */
*r1p = r1 = BATdense(0, seq, lcnt);
if (r1 == NULL)
return GDK_FAIL;
@@ -803,6 +809,7 @@ mergejoin_void(BAT **r1p, BAT **r2p, BAT
virtualize(r1);
if (r2->tkey && r2->tsorted)
virtualize(r2);
+ doreturn2:
ALGODEBUG fprintf(stderr, "#mergejoin_void(l=%s,r=%s)=(" ALGOBATFMT ","
ALGOOPTBATFMT ") " LLFMT "us\n",
BATgetId(l), BATgetId(r),
ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
@@ -810,6 +817,8 @@ mergejoin_void(BAT **r1p, BAT **r2p, BAT
return GDK_SUCCEED;
}
+/* Implementation of mergejoin (see below) for the special case that
+ * the values are of type int, and some more conditions are met. */
static gdk_return
mergejoin_int(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
bool nil_matches, BUN estimate, lng t0, bool swapped)
@@ -1104,6 +1113,8 @@ mergejoin_int(BAT **r1p, BAT **r2p, BAT
return GDK_FAIL;
}
+/* Implementation of mergejoin (see below) for the special case that
+ * the values are of type lng, and some more conditions are met. */
static gdk_return
mergejoin_lng(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
bool nil_matches, BUN estimate, lng t0, bool swapped)
@@ -1413,9 +1424,6 @@ mergejoin_lng(BAT **r1p, BAT **r2p, BAT
* there is a match of l in r, no matter how many matches there are in
* r; otherwise all matches are returned.
*
- * maxsize is the absolute maximum size the output BATs can become (if
- * they were to become larger, we have a bug).
- *
* t0 and swapped are only for debugging (ALGOMASK set in GDKdebug).
*/
static gdk_return
@@ -2484,6 +2492,8 @@ binsearchcand(const oid *cand, BUN lo, B
} \
} while (false)
+/* Implementation of join using a hash lookup of values in the right
+ * column. */
static gdk_return
hashjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
BUN lstart, BUN lend, BUN lcnt,
@@ -3573,26 +3583,44 @@ bandjoin(BAT **r1p, BAT **r2p, BAT *l, B
/* small ordered right, dense left, oid's only, do fetches */
static gdk_return
-fetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, lng t0)
+fetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BUN lstart, BUN lend,
+ BUN rstart, BUN rend, lng t0)
{
- oid lo = l->tseqbase, hi = lo + BATcount(l);
- BUN b = SORTfndfirst(r, &lo), e = SORTfndlast(r, &hi), p;
+ oid lo = l->tseqbase + lstart, hi = lo + lend;
+ BUN b, e, p;
+ BAT *r1, *r2 = NULL;
ALGODEBUG fprintf(stderr, "#fetchjoin(l=" ALGOBATFMT ","
"r=" ALGOBATFMT ")\n",
ALGOBATPAR(l), ALGOBATPAR(r));
- if (r2p &&
- (*r2p = BATdense(0, e == b ? 0 : r->hseqbase + b, e - b)) == NULL) {
- return GDK_FAIL;
+ if (r->tsorted) {
+ b = SORTfndfirst(r, &lo);
+ e = SORTfndfirst(r, &hi);
+ } else {
+ assert(r->trevsorted);
+ b = SORTfndlast(r, &hi);
+ e = SORTfndlast(r, &lo);
}
- BAT *r2 = r2p ? *r2p : NULL;
- BAT *r1 = *r1p = COLnew(0, TYPE_oid, e - b, TRANSIENT);
- if (r1 == NULL) {
- if (r2p)
- BBPreclaim(*r2p);
+ if (b < rstart)
+ b = rstart;
+ if (e > rend)
+ e = rend;
+ if (e == b) {
+ return nomatch(r1p, r2p, l, r, lstart, lend, NULL, NULL,
+ false, false, "fetchjoin", t0);
+ }
+ r1 = COLnew(0, TYPE_oid, e - b, TRANSIENT);
+ if (r1 == NULL)
return GDK_FAIL;
+ if (r2p) {
+ if ((r2 = BATdense(0, r->hseqbase + b, e - b)) == NULL) {
+ BBPreclaim(r1);
+ return GDK_FAIL;
+ }
+ *r2p = r2;
}
+ *r1p = r1;
oid *op = (oid *) Tloc(r1, 0);
const oid *rp = (const oid *) Tloc(r, 0);
for (p = b; p < e; p++) {
@@ -3615,7 +3643,7 @@ fetchjoin(BAT **r1p, BAT **r2p, BAT *l,
static gdk_return
leftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
- BUN estimate, const char *name, lng t0)
+ BUN estimate, const char *func, lng t0)
{
BUN lstart, lend, lcnt;
const oid *lcand, *lcandend;
@@ -3632,7 +3660,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
*r1p = NULL;
if (r2p)
*r2p = NULL;
- if (joinparamcheck(l, r, NULL, sl, sr, name) != GDK_SUCCEED)
+ if (joinparamcheck(l, r, NULL, sl, sr, func) != GDK_SUCCEED)
return GDK_FAIL;
CANDINIT(l, sl, lstart, lend, lcnt, lcand, lcandend);
@@ -3640,9 +3668,18 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
lcnt = lcand ? (BUN) (lcandend - lcand) : lend - lstart;
rcnt = rcand ? (BUN) (rcandend - rcand) : rend - rstart;
- if (lcnt == 0 || (!only_misses && !nil_on_miss && rcnt == 0))
+ if (lcnt == 0 || (!only_misses && !nil_on_miss && rcnt == 0)) {
+ ALGODEBUG fprintf(stderr, "#%s(l=" ALGOBATFMT ","
+ "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
+ "sr=" ALGOOPTBATFMT ",nil_matches=%d,"
+ "nil_on_miss=%d,semi=%d,only_misses=%d)\n",
+ func,
+ ALGOBATPAR(l), ALGOBATPAR(r),
+ ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
+ nil_matches, nil_on_miss, semi, only_misses);
return nomatch(r1p, r2p, l, r, lstart, lend, lcand, lcandend,
- nil_on_miss, only_misses, "leftjoin", t0);
+ nil_on_miss, only_misses, func, t0);
+ }
if (!nil_on_miss && !semi && !only_misses &&
(lcnt == 1 || (BATordered(l) && BATordered_rev(l)))) {
@@ -3656,27 +3693,29 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
return mergejoin_void(r1p, r2p, l, r, sl, sr,
lstart, lend, lcnt, lcand, lcandend,
nil_on_miss, only_misses, t0, false);
+ } else if (BATtdense(l)
+ && lcand == NULL
+ && rcand == NULL
+ && !semi
+ && !nil_matches
+ && !only_misses
+ /* && (rcnt * 1024) < lcnt */
+ && (BATordered(r) || BATordered_rev(r))) {
+ assert(ATOMtype(l->ttype) == TYPE_oid); /* tdense */
+ return fetchjoin(r1p, r2p, l, r,
+ lstart, lend, rstart, rend, t0);
} else if ((BATordered(r) || BATordered_rev(r))
&& (BATordered(l)
|| BATordered_rev(l)
|| BATtdense(r)
|| lcnt < 1024
- || BATcount(r) * (Tsize(r) + (r->tvheap ?
r->tvheap->size : 0) + 2 * sizeof(BUN)) > GDK_mem_maxsize / (GDKnr_threads ?
GDKnr_threads : 1)))
+ || BATcount(r) * (Tsize(r) + (r->tvheap ?
r->tvheap->size : 0) + 2 * sizeof(BUN)) > GDK_mem_maxsize / (GDKnr_threads ?
GDKnr_threads : 1))) {
return mergejoin(r1p, r2p, l, r, sl, sr,
lstart, lend, lcnt, lcand, lcandend,
rstart, rend, rcnt, rcand, rcandend,
nil_matches, nil_on_miss, semi, only_misses,
estimate, t0, false);
- if (BATtdense(l)
- && ATOMtype(l->ttype) == TYPE_oid
- && sl == NULL
- && sr == NULL
- && !semi
- && !nil_matches
- && !only_misses
- && (rcnt * 1024) < lcnt
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list