Changeset: 58f7c1e4d490 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=58f7c1e4d490
Added Files:
sql/test/BugTracker-2016/Tests/All
sql/test/BugTracker-2016/Tests/LEFT-JOIN_with_OR_conditions_triggers_assertion.Bug-3908.sql
sql/test/BugTracker-2016/Tests/LEFT-JOIN_with_OR_conditions_triggers_assertion.Bug-3908.stable.err
sql/test/BugTracker-2016/Tests/LEFT-JOIN_with_OR_conditions_triggers_assertion.Bug-3908.stable.out
sql/test/BugTracker-2016/Tests/incorrect_column_name_in_OR_condition_of_LEFT-JOIN_crashes_mserver.Bug-3909.sql
sql/test/BugTracker-2016/Tests/incorrect_column_name_in_OR_condition_of_LEFT-JOIN_crashes_mserver.Bug-3909.stable.err
sql/test/BugTracker-2016/Tests/incorrect_column_name_in_OR_condition_of_LEFT-JOIN_crashes_mserver.Bug-3909.stable.out
Modified Files:
gdk/gdk_join.c
monetdb5/optimizer/Tests/All
monetdb5/optimizer/Tests/projectionchain.malC
monetdb5/optimizer/Tests/projectionchain.stable.err
monetdb5/optimizer/Tests/projectionchain.stable.out
testing/Mfilter.py
testing/Mtest.py.in
Branch: mosaic
Log Message:
Merge with default
diffs (truncated from 1246 to 300 lines):
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -195,16 +195,14 @@ joininitresults(BAT **r1p, BAT **r2p, BU
#define BINSEARCHFUNC(TYPE) \
static inline BUN \
binsearch_##TYPE(const oid *rcand, oid offset, const TYPE *rvals, \
- BUN lo, BUN hi, const void *vp, int ordering, int last) \
+ BUN lo, BUN hi, TYPE v, int ordering, int last) \
{ \
BUN mid; \
- TYPE v, x; \
+ TYPE x; \
\
assert(ordering == 1 || ordering == -1); \
assert(lo <= hi); \
\
- v = *(const TYPE *) vp; /* value we're searching for */ \
- \
if (ordering > 0) { \
if (rcand) { \
if (last) { \
@@ -355,10 +353,10 @@ BINSEARCHFUNC(hge)
BINSEARCHFUNC(flt)
BINSEARCHFUNC(dbl)
#if SIZEOF_OID == SIZEOF_INT
-#define binsearch_oid(rcand, offset, rvals, lo, hi, vp, ordering, last)
binsearch_int(rcand, offset, (const int *) rvals, lo, hi, (const int *) (vp),
ordering, last)
+#define binsearch_oid(rcand, offset, rvals, lo, hi, v, ordering, last)
binsearch_int(rcand, offset, (const int *) rvals, lo, hi, (int) (v), ordering,
last)
#endif
#if SIZEOF_OID == SIZEOF_LNG
-#define binsearch_oid(rcand, offset, rvals, lo, hi, vp, ordering, last)
binsearch_lng(rcand, offset, (const lng *) rvals, lo, hi, (const lng *) (vp),
ordering, last)
+#define binsearch_oid(rcand, offset, rvals, lo, hi, v, ordering, last)
binsearch_lng(rcand, offset, (const lng *) rvals, lo, hi, (lng) (v), ordering,
last)
#endif
/* Do a binary search for the first/last occurrence of v between lo and hi
@@ -385,10 +383,10 @@ binsearch(const oid *rcand, oid offset,
switch (type) {
case TYPE_bte:
return binsearch_bte(rcand, offset, (const bte *) rvals,
- lo, hi, (const bte *) v, ordering, last);
+ lo, hi, *(const bte *) v, ordering, last);
case TYPE_sht:
return binsearch_sht(rcand, offset, (const sht *) rvals,
- lo, hi, (const sht *) v, ordering, last);
+ lo, hi, *(const sht *) v, ordering, last);
case TYPE_int:
#if SIZEOF_WRD == SIZEOF_INT
case TYPE_wrd:
@@ -397,7 +395,7 @@ binsearch(const oid *rcand, oid offset,
case TYPE_oid:
#endif
return binsearch_int(rcand, offset, (const int *) rvals,
- lo, hi, (const int *) v, ordering, last);
+ lo, hi, *(const int *) v, ordering, last);
case TYPE_lng:
#if SIZEOF_WRD == SIZEOF_LNG
case TYPE_wrd:
@@ -406,18 +404,18 @@ binsearch(const oid *rcand, oid offset,
case TYPE_oid:
#endif
return binsearch_lng(rcand, offset, (const lng *) rvals,
- lo, hi, (const lng *) v, ordering, last);
+ lo, hi, *(const lng *) v, ordering, last);
#ifdef HAVE_HGE
case TYPE_hge:
return binsearch_hge(rcand, offset, (const hge *) rvals,
- lo, hi, (const hge *) v, ordering, last);
+ lo, hi, *(const hge *) v, ordering, last);
#endif
case TYPE_flt:
return binsearch_flt(rcand, offset, (const flt *) rvals,
- lo, hi, (const flt *) v, ordering, last);
+ lo, hi, *(const flt *) v, ordering, last);
case TYPE_dbl:
return binsearch_dbl(rcand, offset, (const dbl *) rvals,
- lo, hi, (const dbl *) v, ordering, last);
+ lo, hi, *(const dbl *) v, ordering, last);
}
if ((c = ordering * cmp(VALUE(r, rcand ? rcand[lo] - offset : lo), v))
> 0 ||
@@ -746,7 +744,7 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
if (only_misses) {
for (i = 0; i < cnt && lvals[i] < lo; i++)
APPEND(r1, lvals[i]);
- i = binsearch_oid(NULL, 0, lvals, 0, cnt - 1, &hi, 1,
0);
+ i = binsearch_oid(NULL, 0, lvals, 0, cnt - 1, hi, 1, 0);
for (; i < cnt; i++)
APPEND(r1, lvals[i]);
} else {
@@ -761,7 +759,7 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
r2->tkey = i > 1;
}
} else {
- i = binsearch_oid(NULL, 0, lvals, 0, cnt - 1,
&lo, 1, 0);
+ i = binsearch_oid(NULL, 0, lvals, 0, cnt - 1,
lo, 1, 0);
}
for (; i < cnt && lvals[i] < hi; i++) {
APPEND(r1, lvals[i]);
@@ -812,11 +810,11 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
/* first restrict candidate list (lcand and
* cnt) to section that refers to l */
o = l->hseqbase;
- i = binsearch_oid(NULL, 0, lcand, 0, cnt - 1, &o, 1, 0);
+ i = binsearch_oid(NULL, 0, lcand, 0, cnt - 1, o, 1, 0);
lcand += i;
cnt -= i;
o = l->hseqbase + BATcount(l);
- i = binsearch_oid(NULL, 0, lcand, 0, cnt - 1, &o, 1, 0);
+ i = binsearch_oid(NULL, 0, lcand, 0, cnt - 1, o, 1, 0);
cnt -= i;
if (BATextend(r1, cnt) != GDK_SUCCEED)
@@ -1003,6 +1001,608 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
* r; otherwise all matches are returned.
*/
static gdk_return
+mergejoin_int(BAT *r1, BAT *r2, BAT *l, BAT *r,
+ int nil_matches, BUN maxsize, lng t0, int swapped)
+{
+ BUN lstart, lend;
+ BUN rstart, rend;
+ BUN lscan, rscan; /* opportunistic scan window */
+ const int *lvals, *rvals;
+ int v;
+ BUN nl, nr;
+ oid lv;
+ BUN i;
+
+ ALGODEBUG fprintf(stderr, "#mergejoin_int(l=%s#" BUNFMT "[%s]%s%s%s,"
+ "r=%s#" BUNFMT "[%s]%s%s%s)%s\n",
+ BATgetId(l), BATcount(l), ATOMname(l->ttype),
+ l->tsorted ? "-sorted" : "",
+ l->trevsorted ? "-revsorted" : "",
+ l->tkey & 1 ? "-key" : "",
+ BATgetId(r), BATcount(r), ATOMname(r->ttype),
+ r->tsorted ? "-sorted" : "",
+ r->trevsorted ? "-revsorted" : "",
+ r->tkey & 1 ? "-key" : "",
+ swapped ? " swapped" : "");
+
+ assert(BAThdense(l));
+ assert(BAThdense(r));
+ assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
+ assert(r->tsorted || r->trevsorted);
+
+ lstart = rstart = 0;
+ lend = BATcount(l);
+ rend = BATcount(r);
+ lvals = (const int *) Tloc(l, BUNfirst(l));
+ rvals = (const int *) Tloc(r, BUNfirst(r));
+ assert(!r->tvarsized || !r->ttype);
+
+ /* basic properties will be adjusted if necessary later on,
+ * they were initially set by joininitresults() */
+
+ if (lend == 0 || rend == 0) {
+ /* there are no matches */
+ return nomatch(r1, r2, l, r, lstart, lend, NULL, NULL,
+ 0, 0, "mergejoin_int", t0);
+ }
+
+ /* determine opportunistic scan window for l and r */
+ for (nl = lend - lstart, lscan = 4; nl > 0; lscan++)
+ nl >>= 1;
+ for (nr = rend - rstart, rscan = 4; nr > 0; rscan++)
+ nr >>= 1;
+
+ if (!nil_matches) {
+ /* skip over nils at the start of the columns */
+ if (lscan < lend - lstart && lvals[lstart + lscan] == int_nil) {
+ lstart = binsearch_int(NULL, 0, lvals, lstart + lscan,
+ lend - 1, int_nil, 1, 1);
+ } else {
+ while (lvals[lstart] == int_nil)
+ lstart++;
+ }
+ if (rscan < rend - rstart && rvals[rstart + rscan] == int_nil) {
+ rstart = binsearch_int(NULL, 0, rvals, rstart + rscan,
+ rend - 1, int_nil, 1, 1);
+ } else {
+ while (rvals[rstart] == int_nil)
+ rstart++;
+ }
+ }
+ /* from here on we don't have to worry about nil values */
+
+ while (lstart < lend && rstart < rend) {
+ v = rvals[rstart];
+
+ if (lscan < lend - lstart && lvals[lstart + lscan] < v) {
+ lstart = binsearch_int(NULL, 0, lvals, lstart + lscan,
+ lend - 1, v, 1, 0);
+ } else {
+ /* scan l for v */
+ while (lstart < lend && lvals[lstart] < v)
+ lstart++;
+ }
+ if (lstart >= lend) {
+ /* nothing found */
+ break;
+ }
+
+ /* Here we determine the next value in l that we are
+ * going to try to match in r. We will also count the
+ * number of occurrences in l of that value.
+ * Afterwards, v points to the value and nl is the
+ * number of times it occurs. Also, lstart will
+ * point to the next value to be considered (ready for
+ * the next iteration).
+ * If there are many equal values in l (more than
+ * lscan), we will use binary search to find the end
+ * of the sequence. Obviously, we can do this only if
+ * l is actually sorted (lscan > 0). */
+ nl = 1; /* we'll match (at least) one in l */
+ nr = 0; /* maybe we won't match anything in r */
+ v = lvals[lstart];
+ if (l->tkey) {
+ /* if l is key, there is a single value */
+ lstart++;
+ } else if (lscan < lend - lstart &&
+ v == lvals[lstart + lscan]) {
+ /* lots of equal values: use binary search to
+ * find end */
+ nl = binsearch_int(NULL, 0, lvals, lstart + lscan,
+ lend - 1, v, 1, 1);
+ nl -= lstart;
+ lstart += nl;
+ } else {
+ /* just scan */
+ while (++lstart < lend && v == lvals[lstart])
+ nl++;
+ }
+ /* lstart points one beyond the value we're
+ * going to match: ready for the next iteration. */
+
+ /* First we find the first value in r that is at
+ * least as large as v, then we find the first
+ * value in r that is larger than v. The difference
+ * is the number of values equal to v and is stored in
+ * nr.
+ * We will use binary search on r to find both ends of
+ * the sequence of values that are equal to v in case
+ * the position is "too far" (more than rscan
+ * away). */
+
+ /* first find the location of the first value in r
+ * that is >= v, then find the location of the first
+ * value in r that is > v; the difference is the
+ * number of values equal to v */
+
+ /* look ahead a little (rscan) in r to see whether
+ * we're better off doing a binary search */
+ if (rscan < rend - rstart && rvals[rstart + rscan] < v) {
+ /* value too far away in r: use binary
+ * search */
+ rstart = binsearch_int(NULL, 0, rvals, rstart + rscan,
+ rend - 1, v, 1, 0);
+ } else {
+ /* scan r for v */
+ while (rstart < rend && rvals[rstart] < v)
+ rstart++;
+ }
+ if (rstart == rend) {
+ /* nothing found */
+ break;
+ }
+
+ /* now find the end of the sequence of equal values v */
+
+ /* if r is key, there is zero or one match, otherwise
+ * look ahead a little (rscan) in r to see whether
+ * we're better off doing a binary search */
+ if (r->tkey) {
+ if (rstart < rend && v == rvals[rstart]) {
+ nr = 1;
+ rstart++;
+ }
+ } else if (rscan < rend - rstart &&
+ v == rvals[rstart + rscan]) {
+ /* range too large: use binary search */
+ nr = binsearch_int(NULL, 0, rvals, rstart + rscan,
+ rend - 1, v, 1, 1);
+ nr -= rstart;
+ rstart += nr;
+ } else {
+ /* scan r for end of range */
+ while (rstart < rend && v == rvals[rstart]) {
+ nr++;
+ rstart++;
+ }
+ }
+ /* rstart points to first value > v or end of
+ * r, and nr is the number of values in r that
+ * are equal to v */
+ if (nr == 0) {
+ /* no entries in r found */
+ continue;
+ }
+ /* make space: nl values in l match nr values in r, so
+ * we need to add nl * nr values in the results */
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list