Changeset: d159bf94ef97 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d159bf94ef97
Modified Files:
gdk/gdk_bat.c
gdk/gdk_batop.c
gdk/gdk_firstn.c
gdk/gdk_group.c
gdk/gdk_join.c
gdk/gdk_select.c
gdk/gdk_unique.c
sql/backends/monet5/sql.c
Branch: default
Log Message:
Merge with Jul2015 branch.
diffs (truncated from 551 to 300 lines):
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1320,7 +1320,7 @@ BUNfnd(BAT *b, const void *v)
if (BATtvoid(b))
return BUNfndVOID(b, v);
if (!BATcheckhash(b)) {
- if (BATtordered(b) || BATtrevordered(b))
+ if (BATordered(b) || BATordered_rev(b))
return SORTfnd(b, v);
}
bi = bat_iterator(b);
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -838,21 +838,97 @@ BATslice(BAT *b, BUN l, BUN h)
return NULL;
}
-/* Return whether the BAT is ordered or not. */
+/* Return whether the BAT is ordered or not. If we don't know, invest
+* in a scan and record the results in the bat descriptor. */
int
BATordered(BAT *b)
{
- if (!b->tsorted)
- BATderiveTailProps(b, 0);
+ lng t0 = GDKusec();
+
+ if (b->ttype == TYPE_void)
+ return 1;
+ /* In order that multiple threads don't scan the same BAT at
+ * the same time (happens a lot with mitosis/mergetable), we
+ * use a lock. We reuse the hash lock for this, not because
+ * this scanning interferes with hashes, but because it's
+ * there, and not so likely to be used at the same time. */
+ MT_lock_set(&GDKhashLock(abs(b->batCacheid)));
+ if (!b->tsorted && b->T->nosorted == 0) {
+ BATiter bi = bat_iterator(b);
+ int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
+ BUN p, q;
+ b->batDirtydesc = 1;
+ switch (ATOMbasetype(b->ttype)) {
+ case TYPE_int: {
+ const int *iptr = (const int *) Tloc(b, 0);
+ for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) {
+ if (iptr[p - 1] > iptr[p]) {
+ b->T->nosorted = p;
+ ALGODEBUG fprintf(stderr, "#BATordered:
fixed nosorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p,
BATgetId(b), BATcount(b), GDKusec() - t0);
+ goto doreturn;
+ }
+ }
+ break;
+ }
+ case TYPE_lng: {
+ const lng *lptr = (const lng *) Tloc(b, 0);
+ for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) {
+ if (lptr[p - 1] > lptr[p]) {
+ b->T->nosorted = p;
+ ALGODEBUG fprintf(stderr, "#BATordered:
fixed nosorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p,
BATgetId(b), BATcount(b), GDKusec() - t0);
+ goto doreturn;
+ }
+ }
+ break;
+ }
+ default:
+ for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) {
+ if (cmpf(BUNtail(bi, p - 1), BUNtail(bi, p)) >
0) {
+ b->T->nosorted = p;
+ ALGODEBUG fprintf(stderr, "#BATordered:
fixed nosorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p,
BATgetId(b), BATcount(b), GDKusec() - t0);
+ goto doreturn;
+ }
+ }
+ break;
+ }
+ b->tsorted = 1;
+ ALGODEBUG fprintf(stderr, "#BATordered: fixed sorted for %s#"
BUNFMT " (" LLFMT " usec)\n", BATgetId(b), BATcount(b), GDKusec() - t0);
+ }
+ doreturn:
+ MT_lock_unset(&GDKhashLock(abs(b->batCacheid)));
return b->tsorted;
}
-/* Return whether the BAT is reverse ordered or not. */
+/* Return whether the BAT is reverse ordered or not. If we don't
+ * know, invest in a scan and record the results in the bat
+ * descriptor. */
int
BATordered_rev(BAT *b)
{
- if (!b->trevsorted)
- BATderiveTailProps(b, 0);
+ lng t0 = GDKusec();
+
+ if (b == NULL)
+ return 0;
+ if (b->ttype == TYPE_void)
+ return b->tseqbase == oid_nil;
+ MT_lock_set(&GDKhashLock(abs(b->batCacheid)));
+ if (!b->trevsorted && b->T->norevsorted == 0) {
+ BATiter bi = bat_iterator(b);
+ int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
+ BUN p, q;
+ b->batDirtydesc = 1;
+ for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) {
+ if (cmpf(BUNtail(bi, p - 1), BUNtail(bi, p)) < 0) {
+ b->T->norevsorted = p;
+ ALGODEBUG fprintf(stderr, "#BATordered_rev:
fixed norevsorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p,
BATgetId(b), BATcount(b), GDKusec() - t0);
+ goto doreturn;
+ }
+ }
+ b->trevsorted = 1;
+ ALGODEBUG fprintf(stderr, "#BATordered_rev: fixed revsorted for
%s#" BUNFMT " (" LLFMT " usec)\n", BATgetId(b), BATcount(b), GDKusec() - t0);
+ }
+ doreturn:
+ MT_lock_unset(&GDKhashLock(abs(b->batCacheid)));
return b->trevsorted;
}
diff --git a/gdk/gdk_firstn.c b/gdk/gdk_firstn.c
--- a/gdk/gdk_firstn.c
+++ b/gdk/gdk_firstn.c
@@ -154,7 +154,8 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n,
BATseqbase(BATmirror(bn), start + b->hseqbase);
return bn;
}
- if (b->tsorted || b->trevsorted) {
+ /* note, we want to do bot calls */
+ if (BATordered(b) | BATordered_rev(b)) {
/* trivial: b is sorted so we just need to return the
* initial or final part of it (or of the candidate
* list) */
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -444,7 +444,7 @@ BATgroup_internal(BAT **groups, BAT **ex
}
return GDK_SUCCEED;
}
- if (b->tsorted && b->trevsorted) {
+ if (BATordered(b) && BATordered_rev(b)) {
/* all values are equal */
if (g == NULL) {
/* there's only a single group: 0 */
@@ -581,8 +581,8 @@ BATgroup_internal(BAT **groups, BAT **ex
}
}
- if (((b->tsorted || b->trevsorted) &&
- (g == NULL || g->tsorted || g->trevsorted)) ||
+ if (((BATordered(b) || BATordered_rev(b)) &&
+ (g == NULL || BATordered(g) || BATordered_rev(g))) ||
subsorted) {
/* we only need to compare each entry with the previous */
ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
@@ -634,7 +634,7 @@ BATgroup_internal(BAT **groups, BAT **ex
gn->tsorted = 1;
*groups = gn;
- } else if (b->tsorted || b->trevsorted) {
+ } else if (BATordered(b) || BATordered_rev(b)) {
BUN i, j;
BUN *pgrp;
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -458,9 +458,33 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r
{
BUN cnt;
+ r1->tkey = 1;
+ r1->T->nokey[0] = r1->T->nokey[1] = 0;
+ r1->tsorted = 1;
+ r1->T->nosorted = 0;
+ r1->tdense = 0;
+ r1->T->nodense = 0;
+ r1->T->nil = 0;
+ r1->T->nonil = 1;
+ if (r2) {
+ r2->tkey = 1;
+ r2->T->nokey[0] = r2->T->nokey[1] = 0;
+ r2->tsorted = 1;
+ r2->T->nosorted = 0;
+ r2->tdense = 0;
+ r2->T->nodense = 0;
+ r2->T->nil = 0;
+ r2->T->nonil = 1;
+ }
if (lstart == lend || !(nil_on_miss | only_misses)) {
virtualize(r1);
- virtualize(r2);
+ r1->trevsorted = 1;
+ r1->T->norevsorted = 0;
+ if (r2) {
+ virtualize(r2);
+ r2->trevsorted = 1;
+ r2->T->norevsorted = 0;
+ }
return GDK_SUCCEED;
}
if (lcand) {
@@ -469,12 +493,6 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r
goto bailout;
memcpy(Tloc(r1, BUNfirst(r1)), lcand, (lcandend - lcand) *
sizeof(oid));
BATsetcount(r1, cnt);
- r1->tkey = 1;
- r1->tsorted = 1;
- r1->trevsorted = BATcount(r1) <= 1;
- r1->tdense = 0;
- r1->T->nil = 0;
- r1->T->nonil = 1;
} else {
cnt = lend - lstart;
HEAPfree(&r1->T->heap, 1);
@@ -482,12 +500,12 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r
r1->tvarsized = 1;
r1->T->width = 0;
r1->T->shift = 0;
- r1->tdense = 0;
if (BATextend(r1, cnt) != GDK_SUCCEED)
goto bailout;
BATsetcount(r1, cnt);
BATseqbase(BATmirror(r1), lstart + l->hseqbase);
}
+ r1->T->norevsorted = !(r1->trevsorted = BATcount(r1) <= 1);
BATseqbase(r1, 0);
if (r2) {
HEAPfree(&r2->T->heap, 1);
@@ -495,7 +513,6 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r
r2->tvarsized = 1;
r2->T->width = 0;
r2->T->shift = 0;
- r2->tdense = 0;
if (BATextend(r2, cnt) != GDK_SUCCEED)
goto bailout;
BATsetcount(r2, cnt);
@@ -1109,10 +1126,12 @@ mergejoin(BAT *r1, BAT *r2, BAT *l, BAT
lordering = 1;
rordering = r->tsorted ? 1 : -1;
/* if l not sorted, we only know for sure that r2 is
- * key if l is, and that r1 is key if r is */
+ * key if l is, and that r1 is key if r is; r1 is also
+ * key in the case of a semi-join or anti-semi-join
+ * (only_misses) */
if (r2)
r2->tkey = l->tkey != 0;
- r1->tkey = r->tkey != 0;
+ r1->tkey = (r->tkey != 0) | semi | only_misses;
}
/* determine opportunistic scan window for r; if l is not
* sorted this is only used to find range of equal values */
@@ -3082,7 +3101,7 @@ subleftjoin(BAT **r1p, BAT **r2p, BAT *l
/* use special implementation for dense right-hand side */
return mergejoin_void(r1, r2, l, r, sl, sr,
nil_on_miss, only_misses, t0);
- } else if ((r->tsorted || r->trevsorted) &&
+ } else if ((BATordered(r) || BATordered_rev(r)) &&
(BATtdense(r) ||
lcount < 1024 ||
BATcount(r) * (Tsize(r) + (r->T->vheap ? r->T->vheap->size
: 0) + 2 * sizeof(BUN)) > GDK_mem_maxsize / (GDKnr_threads ? GDKnr_threads :
1)))
@@ -3267,7 +3286,8 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA
} else if (BATtdense(l) && (sl == NULL || BATtdense(sl))) {
/* use special implementation for dense right-hand side */
return mergejoin_void(r2, r1, r, l, sr, sl, 0, 0, t0);
- } else if ((l->tsorted || l->trevsorted) && (r->tsorted ||
r->trevsorted)) {
+ } else if ((BATordered(l) || BATordered_rev(l)) &&
+ (BATordered(r) || BATordered_rev(r))) {
/* both sorted, smallest on left */
if (BATcount(l) <= BATcount(r))
return mergejoin(r1, r2, l, r, sl, sr, nil_matches, 0,
0, 0, maxsize, t0, 0);
@@ -3285,14 +3305,14 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA
/* only right has hash, don't swap */
swap = 0;
reason = "right has hash";
- } else if ((l->tsorted || l->trevsorted) &&
+ } else if ((BATordered(l) || BATordered_rev(l)) &&
(l->ttype == TYPE_void || rcount < 1024 || MIN(lsize, rsize)
> mem_size)) {
/* only left is sorted, swap; but only if right is
* "large" and the smaller of the two isn't too large
* (i.e. prefer hash over binary search, but only if
* the hash table doesn't cause thrashing) */
return mergejoin(r2, r1, r, l, sr, sl, nil_matches, 0, 0, 0,
maxsize, t0, 1);
- } else if ((r->tsorted || r->trevsorted) &&
+ } else if ((BATordered(r) || BATordered_rev(r)) &&
(r->ttype == TYPE_void || lcount < 1024 || MIN(lsize, rsize)
> mem_size)) {
/* only right is sorted, don't swap; but only if left
* is "large" and the smaller of the two isn't too
@@ -3387,12 +3407,12 @@ BATrangejoin(BAT **r1p, BAT **r2p, BAT *
#define project_loop(TYPE) \
static gdk_return \
-project_##TYPE(BAT *bn, BAT *l, BAT *r, int nilcheck, int sortcheck) \
+project_##TYPE(BAT *bn, BAT *l, BAT *r, int nilcheck) \
{ \
oid lo, hi; \
const TYPE *restrict rt; \
TYPE *restrict bt; \
- TYPE v, prev = 0; \
+ TYPE v; \
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list