Changeset: 3a84bdcb4e00 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=3a84bdcb4e00
Modified Files:
gdk/gdk_bat.c
gdk/gdk_batop.c
gdk/gdk_firstn.c
gdk/gdk_group.c
gdk/gdk_join.c
gdk/gdk_select.c
gdk/gdk_unique.c
sql/backends/monet5/sql.c
Branch: Jul2015
Log Message:
Just before doing an operation that can benefit from sortedness, check.
diffs (truncated from 760 to 300 lines):
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1863,7 +1863,7 @@ BUNfnd(BAT *b, const void *v)
if (BATtvoid(b))
return BUNfndVOID(b, v);
if (!BATcheckhash(b)) {
- if (BATtordered(b) || BATtrevordered(b))
+ if (BATordered(BATmirror(b)) || BATordered_rev(BATmirror(b)))
return SORTfnd(b, v);
}
bi = bat_iterator(b);
@@ -1943,12 +1943,12 @@ BUNlocate(BAT *b, const void *x, const v
/* sometimes BUNlocate is just about a single column */
if (y &&
- BAThordered(b) &&
+ BATordered(b) &&
(*hcmp) (x, BUNhead(bi, p)) == 0 &&
(*hcmp) (x, BUNhead(bi, q - 1)) == 0)
usemirror();
if (y == NULL ||
- (BATtordered(b) &&
+ (BATordered(BATmirror(b)) &&
(*tcmp) (y, BUNtail(bi, p)) == 0 &&
(*tcmp) (y, BUNtail(bi, q - 1)) == 0)) {
return BUNfnd(BATmirror(b), x);
@@ -1970,11 +1970,11 @@ BUNlocate(BAT *b, const void *x, const v
}
/* next, try to restrict the range using sorted columns */
- if (BATtordered(b) || BATtrevordered(b)) {
+ if (BATordered(BATmirror(b)) || BATordered_rev(BATmirror(b))) {
p = SORTfndfirst(b, y);
q = SORTfndlast(b, y);
}
- if (BAThordered(b) || BAThrevordered(b)) {
+ if (BATordered(b) || BATordered_rev(b)) {
BUN mp = SORTfndfirst(BATmirror(b), x);
BUN mq = SORTfndlast(BATmirror(b), x);
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -1095,21 +1095,97 @@ BATslice(BAT *b, BUN l, BUN h)
return NULL;
}
-/* Return whether the BAT is ordered or not. */
+/* Return whether the BAT is ordered or not. If we don't know, invest
+ * in a scan and record the results in the bat descriptor. */
int
BATordered(BAT *b)
{
- if (!b->hsorted)
- BATderiveHeadProps(b, 0);
+ lng t0 = GDKusec();
+
+ if (b->htype == TYPE_void)
+ return 1;
+ /* In order that multiple threads don't scan the same BAT at
+ * the same time (happens a lot with mitosis/mergetable), we
+ * use a lock. We reuse the hash lock for this, not because
+ * this scanning interferes with hashes, but because it's
+ * there, and not so likely to be used at the same time. */
+ MT_lock_set(&GDKhashLock(abs(b->batCacheid)), "BATordered");
+ if (!b->hsorted && b->H->nosorted == 0) {
+ BATiter bi = bat_iterator(b);
+ int (*cmpf)(const void *, const void *) = ATOMcompare(b->htype);
+ BUN p, q;
+ b->batDirtydesc = 1;
+ switch (ATOMbasetype(b->htype)) {
+ case TYPE_int: {
+ const int *iptr = (const int *) Hloc(b, 0);
+ for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) {
+ if (iptr[p - 1] > iptr[p]) {
+ b->H->nosorted = p;
+ ALGODEBUG fprintf(stderr, "#BATordered:
fixed nosorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p,
BATgetId(b), BATcount(b), GDKusec() - t0);
+ goto doreturn;
+ }
+ }
+ break;
+ }
+ case TYPE_lng: {
+ const lng *lptr = (const lng *) Hloc(b, 0);
+ for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) {
+ if (lptr[p - 1] > lptr[p]) {
+ b->H->nosorted = p;
+ ALGODEBUG fprintf(stderr, "#BATordered:
fixed nosorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p,
BATgetId(b), BATcount(b), GDKusec() - t0);
+ goto doreturn;
+ }
+ }
+ break;
+ }
+ default:
+ for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) {
+ if (cmpf(BUNhead(bi, p - 1), BUNhead(bi, p)) >
0) {
+ b->H->nosorted = p;
+ ALGODEBUG fprintf(stderr, "#BATordered:
fixed nosorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p,
BATgetId(b), BATcount(b), GDKusec() - t0);
+ goto doreturn;
+ }
+ }
+ break;
+ }
+ b->hsorted = 1;
+ ALGODEBUG fprintf(stderr, "#BATordered: fixed sorted for %s#"
BUNFMT " (" LLFMT " usec)\n", BATgetId(b), BATcount(b), GDKusec() - t0);
+ }
+ doreturn:
+ MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BATordered");
return b->hsorted;
}
-/* Return whether the BAT is reverse ordered or not. */
+/* Return whether the BAT is reverse ordered or not. If we don't
+ * know, invest in a scan and record the results in the bat
+ * descriptor. */
int
BATordered_rev(BAT *b)
{
- if (!b->hrevsorted)
- BATderiveHeadProps(b, 0);
+ lng t0 = GDKusec();
+
+ if (b == NULL)
+ return 0;
+ if (b->htype == TYPE_void)
+ return b->hseqbase == oid_nil;
+ MT_lock_set(&GDKhashLock(abs(b->batCacheid)), "BATordered_rev");
+ if (!b->hrevsorted && b->H->norevsorted == 0) {
+ BATiter bi = bat_iterator(b);
+ int (*cmpf)(const void *, const void *) = ATOMcompare(b->htype);
+ BUN p, q;
+ b->batDirtydesc = 1;
+ for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) {
+ if (cmpf(BUNhead(bi, p - 1), BUNhead(bi, p)) < 0) {
+ b->H->norevsorted = p;
+ ALGODEBUG fprintf(stderr, "#BATordered_rev:
fixed norevsorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p,
BATgetId(b), BATcount(b), GDKusec() - t0);
+ goto doreturn;
+ }
+ }
+ b->hrevsorted = 1;
+ ALGODEBUG fprintf(stderr, "#BATordered_rev: fixed revsorted for
%s#" BUNFMT " (" LLFMT " usec)\n", BATgetId(b), BATcount(b), GDKusec() - t0);
+ }
+ doreturn:
+ MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BATordered_rev");
return b->hrevsorted;
}
diff --git a/gdk/gdk_firstn.c b/gdk/gdk_firstn.c
--- a/gdk/gdk_firstn.c
+++ b/gdk/gdk_firstn.c
@@ -154,7 +154,8 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n,
BATseqbase(BATmirror(bn), start + b->hseqbase);
return bn;
}
- if (b->tsorted || b->trevsorted) {
+ /* note, we want to do both calls */
+ if (BATordered(BATmirror(b)) | BATordered_rev(BATmirror(b))) {
/* trivial: b is sorted so we just need to return the
* initial or final part of it (or of the candidate
* list) */
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -444,7 +444,7 @@ BATgroup_internal(BAT **groups, BAT **ex
}
return GDK_SUCCEED;
}
- if (b->tsorted && b->trevsorted) {
+ if (BATordered(BATmirror(b)) && BATordered_rev(BATmirror(b))) {
/* all values are equal */
if (g == NULL) {
/* there's only a single group: 0 */
@@ -578,8 +578,8 @@ BATgroup_internal(BAT **groups, BAT **ex
}
}
- if (((b->tsorted || b->trevsorted) &&
- (g == NULL || g->tsorted || g->trevsorted)) ||
+ if (((BATordered(BATmirror(b)) || BATordered_rev(BATmirror(b))) &&
+ (g == NULL || BATordered(BATmirror(g)) ||
BATordered_rev(BATmirror(g)))) ||
subsorted) {
/* we only need to compare each entry with the previous */
ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
@@ -631,7 +631,7 @@ BATgroup_internal(BAT **groups, BAT **ex
gn->tsorted = 1;
*groups = gn;
- } else if (b->tsorted || b->trevsorted) {
+ } else if (BATordered(BATmirror(b)) || BATordered_rev(BATmirror(b))) {
BUN i, j;
BUN *pgrp;
@@ -832,7 +832,7 @@ BATgroup_internal(BAT **groups, BAT **ex
break;
}
} else {
- bit gc = g && (g->tsorted || g->trevsorted);
+ bit gc = g && (BATordered(BATmirror(g)) ||
BATordered_rev(BATmirror(g)));
const char *nme;
size_t nmelen;
Heap *hp = NULL;
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -439,9 +439,29 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r
{
BUN cnt;
+ r1->tkey = 1;
+ r1->T->nokey[0] = r1->T->nokey[1] = 0;
+ r1->tsorted = 1;
+ r1->T->nosorted = 0;
+ r1->tdense = 0;
+ r1->T->nodense = 0;
+ r1->T->nil = 0;
+ r1->T->nonil = 1;
+ r2->tkey = 1;
+ r2->T->nokey[0] = r2->T->nokey[1] = 0;
+ r2->tsorted = 1;
+ r2->T->nosorted = 0;
+ r2->tdense = 0;
+ r2->T->nodense = 0;
+ r2->T->nil = 0;
+ r2->T->nonil = 1;
if (lstart == lend || (!must_match && !nil_on_miss)) {
virtualize(r1);
+ r1->trevsorted = 1;
+ r1->T->norevsorted = 0;
virtualize(r2);
+ r2->trevsorted = 1;
+ r2->T->norevsorted = 0;
return GDK_SUCCEED;
}
if (must_match) {
@@ -455,12 +475,6 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r
goto bailout;
memcpy(Tloc(r1, BUNfirst(r1)), lcand, (lcandend - lcand) *
sizeof(oid));
BATsetcount(r1, cnt);
- r1->tkey = 1;
- r1->tsorted = 1;
- r1->trevsorted = BATcount(r1) <= 1;
- r1->tdense = 0;
- r1->T->nil = 0;
- r1->T->nonil = 1;
} else {
cnt = lend - lstart;
HEAPfree(&r1->T->heap, 1);
@@ -468,18 +482,17 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r
r1->tvarsized = 1;
r1->T->width = 0;
r1->T->shift = 0;
- r1->tdense = 0;
if (BATextend(r1, cnt) != GDK_SUCCEED)
goto bailout;
BATsetcount(r1, cnt);
BATseqbase(BATmirror(r1), lstart + l->hseqbase);
}
+ r1->T->norevsorted = !(r1->trevsorted = BATcount(r1) <= 1);
HEAPfree(&r2->T->heap, 1);
r2->ttype = TYPE_void;
r2->tvarsized = 1;
r2->T->width = 0;
r2->T->shift = 0;
- r2->tdense = 0;
if (BATextend(r2, cnt) != GDK_SUCCEED)
goto bailout;
BATsetcount(r2, cnt);
@@ -528,6 +541,12 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
assert(BATcount(l) > 0);
assert(BATtdense(r));
assert(BATcount(r) > 0);
+ /* some output properties are easy: r is dense and hence key,
+ * so r1 is key; r1 is sorted; r1 does not contain nils */
+ r1->tsorted = 1;
+ r1->tkey = 1;
+ r1->T->nil = 0;
+ r1->T->nonil = 1;
/* figure out range [lo..hi) of values in r that we need to match */
lo = r->tseqbase;
hi = lo + BATcount(r);
@@ -614,27 +633,29 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
goto bailout;
for (o = seq - l->hseqbase + l->tseqbase; o <
lo; o++)
APPEND(r2, oid_nil);
+ if (BATcount(r2) > 0 && hi - lo > 0)
+ r2->T->norevsorted = BUNlast(r2);
+ else if (hi - lo > 1)
+ r2->T->norevsorted = BUNlast(r2) + 1;
for (o = lo; o < hi; o++)
APPEND(r2, o - r->tseqbase +
r->hseqbase);
+ if (BATcount(r2) > 0 && BATcount(r2) < cnt) {
+ /* nils are smaller than
+ * non-nils, so so far r2 is
+ * sorted, it becomes unsorted
+ * if more nils are to
+ * follow */
+ r2->T->nosorted = BUNlast(r2);
+ }
for (o = BATcount(r2); o < cnt; o++)
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list