Changeset: 3a84bdcb4e00 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=3a84bdcb4e00
Modified Files:
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_firstn.c
        gdk/gdk_group.c
        gdk/gdk_join.c
        gdk/gdk_select.c
        gdk/gdk_unique.c
        sql/backends/monet5/sql.c
Branch: Jul2015
Log Message:

Just before doing an operation that can benefit from sortedness, check.


diffs (truncated from 760 to 300 lines):

diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1863,7 +1863,7 @@ BUNfnd(BAT *b, const void *v)
        if (BATtvoid(b))
                return BUNfndVOID(b, v);
        if (!BATcheckhash(b)) {
-               if (BATtordered(b) || BATtrevordered(b))
+               if (BATordered(BATmirror(b)) || BATordered_rev(BATmirror(b)))
                        return SORTfnd(b, v);
        }
        bi = bat_iterator(b);
@@ -1943,12 +1943,12 @@ BUNlocate(BAT *b, const void *x, const v
 
        /* sometimes BUNlocate is just about a single column */
        if (y &&
-           BAThordered(b) &&
+           BATordered(b) &&
            (*hcmp) (x, BUNhead(bi, p)) == 0 &&
            (*hcmp) (x, BUNhead(bi, q - 1)) == 0)
                usemirror();
        if (y == NULL ||
-           (BATtordered(b) &&
+           (BATordered(BATmirror(b)) &&
             (*tcmp) (y, BUNtail(bi, p)) == 0 &&
             (*tcmp) (y, BUNtail(bi, q - 1)) == 0)) {
                return BUNfnd(BATmirror(b), x);
@@ -1970,11 +1970,11 @@ BUNlocate(BAT *b, const void *x, const v
        }
 
        /* next, try to restrict the range using sorted columns */
-       if (BATtordered(b) || BATtrevordered(b)) {
+       if (BATordered(BATmirror(b)) || BATordered_rev(BATmirror(b))) {
                p = SORTfndfirst(b, y);
                q = SORTfndlast(b, y);
        }
-       if (BAThordered(b) || BAThrevordered(b)) {
+       if (BATordered(b) || BATordered_rev(b)) {
                BUN mp = SORTfndfirst(BATmirror(b), x);
                BUN mq = SORTfndlast(BATmirror(b), x);
 
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -1095,21 +1095,97 @@ BATslice(BAT *b, BUN l, BUN h)
        return NULL;
 }
 
-/* Return whether the BAT is ordered or not.  */
+/* Return whether the BAT is ordered or not.  If we don't know, invest
+ * in a scan and record the results in the bat descriptor.  */
 int
 BATordered(BAT *b)
 {
-       if (!b->hsorted)
-               BATderiveHeadProps(b, 0);
+       lng t0 = GDKusec();
+
+       if (b->htype == TYPE_void)
+               return 1;
+       /* In order that multiple threads don't scan the same BAT at
+        * the same time (happens a lot with mitosis/mergetable), we
+        * use a lock.  We reuse the hash lock for this, not because
+        * this scanning interferes with hashes, but because it's
+        * there, and not so likely to be used at the same time. */
+       MT_lock_set(&GDKhashLock(abs(b->batCacheid)), "BATordered");
+       if (!b->hsorted && b->H->nosorted == 0) {
+               BATiter bi = bat_iterator(b);
+               int (*cmpf)(const void *, const void *) = ATOMcompare(b->htype);
+               BUN p, q;
+               b->batDirtydesc = 1;
+               switch (ATOMbasetype(b->htype)) {
+               case TYPE_int: {
+                       const int *iptr = (const int *) Hloc(b, 0);
+                       for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) {
+                               if (iptr[p - 1] > iptr[p]) {
+                                       b->H->nosorted = p;
+                                       ALGODEBUG fprintf(stderr, "#BATordered: 
fixed nosorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p, 
BATgetId(b), BATcount(b), GDKusec() - t0);
+                                       goto doreturn;
+                               }
+                       }
+                       break;
+               }
+               case TYPE_lng: {
+                       const lng *lptr = (const lng *) Hloc(b, 0);
+                       for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) {
+                               if (lptr[p - 1] > lptr[p]) {
+                                       b->H->nosorted = p;
+                                       ALGODEBUG fprintf(stderr, "#BATordered: 
fixed nosorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p, 
BATgetId(b), BATcount(b), GDKusec() - t0);
+                                       goto doreturn;
+                               }
+                       }
+                       break;
+               }
+               default:
+                       for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) {
+                               if (cmpf(BUNhead(bi, p - 1), BUNhead(bi, p)) > 
0) {
+                                       b->H->nosorted = p;
+                                       ALGODEBUG fprintf(stderr, "#BATordered: 
fixed nosorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p, 
BATgetId(b), BATcount(b), GDKusec() - t0);
+                                       goto doreturn;
+                               }
+                       }
+                       break;
+               }
+               b->hsorted = 1;
+               ALGODEBUG fprintf(stderr, "#BATordered: fixed sorted for %s#" 
BUNFMT " (" LLFMT " usec)\n", BATgetId(b), BATcount(b), GDKusec() - t0);
+       }
+  doreturn:
+       MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BATordered");
        return b->hsorted;
 }
 
-/* Return whether the BAT is reverse ordered or not.  */
+/* Return whether the BAT is reverse ordered or not.  If we don't
+ * know, invest in a scan and record the results in the bat
+ * descriptor.  */
 int
 BATordered_rev(BAT *b)
 {
-       if (!b->hrevsorted)
-               BATderiveHeadProps(b, 0);
+       lng t0 = GDKusec();
+
+       if (b == NULL)
+               return 0;
+       if (b->htype == TYPE_void)
+               return b->hseqbase == oid_nil;
+       MT_lock_set(&GDKhashLock(abs(b->batCacheid)), "BATordered_rev");
+       if (!b->hrevsorted && b->H->norevsorted == 0) {
+               BATiter bi = bat_iterator(b);
+               int (*cmpf)(const void *, const void *) = ATOMcompare(b->htype);
+               BUN p, q;
+               b->batDirtydesc = 1;
+               for (q = BUNlast(b), p = BUNfirst(b) + 1; p < q; p++) {
+                       if (cmpf(BUNhead(bi, p - 1), BUNhead(bi, p)) < 0) {
+                               b->H->norevsorted = p;
+                               ALGODEBUG fprintf(stderr, "#BATordered_rev: 
fixed norevsorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p, 
BATgetId(b), BATcount(b), GDKusec() - t0);
+                               goto doreturn;
+                       }
+               }
+               b->hrevsorted = 1;
+               ALGODEBUG fprintf(stderr, "#BATordered_rev: fixed revsorted for 
%s#" BUNFMT " (" LLFMT " usec)\n", BATgetId(b), BATcount(b), GDKusec() - t0);
+       }
+  doreturn:
+       MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BATordered_rev");
        return b->hrevsorted;
 }
 
diff --git a/gdk/gdk_firstn.c b/gdk/gdk_firstn.c
--- a/gdk/gdk_firstn.c
+++ b/gdk/gdk_firstn.c
@@ -154,7 +154,8 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n, 
                BATseqbase(BATmirror(bn), start + b->hseqbase);
                return bn;
        }
-       if (b->tsorted || b->trevsorted) {
+       /* note, we want to do both calls */
+       if (BATordered(BATmirror(b)) | BATordered_rev(BATmirror(b))) {
                /* trivial: b is sorted so we just need to return the
                 * initial or final part of it (or of the candidate
                 * list) */
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -444,7 +444,7 @@ BATgroup_internal(BAT **groups, BAT **ex
                }
                return GDK_SUCCEED;
        }
-       if (b->tsorted && b->trevsorted) {
+       if (BATordered(BATmirror(b)) && BATordered_rev(BATmirror(b))) {
                /* all values are equal */
                if (g == NULL) {
                        /* there's only a single group: 0 */
@@ -578,8 +578,8 @@ BATgroup_internal(BAT **groups, BAT **ex
                }
        }
 
-       if (((b->tsorted || b->trevsorted) &&
-            (g == NULL || g->tsorted || g->trevsorted)) ||
+       if (((BATordered(BATmirror(b)) || BATordered_rev(BATmirror(b))) &&
+            (g == NULL || BATordered(BATmirror(g)) || 
BATordered_rev(BATmirror(g)))) ||
            subsorted) {
                /* we only need to compare each entry with the previous */
                ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
@@ -631,7 +631,7 @@ BATgroup_internal(BAT **groups, BAT **ex
 
                gn->tsorted = 1;
                *groups = gn;
-       } else if (b->tsorted || b->trevsorted) {
+       } else if (BATordered(BATmirror(b)) || BATordered_rev(BATmirror(b))) {
                BUN i, j;
                BUN *pgrp;
 
@@ -832,7 +832,7 @@ BATgroup_internal(BAT **groups, BAT **ex
                        break;
                }
        } else {
-               bit gc = g && (g->tsorted || g->trevsorted);
+               bit gc = g && (BATordered(BATmirror(g)) || 
BATordered_rev(BATmirror(g)));
                const char *nme;
                size_t nmelen;
                Heap *hp = NULL;
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -439,9 +439,29 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r
 {
        BUN cnt;
 
+       r1->tkey = 1;
+       r1->T->nokey[0] = r1->T->nokey[1] = 0;
+       r1->tsorted = 1;
+       r1->T->nosorted = 0;
+       r1->tdense = 0;
+       r1->T->nodense = 0;
+       r1->T->nil = 0;
+       r1->T->nonil = 1;
+       r2->tkey = 1;
+       r2->T->nokey[0] = r2->T->nokey[1] = 0;
+       r2->tsorted = 1;
+       r2->T->nosorted = 0;
+       r2->tdense = 0;
+       r2->T->nodense = 0;
+       r2->T->nil = 0;
+       r2->T->nonil = 1;
        if (lstart == lend || (!must_match && !nil_on_miss)) {
                virtualize(r1);
+               r1->trevsorted = 1;
+               r1->T->norevsorted = 0;
                virtualize(r2);
+               r2->trevsorted = 1;
+               r2->T->norevsorted = 0;
                return GDK_SUCCEED;
        }
        if (must_match) {
@@ -455,12 +475,6 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r
                        goto bailout;
                memcpy(Tloc(r1, BUNfirst(r1)), lcand, (lcandend - lcand) * 
sizeof(oid));
                BATsetcount(r1, cnt);
-               r1->tkey = 1;
-               r1->tsorted = 1;
-               r1->trevsorted = BATcount(r1) <= 1;
-               r1->tdense = 0;
-               r1->T->nil = 0;
-               r1->T->nonil = 1;
        } else {
                cnt = lend - lstart;
                HEAPfree(&r1->T->heap, 1);
@@ -468,18 +482,17 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r
                r1->tvarsized = 1;
                r1->T->width = 0;
                r1->T->shift = 0;
-               r1->tdense = 0;
                if (BATextend(r1, cnt) != GDK_SUCCEED)
                        goto bailout;
                BATsetcount(r1, cnt);
                BATseqbase(BATmirror(r1), lstart + l->hseqbase);
        }
+       r1->T->norevsorted = !(r1->trevsorted = BATcount(r1) <= 1);
        HEAPfree(&r2->T->heap, 1);
        r2->ttype = TYPE_void;
        r2->tvarsized = 1;
        r2->T->width = 0;
        r2->T->shift = 0;
-       r2->tdense = 0;
        if (BATextend(r2, cnt) != GDK_SUCCEED)
                goto bailout;
        BATsetcount(r2, cnt);
@@ -528,6 +541,12 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
        assert(BATcount(l) > 0);
        assert(BATtdense(r));
        assert(BATcount(r) > 0);
+       /* some output properties are easy: r is dense and hence key,
+        * so r1 is key; r1 is sorted; r1 does not contain nils */
+       r1->tsorted = 1;
+       r1->tkey = 1;
+       r1->T->nil = 0;
+       r1->T->nonil = 1;
        /* figure out range [lo..hi) of values in r that we need to match */
        lo = r->tseqbase;
        hi = lo + BATcount(r);
@@ -614,27 +633,29 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
                                        goto bailout;
                                for (o = seq - l->hseqbase + l->tseqbase; o < 
lo; o++)
                                        APPEND(r2, oid_nil);
+                               if (BATcount(r2) > 0 && hi - lo > 0)
+                                       r2->T->norevsorted = BUNlast(r2);
+                               else if (hi - lo > 1)
+                                       r2->T->norevsorted = BUNlast(r2) + 1;
                                for (o = lo; o < hi; o++)
                                        APPEND(r2, o - r->tseqbase + 
r->hseqbase);
+                               if (BATcount(r2) > 0 && BATcount(r2) < cnt) {
+                                       /* nils are smaller than
+                                        * non-nils, so so far r2 is
+                                        * sorted, it becomes unsorted
+                                        * if more nils are to
+                                        * follow */
+                                       r2->T->nosorted = BUNlast(r2);
+                               }
                                for (o = BATcount(r2); o < cnt; o++)
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to