Changeset: e2cfbde56683 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e2cfbde56683
Modified Files:
        clients/Tests/exports.stable.out
        gdk/gdk.h
        gdk/gdk_project.c
        monetdb5/modules/mal/projectionpath.c
Branch: default
Log Message:

Implemented BATprojectchain to do a chain of BATproject without intermediates.


diffs (truncated from 483 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -154,6 +154,7 @@ gdk_return BATprintcolumns(stream *s, in
 gdk_return BATprintf(stream *f, BAT *b);
 gdk_return BATprod(void *res, int tp, BAT *b, BAT *s, int skip_nils, int 
abort_on_error, int nil_if_empty);
 BAT *BATproject(BAT *l, BAT *r);
+BAT *BATprojectchain(BAT **bats);
 gdk_return BATrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl, BAT *rh, BAT 
*sl, BAT *sr, int li, int hi, BUN estimate);
 gdk_return BATreplace(BAT *b, BAT *p, BAT *n, bit force);
 void BATroles(BAT *b, const char *hnme, const char *tnme);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2969,6 +2969,7 @@ gdk_export gdk_return BATjoin(BAT **r1p,
 gdk_export gdk_return BATbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, const void *c1, const void *c2, int li, int hi, BUN estimate);
 gdk_export gdk_return BATrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl, BAT 
*rh, BAT *sl, BAT *sr, int li, int hi, BUN estimate);
 gdk_export BAT *BATproject(BAT *l, BAT *r);
+gdk_export BAT *BATprojectchain(BAT **bats);
 
 gdk_export BAT *BATslice(BAT *b, BUN low, BUN high);
 
diff --git a/gdk/gdk_project.c b/gdk/gdk_project.c
--- a/gdk/gdk_project.c
+++ b/gdk/gdk_project.c
@@ -404,3 +404,288 @@ BATproject(BAT *l, BAT *r)
        BBPreclaim(bn);
        return NULL;
 }
+
+/* Calculate a chain of BATproject calls.
+ * The argument is a NULL-terminated array of BAT pointers.
+ * This function is equivalent to a sequence of calls
+ * bn = BATproject(bats[0], bats[1]);
+ * bn = BATproject(bn, bats[2]);
+ * ...
+ * bn = BATproject(bn, bats[n-1]);
+ * return bn;
+ * where none of the intermediates are actually produced (and bats[n]==NULL).
+ * Note that all BATs except the last must be oid/void tailed.
+ */
+BAT *
+BATprojectchain(BAT **bats)
+{
+       /* For each BAT we remember some important details, however,
+        * dense-tailed BATs are optimized away in this list by
+        * combining their details with the following BAT's details.
+        * For each element in the chain, the value must be in the
+        * range [hlo..hlo+cnt) of the following element.  If a BAT in
+        * the chain is dense-tailed, the value tseq is the lowest
+        * value (corresponding with hlo).  Since dense-tailed BATs
+        * are combined with their successors, tseq will only be used
+        * for the last element. */
+       struct {
+               const oid *vals; /* if not dense, start of relevant tail values 
*/
+               BAT *b;         /* the BAT */
+               oid hlo;        /* lowest allowed oid to index the BAT */
+               BUN cnt;        /* size of allowed index range */
+       } *ba;
+       int i, n, tpe;
+       BAT *b, *bn;
+       oid o;
+       const void *nil;        /* nil representation for last BAT */
+       BUN p, cnt, off;
+       oid hseq, tseq;
+       int allnil = 0;
+
+       /* count number of participating BATs and allocate some
+        * temporary work space */
+       for (n = 0; bats[n]; n++)
+               ;
+       ba = GDKmalloc(sizeof(*ba) * n);
+       b = *bats++;
+       cnt = BATcount(b);      /* this will be the size of the output */
+       hseq = b->hseqbase;     /* this will be the seqbase of the output */
+       tseq = oid_nil;         /* initialize, but overwritten before use */
+       for (i = n = 0; b != NULL; n++, i++) {
+               assert(BAThdense(b));
+               if (n > 0 && ba[i-1].vals == NULL) {
+                       /* previous BAT was dense-tailed: we will
+                        * combine it with this one */
+                       i--;
+                       assert(off == 0);
+                       if (tseq + ba[i].cnt > b->hseqbase + BATcount(b)) {
+                               if (tseq > b->hseqbase + BATcount(b))
+                                       ba[i].cnt = 0;
+                               else
+                                       ba[i].cnt = b->hseqbase + BATcount(b) - 
tseq;
+                       }
+                       if (BATtdense(b)) {
+                               if (tseq > b->hseqbase) {
+                                       tseq = tseq - b->hseqbase + b->tseqbase;
+                               } else if (tseq < b->hseqbase) {
+                                       if (b->hseqbase - tseq > ba[i].cnt) {
+                                               ba[i].cnt = 0;
+                                       } else {
+                                               ba[i].hlo += b->hseqbase - tseq;
+                                               ba[i].cnt -= b->hseqbase - tseq;
+                                               tseq = b->tseqbase;
+                                       }
+                               } else {
+                                       tseq = b->tseqbase;
+                               }
+                       } else {
+                               if (tseq > b->hseqbase) {
+                                       off = tseq - b->hseqbase;
+                               } else if (tseq < b->hseqbase) {
+                                       if (b->hseqbase - tseq > ba[i].cnt) {
+                                               ba[i].cnt = 0;
+                                       } else {
+                                               ba[i].hlo += b->hseqbase - tseq;
+                                               ba[i].cnt -= b->hseqbase - tseq;
+                                       }
+                               }
+                               ba[i].vals = (const oid *) Tloc(b, BUNfirst(b) 
+ off);
+                       }
+               } else {
+                       ba[i].hlo = b->hseqbase;
+                       ba[i].cnt = BATcount(b);
+                       off = 0;
+                       if (BATtdense(b)) {
+                               tseq = b->tseqbase;
+                               ba[i].vals = NULL;
+                               if (b->tseqbase == oid_nil)
+                                       allnil = 1;
+                       } else {
+                               tseq = oid_nil;
+                               ba[i].vals = (const oid *) Tloc(b, BUNfirst(b));
+                       }
+               }
+               ba[i].b = b;
+               if ((ba[i].cnt == 0 && cnt > 0) ||
+                   (i == 0 && (ba[0].cnt < cnt || ba[0].hlo != hseq))) {
+                       GDKerror("BATprojectchain: does not match always\n");
+                       GDKfree(ba);
+                       return NULL;
+               }
+               b = *bats++;
+       }
+       assert(n >= 1);         /* not too few inputs */
+       b = bats[-2];           /* the last BAT in the list (bats[-1]==NULL) */
+       tpe = b->ttype;         /* its type */
+       if (i == 1) {
+               /* only dense-tailed BATs before last: we can return a
+                * slice and manipulate offsets and head seqbase */
+               ALGODEBUG fprintf(stderr, "#BATprojectchain with %d BATs, size 
"BUNFMT", type %s, using BATslice("BUNFMT","BUNFMT")\n", n, cnt, ATOMname(tpe), 
off, off + cnt);
+               bn = BATslice(b, off, off + cnt);
+               if (bn == NULL) {
+                       GDKfree(ba);
+                       return NULL;
+               }
+               BATseqbase(bn, hseq);
+               if (bn->ttype == TYPE_void)
+                       BATseqbase(BATmirror(bn), tseq);
+               GDKfree(ba);
+               return bn;
+       }
+       nil = ATOMnilptr(tpe);
+       if (allnil) {
+               /* somewhere on the way we encountered a void-nil BAT */
+               ALGODEBUG fprintf(stderr, "#BATprojectchain with %d BATs, size 
"BUNFMT", type %s, all nil\n", n, cnt, ATOMname(tpe));
+               bn = BATconstant(tpe, nil, cnt, TRANSIENT);
+               GDKfree(ba);
+               return bn;
+       }
+       ALGODEBUG fprintf(stderr, "#BATprojectchain with %d (%d) BATs, size 
"BUNFMT", type %s\n", n, i, cnt, ATOMname(tpe));
+       bn = BATnew(TYPE_void, ATOMtype(tpe), cnt, TRANSIENT);
+       if (bn == NULL || cnt == 0) {
+               GDKfree(ba);
+               return bn;
+       }
+       bn->T->nil = bn->T->nonil = 0; /* we're not paying attention to this */
+       n = i - 1;              /* ba[n] is last BAT */
+
+/* figure out the "other" type, i.e. not compatible with oid */
+#if SIZEOF_OID == SIZEOF_INT
+#define OTPE   lng
+#define TOTPE  TYPE_lng
+#else
+#define OTPE   int
+#define TOTPE  TYPE_int
+#endif
+       if (ATOMstorage(bn->ttype) == ATOMstorage(TYPE_oid)) {
+               /* oids all the way (or the final tail type is a fixed
+                * sized atom the same size as oid) */
+               oid *restrict v = (oid *) Tloc(bn, BUNfirst(bn));
+
+               if (ba[n].vals == NULL) {
+                       /* last BAT is dense-tailed */
+                       lng offset = 0;
+
+                       offset = (lng) tseq - (lng) ba[n].hlo;
+                       ba[n].cnt += ba[n].hlo; /* upper bound of last BAT */
+                       for (p = 0; p < cnt; p++) {
+                               o = ba[0].vals[p];
+                               for (i = 1; i < n; i++) {
+                                       o -= ba[i].hlo;
+                                       if (o >= ba[i].cnt) {
+                                               if (o == oid_nil - ba[i].hlo) {
+                                                       bn->T->nil = 1;
+                                                       o = oid_nil;
+                                                       break;
+                                               }
+                                               GDKerror("BATprojectchain: does 
not match always\n");
+                                               goto bunins_failed;
+                                       }
+                                       o = ba[i].vals[o];
+                               }
+                               if (o == oid_nil) {
+                                       *v++ = *(oid *) nil;
+                               } else {
+                                       if (o < ba[n].hlo || o >= ba[n].cnt) {
+                                               GDKerror("BATprojectchain: does 
not match always\n");
+                                               goto bunins_failed;
+                                       }
+                                       *v++ = o + offset;
+                               }
+                       }
+               } else {
+                       /* last BAT is materialized */
+                       for (p = 0; p < cnt; p++) {
+                               o = ba[0].vals[p];
+                               for (i = 1; i <= n; i++) { /* note "<=" */
+                                       o -= ba[i].hlo;
+                                       if (o >= ba[i].cnt) {
+                                               if (o == oid_nil - ba[i].hlo) {
+                                                       bn->T->nil = 1;
+                                                       o = oid_nil;
+                                                       break;
+                                               }
+                                               GDKerror("BATprojectchain: does 
not match always\n");
+                                               goto bunins_failed;
+                                       }
+                                       o = ba[i].vals[o];
+                               }
+                               *v++ = o == oid_nil ? *(oid *) nil : o;
+                       }
+               }
+               assert(v == (oid *) Tloc(bn, BUNfirst(bn) + cnt));
+       } else if (ATOMstorage(b->ttype) == ATOMstorage(TOTPE)) {
+               /* one special case for a fixed sized BAT */
+               const OTPE *src = (const OTPE *) Tloc(b, BUNfirst(b) + off);
+               OTPE *restrict dst = (OTPE *) Tloc(bn, BUNfirst(bn));
+
+               for (p = 0; p < cnt; p++) {
+                       o = ba[0].vals[p];
+                       for (i = 1; i < n; i++) {
+                               o -= ba[i].hlo;
+                               if (o >= ba[i].cnt) {
+                                       if (o == oid_nil - ba[i].hlo) {
+                                               bn->T->nil = 1;
+                                               o = oid_nil;
+                                               break;
+                                       }
+                                       GDKerror("BATprojectchain: does not 
match always\n");
+                                       goto bunins_failed;
+                               }
+                               o = ba[i].vals[o];
+                       }
+                       if (o == oid_nil) {
+                               *dst++ = * (OTPE *) nil;
+                       } else {
+                               o -= ba[n].hlo;
+                               if (o >= ba[n].cnt) {
+                                       GDKerror("BATprojectchain: does not 
match always\n");
+                                       goto bunins_failed;
+                               }
+                               *dst++ = src[o];
+                       }
+               }
+       } else {
+               /* generic code */
+               BATiter bi = bat_iterator(b);
+               const void *v = nil; /* make compiler happy with init */
+
+               off += BUNfirst(b);
+               for (p = 0; p < cnt; p++) {
+                       o = ba[0].vals[p];
+                       for (i = 1; i < n; i++) {
+                               o -= ba[i].hlo;
+                               if (o >= ba[i].cnt) {
+                                       if (o == oid_nil - ba[i].hlo) {
+                                               bn->T->nil = 1;
+                                               v = nil;
+                                               o = oid_nil;
+                                               break;
+                                       }
+                                       GDKerror("BATprojectchain: does not 
match always\n");
+                                       goto bunins_failed;
+                               }
+                               o = ba[i].vals[o];
+                       }
+                       if (o != oid_nil) {
+                               o -= ba[n].hlo;
+                               if (o >= ba[n].cnt) {
+                                       GDKerror("BATprojectchain: does not 
match always\n");
+                                       goto bunins_failed;
+                               }
+                               v = BUNtail(bi, o + off);
+                       }
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to