Changeset: e2cfbde56683 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e2cfbde56683
Modified Files:
clients/Tests/exports.stable.out
gdk/gdk.h
gdk/gdk_project.c
monetdb5/modules/mal/projectionpath.c
Branch: default
Log Message:
Implemented BATprojectchain to do a chain of BATproject without intermediates.
diffs (truncated from 483 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -154,6 +154,7 @@ gdk_return BATprintcolumns(stream *s, in
gdk_return BATprintf(stream *f, BAT *b);
gdk_return BATprod(void *res, int tp, BAT *b, BAT *s, int skip_nils, int
abort_on_error, int nil_if_empty);
BAT *BATproject(BAT *l, BAT *r);
+BAT *BATprojectchain(BAT **bats);
gdk_return BATrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl, BAT *rh, BAT
*sl, BAT *sr, int li, int hi, BUN estimate);
gdk_return BATreplace(BAT *b, BAT *p, BAT *n, bit force);
void BATroles(BAT *b, const char *hnme, const char *tnme);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2969,6 +2969,7 @@ gdk_export gdk_return BATjoin(BAT **r1p,
gdk_export gdk_return BATbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, const void *c1, const void *c2, int li, int hi, BUN estimate);
gdk_export gdk_return BATrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl, BAT
*rh, BAT *sl, BAT *sr, int li, int hi, BUN estimate);
gdk_export BAT *BATproject(BAT *l, BAT *r);
+gdk_export BAT *BATprojectchain(BAT **bats);
gdk_export BAT *BATslice(BAT *b, BUN low, BUN high);
diff --git a/gdk/gdk_project.c b/gdk/gdk_project.c
--- a/gdk/gdk_project.c
+++ b/gdk/gdk_project.c
@@ -404,3 +404,288 @@ BATproject(BAT *l, BAT *r)
BBPreclaim(bn);
return NULL;
}
+
+/* Calculate a chain of BATproject calls.
+ * The argument is a NULL-terminated array of BAT pointers.
+ * This function is equivalent to a sequence of calls
+ * bn = BATproject(bats[0], bats[1]);
+ * bn = BATproject(bn, bats[2]);
+ * ...
+ * bn = BATproject(bn, bats[n-1]);
+ * return bn;
+ * where none of the intermediates are actually produced (and bats[n]==NULL).
+ * Note that all BATs except the last must be oid/void tailed.
+ */
+BAT *
+BATprojectchain(BAT **bats)
+{
+ /* For each BAT we remember some important details, however,
+ * dense-tailed BATs are optimized away in this list by
+ * combining their details with the following BAT's details.
+ * For each element in the chain, the value must be in the
+ * range [hlo..hlo+cnt) of the following element. If a BAT in
+ * the chain is dense-tailed, the value tseq is the lowest
+ * value (corresponding with hlo). Since dense-tailed BATs
+ * are combined with their successors, tseq will only be used
+ * for the last element. */
+ struct {
+ const oid *vals; /* if not dense, start of relevant tail values
*/
+ BAT *b; /* the BAT */
+ oid hlo; /* lowest allowed oid to index the BAT */
+ BUN cnt; /* size of allowed index range */
+ } *ba;
+ int i, n, tpe;
+ BAT *b, *bn;
+ oid o;
+ const void *nil; /* nil representation for last BAT */
+ BUN p, cnt, off;
+ oid hseq, tseq;
+ int allnil = 0;
+
+ /* count number of participating BATs and allocate some
+ * temporary work space */
+ for (n = 0; bats[n]; n++)
+ ;
+ ba = GDKmalloc(sizeof(*ba) * n);
+ b = *bats++;
+ cnt = BATcount(b); /* this will be the size of the output */
+ hseq = b->hseqbase; /* this will be the seqbase of the output */
+ tseq = oid_nil; /* initialize, but overwritten before use */
+ for (i = n = 0; b != NULL; n++, i++) {
+ assert(BAThdense(b));
+ if (n > 0 && ba[i-1].vals == NULL) {
+ /* previous BAT was dense-tailed: we will
+ * combine it with this one */
+ i--;
+ assert(off == 0);
+ if (tseq + ba[i].cnt > b->hseqbase + BATcount(b)) {
+ if (tseq > b->hseqbase + BATcount(b))
+ ba[i].cnt = 0;
+ else
+ ba[i].cnt = b->hseqbase + BATcount(b) -
tseq;
+ }
+ if (BATtdense(b)) {
+ if (tseq > b->hseqbase) {
+ tseq = tseq - b->hseqbase + b->tseqbase;
+ } else if (tseq < b->hseqbase) {
+ if (b->hseqbase - tseq > ba[i].cnt) {
+ ba[i].cnt = 0;
+ } else {
+ ba[i].hlo += b->hseqbase - tseq;
+ ba[i].cnt -= b->hseqbase - tseq;
+ tseq = b->tseqbase;
+ }
+ } else {
+ tseq = b->tseqbase;
+ }
+ } else {
+ if (tseq > b->hseqbase) {
+ off = tseq - b->hseqbase;
+ } else if (tseq < b->hseqbase) {
+ if (b->hseqbase - tseq > ba[i].cnt) {
+ ba[i].cnt = 0;
+ } else {
+ ba[i].hlo += b->hseqbase - tseq;
+ ba[i].cnt -= b->hseqbase - tseq;
+ }
+ }
+ ba[i].vals = (const oid *) Tloc(b, BUNfirst(b)
+ off);
+ }
+ } else {
+ ba[i].hlo = b->hseqbase;
+ ba[i].cnt = BATcount(b);
+ off = 0;
+ if (BATtdense(b)) {
+ tseq = b->tseqbase;
+ ba[i].vals = NULL;
+ if (b->tseqbase == oid_nil)
+ allnil = 1;
+ } else {
+ tseq = oid_nil;
+ ba[i].vals = (const oid *) Tloc(b, BUNfirst(b));
+ }
+ }
+ ba[i].b = b;
+ if ((ba[i].cnt == 0 && cnt > 0) ||
+ (i == 0 && (ba[0].cnt < cnt || ba[0].hlo != hseq))) {
+ GDKerror("BATprojectchain: does not match always\n");
+ GDKfree(ba);
+ return NULL;
+ }
+ b = *bats++;
+ }
+ assert(n >= 1); /* not too few inputs */
+ b = bats[-2]; /* the last BAT in the list (bats[-1]==NULL) */
+ tpe = b->ttype; /* its type */
+ if (i == 1) {
+ /* only dense-tailed BATs before last: we can return a
+ * slice and manipulate offsets and head seqbase */
+ ALGODEBUG fprintf(stderr, "#BATprojectchain with %d BATs, size
"BUNFMT", type %s, using BATslice("BUNFMT","BUNFMT")\n", n, cnt, ATOMname(tpe),
off, off + cnt);
+ bn = BATslice(b, off, off + cnt);
+ if (bn == NULL) {
+ GDKfree(ba);
+ return NULL;
+ }
+ BATseqbase(bn, hseq);
+ if (bn->ttype == TYPE_void)
+ BATseqbase(BATmirror(bn), tseq);
+ GDKfree(ba);
+ return bn;
+ }
+ nil = ATOMnilptr(tpe);
+ if (allnil) {
+ /* somewhere on the way we encountered a void-nil BAT */
+ ALGODEBUG fprintf(stderr, "#BATprojectchain with %d BATs, size
"BUNFMT", type %s, all nil\n", n, cnt, ATOMname(tpe));
+ bn = BATconstant(tpe, nil, cnt, TRANSIENT);
+ GDKfree(ba);
+ return bn;
+ }
+ ALGODEBUG fprintf(stderr, "#BATprojectchain with %d (%d) BATs, size
"BUNFMT", type %s\n", n, i, cnt, ATOMname(tpe));
+ bn = BATnew(TYPE_void, ATOMtype(tpe), cnt, TRANSIENT);
+ if (bn == NULL || cnt == 0) {
+ GDKfree(ba);
+ return bn;
+ }
+ bn->T->nil = bn->T->nonil = 0; /* we're not paying attention to this */
+ n = i - 1; /* ba[n] is last BAT */
+
+/* figure out the "other" type, i.e. not compatible with oid */
+#if SIZEOF_OID == SIZEOF_INT
+#define OTPE lng
+#define TOTPE TYPE_lng
+#else
+#define OTPE int
+#define TOTPE TYPE_int
+#endif
+ if (ATOMstorage(bn->ttype) == ATOMstorage(TYPE_oid)) {
+ /* oids all the way (or the final tail type is a fixed
+ * sized atom the same size as oid) */
+ oid *restrict v = (oid *) Tloc(bn, BUNfirst(bn));
+
+ if (ba[n].vals == NULL) {
+ /* last BAT is dense-tailed */
+ lng offset = 0;
+
+ offset = (lng) tseq - (lng) ba[n].hlo;
+ ba[n].cnt += ba[n].hlo; /* upper bound of last BAT */
+ for (p = 0; p < cnt; p++) {
+ o = ba[0].vals[p];
+ for (i = 1; i < n; i++) {
+ o -= ba[i].hlo;
+ if (o >= ba[i].cnt) {
+ if (o == oid_nil - ba[i].hlo) {
+ bn->T->nil = 1;
+ o = oid_nil;
+ break;
+ }
+ GDKerror("BATprojectchain: does
not match always\n");
+ goto bunins_failed;
+ }
+ o = ba[i].vals[o];
+ }
+ if (o == oid_nil) {
+ *v++ = *(oid *) nil;
+ } else {
+ if (o < ba[n].hlo || o >= ba[n].cnt) {
+ GDKerror("BATprojectchain: does
not match always\n");
+ goto bunins_failed;
+ }
+ *v++ = o + offset;
+ }
+ }
+ } else {
+ /* last BAT is materialized */
+ for (p = 0; p < cnt; p++) {
+ o = ba[0].vals[p];
+ for (i = 1; i <= n; i++) { /* note "<=" */
+ o -= ba[i].hlo;
+ if (o >= ba[i].cnt) {
+ if (o == oid_nil - ba[i].hlo) {
+ bn->T->nil = 1;
+ o = oid_nil;
+ break;
+ }
+ GDKerror("BATprojectchain: does
not match always\n");
+ goto bunins_failed;
+ }
+ o = ba[i].vals[o];
+ }
+ *v++ = o == oid_nil ? *(oid *) nil : o;
+ }
+ }
+ assert(v == (oid *) Tloc(bn, BUNfirst(bn) + cnt));
+ } else if (ATOMstorage(b->ttype) == ATOMstorage(TOTPE)) {
+ /* one special case for a fixed sized BAT */
+ const OTPE *src = (const OTPE *) Tloc(b, BUNfirst(b) + off);
+ OTPE *restrict dst = (OTPE *) Tloc(bn, BUNfirst(bn));
+
+ for (p = 0; p < cnt; p++) {
+ o = ba[0].vals[p];
+ for (i = 1; i < n; i++) {
+ o -= ba[i].hlo;
+ if (o >= ba[i].cnt) {
+ if (o == oid_nil - ba[i].hlo) {
+ bn->T->nil = 1;
+ o = oid_nil;
+ break;
+ }
+ GDKerror("BATprojectchain: does not
match always\n");
+ goto bunins_failed;
+ }
+ o = ba[i].vals[o];
+ }
+ if (o == oid_nil) {
+ *dst++ = * (OTPE *) nil;
+ } else {
+ o -= ba[n].hlo;
+ if (o >= ba[n].cnt) {
+ GDKerror("BATprojectchain: does not
match always\n");
+ goto bunins_failed;
+ }
+ *dst++ = src[o];
+ }
+ }
+ } else {
+ /* generic code */
+ BATiter bi = bat_iterator(b);
+ const void *v = nil; /* make compiler happy with init */
+
+ off += BUNfirst(b);
+ for (p = 0; p < cnt; p++) {
+ o = ba[0].vals[p];
+ for (i = 1; i < n; i++) {
+ o -= ba[i].hlo;
+ if (o >= ba[i].cnt) {
+ if (o == oid_nil - ba[i].hlo) {
+ bn->T->nil = 1;
+ v = nil;
+ o = oid_nil;
+ break;
+ }
+ GDKerror("BATprojectchain: does not
match always\n");
+ goto bunins_failed;
+ }
+ o = ba[i].vals[o];
+ }
+ if (o != oid_nil) {
+ o -= ba[n].hlo;
+ if (o >= ba[n].cnt) {
+ GDKerror("BATprojectchain: does not
match always\n");
+ goto bunins_failed;
+ }
+ v = BUNtail(bi, o + off);
+ }
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list