Changeset: 4c7b14849621 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4c7b14849621
Modified Files:
        gdk/gdk.h
        gdk/gdk_batop.c
        gdk/gdk_join.c
        gdk/gdk_project.c
        gdk/gdk_select.c
        sql/backends/monet5/sql.c
Branch: cand
Log Message:

initial steps on delete lists


diffs (truncated from 373 to 300 lines):

diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -767,6 +767,9 @@ gdk_export void *VALget(ValPtr v);
 gdk_export int VALcmp(const ValRecord *p, const ValRecord *q);
 gdk_export int VALisnil(const ValRecord *v);
 
+#define CAND_LIST 1
+#define CAND_NEG 2
+#define CAND_BIT 3
 /*
  * @- The BAT record
  * The elements of the BAT structure are introduced in the remainder.
@@ -851,7 +854,8 @@ typedef struct {
         restricted:2,          /* access privileges */
         persistence:1,         /* should the BAT persist on disk? */
         role:8,                /* role of the bat */
-        unused:15;             /* value=0 for now */
+        cand:2,                /* 0 bat, 1 candidate list, 2 neg cand list, 3 
compressed bit vector */ 
+        unused:13;             /* value=0 for now */
        int sharecnt;           /* incoming view count */
 
        /* delta status administration */
@@ -860,6 +864,8 @@ typedef struct {
        BUN inserted;           /* start of inserted elements */
        BUN count;              /* tuple count */
        BUN capacity;           /* tuple capacity */
+       BUN negfirst;           /* seqbase */
+       BUN negcount;           /* total count of CAND_NEG list */
 } BATrec;
 
 typedef struct PROPrec PROPrec;
@@ -3087,6 +3093,7 @@ gdk_export BAT *BATunique(BAT *b, BAT *s
 
 gdk_export BAT *BATmergecand(BAT *a, BAT *b);
 gdk_export BAT *BATintersectcand(BAT *a, BAT *b);
+gdk_export BAT *BATminuscand(BAT *a, BAT *b);
 
 gdk_export gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, 
BAT *grps, BUN n, int asc, int distinct);
 
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -1724,3 +1724,94 @@ BATintersectcand(BAT *a, BAT *b)
        bn->T->nonil = 1;
        return virtualize(bn);
 }
+
+/* minus two candidate lists and produce a new one
+ *
+ * candidate lists are VOID-headed BATs with an OID tail which is
+ * sorted and unique.
+ */
+BAT *
+BATminuscand(BAT *a, BAT *b)
+{
+       BAT *bn;
+       const oid *restrict ap, *restrict bp, *ape, *bpe;
+       oid *restrict p;
+       BUN cnt;
+
+       BATcheck(a, "BATminuscand", NULL);
+       BATcheck(b, "BATminuscand", NULL);
+       assert(a->htype == TYPE_void);
+       assert(b->htype == TYPE_void);
+       assert(ATOMtype(a->ttype) == TYPE_oid);
+       assert(ATOMtype(b->ttype) == TYPE_oid);
+       assert(a->tsorted);
+       assert(b->tsorted);
+       assert(a->tkey);
+       //assert(b->tkey);
+       assert(a->T->nonil);
+       assert(b->T->nonil);
+
+       if (BATcount(a) == 0) {
+               BAT *x = newdensecand(0, 0);
+               BATsetcount(x,0);
+               return x;
+       }
+       if (BATcount(b) == 0) {
+               BBPfix(a->batCacheid);
+               return a;
+       }
+
+       cnt = BATcount(a);
+       if (BATcount(b) < cnt)
+               cnt -= BATcount(b);
+       bn = BATnew(TYPE_void, TYPE_oid, cnt, TRANSIENT);
+       if (bn == NULL)
+               return NULL;
+       p = (oid *) Tloc(bn, BUNfirst(bn));
+       assert(b->ttype == TYPE_oid);
+       if (a->ttype == TYPE_void) {
+               oid c = a->tseqbase, ce = c + BATcount(a), cur = 0;
+               /* a->ttype == TYPE_void, b->ttype == TYPE_oid */
+               bp = (const oid *) Tloc(b, BUNfirst(b));
+               bpe = bp + BATcount(b);
+               cur = *bp;
+               while (c < ce && bp <= bpe) {
+                       if (c < cur) {
+                               *p++ = c++;
+                       } else {
+                               if (c == cur)
+                                       c++;
+                               cur = *bp++;
+                       }
+               }
+               while (c<ce)
+                       *p++ = c++;
+       } else {
+               /* a->ttype == TYPE_oid, b->ttype == TYPE_oid */
+               ap = (const oid *) Tloc(a, BUNfirst(a));
+               ape = ap + BATcount(a);
+               bp = (const oid *) Tloc(b, BUNfirst(b));
+               bpe = bp + BATcount(b);
+               while (ap < ape && bp <= bpe) {
+                       if (*ap < *bp)
+                               *p++ = *ap++;
+                       else {
+                               if (*ap == *bp)
+                                       ap++;
+                               bp++;
+                       }
+               }
+               while (ap<ape)
+                       *p++ = *ap++;
+       }
+
+       /* properties */
+       BATsetcount(bn, (BUN) (p - (oid *) Tloc(bn, BUNfirst(bn))));
+       BATseqbase(bn, 0);
+       bn->trevsorted = BATcount(bn) <= 1;
+       bn->tsorted = 1;
+       bn->tkey = 1;
+       bn->T->nil = 0;
+       bn->T->nonil = 1;
+       return virtualize(bn);
+}
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3883,6 +3883,10 @@ BATouterjoin(BAT **r1p, BAT **r2p, BAT *
 gdk_return
 BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int 
nil_matches, BUN estimate)
 {
+       if (r->S->cand == CAND_NEG && !sl && !sr && !nil_matches) {
+               *r1p = BATminuscand(l,r);
+               return GDK_SUCCEED;
+       }
        return subleftjoin(r1p, r2p, l, r, sl, sr, nil_matches,
                           0, 1, 0, estimate, "BATsemijoin", GDKusec());
 }
diff --git a/gdk/gdk_project.c b/gdk/gdk_project.c
--- a/gdk/gdk_project.c
+++ b/gdk/gdk_project.c
@@ -183,8 +183,88 @@ bunins_failed:
        return GDK_FAIL;
 }
 
+static BAT *
+candidates(BAT *d)
+{
+       BAT *tids = BATnew(TYPE_void, TYPE_void, 0, TRANSIENT);
+
+       if (!tids)
+               return NULL;
+       tids->H->seq = d->S->negfirst;
+       tids->T->seq = d->S->negfirst;
+       BATsetcount(tids, d->S->negcount);
+       tids->H->revsorted = 0;
+       tids->T->revsorted = 0;
+
+       tids->T->key = 1;
+       tids->T->dense = 1;
+       tids->H->key = 1;
+       tids->H->dense = 1;
+
+       if (BATcount(d)) {
+               BAT *diff = BATdiff(tids, d, NULL, NULL, 0, BUN_NONE);
+
+               BBPunfix(tids->batCacheid);
+               BATseqbase(diff, d->S->negfirst);
+               tids = diff;
+       }
+       return tids;
+}
+
+static BAT * BATproject_pos(BAT *l, BAT *r);
 BAT *
-BATproject(BAT *l, BAT *r)
+BATproject(BAT *s, BAT *b) 
+{
+       if (s->S->cand == CAND_NEG) {
+               if (BATcount(s) == 0) {
+                       BBPfix(b->batCacheid);
+                       return b;
+               } else {
+                       BUN cnt = BATcount(b);
+                       BAT *n; 
+                       oid h = b->hseqbase, off = h, end = h + BATcount(b);
+                       oid *d = (oid*)Tloc(s,0), *e = d + BATcount(s);
+                       
+                       if (BATcount(s) < cnt)
+                               cnt -= BATcount(s);
+                       n = BATnew( TYPE_void, b->ttype, cnt, TRANSIENT);
+                       if (!n)
+                               return n;
+                       assert(s->T->type == TYPE_oid);
+                       /* later optimize (assume small delete lists) */
+                       for(;d<e && *d < h; d++) 
+                               ;
+                       if (*d == h) {
+                               h = *d+1;
+                               d++;
+                       }
+                       BATseqbase(n, s->S->negfirst);
+                       for(;d<e && *d < end; d++){
+                               BATappend(n, BATslice(b,h-off,*d-off), FALSE);
+                               h = *d+1;
+                       }
+                       if (h < end)
+                               BATappend(n, BATslice(b,h-off,end-off), FALSE);
+                       return n;
+               }
+       } else {
+               BAT *ob = b, *r;
+
+               if (b->S->cand == CAND_NEG) { /* convert to CAND_POS */
+                       b = candidates(b);
+
+                       if (!b)
+                               return NULL;
+               }
+               r = BATproject_pos(s, b);
+               if (ob != b)
+                       BBPunfix(b->batCacheid);
+               return r;
+       }
+}
+       
+static BAT *
+BATproject_pos(BAT *l, BAT *r)
 {
        BAT *bn;
        oid lo, hi;
@@ -439,13 +519,40 @@ BATprojectchain(BAT **bats)
        const void *nil;        /* nil representation for last BAT */
        BUN p, cnt, off;
        oid hseq, tseq;
-       int allnil = 0, nonil = 1;
+       int allnil = 0, nonil = 1, neg = 0;
        int stringtrick = 0;
 
        /* count number of participating BATs and allocate some
         * temporary work space */
-       for (n = 0; bats[n]; n++)
-               ;
+       for (n = 0; bats[n]; n++) {
+               /* fallback BATprojects */
+               if (bats[n]->S->cand == CAND_NEG) 
+                       neg = 1;
+       }
+       if (neg) {
+               BAT *cur = bats[0];
+               BBPfix(cur->batCacheid);
+               if (cur->S->cand == CAND_NEG)
+                       assert(0);
+               for (i = 1; i<n; i++ ) {
+                       if (bats[i]->S->cand == CAND_NEG) {
+                               BAT *c = candidates(bats[i]);
+
+                               if (!c)
+                                       return NULL;
+                               b = BATproject(cur, c);
+                               assert(b);
+                               BBPunfix(c->batCacheid);
+                               //b = BATminuscand(cur, bats[i]);
+                       } else {
+                               b = BATproject(cur, bats[i]);
+                               assert(b);
+                       }
+                       BBPunfix(cur->batCacheid);
+                       cur = b;
+               }
+               return cur;
+       }
        ba = GDKmalloc(sizeof(*ba) * n);
        b = *bats++;
        cnt = BATcount(b);      /* this will be the size of the output */
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -1207,8 +1207,8 @@ BAT_scanselect(BAT *b, BAT *s, BAT *bn, 
                /* in the case where equi==1, the check is x == *tl */  \
        } while (0)
 
-BAT *
-BATselect(BAT *b, BAT *s, const void *tl, const void *th,
+static BAT *
+BATselect_pos(BAT *b, BAT *s, const void *tl, const void *th,
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to