Changeset: 4c7b14849621 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4c7b14849621
Modified Files:
gdk/gdk.h
gdk/gdk_batop.c
gdk/gdk_join.c
gdk/gdk_project.c
gdk/gdk_select.c
sql/backends/monet5/sql.c
Branch: cand
Log Message:
initial steps on delete lists
diffs (truncated from 373 to 300 lines):
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -767,6 +767,9 @@ gdk_export void *VALget(ValPtr v);
gdk_export int VALcmp(const ValRecord *p, const ValRecord *q);
gdk_export int VALisnil(const ValRecord *v);
+#define CAND_LIST 1
+#define CAND_NEG 2
+#define CAND_BIT 3
/*
* @- The BAT record
* The elements of the BAT structure are introduced in the remainder.
@@ -851,7 +854,8 @@ typedef struct {
restricted:2, /* access privileges */
persistence:1, /* should the BAT persist on disk? */
role:8, /* role of the bat */
- unused:15; /* value=0 for now */
+ cand:2, /* 0 bat, 1 candidate list, 2 neg cand list, 3
compressed bit vector */
+ unused:13; /* value=0 for now */
int sharecnt; /* incoming view count */
/* delta status administration */
@@ -860,6 +864,8 @@ typedef struct {
BUN inserted; /* start of inserted elements */
BUN count; /* tuple count */
BUN capacity; /* tuple capacity */
+ BUN negfirst; /* seqbase */
+ BUN negcount; /* total count of CAND_NEG list */
} BATrec;
typedef struct PROPrec PROPrec;
@@ -3087,6 +3093,7 @@ gdk_export BAT *BATunique(BAT *b, BAT *s
gdk_export BAT *BATmergecand(BAT *a, BAT *b);
gdk_export BAT *BATintersectcand(BAT *a, BAT *b);
+gdk_export BAT *BATminuscand(BAT *a, BAT *b);
gdk_export gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands,
BAT *grps, BUN n, int asc, int distinct);
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -1724,3 +1724,94 @@ BATintersectcand(BAT *a, BAT *b)
bn->T->nonil = 1;
return virtualize(bn);
}
+
+/* minus two candidate lists and produce a new one
+ *
+ * candidate lists are VOID-headed BATs with an OID tail which is
+ * sorted and unique.
+ */
+BAT *
+BATminuscand(BAT *a, BAT *b)
+{
+ BAT *bn;
+ const oid *restrict ap, *restrict bp, *ape, *bpe;
+ oid *restrict p;
+ BUN cnt;
+
+ BATcheck(a, "BATminuscand", NULL);
+ BATcheck(b, "BATminuscand", NULL);
+ assert(a->htype == TYPE_void);
+ assert(b->htype == TYPE_void);
+ assert(ATOMtype(a->ttype) == TYPE_oid);
+ assert(ATOMtype(b->ttype) == TYPE_oid);
+ assert(a->tsorted);
+ assert(b->tsorted);
+ assert(a->tkey);
+ //assert(b->tkey);
+ assert(a->T->nonil);
+ assert(b->T->nonil);
+
+ if (BATcount(a) == 0) {
+ BAT *x = newdensecand(0, 0);
+ BATsetcount(x,0);
+ return x;
+ }
+ if (BATcount(b) == 0) {
+ BBPfix(a->batCacheid);
+ return a;
+ }
+
+ cnt = BATcount(a);
+ if (BATcount(b) < cnt)
+ cnt -= BATcount(b);
+ bn = BATnew(TYPE_void, TYPE_oid, cnt, TRANSIENT);
+ if (bn == NULL)
+ return NULL;
+ p = (oid *) Tloc(bn, BUNfirst(bn));
+ assert(b->ttype == TYPE_oid);
+ if (a->ttype == TYPE_void) {
+ oid c = a->tseqbase, ce = c + BATcount(a), cur = 0;
+ /* a->ttype == TYPE_void, b->ttype == TYPE_oid */
+ bp = (const oid *) Tloc(b, BUNfirst(b));
+ bpe = bp + BATcount(b);
+ cur = *bp;
+ while (c < ce && bp <= bpe) {
+ if (c < cur) {
+ *p++ = c++;
+ } else {
+ if (c == cur)
+ c++;
+ cur = *bp++;
+ }
+ }
+ while (c<ce)
+ *p++ = c++;
+ } else {
+ /* a->ttype == TYPE_oid, b->ttype == TYPE_oid */
+ ap = (const oid *) Tloc(a, BUNfirst(a));
+ ape = ap + BATcount(a);
+ bp = (const oid *) Tloc(b, BUNfirst(b));
+ bpe = bp + BATcount(b);
+ while (ap < ape && bp <= bpe) {
+ if (*ap < *bp)
+ *p++ = *ap++;
+ else {
+ if (*ap == *bp)
+ ap++;
+ bp++;
+ }
+ }
+ while (ap<ape)
+ *p++ = *ap++;
+ }
+
+ /* properties */
+ BATsetcount(bn, (BUN) (p - (oid *) Tloc(bn, BUNfirst(bn))));
+ BATseqbase(bn, 0);
+ bn->trevsorted = BATcount(bn) <= 1;
+ bn->tsorted = 1;
+ bn->tkey = 1;
+ bn->T->nil = 0;
+ bn->T->nonil = 1;
+ return virtualize(bn);
+}
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3883,6 +3883,10 @@ BATouterjoin(BAT **r1p, BAT **r2p, BAT *
gdk_return
BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int
nil_matches, BUN estimate)
{
+ if (r->S->cand == CAND_NEG && !sl && !sr && !nil_matches) {
+ *r1p = BATminuscand(l,r);
+ return GDK_SUCCEED;
+ }
return subleftjoin(r1p, r2p, l, r, sl, sr, nil_matches,
0, 1, 0, estimate, "BATsemijoin", GDKusec());
}
diff --git a/gdk/gdk_project.c b/gdk/gdk_project.c
--- a/gdk/gdk_project.c
+++ b/gdk/gdk_project.c
@@ -183,8 +183,88 @@ bunins_failed:
return GDK_FAIL;
}
+static BAT *
+candidates(BAT *d)
+{
+ BAT *tids = BATnew(TYPE_void, TYPE_void, 0, TRANSIENT);
+
+ if (!tids)
+ return NULL;
+ tids->H->seq = d->S->negfirst;
+ tids->T->seq = d->S->negfirst;
+ BATsetcount(tids, d->S->negcount);
+ tids->H->revsorted = 0;
+ tids->T->revsorted = 0;
+
+ tids->T->key = 1;
+ tids->T->dense = 1;
+ tids->H->key = 1;
+ tids->H->dense = 1;
+
+ if (BATcount(d)) {
+ BAT *diff = BATdiff(tids, d, NULL, NULL, 0, BUN_NONE);
+
+ BBPunfix(tids->batCacheid);
+ BATseqbase(diff, d->S->negfirst);
+ tids = diff;
+ }
+ return tids;
+}
+
+static BAT * BATproject_pos(BAT *l, BAT *r);
BAT *
-BATproject(BAT *l, BAT *r)
+BATproject(BAT *s, BAT *b)
+{
+ if (s->S->cand == CAND_NEG) {
+ if (BATcount(s) == 0) {
+ BBPfix(b->batCacheid);
+ return b;
+ } else {
+ BUN cnt = BATcount(b);
+ BAT *n;
+ oid h = b->hseqbase, off = h, end = h + BATcount(b);
+ oid *d = (oid*)Tloc(s,0), *e = d + BATcount(s);
+
+ if (BATcount(s) < cnt)
+ cnt -= BATcount(s);
+ n = BATnew( TYPE_void, b->ttype, cnt, TRANSIENT);
+ if (!n)
+ return n;
+ assert(s->T->type == TYPE_oid);
+ /* later optimize (assume small delete lists) */
+ for(;d<e && *d < h; d++)
+ ;
+ if (*d == h) {
+ h = *d+1;
+ d++;
+ }
+ BATseqbase(n, s->S->negfirst);
+ for(;d<e && *d < end; d++){
+ BATappend(n, BATslice(b,h-off,*d-off), FALSE);
+ h = *d+1;
+ }
+ if (h < end)
+ BATappend(n, BATslice(b,h-off,end-off), FALSE);
+ return n;
+ }
+ } else {
+ BAT *ob = b, *r;
+
+ if (b->S->cand == CAND_NEG) { /* convert to CAND_POS */
+ b = candidates(b);
+
+ if (!b)
+ return NULL;
+ }
+ r = BATproject_pos(s, b);
+ if (ob != b)
+ BBPunfix(b->batCacheid);
+ return r;
+ }
+}
+
+static BAT *
+BATproject_pos(BAT *l, BAT *r)
{
BAT *bn;
oid lo, hi;
@@ -439,13 +519,40 @@ BATprojectchain(BAT **bats)
const void *nil; /* nil representation for last BAT */
BUN p, cnt, off;
oid hseq, tseq;
- int allnil = 0, nonil = 1;
+ int allnil = 0, nonil = 1, neg = 0;
int stringtrick = 0;
/* count number of participating BATs and allocate some
* temporary work space */
- for (n = 0; bats[n]; n++)
- ;
+ for (n = 0; bats[n]; n++) {
+ /* fallback BATprojects */
+ if (bats[n]->S->cand == CAND_NEG)
+ neg = 1;
+ }
+ if (neg) {
+ BAT *cur = bats[0];
+ BBPfix(cur->batCacheid);
+ if (cur->S->cand == CAND_NEG)
+ assert(0);
+ for (i = 1; i<n; i++ ) {
+ if (bats[i]->S->cand == CAND_NEG) {
+ BAT *c = candidates(bats[i]);
+
+ if (!c)
+ return NULL;
+ b = BATproject(cur, c);
+ assert(b);
+ BBPunfix(c->batCacheid);
+ //b = BATminuscand(cur, bats[i]);
+ } else {
+ b = BATproject(cur, bats[i]);
+ assert(b);
+ }
+ BBPunfix(cur->batCacheid);
+ cur = b;
+ }
+ return cur;
+ }
ba = GDKmalloc(sizeof(*ba) * n);
b = *bats++;
cnt = BATcount(b); /* this will be the size of the output */
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -1207,8 +1207,8 @@ BAT_scanselect(BAT *b, BAT *s, BAT *bn,
/* in the case where equi==1, the check is x == *tl */ \
} while (0)
-BAT *
-BATselect(BAT *b, BAT *s, const void *tl, const void *th,
+static BAT *
+BATselect_pos(BAT *b, BAT *s, const void *tl, const void *th,
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list