Changeset: 4ec4bbbf8d54 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4ec4bbbf8d54
Modified Files:
clients/Tests/MAL-signatures.stable.out
clients/Tests/MAL-signatures.stable.out.int128
clients/Tests/exports.stable.out
gdk/gdk.h
gdk/gdk_align.c
gdk/gdk_bat.c
gdk/gdk_batop.c
gdk/gdk_cand.h
gdk/gdk_firstn.c
gdk/gdk_group.c
gdk/gdk_join.c
gdk/gdk_logger.c
gdk/gdk_private.h
gdk/gdk_project.c
gdk/gdk_sample.c
gdk/gdk_select.c
gdk/gdk_unique.c
monetdb5/modules/mal/mat.c
monetdb5/modules/mal/pcre.c
monetdb5/modules/mal/remote.c
monetdb5/modules/mal/remote.mal
sql/backends/monet5/generator/generator.c
sql/backends/monet5/sql.c
sql/storage/bat/bat_table.c
Branch: candidate-type
Log Message:
Implemented a batIscand property.
diffs (truncated from 1217 to 300 lines):
diff --git a/clients/Tests/MAL-signatures.stable.out
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -10765,6 +10765,7 @@ Ready.
[ "remote", "batbincopy", "pattern remote.batbincopy():bat[:any] ",
"RMTbincopyfrom;", "store the binary BAT data in the BBP and return as
BAT" ]
[ "remote", "batbincopy", "pattern remote.batbincopy(b:bat[:any]):void ",
"RMTbincopyto;", "dump BAT b in binary form to the stream" ]
[ "remote", "batload", "pattern remote.batload(tt:any_1,
size:int):bat[:any_1] ", "RMTbatload;", "create a BAT of the given type
and size, and load values from the input stream" ]
+[ "remote", "batload", "pattern remote.batload(tt:any_1, size:int,
iscand:bit):bat[:any_1] ", "RMTbatload;", "create a BAT of the given type and
size, and load values from the input stream" ]
[ "remote", "bintype", "pattern remote.bintype():void ",
"RMTbintype;", "print the binary type of this mserver5" ]
[ "remote", "connect", "command remote.connect(uri:str, user:str,
passwd:str):str ", "RMTconnect;", "returns a newly created connection for
uri, using user name and password" ]
[ "remote", "connect", "command remote.connect(uri:str, user:str,
passwd:str, scen:str):str ", "RMTconnectScen;", "returns a newly created
connection for uri, using user name, password and scenario" ]
diff --git a/clients/Tests/MAL-signatures.stable.out.int128
b/clients/Tests/MAL-signatures.stable.out.int128
--- a/clients/Tests/MAL-signatures.stable.out.int128
+++ b/clients/Tests/MAL-signatures.stable.out.int128
@@ -14976,6 +14976,7 @@ Ready.
[ "remote", "batbincopy", "pattern remote.batbincopy():bat[:any] ",
"RMTbincopyfrom;", "store the binary BAT data in the BBP and return as
BAT" ]
[ "remote", "batbincopy", "pattern remote.batbincopy(b:bat[:any]):void ",
"RMTbincopyto;", "dump BAT b in binary form to the stream" ]
[ "remote", "batload", "pattern remote.batload(tt:any_1,
size:int):bat[:any_1] ", "RMTbatload;", "create a BAT of the given type
and size, and load values from the input stream" ]
+[ "remote", "batload", "pattern remote.batload(tt:any_1, size:int,
iscand:bit):bat[:any_1] ", "RMTbatload;", "create a BAT of the given type and
size, and load values from the input stream" ]
[ "remote", "bintype", "pattern remote.bintype():void ",
"RMTbintype;", "print the binary type of this mserver5" ]
[ "remote", "connect", "command remote.connect(uri:str, user:str,
passwd:str):str ", "RMTconnect;", "returns a newly created connection for
uri, using user name and password" ]
[ "remote", "connect", "command remote.connect(uri:str, user:str,
passwd:str, scen:str):str ", "RMTconnectScen;", "returns a newly created
connection for uri, using user name, password and scenario" ]
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -117,6 +117,7 @@ BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BA
gdk_return BATextend(BAT *b, BUN newcap)
__attribute__((__warn_unused_result__));
void BATfakeCommit(BAT *b);
gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps,
BUN n, int asc, int distinct) __attribute__((__warn_unused_result__));
+BAT *BATfixcand(BAT *bn);
int BATgetaccess(BAT *b);
PROPrec *BATgetprop(BAT *b, int idx);
gdk_return BATgroup(BAT **groups, BAT **extents, BAT **histo, BAT *b, BAT *s,
BAT *g, BAT *e, BAT *h) __attribute__((__warn_unused_result__));
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -741,8 +741,9 @@ typedef struct {
descdirty:1, /* bat descriptor dirty marker */
restricted:2, /* access privileges */
persistence:1, /* should the BAT persist on disk? */
+ iscand:1, /* BAT is a candidate list */
role:8, /* role of the bat */
- unused:17; /* value=0 for now (sneakily used by mat.c) */
+ unused:16; /* value=0 for now (sneakily used by mat.c) */
int sharecnt; /* incoming view count */
/* delta status administration */
@@ -822,6 +823,7 @@ typedef struct BATiter {
#define batCapacity S.capacity
#define batSharecnt S.sharecnt
#define batRestricted S.restricted
+#define batIscand S.iscand
#define batRole S.role
#define creator_tid S.tid
#define ttype T.type
@@ -2761,6 +2763,7 @@ gdk_export BAT *BATslice(BAT *b, BUN low
gdk_export BAT *BATunique(BAT *b, BAT *s);
+gdk_export BAT *BATfixcand(BAT *bn);
gdk_export BAT *BATmergecand(BAT *a, BAT *b);
gdk_export BAT *BATintersectcand(BAT *a, BAT *b);
diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -101,6 +101,7 @@ VIEWcreate(oid seq, BAT *b)
bn->batInserted = b->batInserted;
bn->batCount = b->batCount;
bn->batCapacity = b->batCapacity;
+ bn->batIscand = b->batIscand;
bn->T = b->T;
if (tp)
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -854,6 +854,7 @@ COLcopy(BAT *b, int tt, int writable, in
}
if (writable != TRUE)
bn->batRestricted = BAT_READ;
+ bn->batIscand = b->batIscand;
return bn;
bunins_failed:
BBPreclaim(bn);
@@ -914,6 +915,8 @@ setcolprops(BAT *b, const void *x)
/* x may only be NULL if the column type is VOID */
assert(x != NULL || b->ttype == TYPE_void);
+ /* nil not allowed for CND */
+ assert(!isnil || !b->batIscand);
if (b->batCount == 0) {
/* first value */
b->tsorted = b->trevsorted = ATOMlinear(b->ttype) != 0;
@@ -959,6 +962,9 @@ setcolprops(BAT *b, const void *x)
prv = BUNtail(bi, pos - 1);
cmp = ATOMcmp(b->ttype, prv, x);
+ /* candidate lists must be strictly ascending */
+ assert(cmp < 0 || !b->batIscand);
+
if (!b->tunique && /* assume outside check if tunique */
b->tkey &&
(cmp == 0 || /* definitely not KEY */
@@ -1070,6 +1076,9 @@ BUNdelete(BAT *b, oid o)
BUN p;
BATiter bi = bat_iterator(b);
+ /* this function messes with the order of the rows, so no
+ * candidate lists allowed */
+ assert(!b->batIscand);
assert(!is_oid_nil(b->hseqbase) || BATcount(b) == 0);
if (o < b->hseqbase || o >= b->hseqbase + BATcount(b)) {
/* value already not there */
@@ -2022,6 +2031,15 @@ BATassertProps(BAT *b)
assert(b->theap.size >> b->tshift >= b->batCapacity);
}
+ /* candidate lists must have certain properties */
+ if (b->batIscand) {
+ assert(ATOMtype(b->ttype) == TYPE_oid);
+ assert(b->tnonil);
+ assert(b->tsorted);
+ assert(b->tkey);
+ assert(b->batRole == TRANSIENT);
+ }
+
/* void and str imply varsized */
if (b->ttype == TYPE_void ||
ATOMstorage(b->ttype) == TYPE_str)
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -516,6 +516,7 @@ BATappend(BAT *b, BAT *n, BAT *s, bit fo
}
assert(b->batCacheid > 0);
assert(b->theap.parentid == 0);
+ assert(!b->batIscand || n->batIscand);
ALIGNapp(b, "BATappend", force, GDK_FAIL);
@@ -608,6 +609,7 @@ BATappend(BAT *b, BAT *n, BAT *s, bit fo
if ((BATcount(b) == 0 || is_oid_nil(b->tseqbase)) &&
n->ttype == TYPE_void && is_oid_nil(n->tseqbase)) {
/* both b and n are void/nil */
+ assert(!b->batIscand);
BATtseqbase(b, oid_nil);
BATsetcount(b, BATcount(b) + cnt);
if (b->tunique)
@@ -667,6 +669,9 @@ BATappend(BAT *b, BAT *n, BAT *s, bit fo
BATiter ni = bat_iterator(n);
BATiter bi = bat_iterator(b);
int xx = ATOMcmp(b->ttype, BUNtail(ni, start), BUNtail(bi,
last));
+ /* when appending candidate lists, the first of n must
+ * be larger than the last of b */
+ assert(!b->batIscand || xx > 0);
if (BATtordered(b) && (!BATtordered(n) || xx < 0)) {
b->tsorted = FALSE;
b->tnosorted = 0;
@@ -928,7 +933,7 @@ BATslice(BAT *b, BUN l, BUN h)
bn = COLnew((oid) (b->hseqbase + low), BATtdense(b) ? TYPE_void
: b->ttype, h - l, TRANSIENT);
if (bn == NULL) {
- return bn;
+ return NULL;
}
if (bn->ttype == TYPE_void ||
(!bn->tvarsized &&
@@ -991,6 +996,7 @@ BATslice(BAT *b, BUN l, BUN h)
bn->tnil = 0; /* we just don't know */
bn->tnosorted = 0;
bn->tnokey[0] = bn->tnokey[1] = 0;
+ bn->batIscand = b->batIscand;
return bn;
bunins_failed:
BBPreclaim(bn);
@@ -1926,49 +1932,54 @@ newdensecand(oid first, oid last)
{
if (last < first)
first = last = 0; /* empty range */
- return BATdense(0, first, last - first);
+ return BATfixcand(BATdense(0, first, last - first));
}
-/* merge two candidate lists and produce a new one
- *
- * candidate lists are VOID-headed BATs with an OID tail which is
- * sorted and unique.
- */
+/* merge two candidate lists and produce a new one */
BAT *
BATmergecand(BAT *a, BAT *b)
{
BAT *bn;
const oid *restrict ap, *restrict bp, *ape, *bpe;
oid *restrict p, i;
- oid af, al, bf, bl;
- BATiter ai, bi;
- bit ad, bd;
+ oid af, al, bf, bl; /* first and last values of a and b */
+ bool ad, bd; /* whether a and b are dense */
BATcheck(a, "BATmergecand", NULL);
BATcheck(b, "BATmergecand", NULL);
assert(ATOMtype(a->ttype) == TYPE_oid);
assert(ATOMtype(b->ttype) == TYPE_oid);
- assert(BATcount(a) <= 1 || a->tsorted);
- assert(BATcount(b) <= 1 || b->tsorted);
- assert(BATcount(a) <= 1 || a->tkey);
- assert(BATcount(b) <= 1 || b->tkey);
+ assert(a->tsorted);
+ assert(b->tsorted);
+ assert(a->tkey);
+ assert(b->tkey);
assert(a->tnonil);
assert(b->tnonil);
+ assert(a->batIscand || BATcount(a) == 0);
+ assert(b->batIscand || BATcount(b) == 0);
/* we can return a if b is empty (and v.v.) */
if (BATcount(a) == 0) {
- return COLcopy(b, b->ttype, 0, TRANSIENT);
+ return BATfixcand(COLcopy(b, b->ttype, 0, TRANSIENT));
}
if (BATcount(b) == 0) {
- return COLcopy(a, a->ttype, 0, TRANSIENT);
+ return BATfixcand(COLcopy(a, a->ttype, 0, TRANSIENT));
}
/* we can return a if a fully covers b (and v.v) */
- ai = bat_iterator(a);
- bi = bat_iterator(b);
- af = *(oid*) BUNtail(ai, 0);
- bf = *(oid*) BUNtail(bi, 0);
- al = *(oid*) BUNtail(ai, BUNlast(a) - 1);
- bl = *(oid*) BUNtail(bi, BUNlast(b) - 1);
+ if (BATtdense(a)) {
+ af = a->tseqbase;
+ al = af + BATcount(a) - 1;
+ } else {
+ af = *(oid *) Tloc(a, 0);
+ al = *(oid *) Tloc(a, BUNlast(a) - 1);
+ }
+ if (BATtdense(b)) {
+ bf = b->tseqbase;
+ bl = bf + BATcount(b) - 1;
+ } else {
+ bf = *(oid *) Tloc(b, 0);
+ bl = *(oid *) Tloc(b, BUNlast(b) - 1);
+ }
ad = (af + BATcount(a) - 1 == al); /* i.e., dense */
bd = (bf + BATcount(b) - 1 == bl); /* i.e., dense */
if (ad && bd) {
@@ -1995,8 +2006,8 @@ BATmergecand(BAT *a, BAT *b)
if (bn == NULL)
return NULL;
p = (oid *) Tloc(bn, 0);
- if (a->ttype == TYPE_void && b->ttype == TYPE_void) {
- /* both lists are VOID */
+ if (BATtdense(a) && BATtdense(b)) {
+ /* both lists are dense */
if (a->tseqbase > b->tseqbase) {
BAT *t = a;
@@ -2010,14 +2021,14 @@ BATmergecand(BAT *a, BAT *b)
i < b->tseqbase + BATcount(b);
i++)
*p++ = i;
- } else if (a->ttype == TYPE_void || b->ttype == TYPE_void) {
- if (b->ttype == TYPE_void) {
+ } else if (BATtdense(a) || BATtdense(b)) {
+ if (BATtdense(b)) {
BAT *t = a;
a = b;
b = t;
}
- /* a->ttype == TYPE_void, b->ttype == TYPE_oid */
+ /* a is dense, b->ttype == TYPE_oid */
bp = (const oid *) Tloc(b, 0);
bpe = bp + BATcount(b);
while (bp < bpe && *bp < a->tseqbase)
@@ -2057,14 +2068,10 @@ BATmergecand(BAT *a, BAT *b)
bn->tkey = 1;
bn->tnil = 0;
bn->tnonil = 1;
- return virtualize(bn);
+ return BATfixcand(bn);
}
-/* intersect two candidate lists and produce a new one
- *
- * candidate lists are VOID-headed BATs with an OID tail which is
- * sorted and unique.
- */
+/* intersect two candidate lists and produce a new one */
BAT *
BATintersectcand(BAT *a, BAT *b)
{
@@ -2072,7 +2079,6 @@ BATintersectcand(BAT *a, BAT *b)
const oid *restrict ap, *restrict bp, *ape, *bpe;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list