Changeset: 4ec4bbbf8d54 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4ec4bbbf8d54
Modified Files:
        clients/Tests/MAL-signatures.stable.out
        clients/Tests/MAL-signatures.stable.out.int128
        clients/Tests/exports.stable.out
        gdk/gdk.h
        gdk/gdk_align.c
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_cand.h
        gdk/gdk_firstn.c
        gdk/gdk_group.c
        gdk/gdk_join.c
        gdk/gdk_logger.c
        gdk/gdk_private.h
        gdk/gdk_project.c
        gdk/gdk_sample.c
        gdk/gdk_select.c
        gdk/gdk_unique.c
        monetdb5/modules/mal/mat.c
        monetdb5/modules/mal/pcre.c
        monetdb5/modules/mal/remote.c
        monetdb5/modules/mal/remote.mal
        sql/backends/monet5/generator/generator.c
        sql/backends/monet5/sql.c
        sql/storage/bat/bat_table.c
Branch: candidate-type
Log Message:

Implemented a batIscand property.


diffs (truncated from 1217 to 300 lines):

diff --git a/clients/Tests/MAL-signatures.stable.out 
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -10765,6 +10765,7 @@ Ready.
 [ "remote",    "batbincopy",   "pattern remote.batbincopy():bat[:any] ",       
"RMTbincopyfrom;",      "store the binary BAT data in the BBP and return as 
BAT"        ]
 [ "remote",    "batbincopy",   "pattern remote.batbincopy(b:bat[:any]):void ", 
"RMTbincopyto;",        "dump BAT b in binary form to the stream"       ]
 [ "remote",    "batload",      "pattern remote.batload(tt:any_1, 
size:int):bat[:any_1] ",      "RMTbatload;",  "create a BAT of the given type 
and size, and load values from the input stream"        ]
+[ "remote",    "batload",      "pattern remote.batload(tt:any_1, size:int, 
iscand:bit):bat[:any_1] ",  "RMTbatload;",  "create a BAT of the given type and 
size, and load values from the input stream"        ]
 [ "remote",    "bintype",      "pattern remote.bintype():void ",       
"RMTbintype;",  "print the binary type of this mserver5"        ]
 [ "remote",    "connect",      "command remote.connect(uri:str, user:str, 
passwd:str):str ",   "RMTconnect;",  "returns a newly created connection for 
uri, using user name and password"      ]
 [ "remote",    "connect",      "command remote.connect(uri:str, user:str, 
passwd:str, scen:str):str ", "RMTconnectScen;",      "returns a newly created 
connection for uri, using user name, password and scenario"    ]
diff --git a/clients/Tests/MAL-signatures.stable.out.int128 
b/clients/Tests/MAL-signatures.stable.out.int128
--- a/clients/Tests/MAL-signatures.stable.out.int128
+++ b/clients/Tests/MAL-signatures.stable.out.int128
@@ -14976,6 +14976,7 @@ Ready.
 [ "remote",    "batbincopy",   "pattern remote.batbincopy():bat[:any] ",       
"RMTbincopyfrom;",      "store the binary BAT data in the BBP and return as 
BAT"        ]
 [ "remote",    "batbincopy",   "pattern remote.batbincopy(b:bat[:any]):void ", 
"RMTbincopyto;",        "dump BAT b in binary form to the stream"       ]
 [ "remote",    "batload",      "pattern remote.batload(tt:any_1, 
size:int):bat[:any_1] ",      "RMTbatload;",  "create a BAT of the given type 
and size, and load values from the input stream"        ]
+[ "remote",    "batload",      "pattern remote.batload(tt:any_1, size:int, 
iscand:bit):bat[:any_1] ",  "RMTbatload;",  "create a BAT of the given type and 
size, and load values from the input stream"        ]
 [ "remote",    "bintype",      "pattern remote.bintype():void ",       
"RMTbintype;",  "print the binary type of this mserver5"        ]
 [ "remote",    "connect",      "command remote.connect(uri:str, user:str, 
passwd:str):str ",   "RMTconnect;",  "returns a newly created connection for 
uri, using user name and password"      ]
 [ "remote",    "connect",      "command remote.connect(uri:str, user:str, 
passwd:str, scen:str):str ", "RMTconnectScen;",      "returns a newly created 
connection for uri, using user name, password and scenario"    ]
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -117,6 +117,7 @@ BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BA
 gdk_return BATextend(BAT *b, BUN newcap) 
__attribute__((__warn_unused_result__));
 void BATfakeCommit(BAT *b);
 gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps, 
BUN n, int asc, int distinct) __attribute__((__warn_unused_result__));
+BAT *BATfixcand(BAT *bn);
 int BATgetaccess(BAT *b);
 PROPrec *BATgetprop(BAT *b, int idx);
 gdk_return BATgroup(BAT **groups, BAT **extents, BAT **histo, BAT *b, BAT *s, 
BAT *g, BAT *e, BAT *h) __attribute__((__warn_unused_result__));
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -741,8 +741,9 @@ typedef struct {
         descdirty:1,           /* bat descriptor dirty marker */
         restricted:2,          /* access privileges */
         persistence:1,         /* should the BAT persist on disk? */
+        iscand:1,              /* BAT is a candidate list */
         role:8,                /* role of the bat */
-        unused:17;             /* value=0 for now (sneakily used by mat.c) */
+        unused:16;             /* value=0 for now (sneakily used by mat.c) */
        int sharecnt;           /* incoming view count */
 
        /* delta status administration */
@@ -822,6 +823,7 @@ typedef struct BATiter {
 #define batCapacity    S.capacity
 #define batSharecnt    S.sharecnt
 #define batRestricted  S.restricted
+#define batIscand      S.iscand
 #define batRole                S.role
 #define creator_tid    S.tid
 #define ttype          T.type
@@ -2761,6 +2763,7 @@ gdk_export BAT *BATslice(BAT *b, BUN low
 
 gdk_export BAT *BATunique(BAT *b, BAT *s);
 
+gdk_export BAT *BATfixcand(BAT *bn);
 gdk_export BAT *BATmergecand(BAT *a, BAT *b);
 gdk_export BAT *BATintersectcand(BAT *a, BAT *b);
 
diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -101,6 +101,7 @@ VIEWcreate(oid seq, BAT *b)
        bn->batInserted = b->batInserted;
        bn->batCount = b->batCount;
        bn->batCapacity = b->batCapacity;
+       bn->batIscand = b->batIscand;
        bn->T = b->T;
 
        if (tp)
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -854,6 +854,7 @@ COLcopy(BAT *b, int tt, int writable, in
        }
        if (writable != TRUE)
                bn->batRestricted = BAT_READ;
+       bn->batIscand = b->batIscand;
        return bn;
       bunins_failed:
        BBPreclaim(bn);
@@ -914,6 +915,8 @@ setcolprops(BAT *b, const void *x)
 
        /* x may only be NULL if the column type is VOID */
        assert(x != NULL || b->ttype == TYPE_void);
+       /* nil not allowed for CND */
+       assert(!isnil || !b->batIscand);
        if (b->batCount == 0) {
                /* first value */
                b->tsorted = b->trevsorted = ATOMlinear(b->ttype) != 0;
@@ -959,6 +962,9 @@ setcolprops(BAT *b, const void *x)
                prv = BUNtail(bi, pos - 1);
                cmp = ATOMcmp(b->ttype, prv, x);
 
+               /* candidate lists must be strictly ascending */
+               assert(cmp < 0 || !b->batIscand);
+
                if (!b->tunique && /* assume outside check if tunique */
                    b->tkey &&
                    (cmp == 0 || /* definitely not KEY */
@@ -1070,6 +1076,9 @@ BUNdelete(BAT *b, oid o)
        BUN p;
        BATiter bi = bat_iterator(b);
 
+       /* this function messes with the order of the rows, so no
+        * candidate lists allowed */
+       assert(!b->batIscand);
        assert(!is_oid_nil(b->hseqbase) || BATcount(b) == 0);
        if (o < b->hseqbase || o >= b->hseqbase + BATcount(b)) {
                /* value already not there */
@@ -2022,6 +2031,15 @@ BATassertProps(BAT *b)
                assert(b->theap.size >> b->tshift >= b->batCapacity);
        }
 
+       /* candidate lists must have certain properties */
+       if (b->batIscand) {
+               assert(ATOMtype(b->ttype) == TYPE_oid);
+               assert(b->tnonil);
+               assert(b->tsorted);
+               assert(b->tkey);
+               assert(b->batRole == TRANSIENT);
+       }
+
        /* void and str imply varsized */
        if (b->ttype == TYPE_void ||
            ATOMstorage(b->ttype) == TYPE_str)
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -516,6 +516,7 @@ BATappend(BAT *b, BAT *n, BAT *s, bit fo
        }
        assert(b->batCacheid > 0);
        assert(b->theap.parentid == 0);
+       assert(!b->batIscand || n->batIscand);
 
        ALIGNapp(b, "BATappend", force, GDK_FAIL);
 
@@ -608,6 +609,7 @@ BATappend(BAT *b, BAT *n, BAT *s, bit fo
                if ((BATcount(b) == 0 || is_oid_nil(b->tseqbase)) &&
                    n->ttype == TYPE_void && is_oid_nil(n->tseqbase)) {
                        /* both b and n are void/nil */
+                       assert(!b->batIscand);
                        BATtseqbase(b, oid_nil);
                        BATsetcount(b, BATcount(b) + cnt);
                        if (b->tunique)
@@ -667,6 +669,9 @@ BATappend(BAT *b, BAT *n, BAT *s, bit fo
                BATiter ni = bat_iterator(n);
                BATiter bi = bat_iterator(b);
                int xx = ATOMcmp(b->ttype, BUNtail(ni, start), BUNtail(bi, 
last));
+               /* when appending candidate lists, the first of n must
+                * be larger than the last of b */
+               assert(!b->batIscand || xx > 0);
                if (BATtordered(b) && (!BATtordered(n) || xx < 0)) {
                        b->tsorted = FALSE;
                        b->tnosorted = 0;
@@ -928,7 +933,7 @@ BATslice(BAT *b, BUN l, BUN h)
 
                bn = COLnew((oid) (b->hseqbase + low), BATtdense(b) ? TYPE_void 
: b->ttype, h - l, TRANSIENT);
                if (bn == NULL) {
-                       return bn;
+                       return NULL;
                }
                if (bn->ttype == TYPE_void ||
                    (!bn->tvarsized &&
@@ -991,6 +996,7 @@ BATslice(BAT *b, BUN l, BUN h)
        bn->tnil = 0;           /* we just don't know */
        bn->tnosorted = 0;
        bn->tnokey[0] = bn->tnokey[1] = 0;
+       bn->batIscand = b->batIscand;
        return bn;
       bunins_failed:
        BBPreclaim(bn);
@@ -1926,49 +1932,54 @@ newdensecand(oid first, oid last)
 {
        if (last < first)
                first = last = 0; /* empty range */
-       return BATdense(0, first, last - first);
+       return BATfixcand(BATdense(0, first, last - first));
 }
 
-/* merge two candidate lists and produce a new one
- *
- * candidate lists are VOID-headed BATs with an OID tail which is
- * sorted and unique.
- */
+/* merge two candidate lists and produce a new one */
 BAT *
 BATmergecand(BAT *a, BAT *b)
 {
        BAT *bn;
        const oid *restrict ap, *restrict bp, *ape, *bpe;
        oid *restrict p, i;
-       oid af, al, bf, bl;
-       BATiter ai, bi;
-       bit ad, bd;
+       oid af, al, bf, bl;     /* first and last values of a and b */
+       bool ad, bd;            /* whether a and b are dense */
 
        BATcheck(a, "BATmergecand", NULL);
        BATcheck(b, "BATmergecand", NULL);
        assert(ATOMtype(a->ttype) == TYPE_oid);
        assert(ATOMtype(b->ttype) == TYPE_oid);
-       assert(BATcount(a) <= 1 || a->tsorted);
-       assert(BATcount(b) <= 1 || b->tsorted);
-       assert(BATcount(a) <= 1 || a->tkey);
-       assert(BATcount(b) <= 1 || b->tkey);
+       assert(a->tsorted);
+       assert(b->tsorted);
+       assert(a->tkey);
+       assert(b->tkey);
        assert(a->tnonil);
        assert(b->tnonil);
+       assert(a->batIscand || BATcount(a) == 0);
+       assert(b->batIscand || BATcount(b) == 0);
 
        /* we can return a if b is empty (and v.v.) */
        if (BATcount(a) == 0) {
-               return COLcopy(b, b->ttype, 0, TRANSIENT);
+               return BATfixcand(COLcopy(b, b->ttype, 0, TRANSIENT));
        }
        if (BATcount(b) == 0) {
-               return COLcopy(a, a->ttype, 0, TRANSIENT);
+               return BATfixcand(COLcopy(a, a->ttype, 0, TRANSIENT));
        }
        /* we can return a if a fully covers b (and v.v) */
-       ai = bat_iterator(a);
-       bi = bat_iterator(b);
-       af = *(oid*) BUNtail(ai, 0);
-       bf = *(oid*) BUNtail(bi, 0);
-       al = *(oid*) BUNtail(ai, BUNlast(a) - 1);
-       bl = *(oid*) BUNtail(bi, BUNlast(b) - 1);
+       if (BATtdense(a)) {
+               af = a->tseqbase;
+               al = af + BATcount(a) - 1;
+       } else {
+               af = *(oid *) Tloc(a, 0);
+               al = *(oid *) Tloc(a, BUNlast(a) - 1);
+       }
+       if (BATtdense(b)) {
+               bf = b->tseqbase;
+               bl = bf + BATcount(b) - 1;
+       } else {
+               bf = *(oid *) Tloc(b, 0);
+               bl = *(oid *) Tloc(b, BUNlast(b) - 1);
+       }
        ad = (af + BATcount(a) - 1 == al); /* i.e., dense */
        bd = (bf + BATcount(b) - 1 == bl); /* i.e., dense */
        if (ad && bd) {
@@ -1995,8 +2006,8 @@ BATmergecand(BAT *a, BAT *b)
        if (bn == NULL)
                return NULL;
        p = (oid *) Tloc(bn, 0);
-       if (a->ttype == TYPE_void && b->ttype == TYPE_void) {
-               /* both lists are VOID */
+       if (BATtdense(a) && BATtdense(b)) {
+               /* both lists are dense */
                if (a->tseqbase > b->tseqbase) {
                        BAT *t = a;
 
@@ -2010,14 +2021,14 @@ BATmergecand(BAT *a, BAT *b)
                     i < b->tseqbase + BATcount(b);
                     i++)
                        *p++ = i;
-       } else if (a->ttype == TYPE_void || b->ttype == TYPE_void) {
-               if (b->ttype == TYPE_void) {
+       } else if (BATtdense(a) || BATtdense(b)) {
+               if (BATtdense(b)) {
                        BAT *t = a;
 
                        a = b;
                        b = t;
                }
-               /* a->ttype == TYPE_void, b->ttype == TYPE_oid */
+               /* a is dense, b->ttype == TYPE_oid */
                bp = (const oid *) Tloc(b, 0);
                bpe = bp + BATcount(b);
                while (bp < bpe && *bp < a->tseqbase)
@@ -2057,14 +2068,10 @@ BATmergecand(BAT *a, BAT *b)
        bn->tkey = 1;
        bn->tnil = 0;
        bn->tnonil = 1;
-       return virtualize(bn);
+       return BATfixcand(bn);
 }
 
-/* intersect two candidate lists and produce a new one
- *
- * candidate lists are VOID-headed BATs with an OID tail which is
- * sorted and unique.
- */
+/* intersect two candidate lists and produce a new one */
 BAT *
 BATintersectcand(BAT *a, BAT *b)
 {
@@ -2072,7 +2079,6 @@ BATintersectcand(BAT *a, BAT *b)
        const oid *restrict ap, *restrict bp, *ape, *bpe;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to