Changeset: 96ee870d1d0e for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=96ee870d1d0e Modified Files: clients/Tests/MAL-signatures.stable.out clients/Tests/MAL-signatures.stable.out.int128 gdk/ChangeLog.Dec2016 gdk/gdk.h gdk/gdk_align.c gdk/gdk_bat.c gdk/gdk_batop.c gdk/gdk_calc.c gdk/gdk_join.c gdk/gdk_project.c gdk/gdk_search.c monetdb5/modules/kernel/bat5.c monetdb5/modules/kernel/bat5.mal Branch: Dec2016 Log Message:
Split the 2 bit tkey field into a 1 bit tkey and a 1 bit tunique field. The tunique field holds the old tkey&BOUND2BTRUE value. Also, in bat.setKey, test for uniqueness when setting the property. diffs (truncated from 812 to 300 lines): diff --git a/clients/Tests/MAL-signatures.stable.out b/clients/Tests/MAL-signatures.stable.out --- a/clients/Tests/MAL-signatures.stable.out +++ b/clients/Tests/MAL-signatures.stable.out @@ -599,7 +599,7 @@ Ready. [ "bat", "setColumn", "command bat.setColumn(b:bat[:any_1],t:str):void ", "BKCsetColumn;", "Give a logical name to the tail column of a BAT." ] [ "bat", "setHash", "command bat.setHash(b:bat[:any_1]):bit ", "BKCsetHash;", "Create a hash structure on the column" ] [ "bat", "setImprints", "command bat.setImprints(b:bat[:any_1]):bit ", "BKCsetImprints;", "Create an imprints structure on the column" ] -[ "bat", "setKey", "command bat.setKey(b:bat[:any_1],mode:bit):bat[:any_1] ", "BKCsetkey;", "Sets the 'key' property of the tail column to 'mode'. In 'key' mode, \n the kernel will silently block insertions that cause a duplicate \n entries in the head column. KNOWN BUG:when 'key' is set to TRUE, this \n\t\tfunction does not automatically eliminate duplicates. " ] +[ "bat", "setKey", "command bat.setKey(b:bat[:any_1],mode:bit):bat[:any_1] ", "BKCsetkey;", "Sets the 'key' property of the tail column to 'mode'. In 'key' mode,\n the kernel will silently block insertions that cause a duplicate\n entry in the head column." ] [ "bat", "setName", "command bat.setName(b:bat[:any_1],s:str):void ", "BKCsetName;", "Give a logical name to a BAT. " ] [ "bat", "setPersistent", "command bat.setPersistent(b:bat[:any_1]):void ", "BKCsetPersistent;", "Make the BAT persistent." ] [ "bat", "setTransient", "command bat.setTransient(b:bat[:any_1]):void ", "BKCsetTransient;", "Make the BAT transient. Returns \n\tboolean which indicates if the\nBAT administration has indeed changed." ] diff --git a/clients/Tests/MAL-signatures.stable.out.int128 b/clients/Tests/MAL-signatures.stable.out.int128 --- a/clients/Tests/MAL-signatures.stable.out.int128 +++ b/clients/Tests/MAL-signatures.stable.out.int128 @@ -705,7 +705,7 @@ Ready. [ "bat", "setColumn", "command bat.setColumn(b:bat[:any_1],t:str):void ", "BKCsetColumn;", "Give a logical name to the tail column of a BAT." ] [ "bat", "setHash", "command bat.setHash(b:bat[:any_1]):bit ", "BKCsetHash;", "Create a hash structure on the column" ] [ "bat", "setImprints", "command bat.setImprints(b:bat[:any_1]):bit ", "BKCsetImprints;", "Create an imprints structure on the column" ] -[ "bat", "setKey", "command bat.setKey(b:bat[:any_1],mode:bit):bat[:any_1] ", "BKCsetkey;", "Sets the 'key' property of the tail column to 'mode'. In 'key' mode, \n the kernel will silently block insertions that cause a duplicate \n entries in the head column. KNOWN BUG:when 'key' is set to TRUE, this \n\t\tfunction does not automatically eliminate duplicates. " ] +[ "bat", "setKey", "command bat.setKey(b:bat[:any_1],mode:bit):bat[:any_1] ", "BKCsetkey;", "Sets the 'key' property of the tail column to 'mode'. In 'key' mode,\n the kernel will silently block insertions that cause a duplicate\n entry in the head column." ] [ "bat", "setName", "command bat.setName(b:bat[:any_1],s:str):void ", "BKCsetName;", "Give a logical name to a BAT. " ] [ "bat", "setPersistent", "command bat.setPersistent(b:bat[:any_1]):void ", "BKCsetPersistent;", "Make the BAT persistent." ] [ "bat", "setTransient", "command bat.setTransient(b:bat[:any_1]):void ", "BKCsetTransient;", "Make the BAT transient. Returns \n\tboolean which indicates if the\nBAT administration has indeed changed." ] diff --git a/gdk/ChangeLog.Dec2016 b/gdk/ChangeLog.Dec2016 --- a/gdk/ChangeLog.Dec2016 +++ b/gdk/ChangeLog.Dec2016 @@ -4,6 +4,9 @@ * Thu Dec 1 2016 Sjoerd Mullender <sjo...@acm.org> - The tnokey values must now be 0 if it is not known whether all values in a column are distinct. +- The 2-bit tkey field in the bat descriptor has been split into two + single bit fields: tkey and tunique. The old tkey&BOUND2BTRUE value + is now stored in tunique. * Wed Oct 26 2016 Sjoerd Mullender <sjo...@acm.org> - Implemented conversion to str from any type (not just the internal diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -513,7 +513,6 @@ #define TRUE true #define FALSE false #endif -#define BOUND2BTRUE 2 /* TRUE, and bound to be so */ #define IDLENGTH 64 /* maximum BAT id length */ #define BATMARGIN 1.2 /* extra free margin for new heaps */ @@ -764,7 +763,8 @@ gdk_export int VALisnil(const ValRecord * // Tail properties * int ttype; // Tail type number * str tident; // name for tail column - * bit tkey; // tail values should be unique? + * bit tkey; // tail values are unique + * bit tunique; // tail values must be kept unique * bit tnonil; // tail has no nils * bit tsorted; // are tail values currently ordered? * bit tvarsized; // for speed: tail type is varsized? @@ -824,7 +824,8 @@ typedef struct { bte shift; /* log2 of bunwidth */ unsigned int varsized:1, /* varsized (1) or fixedsized (0) */ - key:2, /* duplicates allowed? */ + key:1, /* no duplicate values present */ + unique:1, /* no duplicate values allowed */ dense:1, /* OID only: only consecutive values */ nonil:1, /* there are no nils in the column */ nil:1, /* there is a nil in the column */ @@ -890,6 +891,7 @@ typedef struct BATiter { #define creator_tid S.tid #define ttype T.type #define tkey T.key +#define tunique T.unique #define tvarsized T.varsized #define tseqbase T.seq #define tsorted T.sorted diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c --- a/gdk/gdk_align.c +++ b/gdk/gdk_align.c @@ -417,8 +417,8 @@ VIEWreset(BAT *b) b->theap.parentid = 0; b->batRestricted = BAT_WRITE; - /* reset BOUND2BTRUE */ b->tkey = BATtkey(v); + b->tunique = 0; /* copy the heaps */ b->theap = tail; @@ -442,8 +442,8 @@ VIEWreset(BAT *b) b->batCopiedtodisk = 0; b->batDirty = 1; - /* reset BOUND2KEY */ b->tkey = BATtkey(v); + b->tunique = 0; /* make the BAT empty and insert all again */ DELTAinit(b); diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c --- a/gdk/gdk_bat.c +++ b/gdk/gdk_bat.c @@ -88,6 +88,7 @@ BATcreatedesc(oid hseq, int tt, int heap bn->ttype = tt; bn->tkey = FALSE; + bn->tunique = FALSE; bn->tnonil = TRUE; bn->tnil = FALSE; bn->tsorted = bn->trevsorted = ATOMlinear(tt) != 0; @@ -909,7 +910,7 @@ setcolprops(BAT *b, const void *x) /* first value */ b->tsorted = b->trevsorted = ATOMlinear(b->ttype) != 0; b->tnosorted = b->tnorevsorted = 0; - b->tkey |= 1; + b->tkey = 1; b->tnokey[0] = b->tnokey[1] = 0; b->tnodense = 0; if (b->ttype == TYPE_void) { @@ -954,7 +955,8 @@ setcolprops(BAT *b, const void *x) prv = BUNtail(bi, pos - 1); cmp = atom_CMP(prv, x, b->ttype); - if (b->tkey == 1 && /* assume outside check if BOUND2BTRUE */ + if (!b->tunique && /* assume outside check if tunique */ + b->tkey && (cmp == 0 || /* definitely not KEY */ (b->batCount > 1 && /* can't guarantee KEY if unordered */ ((b->tsorted && cmp > 0) || @@ -1002,7 +1004,7 @@ BUNappend(BAT *b, const void *t, bit for BATcheck(b, "BUNappend", GDK_FAIL); assert(!isVIEW(b)); - if ((b->tkey & BOUND2BTRUE) && BUNfnd(b, t) != BUN_NONE) { + if (b->tunique && BUNfnd(b, t) != BUN_NONE) { return GDK_SUCCEED; } @@ -1094,7 +1096,7 @@ BUNdelete(BAT *b, oid o) b->batCount--; if (b->batCount <= 1) { /* some trivial properties */ - b->tkey |= 1; + b->tkey = 1; b->tsorted = b->trevsorted = 1; b->tnosorted = b->tnorevsorted = 0; if (b->batCount == 0) { @@ -1185,7 +1187,7 @@ BUNinplace(BAT *b, BUN p, const void *t, } } else if (b->tnorevsorted >= p) b->tnorevsorted = 0; - if (((b->ttype != TYPE_void) & b->tkey & !(b->tkey & BOUND2BTRUE)) && b->batCount > 1) { + if (((b->ttype != TYPE_void) & b->tkey & !b->tunique) && b->batCount > 1) { BATkey(b, FALSE); } if (b->tnonil) @@ -1211,7 +1213,7 @@ BUNreplace(BAT *b, oid id, const void *t if (id < b->hseqbase || id >= b->hseqbase + BATcount(b)) return GDK_SUCCEED; - if ((b->tkey & BOUND2BTRUE) && BUNfnd(b, t) != BUN_NONE) { + if (b->tunique && BUNfnd(b, t) != BUN_NONE) { return GDK_SUCCEED; } if (b->ttype == TYPE_void) { @@ -1236,7 +1238,7 @@ void_inplace(BAT *b, oid id, const void GDKerror("void_inplace: id out of range\n"); return GDK_FAIL; } - if ((b->tkey & BOUND2BTRUE) && BUNfnd(b, val) != BUN_NONE) + if (b->tunique && BUNfnd(b, val) != BUN_NONE) return GDK_SUCCEED; if (b->ttype == TYPE_void) return GDK_SUCCEED; @@ -1433,11 +1435,9 @@ BATkey(BAT *b, int flag) { BATcheck(b, "BATkey", GDK_FAIL); assert(b->batCacheid > 0); + assert(flag == 0 || flag == 1); + assert(!b->tunique || flag); if (b->ttype == TYPE_void) { - if (b->tseqbase == oid_nil && flag == BOUND2BTRUE) { - GDKerror("BATkey: nil-column cannot be kept unique.\n"); - return GDK_FAIL; - } if (b->tseqbase != oid_nil && flag == FALSE) { GDKerror("BATkey: dense column must be unique.\n"); return GDK_FAIL; @@ -1447,11 +1447,9 @@ BATkey(BAT *b, int flag) return GDK_FAIL; } } - if (flag) - flag |= (1 | b->tkey); - if (b->tkey != flag) + if (b->tkey != (flag != 0)) b->batDirtydesc = TRUE; - b->tkey = flag; + b->tkey = flag != 0; if (!flag) b->tdense = 0; else @@ -2005,10 +2003,10 @@ BATmode(BAT *b, int mode) * revsorted The column is reversely sorted (descending). If * also sorted, then all values are equal. * - * The "key" property consists of two bits. The lower bit, when set, - * indicates that all values in the column are distinct. The upper - * bit, when set, indicates that all values must be distinct - * (BOUND2BTRUE). + * In addition there is a property "unique" that, when set, indicates + * that values must be kept unique (and hence that the "key" property + * must be set). This property is only used when changing (adding, + * replacing) values. * * Note that the functions BATtseqbase and BATkey also set more * properties than you might suspect. When setting properties on a @@ -2047,8 +2045,7 @@ BATassertProps(BAT *b) assert(b->ttype >= TYPE_void); assert(b->ttype < GDKatomcnt); assert(b->ttype != TYPE_bat); - /* if BOUND2BTRUE is set, then so must the low order bit */ - assert(!(b->tkey & BOUND2BTRUE) || (b->tkey & 1)); /* tkey != 2 */ + assert(!b->tunique || b->tkey); /* if unique, then key */ assert(isVIEW(b) || b->ttype == TYPE_void || BBPfarms[b->theap.farmid].roles & (1 << b->batRole)); diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -367,12 +367,12 @@ BATappend(BAT *b, BAT *n, bit force) ALIGNapp(b, "BATappend", force, GDK_FAIL); BATcompatible(b, n, GDK_FAIL, "BATappend"); - if (b->tkey & BOUND2BTRUE) { - /* if b has the BOUND2BTRUE bit set, only insert - * values from n that don't already occur in b, and - * make sure we don't insert any duplicates either; we - * do this by calculating a subset of n that complies - * with this */ + if (b->tunique) { + /* if b has the unique bit set, only insert values + * from n that don't already occur in b, and make sure + * we don't insert any duplicates either; we do this + * by calculating a subset of n that complies with + * this */ BAT *d, *u; d = BATdiff(n, b, NULL, NULL, 1, BUN_NONE); @@ -395,7 +395,7 @@ BATappend(BAT *b, BAT *n, bit force) } if (BUNlast(b) + BATcount(n) > BUN_MAX) { - if (b->tkey & BOUND2BTRUE) + if (b->tunique) BBPunfix(n->batCacheid); GDKerror("BATappend: combined BATs too large\n"); return GDK_FAIL; @@ -430,13 +430,13 @@ BATappend(BAT *b, BAT *n, bit force) if (BATtdense(n) && BATcount(b) + b->tseqbase == f) { sz += BATcount(b); BATsetcount(b, sz); - if (b->tkey & BOUND2BTRUE) + if (b->tunique) BBPunfix(n->batCacheid); return GDK_SUCCEED; } /* we need to materialize the tail */ if (BATmaterialize(b) != GDK_SUCCEED) { - if (b->tkey & BOUND2BTRUE) + if (b->tunique) BBPunfix(n->batCacheid); return GDK_FAIL; } @@ -463,9 +463,9 @@ BATappend(BAT *b, BAT *n, bit force) } b->tdense = n->tdense; b->tnodense = n->tnodense; - b->tkey |= (n->tkey & TRUE); - /* if BOUND2BTRUE, uniqueness is guaranteed above */ - if ((b->tkey & BOUND2BTRUE) == 0) { + /* if tunique, uniqueness is guaranteed above */ + b->tkey = n->tkey | b->tunique; + if (!b->tunique) { b->tnokey[0] = n->tnokey[0]; b->tnokey[1] = n->tnokey[1]; } @@ -488,9 +488,10 @@ BATappend(BAT *b, BAT *n, bit force) b->trevsorted = FALSE; b->tnorevsorted = 0; } - if (b->tkey == 1 && + if (!b->tunique && /* uniqueness is guaranteed above */ + b->tkey && (!(BATtordered(b) || BATtrevordered(b)) || - n->tkey == 0 || xx == 0)) { + !n->tkey || xx == 0)) { BATkey(b, FALSE); } if (b->ttype != TYPE_void && b->tsorted && b->tdense && _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list