Changeset: 6c17dc3a1d31 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6c17dc3a1d31 Modified Files: clients/Tests/MAL-signatures.stable.out clients/Tests/MAL-signatures.stable.out.int128 gdk/ChangeLog.Dec2016 gdk/gdk.h gdk/gdk_align.c gdk/gdk_bat.c gdk/gdk_batop.c gdk/gdk_bbp.c gdk/gdk_calc.c gdk/gdk_hash.h gdk/gdk_join.c gdk/gdk_project.c gdk/gdk_search.c monetdb5/modules/kernel/bat5.c monetdb5/modules/kernel/bat5.mal monetdb5/modules/mal/tablet.c Branch: default Log Message:
Merge with Dec2016 branch. diffs (truncated from 1027 to 300 lines): diff --git a/clients/Tests/MAL-signatures.stable.out b/clients/Tests/MAL-signatures.stable.out --- a/clients/Tests/MAL-signatures.stable.out +++ b/clients/Tests/MAL-signatures.stable.out @@ -599,7 +599,7 @@ Ready. [ "bat", "setColumn", "command bat.setColumn(b:bat[:any_1],t:str):void ", "BKCsetColumn;", "Give a logical name to the tail column of a BAT." ] [ "bat", "setHash", "command bat.setHash(b:bat[:any_1]):bit ", "BKCsetHash;", "Create a hash structure on the column" ] [ "bat", "setImprints", "command bat.setImprints(b:bat[:any_1]):bit ", "BKCsetImprints;", "Create an imprints structure on the column" ] -[ "bat", "setKey", "command bat.setKey(b:bat[:any_1],mode:bit):bat[:any_1] ", "BKCsetkey;", "Sets the 'key' property of the tail column to 'mode'. In 'key' mode, \n the kernel will silently block insertions that cause a duplicate \n entries in the head column. KNOWN BUG:when 'key' is set to TRUE, this \n\t\tfunction does not automatically eliminate duplicates. " ] +[ "bat", "setKey", "command bat.setKey(b:bat[:any_1],mode:bit):bat[:any_1] ", "BKCsetkey;", "Sets the 'key' property of the tail column to 'mode'. In 'key' mode,\n the kernel will silently block insertions that cause a duplicate\n entry in the head column." ] [ "bat", "setName", "command bat.setName(b:bat[:any_1],s:str):void ", "BKCsetName;", "Give a logical name to a BAT. " ] [ "bat", "setPersistent", "command bat.setPersistent(b:bat[:any_1]):void ", "BKCsetPersistent;", "Make the BAT persistent." ] [ "bat", "setTransient", "command bat.setTransient(b:bat[:any_1]):void ", "BKCsetTransient;", "Make the BAT transient. Returns \n\tboolean which indicates if the\nBAT administration has indeed changed." ] diff --git a/clients/Tests/MAL-signatures.stable.out.int128 b/clients/Tests/MAL-signatures.stable.out.int128 --- a/clients/Tests/MAL-signatures.stable.out.int128 +++ b/clients/Tests/MAL-signatures.stable.out.int128 @@ -705,7 +705,7 @@ Ready. [ "bat", "setColumn", "command bat.setColumn(b:bat[:any_1],t:str):void ", "BKCsetColumn;", "Give a logical name to the tail column of a BAT." ] [ "bat", "setHash", "command bat.setHash(b:bat[:any_1]):bit ", "BKCsetHash;", "Create a hash structure on the column" ] [ "bat", "setImprints", "command bat.setImprints(b:bat[:any_1]):bit ", "BKCsetImprints;", "Create an imprints structure on the column" ] -[ "bat", "setKey", "command bat.setKey(b:bat[:any_1],mode:bit):bat[:any_1] ", "BKCsetkey;", "Sets the 'key' property of the tail column to 'mode'. In 'key' mode, \n the kernel will silently block insertions that cause a duplicate \n entries in the head column. KNOWN BUG:when 'key' is set to TRUE, this \n\t\tfunction does not automatically eliminate duplicates. " ] +[ "bat", "setKey", "command bat.setKey(b:bat[:any_1],mode:bit):bat[:any_1] ", "BKCsetkey;", "Sets the 'key' property of the tail column to 'mode'. In 'key' mode,\n the kernel will silently block insertions that cause a duplicate\n entry in the head column." ] [ "bat", "setName", "command bat.setName(b:bat[:any_1],s:str):void ", "BKCsetName;", "Give a logical name to a BAT. " ] [ "bat", "setPersistent", "command bat.setPersistent(b:bat[:any_1]):void ", "BKCsetPersistent;", "Make the BAT persistent." ] [ "bat", "setTransient", "command bat.setTransient(b:bat[:any_1]):void ", "BKCsetTransient;", "Make the BAT transient. Returns \n\tboolean which indicates if the\nBAT administration has indeed changed." ] diff --git a/gdk/ChangeLog.Dec2016 b/gdk/ChangeLog.Dec2016 --- a/gdk/ChangeLog.Dec2016 +++ b/gdk/ChangeLog.Dec2016 @@ -1,6 +1,13 @@ # ChangeLog file for MonetDB # This file is updated with Maddlog +* Thu Dec 1 2016 Sjoerd Mullender <sjo...@acm.org> +- The tnokey values must now be 0 if it is not known whether all values + in a column are distinct. +- The 2-bit tkey field in the bat descriptor has been split into two + single bit fields: tkey and tunique. The old tkey&BOUND2BTRUE value + is now stored in tunique. + * Wed Oct 26 2016 Sjoerd Mullender <sjo...@acm.org> - Implemented conversion to str from any type (not just the internal types). diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -513,7 +513,6 @@ #define TRUE true #define FALSE false #endif -#define BOUND2BTRUE 2 /* TRUE, and bound to be so */ #define IDLENGTH 64 /* maximum BAT id length */ #define BATMARGIN 1.2 /* extra free margin for new heaps */ @@ -764,7 +763,8 @@ gdk_export int VALisnil(const ValRecord * // Tail properties * int ttype; // Tail type number * str tident; // name for tail column - * bit tkey; // tail values should be unique? + * bit tkey; // tail values are unique + * bit tunique; // tail values must be kept unique * bit tnonil; // tail has no nils * bit tsorted; // are tail values currently ordered? * bit tvarsized; // for speed: tail type is varsized? @@ -824,14 +824,15 @@ typedef struct { bte shift; /* log2 of bunwidth */ unsigned int varsized:1, /* varsized (1) or fixedsized (0) */ - key:2, /* duplicates allowed? */ + key:1, /* no duplicate values present */ + unique:1, /* no duplicate values allowed */ dense:1, /* OID only: only consecutive values */ nonil:1, /* there are no nils in the column */ nil:1, /* there is a nil in the column */ sorted:1, /* column is sorted in ascending order */ revsorted:1; /* column is sorted in descending order */ oid align; /* OID for sync alignment */ - BUN nokey[2]; /* positions that prove key ==FALSE */ + BUN nokey[2]; /* positions that prove key==FALSE */ BUN nosorted; /* position that proves sorted==FALSE */ BUN norevsorted; /* position that proves revsorted==FALSE */ BUN nodense; /* position that proves dense==FALSE */ @@ -855,7 +856,8 @@ typedef struct { #define GDKLIBRARY_OLDWKB 061031 /* old geom WKB format */ #define GDKLIBRARY_INSERTED 061032 /* inserted and deleted in BBP.dir */ #define GDKLIBRARY_HEADED 061033 /* head properties are stored */ -#define GDKLIBRARY 061034 +#define GDKLIBRARY_NOKEY 061034 /* nokey values can't be trusted */ +#define GDKLIBRARY 061035 typedef struct BAT { /* static bat properties */ @@ -889,6 +891,7 @@ typedef struct BATiter { #define creator_tid S.tid #define ttype T.type #define tkey T.key +#define tunique T.unique #define tvarsized T.varsized #define tseqbase T.seq #define tsorted T.sorted diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c --- a/gdk/gdk_align.c +++ b/gdk/gdk_align.c @@ -417,8 +417,8 @@ VIEWreset(BAT *b) b->theap.parentid = 0; b->batRestricted = BAT_WRITE; - /* reset BOUND2BTRUE */ b->tkey = BATtkey(v); + b->tunique = 0; /* copy the heaps */ b->theap = tail; @@ -442,8 +442,8 @@ VIEWreset(BAT *b) b->batCopiedtodisk = 0; b->batDirty = 1; - /* reset BOUND2KEY */ b->tkey = BATtkey(v); + b->tunique = 0; /* make the BAT empty and insert all again */ DELTAinit(b); diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c --- a/gdk/gdk_bat.c +++ b/gdk/gdk_bat.c @@ -88,6 +88,7 @@ BATcreatedesc(oid hseq, int tt, int heap bn->ttype = tt; bn->tkey = FALSE; + bn->tunique = FALSE; bn->tnonil = TRUE; bn->tnil = FALSE; bn->tsorted = bn->trevsorted = ATOMlinear(tt) != 0; @@ -909,7 +910,7 @@ setcolprops(BAT *b, const void *x) /* first value */ b->tsorted = b->trevsorted = ATOMlinear(b->ttype) != 0; b->tnosorted = b->tnorevsorted = 0; - b->tkey |= 1; + b->tkey = 1; b->tnokey[0] = b->tnokey[1] = 0; b->tnodense = 0; if (b->ttype == TYPE_void) { @@ -954,7 +955,8 @@ setcolprops(BAT *b, const void *x) prv = BUNtail(bi, pos - 1); cmp = atom_CMP(prv, x, b->ttype); - if (b->tkey == 1 && /* assume outside check if BOUND2BTRUE */ + if (!b->tunique && /* assume outside check if tunique */ + b->tkey && (cmp == 0 || /* definitely not KEY */ (b->batCount > 1 && /* can't guarantee KEY if unordered */ ((b->tsorted && cmp > 0) || @@ -1002,7 +1004,7 @@ BUNappend(BAT *b, const void *t, bit for BATcheck(b, "BUNappend", GDK_FAIL); assert(!isVIEW(b)); - if ((b->tkey & BOUND2BTRUE) && BUNfnd(b, t) != BUN_NONE) { + if (b->tunique && BUNfnd(b, t) != BUN_NONE) { return GDK_SUCCEED; } @@ -1042,10 +1044,10 @@ BUNappend(BAT *b, const void *t, bit for IMPSdestroy(b); /* no support for inserts in imprints yet */ OIDXdestroy(b); - - /* first adapt the hashes; then the user-defined accelerators. - * REASON: some accelerator updates (qsignature) use the hashes! - */ + if (b->thash == (Hash *) 1) { + /* don't bother first loading the hash to then change it */ + HASHdestroy(b); + } if (b->thash) { HASHins(b, p, t); if (tsize && tsize != b->tvheap->size) @@ -1094,7 +1096,7 @@ BUNdelete(BAT *b, oid o) b->batCount--; if (b->batCount <= 1) { /* some trivial properties */ - b->tkey |= 1; + b->tkey = 1; b->tsorted = b->trevsorted = 1; b->tnosorted = b->tnorevsorted = 0; if (b->batCount == 0) { @@ -1185,7 +1187,7 @@ BUNinplace(BAT *b, BUN p, const void *t, } } else if (b->tnorevsorted >= p) b->tnorevsorted = 0; - if (((b->ttype != TYPE_void) & b->tkey & !(b->tkey & BOUND2BTRUE)) && b->batCount > 1) { + if (((b->ttype != TYPE_void) & b->tkey & !b->tunique) && b->batCount > 1) { BATkey(b, FALSE); } if (b->tnonil) @@ -1211,7 +1213,7 @@ BUNreplace(BAT *b, oid id, const void *t if (id < b->hseqbase || id >= b->hseqbase + BATcount(b)) return GDK_SUCCEED; - if ((b->tkey & BOUND2BTRUE) && BUNfnd(b, t) != BUN_NONE) { + if (b->tunique && BUNfnd(b, t) != BUN_NONE) { return GDK_SUCCEED; } if (b->ttype == TYPE_void) { @@ -1236,7 +1238,7 @@ void_inplace(BAT *b, oid id, const void GDKerror("void_inplace: id out of range\n"); return GDK_FAIL; } - if ((b->tkey & BOUND2BTRUE) && BUNfnd(b, val) != BUN_NONE) + if (b->tunique && BUNfnd(b, val) != BUN_NONE) return GDK_SUCCEED; if (b->ttype == TYPE_void) return GDK_SUCCEED; @@ -1433,11 +1435,9 @@ BATkey(BAT *b, int flag) { BATcheck(b, "BATkey", GDK_FAIL); assert(b->batCacheid > 0); + assert(flag == 0 || flag == 1); + assert(!b->tunique || flag); if (b->ttype == TYPE_void) { - if (b->tseqbase == oid_nil && flag == BOUND2BTRUE) { - GDKerror("BATkey: nil-column cannot be kept unique.\n"); - return GDK_FAIL; - } if (b->tseqbase != oid_nil && flag == FALSE) { GDKerror("BATkey: dense column must be unique.\n"); return GDK_FAIL; @@ -1447,13 +1447,13 @@ BATkey(BAT *b, int flag) return GDK_FAIL; } } - if (flag) - flag |= (1 | b->tkey); - if (b->tkey != flag) + if (b->tkey != (flag != 0)) b->batDirtydesc = TRUE; - b->tkey = flag; + b->tkey = flag != 0; if (!flag) b->tdense = 0; + else + b->tnokey[0] = b->tnokey[1] = 0; if (flag && VIEWtparent(b)) { /* if a view is key, then so is the parent if the two * are aligned */ @@ -2003,10 +2003,10 @@ BATmode(BAT *b, int mode) * revsorted The column is reversely sorted (descending). If * also sorted, then all values are equal. * - * The "key" property consists of two bits. The lower bit, when set, - * indicates that all values in the column are distinct. The upper - * bit, when set, indicates that all values must be distinct - * (BOUND2BTRUE). + * In addition there is a property "unique" that, when set, indicates + * that values must be kept unique (and hence that the "key" property + * must be set). This property is only used when changing (adding, + * replacing) values. * * Note that the functions BATtseqbase and BATkey also set more * properties than you might suspect. When setting properties on a @@ -2045,8 +2045,7 @@ BATassertProps(BAT *b) assert(b->ttype >= TYPE_void); assert(b->ttype < GDKatomcnt); assert(b->ttype != TYPE_bat); - /* if BOUND2BTRUE is set, then so must the low order bit */ - assert(!(b->tkey & BOUND2BTRUE) || (b->tkey & 1)); /* tkey != 2 */ + assert(!b->tunique || b->tkey); /* if unique, then key */ assert(isVIEW(b) || b->ttype == TYPE_void || BBPfarms[b->theap.farmid].roles & (1 << b->batRole)); @@ -2133,6 +2132,18 @@ BATassertProps(BAT *b) assert(cmpf(BUNtail(bi, b->tnorevsorted - 1), BUNtail(bi, b->tnorevsorted)) < 0); } + /* if tkey property set, both tnokey values must be 0 */ + assert(!b->tkey || (b->tnokey[0] == 0 && b->tnokey[1] == 0)); + if (!b->tkey && (b->tnokey[0] != 0 || b->tnokey[1] != 0)) { + /* if tkey not set and tnokey indicates a proof of + * non-key-ness, make sure the tnokey values are in + * range and indeed provide a proof */ + assert(b->tnokey[0] != b->tnokey[1]); + assert(b->tnokey[0] < b->batCount); + assert(b->tnokey[1] < b->batCount); + assert(cmpf(BUNtail(bi, b->tnokey[0]), + BUNtail(bi, b->tnokey[1])) == 0); + } /* var heaps must have sane sizes */ assert(b->tvheap == NULL || b->tvheap->free <= b->tvheap->size); diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -79,9 +79,7 @@ insert_string_bat(BAT *b, BAT *n, int fo tp = NULL; if ((!GDK_ELIMDOUBLES(b->tvheap) || b->batCount == 0) && !GDK_ELIMDOUBLES(n->tvheap) && - b->tvheap->hashash == n->tvheap->hashash && - /* if needs to be kept unique, take slow path */ - (b->tkey & BOUND2BTRUE) == 0) { + b->tvheap->hashash == n->tvheap->hashash) { if (b->batRole == TRANSIENT || b->tvheap == n->tvheap) { /* If b is in the transient farm (i.e. b will _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list