Changeset: 6c17dc3a1d31 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6c17dc3a1d31
Modified Files:
        clients/Tests/MAL-signatures.stable.out
        clients/Tests/MAL-signatures.stable.out.int128
        gdk/ChangeLog.Dec2016
        gdk/gdk.h
        gdk/gdk_align.c
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_bbp.c
        gdk/gdk_calc.c
        gdk/gdk_hash.h
        gdk/gdk_join.c
        gdk/gdk_project.c
        gdk/gdk_search.c
        monetdb5/modules/kernel/bat5.c
        monetdb5/modules/kernel/bat5.mal
        monetdb5/modules/mal/tablet.c
Branch: default
Log Message:

Merge with Dec2016 branch.


diffs (truncated from 1027 to 300 lines):

diff --git a/clients/Tests/MAL-signatures.stable.out 
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -599,7 +599,7 @@ Ready.
 [ "bat",       "setColumn",    "command 
bat.setColumn(b:bat[:any_1],t:str):void ",     "BKCsetColumn;",        "Give a 
logical name to the tail column of a BAT."      ]
 [ "bat",       "setHash",      "command bat.setHash(b:bat[:any_1]):bit ",      
"BKCsetHash;",  "Create a hash structure on the column" ]
 [ "bat",       "setImprints",  "command bat.setImprints(b:bat[:any_1]):bit ",  
"BKCsetImprints;",      "Create an imprints structure on the column"    ]
-[ "bat",       "setKey",       "command 
bat.setKey(b:bat[:any_1],mode:bit):bat[:any_1] ",      "BKCsetkey;",   "Sets 
the 'key' property of the tail column to 'mode'. In 'key' mode, \n        the 
kernel will silently block insertions that cause a duplicate \n        entries 
in the head column. KNOWN BUG:when 'key' is set to TRUE, this \n\t\tfunction 
does not automatically eliminate duplicates. "  ]
+[ "bat",       "setKey",       "command 
bat.setKey(b:bat[:any_1],mode:bit):bat[:any_1] ",      "BKCsetkey;",   "Sets 
the 'key' property of the tail column to 'mode'. In 'key' mode,\n        the 
kernel will silently block insertions that cause a duplicate\n        entry in 
the head column."     ]
 [ "bat",       "setName",      "command bat.setName(b:bat[:any_1],s:str):void 
",       "BKCsetName;",  "Give a logical name to a BAT. "        ]
 [ "bat",       "setPersistent",        "command 
bat.setPersistent(b:bat[:any_1]):void ",       "BKCsetPersistent;",    "Make 
the BAT persistent."      ]
 [ "bat",       "setTransient", "command bat.setTransient(b:bat[:any_1]):void 
",        "BKCsetTransient;",     "Make the BAT transient.  Returns \n\tboolean 
which indicates if the\nBAT administration has indeed changed."   ]
diff --git a/clients/Tests/MAL-signatures.stable.out.int128 
b/clients/Tests/MAL-signatures.stable.out.int128
--- a/clients/Tests/MAL-signatures.stable.out.int128
+++ b/clients/Tests/MAL-signatures.stable.out.int128
@@ -705,7 +705,7 @@ Ready.
 [ "bat",       "setColumn",    "command 
bat.setColumn(b:bat[:any_1],t:str):void ",     "BKCsetColumn;",        "Give a 
logical name to the tail column of a BAT."      ]
 [ "bat",       "setHash",      "command bat.setHash(b:bat[:any_1]):bit ",      
"BKCsetHash;",  "Create a hash structure on the column" ]
 [ "bat",       "setImprints",  "command bat.setImprints(b:bat[:any_1]):bit ",  
"BKCsetImprints;",      "Create an imprints structure on the column"    ]
-[ "bat",       "setKey",       "command 
bat.setKey(b:bat[:any_1],mode:bit):bat[:any_1] ",      "BKCsetkey;",   "Sets 
the 'key' property of the tail column to 'mode'. In 'key' mode, \n        the 
kernel will silently block insertions that cause a duplicate \n        entries 
in the head column. KNOWN BUG:when 'key' is set to TRUE, this \n\t\tfunction 
does not automatically eliminate duplicates. "  ]
+[ "bat",       "setKey",       "command 
bat.setKey(b:bat[:any_1],mode:bit):bat[:any_1] ",      "BKCsetkey;",   "Sets 
the 'key' property of the tail column to 'mode'. In 'key' mode,\n        the 
kernel will silently block insertions that cause a duplicate\n        entry in 
the head column."     ]
 [ "bat",       "setName",      "command bat.setName(b:bat[:any_1],s:str):void 
",       "BKCsetName;",  "Give a logical name to a BAT. "        ]
 [ "bat",       "setPersistent",        "command 
bat.setPersistent(b:bat[:any_1]):void ",       "BKCsetPersistent;",    "Make 
the BAT persistent."      ]
 [ "bat",       "setTransient", "command bat.setTransient(b:bat[:any_1]):void 
",        "BKCsetTransient;",     "Make the BAT transient.  Returns \n\tboolean 
which indicates if the\nBAT administration has indeed changed."   ]
diff --git a/gdk/ChangeLog.Dec2016 b/gdk/ChangeLog.Dec2016
--- a/gdk/ChangeLog.Dec2016
+++ b/gdk/ChangeLog.Dec2016
@@ -1,6 +1,13 @@
 # ChangeLog file for MonetDB
 # This file is updated with Maddlog
 
+* Thu Dec  1 2016 Sjoerd Mullender <sjo...@acm.org>
+- The tnokey values must now be 0 if it is not known whether all values
+  in a column are distinct.
+- The 2-bit tkey field in the bat descriptor has been split into two
+  single bit fields: tkey and tunique.  The old tkey&BOUND2BTRUE value
+  is now stored in tunique.
+
 * Wed Oct 26 2016 Sjoerd Mullender <sjo...@acm.org>
 - Implemented conversion to str from any type (not just the internal
   types).
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -513,7 +513,6 @@
 #define TRUE           true
 #define FALSE          false
 #endif
-#define BOUND2BTRUE    2       /* TRUE, and bound to be so */
 
 #define IDLENGTH       64      /* maximum BAT id length */
 #define BATMARGIN      1.2     /* extra free margin for new heaps */
@@ -764,7 +763,8 @@ gdk_export int VALisnil(const ValRecord 
  *           // Tail properties
  *           int    ttype;            // Tail type number
  *           str    tident;           // name for tail column
- *           bit    tkey;             // tail values should be unique?
+ *           bit    tkey;             // tail values are unique
+ *           bit    tunique;          // tail values must be kept unique
  *           bit    tnonil;           // tail has no nils
  *           bit    tsorted;          // are tail values currently ordered?
  *           bit    tvarsized;        // for speed: tail type is varsized?
@@ -824,14 +824,15 @@ typedef struct {
        bte shift;              /* log2 of bunwidth */
        unsigned int
         varsized:1,            /* varsized (1) or fixedsized (0) */
-        key:2,                 /* duplicates allowed? */
+        key:1,                 /* no duplicate values present */
+        unique:1,              /* no duplicate values allowed */
         dense:1,               /* OID only: only consecutive values */
         nonil:1,               /* there are no nils in the column */
         nil:1,                 /* there is a nil in the column */
         sorted:1,              /* column is sorted in ascending order */
         revsorted:1;           /* column is sorted in descending order */
        oid align;              /* OID for sync alignment */
-       BUN nokey[2];           /* positions that prove key ==FALSE */
+       BUN nokey[2];           /* positions that prove key==FALSE */
        BUN nosorted;           /* position that proves sorted==FALSE */
        BUN norevsorted;        /* position that proves revsorted==FALSE */
        BUN nodense;            /* position that proves dense==FALSE */
@@ -855,7 +856,8 @@ typedef struct {
 #define GDKLIBRARY_OLDWKB      061031  /* old geom WKB format */
 #define GDKLIBRARY_INSERTED    061032  /* inserted and deleted in BBP.dir */
 #define GDKLIBRARY_HEADED      061033  /* head properties are stored */
-#define GDKLIBRARY             061034
+#define GDKLIBRARY_NOKEY       061034  /* nokey values can't be trusted */
+#define GDKLIBRARY             061035
 
 typedef struct BAT {
        /* static bat properties */
@@ -889,6 +891,7 @@ typedef struct BATiter {
 #define creator_tid    S.tid
 #define ttype          T.type
 #define tkey           T.key
+#define tunique                T.unique
 #define tvarsized      T.varsized
 #define tseqbase       T.seq
 #define tsorted                T.sorted
diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -417,8 +417,8 @@ VIEWreset(BAT *b)
                b->theap.parentid = 0;
                b->batRestricted = BAT_WRITE;
 
-               /* reset BOUND2BTRUE */
                b->tkey = BATtkey(v);
+               b->tunique = 0;
 
                /* copy the heaps */
                b->theap = tail;
@@ -442,8 +442,8 @@ VIEWreset(BAT *b)
                b->batCopiedtodisk = 0;
                b->batDirty = 1;
 
-               /* reset BOUND2KEY */
                b->tkey = BATtkey(v);
+               b->tunique = 0;
 
                /* make the BAT empty and insert all again */
                DELTAinit(b);
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -88,6 +88,7 @@ BATcreatedesc(oid hseq, int tt, int heap
 
        bn->ttype = tt;
        bn->tkey = FALSE;
+       bn->tunique = FALSE;
        bn->tnonil = TRUE;
        bn->tnil = FALSE;
        bn->tsorted = bn->trevsorted = ATOMlinear(tt) != 0;
@@ -909,7 +910,7 @@ setcolprops(BAT *b, const void *x)
                /* first value */
                b->tsorted = b->trevsorted = ATOMlinear(b->ttype) != 0;
                b->tnosorted = b->tnorevsorted = 0;
-               b->tkey |= 1;
+               b->tkey = 1;
                b->tnokey[0] = b->tnokey[1] = 0;
                b->tnodense = 0;
                if (b->ttype == TYPE_void) {
@@ -954,7 +955,8 @@ setcolprops(BAT *b, const void *x)
                prv = BUNtail(bi, pos - 1);
                cmp = atom_CMP(prv, x, b->ttype);
 
-               if (b->tkey == 1 && /* assume outside check if BOUND2BTRUE */
+               if (!b->tunique && /* assume outside check if tunique */
+                   b->tkey &&
                    (cmp == 0 || /* definitely not KEY */
                     (b->batCount > 1 && /* can't guarantee KEY if unordered */
                      ((b->tsorted && cmp > 0) ||
@@ -1002,7 +1004,7 @@ BUNappend(BAT *b, const void *t, bit for
        BATcheck(b, "BUNappend", GDK_FAIL);
 
        assert(!isVIEW(b));
-       if ((b->tkey & BOUND2BTRUE) && BUNfnd(b, t) != BUN_NONE) {
+       if (b->tunique && BUNfnd(b, t) != BUN_NONE) {
                return GDK_SUCCEED;
        }
 
@@ -1042,10 +1044,10 @@ BUNappend(BAT *b, const void *t, bit for
 
        IMPSdestroy(b); /* no support for inserts in imprints yet */
        OIDXdestroy(b);
-
-       /* first adapt the hashes; then the user-defined accelerators.
-        * REASON: some accelerator updates (qsignature) use the hashes!
-        */
+       if (b->thash == (Hash *) 1) {
+               /* don't bother first loading the hash to then change it */
+               HASHdestroy(b);
+       }
        if (b->thash) {
                HASHins(b, p, t);
                if (tsize && tsize != b->tvheap->size)
@@ -1094,7 +1096,7 @@ BUNdelete(BAT *b, oid o)
        b->batCount--;
        if (b->batCount <= 1) {
                /* some trivial properties */
-               b->tkey |= 1;
+               b->tkey = 1;
                b->tsorted = b->trevsorted = 1;
                b->tnosorted = b->tnorevsorted = 0;
                if (b->batCount == 0) {
@@ -1185,7 +1187,7 @@ BUNinplace(BAT *b, BUN p, const void *t,
                }
        } else if (b->tnorevsorted >= p)
                b->tnorevsorted = 0;
-       if (((b->ttype != TYPE_void) & b->tkey & !(b->tkey & BOUND2BTRUE)) && 
b->batCount > 1) {
+       if (((b->ttype != TYPE_void) & b->tkey & !b->tunique) && b->batCount > 
1) {
                BATkey(b, FALSE);
        }
        if (b->tnonil)
@@ -1211,7 +1213,7 @@ BUNreplace(BAT *b, oid id, const void *t
        if (id < b->hseqbase || id >= b->hseqbase + BATcount(b))
                return GDK_SUCCEED;
 
-       if ((b->tkey & BOUND2BTRUE) && BUNfnd(b, t) != BUN_NONE) {
+       if (b->tunique && BUNfnd(b, t) != BUN_NONE) {
                return GDK_SUCCEED;
        }
        if (b->ttype == TYPE_void) {
@@ -1236,7 +1238,7 @@ void_inplace(BAT *b, oid id, const void 
                GDKerror("void_inplace: id out of range\n");
                return GDK_FAIL;
        }
-       if ((b->tkey & BOUND2BTRUE) && BUNfnd(b, val) != BUN_NONE)
+       if (b->tunique && BUNfnd(b, val) != BUN_NONE)
                return GDK_SUCCEED;
        if (b->ttype == TYPE_void)
                return GDK_SUCCEED;
@@ -1433,11 +1435,9 @@ BATkey(BAT *b, int flag)
 {
        BATcheck(b, "BATkey", GDK_FAIL);
        assert(b->batCacheid > 0);
+       assert(flag == 0 || flag == 1);
+       assert(!b->tunique || flag);
        if (b->ttype == TYPE_void) {
-               if (b->tseqbase == oid_nil && flag == BOUND2BTRUE) {
-                       GDKerror("BATkey: nil-column cannot be kept unique.\n");
-                       return GDK_FAIL;
-               }
                if (b->tseqbase != oid_nil && flag == FALSE) {
                        GDKerror("BATkey: dense column must be unique.\n");
                        return GDK_FAIL;
@@ -1447,13 +1447,13 @@ BATkey(BAT *b, int flag)
                        return GDK_FAIL;
                }
        }
-       if (flag)
-               flag |= (1 | b->tkey);
-       if (b->tkey != flag)
+       if (b->tkey != (flag != 0))
                b->batDirtydesc = TRUE;
-       b->tkey = flag;
+       b->tkey = flag != 0;
        if (!flag)
                b->tdense = 0;
+       else
+               b->tnokey[0] = b->tnokey[1] = 0;
        if (flag && VIEWtparent(b)) {
                /* if a view is key, then so is the parent if the two
                 * are aligned */
@@ -2003,10 +2003,10 @@ BATmode(BAT *b, int mode)
  * revsorted   The column is reversely sorted (descending).  If
  *             also sorted, then all values are equal.
  *
- * The "key" property consists of two bits.  The lower bit, when set,
- * indicates that all values in the column are distinct.  The upper
- * bit, when set, indicates that all values must be distinct
- * (BOUND2BTRUE).
+ * In addition there is a property "unique" that, when set, indicates
+ * that values must be kept unique (and hence that the "key" property
+ * must be set).  This property is only used when changing (adding,
+ * replacing) values.
  *
  * Note that the functions BATtseqbase and BATkey also set more
  * properties than you might suspect.  When setting properties on a
@@ -2045,8 +2045,7 @@ BATassertProps(BAT *b)
        assert(b->ttype >= TYPE_void);
        assert(b->ttype < GDKatomcnt);
        assert(b->ttype != TYPE_bat);
-       /* if BOUND2BTRUE is set, then so must the low order bit */
-       assert(!(b->tkey & BOUND2BTRUE) || (b->tkey & 1)); /* tkey != 2 */
+       assert(!b->tunique || b->tkey); /* if unique, then key */
        assert(isVIEW(b) ||
               b->ttype == TYPE_void ||
               BBPfarms[b->theap.farmid].roles & (1 << b->batRole));
@@ -2133,6 +2132,18 @@ BATassertProps(BAT *b)
                        assert(cmpf(BUNtail(bi, b->tnorevsorted - 1),
                                    BUNtail(bi, b->tnorevsorted)) < 0);
        }
+       /* if tkey property set, both tnokey values must be 0 */
+       assert(!b->tkey || (b->tnokey[0] == 0 && b->tnokey[1] == 0));
+       if (!b->tkey && (b->tnokey[0] != 0 || b->tnokey[1] != 0)) {
+               /* if tkey not set and tnokey indicates a proof of
+                * non-key-ness, make sure the tnokey values are in
+                * range and indeed provide a proof */
+               assert(b->tnokey[0] != b->tnokey[1]);
+               assert(b->tnokey[0] < b->batCount);
+               assert(b->tnokey[1] < b->batCount);
+               assert(cmpf(BUNtail(bi, b->tnokey[0]),
+                           BUNtail(bi, b->tnokey[1])) == 0);
+       }
        /* var heaps must have sane sizes */
        assert(b->tvheap == NULL || b->tvheap->free <= b->tvheap->size);
 
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -79,9 +79,7 @@ insert_string_bat(BAT *b, BAT *n, int fo
        tp = NULL;
        if ((!GDK_ELIMDOUBLES(b->tvheap) || b->batCount == 0) &&
            !GDK_ELIMDOUBLES(n->tvheap) &&
-           b->tvheap->hashash == n->tvheap->hashash &&
-           /* if needs to be kept unique, take slow path */
-           (b->tkey & BOUND2BTRUE) == 0) {
+           b->tvheap->hashash == n->tvheap->hashash) {
                if (b->batRole == TRANSIENT || b->tvheap == n->tvheap) {
                        /* If b is in the transient farm (i.e. b will
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to