Changeset: 96ee870d1d0e for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=96ee870d1d0e
Modified Files:
        clients/Tests/MAL-signatures.stable.out
        clients/Tests/MAL-signatures.stable.out.int128
        gdk/ChangeLog.Dec2016
        gdk/gdk.h
        gdk/gdk_align.c
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_calc.c
        gdk/gdk_join.c
        gdk/gdk_project.c
        gdk/gdk_search.c
        monetdb5/modules/kernel/bat5.c
        monetdb5/modules/kernel/bat5.mal
Branch: Dec2016
Log Message:

Split the 2 bit tkey field into a 1 bit tkey and a 1 bit tunique field.
The tunique field holds the old tkey&BOUND2BTRUE value.
Also, in bat.setKey, test for uniqueness when setting the property.


diffs (truncated from 812 to 300 lines):

diff --git a/clients/Tests/MAL-signatures.stable.out 
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -599,7 +599,7 @@ Ready.
 [ "bat",       "setColumn",    "command 
bat.setColumn(b:bat[:any_1],t:str):void ",     "BKCsetColumn;",        "Give a 
logical name to the tail column of a BAT."      ]
 [ "bat",       "setHash",      "command bat.setHash(b:bat[:any_1]):bit ",      
"BKCsetHash;",  "Create a hash structure on the column" ]
 [ "bat",       "setImprints",  "command bat.setImprints(b:bat[:any_1]):bit ",  
"BKCsetImprints;",      "Create an imprints structure on the column"    ]
-[ "bat",       "setKey",       "command 
bat.setKey(b:bat[:any_1],mode:bit):bat[:any_1] ",      "BKCsetkey;",   "Sets 
the 'key' property of the tail column to 'mode'. In 'key' mode, \n        the 
kernel will silently block insertions that cause a duplicate \n        entries 
in the head column. KNOWN BUG:when 'key' is set to TRUE, this \n\t\tfunction 
does not automatically eliminate duplicates. "  ]
+[ "bat",       "setKey",       "command 
bat.setKey(b:bat[:any_1],mode:bit):bat[:any_1] ",      "BKCsetkey;",   "Sets 
the 'key' property of the tail column to 'mode'. In 'key' mode,\n        the 
kernel will silently block insertions that cause a duplicate\n        entry in 
the head column."     ]
 [ "bat",       "setName",      "command bat.setName(b:bat[:any_1],s:str):void 
",       "BKCsetName;",  "Give a logical name to a BAT. "        ]
 [ "bat",       "setPersistent",        "command 
bat.setPersistent(b:bat[:any_1]):void ",       "BKCsetPersistent;",    "Make 
the BAT persistent."      ]
 [ "bat",       "setTransient", "command bat.setTransient(b:bat[:any_1]):void 
",        "BKCsetTransient;",     "Make the BAT transient.  Returns \n\tboolean 
which indicates if the\nBAT administration has indeed changed."   ]
diff --git a/clients/Tests/MAL-signatures.stable.out.int128 
b/clients/Tests/MAL-signatures.stable.out.int128
--- a/clients/Tests/MAL-signatures.stable.out.int128
+++ b/clients/Tests/MAL-signatures.stable.out.int128
@@ -705,7 +705,7 @@ Ready.
 [ "bat",       "setColumn",    "command 
bat.setColumn(b:bat[:any_1],t:str):void ",     "BKCsetColumn;",        "Give a 
logical name to the tail column of a BAT."      ]
 [ "bat",       "setHash",      "command bat.setHash(b:bat[:any_1]):bit ",      
"BKCsetHash;",  "Create a hash structure on the column" ]
 [ "bat",       "setImprints",  "command bat.setImprints(b:bat[:any_1]):bit ",  
"BKCsetImprints;",      "Create an imprints structure on the column"    ]
-[ "bat",       "setKey",       "command 
bat.setKey(b:bat[:any_1],mode:bit):bat[:any_1] ",      "BKCsetkey;",   "Sets 
the 'key' property of the tail column to 'mode'. In 'key' mode, \n        the 
kernel will silently block insertions that cause a duplicate \n        entries 
in the head column. KNOWN BUG:when 'key' is set to TRUE, this \n\t\tfunction 
does not automatically eliminate duplicates. "  ]
+[ "bat",       "setKey",       "command 
bat.setKey(b:bat[:any_1],mode:bit):bat[:any_1] ",      "BKCsetkey;",   "Sets 
the 'key' property of the tail column to 'mode'. In 'key' mode,\n        the 
kernel will silently block insertions that cause a duplicate\n        entry in 
the head column."     ]
 [ "bat",       "setName",      "command bat.setName(b:bat[:any_1],s:str):void 
",       "BKCsetName;",  "Give a logical name to a BAT. "        ]
 [ "bat",       "setPersistent",        "command 
bat.setPersistent(b:bat[:any_1]):void ",       "BKCsetPersistent;",    "Make 
the BAT persistent."      ]
 [ "bat",       "setTransient", "command bat.setTransient(b:bat[:any_1]):void 
",        "BKCsetTransient;",     "Make the BAT transient.  Returns \n\tboolean 
which indicates if the\nBAT administration has indeed changed."   ]
diff --git a/gdk/ChangeLog.Dec2016 b/gdk/ChangeLog.Dec2016
--- a/gdk/ChangeLog.Dec2016
+++ b/gdk/ChangeLog.Dec2016
@@ -4,6 +4,9 @@
 * Thu Dec  1 2016 Sjoerd Mullender <sjo...@acm.org>
 - The tnokey values must now be 0 if it is not known whether all values
   in a column are distinct.
+- The 2-bit tkey field in the bat descriptor has been split into two
+  single bit fields: tkey and tunique.  The old tkey&BOUND2BTRUE value
+  is now stored in tunique.
 
 * Wed Oct 26 2016 Sjoerd Mullender <sjo...@acm.org>
 - Implemented conversion to str from any type (not just the internal
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -513,7 +513,6 @@
 #define TRUE           true
 #define FALSE          false
 #endif
-#define BOUND2BTRUE    2       /* TRUE, and bound to be so */
 
 #define IDLENGTH       64      /* maximum BAT id length */
 #define BATMARGIN      1.2     /* extra free margin for new heaps */
@@ -764,7 +763,8 @@ gdk_export int VALisnil(const ValRecord 
  *           // Tail properties
  *           int    ttype;            // Tail type number
  *           str    tident;           // name for tail column
- *           bit    tkey;             // tail values should be unique?
+ *           bit    tkey;             // tail values are unique
+ *           bit    tunique;          // tail values must be kept unique
  *           bit    tnonil;           // tail has no nils
  *           bit    tsorted;          // are tail values currently ordered?
  *           bit    tvarsized;        // for speed: tail type is varsized?
@@ -824,7 +824,8 @@ typedef struct {
        bte shift;              /* log2 of bunwidth */
        unsigned int
         varsized:1,            /* varsized (1) or fixedsized (0) */
-        key:2,                 /* duplicates allowed? */
+        key:1,                 /* no duplicate values present */
+        unique:1,              /* no duplicate values allowed */
         dense:1,               /* OID only: only consecutive values */
         nonil:1,               /* there are no nils in the column */
         nil:1,                 /* there is a nil in the column */
@@ -890,6 +891,7 @@ typedef struct BATiter {
 #define creator_tid    S.tid
 #define ttype          T.type
 #define tkey           T.key
+#define tunique                T.unique
 #define tvarsized      T.varsized
 #define tseqbase       T.seq
 #define tsorted                T.sorted
diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -417,8 +417,8 @@ VIEWreset(BAT *b)
                b->theap.parentid = 0;
                b->batRestricted = BAT_WRITE;
 
-               /* reset BOUND2BTRUE */
                b->tkey = BATtkey(v);
+               b->tunique = 0;
 
                /* copy the heaps */
                b->theap = tail;
@@ -442,8 +442,8 @@ VIEWreset(BAT *b)
                b->batCopiedtodisk = 0;
                b->batDirty = 1;
 
-               /* reset BOUND2KEY */
                b->tkey = BATtkey(v);
+               b->tunique = 0;
 
                /* make the BAT empty and insert all again */
                DELTAinit(b);
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -88,6 +88,7 @@ BATcreatedesc(oid hseq, int tt, int heap
 
        bn->ttype = tt;
        bn->tkey = FALSE;
+       bn->tunique = FALSE;
        bn->tnonil = TRUE;
        bn->tnil = FALSE;
        bn->tsorted = bn->trevsorted = ATOMlinear(tt) != 0;
@@ -909,7 +910,7 @@ setcolprops(BAT *b, const void *x)
                /* first value */
                b->tsorted = b->trevsorted = ATOMlinear(b->ttype) != 0;
                b->tnosorted = b->tnorevsorted = 0;
-               b->tkey |= 1;
+               b->tkey = 1;
                b->tnokey[0] = b->tnokey[1] = 0;
                b->tnodense = 0;
                if (b->ttype == TYPE_void) {
@@ -954,7 +955,8 @@ setcolprops(BAT *b, const void *x)
                prv = BUNtail(bi, pos - 1);
                cmp = atom_CMP(prv, x, b->ttype);
 
-               if (b->tkey == 1 && /* assume outside check if BOUND2BTRUE */
+               if (!b->tunique && /* assume outside check if tunique */
+                   b->tkey &&
                    (cmp == 0 || /* definitely not KEY */
                     (b->batCount > 1 && /* can't guarantee KEY if unordered */
                      ((b->tsorted && cmp > 0) ||
@@ -1002,7 +1004,7 @@ BUNappend(BAT *b, const void *t, bit for
        BATcheck(b, "BUNappend", GDK_FAIL);
 
        assert(!isVIEW(b));
-       if ((b->tkey & BOUND2BTRUE) && BUNfnd(b, t) != BUN_NONE) {
+       if (b->tunique && BUNfnd(b, t) != BUN_NONE) {
                return GDK_SUCCEED;
        }
 
@@ -1094,7 +1096,7 @@ BUNdelete(BAT *b, oid o)
        b->batCount--;
        if (b->batCount <= 1) {
                /* some trivial properties */
-               b->tkey |= 1;
+               b->tkey = 1;
                b->tsorted = b->trevsorted = 1;
                b->tnosorted = b->tnorevsorted = 0;
                if (b->batCount == 0) {
@@ -1185,7 +1187,7 @@ BUNinplace(BAT *b, BUN p, const void *t,
                }
        } else if (b->tnorevsorted >= p)
                b->tnorevsorted = 0;
-       if (((b->ttype != TYPE_void) & b->tkey & !(b->tkey & BOUND2BTRUE)) && 
b->batCount > 1) {
+       if (((b->ttype != TYPE_void) & b->tkey & !b->tunique) && b->batCount > 
1) {
                BATkey(b, FALSE);
        }
        if (b->tnonil)
@@ -1211,7 +1213,7 @@ BUNreplace(BAT *b, oid id, const void *t
        if (id < b->hseqbase || id >= b->hseqbase + BATcount(b))
                return GDK_SUCCEED;
 
-       if ((b->tkey & BOUND2BTRUE) && BUNfnd(b, t) != BUN_NONE) {
+       if (b->tunique && BUNfnd(b, t) != BUN_NONE) {
                return GDK_SUCCEED;
        }
        if (b->ttype == TYPE_void) {
@@ -1236,7 +1238,7 @@ void_inplace(BAT *b, oid id, const void 
                GDKerror("void_inplace: id out of range\n");
                return GDK_FAIL;
        }
-       if ((b->tkey & BOUND2BTRUE) && BUNfnd(b, val) != BUN_NONE)
+       if (b->tunique && BUNfnd(b, val) != BUN_NONE)
                return GDK_SUCCEED;
        if (b->ttype == TYPE_void)
                return GDK_SUCCEED;
@@ -1433,11 +1435,9 @@ BATkey(BAT *b, int flag)
 {
        BATcheck(b, "BATkey", GDK_FAIL);
        assert(b->batCacheid > 0);
+       assert(flag == 0 || flag == 1);
+       assert(!b->tunique || flag);
        if (b->ttype == TYPE_void) {
-               if (b->tseqbase == oid_nil && flag == BOUND2BTRUE) {
-                       GDKerror("BATkey: nil-column cannot be kept unique.\n");
-                       return GDK_FAIL;
-               }
                if (b->tseqbase != oid_nil && flag == FALSE) {
                        GDKerror("BATkey: dense column must be unique.\n");
                        return GDK_FAIL;
@@ -1447,11 +1447,9 @@ BATkey(BAT *b, int flag)
                        return GDK_FAIL;
                }
        }
-       if (flag)
-               flag |= (1 | b->tkey);
-       if (b->tkey != flag)
+       if (b->tkey != (flag != 0))
                b->batDirtydesc = TRUE;
-       b->tkey = flag;
+       b->tkey = flag != 0;
        if (!flag)
                b->tdense = 0;
        else
@@ -2005,10 +2003,10 @@ BATmode(BAT *b, int mode)
  * revsorted   The column is reversely sorted (descending).  If
  *             also sorted, then all values are equal.
  *
- * The "key" property consists of two bits.  The lower bit, when set,
- * indicates that all values in the column are distinct.  The upper
- * bit, when set, indicates that all values must be distinct
- * (BOUND2BTRUE).
+ * In addition there is a property "unique" that, when set, indicates
+ * that values must be kept unique (and hence that the "key" property
+ * must be set).  This property is only used when changing (adding,
+ * replacing) values.
  *
  * Note that the functions BATtseqbase and BATkey also set more
  * properties than you might suspect.  When setting properties on a
@@ -2047,8 +2045,7 @@ BATassertProps(BAT *b)
        assert(b->ttype >= TYPE_void);
        assert(b->ttype < GDKatomcnt);
        assert(b->ttype != TYPE_bat);
-       /* if BOUND2BTRUE is set, then so must the low order bit */
-       assert(!(b->tkey & BOUND2BTRUE) || (b->tkey & 1)); /* tkey != 2 */
+       assert(!b->tunique || b->tkey); /* if unique, then key */
        assert(isVIEW(b) ||
               b->ttype == TYPE_void ||
               BBPfarms[b->theap.farmid].roles & (1 << b->batRole));
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -367,12 +367,12 @@ BATappend(BAT *b, BAT *n, bit force)
        ALIGNapp(b, "BATappend", force, GDK_FAIL);
        BATcompatible(b, n, GDK_FAIL, "BATappend");
 
-       if (b->tkey & BOUND2BTRUE) {
-               /* if b has the BOUND2BTRUE bit set, only insert
-                * values from n that don't already occur in b, and
-                * make sure we don't insert any duplicates either; we
-                * do this by calculating a subset of n that complies
-                * with this */
+       if (b->tunique) {
+               /* if b has the unique bit set, only insert values
+                * from n that don't already occur in b, and make sure
+                * we don't insert any duplicates either; we do this
+                * by calculating a subset of n that complies with
+                * this */
                BAT *d, *u;
 
                d = BATdiff(n, b, NULL, NULL, 1, BUN_NONE);
@@ -395,7 +395,7 @@ BATappend(BAT *b, BAT *n, bit force)
        }
 
        if (BUNlast(b) + BATcount(n) > BUN_MAX) {
-               if (b->tkey & BOUND2BTRUE)
+               if (b->tunique)
                        BBPunfix(n->batCacheid);
                GDKerror("BATappend: combined BATs too large\n");
                return GDK_FAIL;
@@ -430,13 +430,13 @@ BATappend(BAT *b, BAT *n, bit force)
                if (BATtdense(n) && BATcount(b) + b->tseqbase == f) {
                        sz += BATcount(b);
                        BATsetcount(b, sz);
-                       if (b->tkey & BOUND2BTRUE)
+                       if (b->tunique)
                                BBPunfix(n->batCacheid);
                        return GDK_SUCCEED;
                }
                /* we need to materialize the tail */
                if (BATmaterialize(b) != GDK_SUCCEED) {
-                       if (b->tkey & BOUND2BTRUE)
+                       if (b->tunique)
                                BBPunfix(n->batCacheid);
                        return GDK_FAIL;
                }
@@ -463,9 +463,9 @@ BATappend(BAT *b, BAT *n, bit force)
                        }
                        b->tdense = n->tdense;
                        b->tnodense = n->tnodense;
-                       b->tkey |= (n->tkey & TRUE);
-                       /* if BOUND2BTRUE, uniqueness is guaranteed above */
-                       if ((b->tkey & BOUND2BTRUE) == 0) {
+                       /* if tunique, uniqueness is guaranteed above */
+                       b->tkey = n->tkey | b->tunique;
+                       if (!b->tunique) {
                                b->tnokey[0] = n->tnokey[0];
                                b->tnokey[1] = n->tnokey[1];
                        }
@@ -488,9 +488,10 @@ BATappend(BAT *b, BAT *n, bit force)
                                b->trevsorted = FALSE;
                                b->tnorevsorted = 0;
                        }
-                       if (b->tkey == 1 &&
+                       if (!b->tunique && /* uniqueness is guaranteed above */
+                           b->tkey &&
                            (!(BATtordered(b) || BATtrevordered(b)) ||
-                            n->tkey == 0 || xx == 0)) {
+                            !n->tkey || xx == 0)) {
                                BATkey(b, FALSE);
                        }
                        if (b->ttype != TYPE_void && b->tsorted && b->tdense &&
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to