Changeset: 3fbe8f439ac3 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/3fbe8f439ac3
Modified Files:
        gdk/gdk.h
        gdk/gdk_atoms.h
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_bbp.c
        gdk/gdk_calc.c
        gdk/gdk_calc_compare.h
        gdk/gdk_heap.c
        gdk/gdk_join.c
        gdk/gdk_qsort.c
        gdk/gdk_search.c
        gdk/gdk_select.c
        gdk/gdk_ssort.c
        gdk/gdk_unique.c
        monetdb5/modules/atoms/str.c
        monetdb5/modules/atoms/str.h
        monetdb5/modules/mal/pcre.c
        monetdb5/modules/mal/txtsim.c
Branch: zero-offset-is-nil
Log Message:

Accept zero offset in var-sized bats as NIL value (without storing NIL).
Note, we do not (yet) save NILs in this way.


diffs (truncated from 970 to 300 lines):

diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -881,7 +881,10 @@ static inline const void *
 BUNtvar(const BATiter *bi, BUN p)
 {
        assert(bi->type && bi->vh);
-       return bi->vh->base + VarHeapVal(bi->base, p, bi->width);
+       size_t off = VarHeapVal(bi->base, p, bi->width);
+       if (off == 0)
+               return ATOMnilptr(bi->type);
+       return bi->vh->base + off;
 }
 
 __attribute__((__pure__))
@@ -1092,23 +1095,27 @@ BATsettrivprop(BAT *b)
                                        b->tmaxpos = 0;
                                }
                                b->tseqbase = sqbs;
-                       } else if (b->tvheap
-                                  ? ATOMeq(b->ttype,
-                                           b->tvheap->base + 
VarHeapVal(Tloc(b, 0), 0, b->twidth),
-                                           ATOMnilptr(b->ttype))
-                                  : ATOMeq(b->ttype, Tloc(b, 0),
-                                           ATOMnilptr(b->ttype))) {
-                               /* the only value is NIL */
-                               b->tminpos = BUN_NONE;
-                               b->tmaxpos = BUN_NONE;
-                               b->tnil = true;
-                               b->tnonil = false;
                        } else {
-                               /* the only value is both min and max */
-                               b->tminpos = 0;
-                               b->tmaxpos = 0;
-                               b->tnonil = true;
-                               b->tnil = false;
+                               size_t off;
+                               if (b->tvheap
+                                   ? ((off = VarHeapVal(Tloc(b, 0), 0, 
b->twidth)) == 0 ||
+                                      ATOMeq(b->ttype,
+                                             b->tvheap->base + off,
+                                             ATOMnilptr(b->ttype)))
+                                   : ATOMeq(b->ttype, Tloc(b, 0),
+                                            ATOMnilptr(b->ttype))) {
+                                       /* the only value is NIL */
+                                       b->tminpos = BUN_NONE;
+                                       b->tmaxpos = BUN_NONE;
+                                       b->tnil = true;
+                                       b->tnonil = false;
+                               } else {
+                                       /* the only value is both min and max */
+                                       b->tminpos = 0;
+                                       b->tmaxpos = 0;
+                                       b->tnil = false;
+                                       b->tnonil = true;
+                               }
                        }
                } else {
                        b->tsorted = false;
@@ -1118,11 +1125,20 @@ BATsettrivprop(BAT *b)
                }
        } else if (b->batCount == 2 && ATOMlinear(b->ttype)) {
                int c;
-               if (b->tvheap)
-                       c = ATOMcmp(b->ttype,
-                                   b->tvheap->base + VarHeapVal(Tloc(b, 0), 0, 
b->twidth),
-                                   b->tvheap->base + VarHeapVal(Tloc(b, 0), 1, 
b->twidth));
-               else
+               if (b->tvheap) {
+                       size_t off0 = VarHeapVal(Tloc(b, 0), 0, b->twidth);
+                       size_t off1 = VarHeapVal(Tloc(b, 0), 1, b->twidth);
+                       if (off0 == off1)
+                               c = 0;
+                       else if (off0 == 0)
+                               c = -1;
+                       else if (off1 == 0)
+                               c = 1;
+                       else
+                               c = ATOMcmp(b->ttype,
+                                           b->tvheap->base + off0,
+                                           b->tvheap->base + off1);
+               } else
                        c = ATOMcmp(b->ttype, Tloc(b, 0), Tloc(b, 1));
                b->tsorted = c <= 0;
                b->tnosorted = !b->tsorted;
@@ -1208,7 +1224,7 @@ tfastins_nocheckVAR(BAT *b, BUN p, const
        if (rc != GDK_SUCCEED)
                return rc;
        if (b->twidth < SIZEOF_VAR_T &&
-           (b->twidth <= 2 ? d - GDK_VAROFFSET : d) >= ((size_t) 1 << (8 << 
b->tshift))) {
+           (b->twidth <= 2 && d != 0 ? d - GDK_VAROFFSET : d) >= ((size_t) 1 
<< (8 << b->tshift))) {
                /* doesn't fit in current heap, upgrade it */
                rc = GDKupgradevarheap(b, d, 0, MAX(p, b->batCount));
                if (rc != GDK_SUCCEED)
@@ -1216,10 +1232,14 @@ tfastins_nocheckVAR(BAT *b, BUN p, const
        }
        switch (b->twidth) {
        case 1:
-               ((uint8_t *) b->theap->base)[p] = (uint8_t) (d - GDK_VAROFFSET);
+               if (d != 0)
+                       d -= GDK_VAROFFSET;
+               ((uint8_t *) b->theap->base)[p] = (uint8_t) d;
                break;
        case 2:
-               ((uint16_t *) b->theap->base)[p] = (uint16_t) (d - 
GDK_VAROFFSET);
+               if (d != 0)
+                       d -= GDK_VAROFFSET;
+               ((uint16_t *) b->theap->base)[p] = (uint16_t) d;
                break;
        case 4:
                ((uint32_t *) b->theap->base)[p] = (uint32_t) d;
diff --git a/gdk/gdk_atoms.h b/gdk/gdk_atoms.h
--- a/gdk/gdk_atoms.h
+++ b/gdk/gdk_atoms.h
@@ -443,11 +443,14 @@ strEq(const char *l, const char *r)
 static inline size_t
 VarHeapVal(const void *b, BUN p, int w)
 {
+       size_t off;
        switch (w) {
        case 1:
-               return (size_t) ((const uint8_t *) b)[p] + GDK_VAROFFSET;
+               off = (size_t) ((const uint8_t *) b)[p];
+               return off == 0 ? 0 : off + GDK_VAROFFSET;
        case 2:
-               return (size_t) ((const uint16_t *) b)[p] + GDK_VAROFFSET;
+               off = (size_t) ((const uint16_t *) b)[p];
+               return off == 0 ? 0 : off + GDK_VAROFFSET;
        case 4:
                return (size_t) ((const uint32_t *) b)[p];
 #if SIZEOF_VAR_T == 8
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1516,7 +1516,9 @@ BUNinplacemulti(BAT *b, const oid *posit
                        val = BUNtmsk(&bi, p);
                } else if (b->tvheap) {
                        size_t off = VarHeapVal(bi.base, p, bi.width);
-                       if (off < bi.vhfree)
+                       if (off == 0)
+                               val = ATOMnilptr(bi.type);
+                       else if (off < bi.vhfree)
                                val = bi.vh->base + off;
                        else
                                val = NULL; /* bad offset */
@@ -1598,10 +1600,14 @@ BUNinplacemulti(BAT *b, const oid *posit
                        _ptr = b->theap->base + p * b->twidth;
                        switch (b->twidth) {
                        case 1:
-                               _d = (var_t) * (uint8_t *) _ptr + GDK_VAROFFSET;
+                               _d = (var_t) * (uint8_t *) _ptr;
+                               if (_d != 0)
+                                       _d += GDK_VAROFFSET;
                                break;
                        case 2:
-                               _d = (var_t) * (uint16_t *) _ptr + 
GDK_VAROFFSET;
+                               _d = (var_t) * (uint16_t *) _ptr;
+                               if (_d != 0)
+                                       _d += GDK_VAROFFSET;
                                break;
                        case 4:
                                _d = (var_t) * (uint32_t *) _ptr;
@@ -1622,7 +1628,7 @@ BUNinplacemulti(BAT *b, const oid *posit
                        }
                        MT_lock_unset(&b->theaplock);
                        if (b->twidth < SIZEOF_VAR_T &&
-                           (b->twidth <= 2 ? _d - GDK_VAROFFSET : _d) >= 
((size_t) 1 << (8 << b->tshift))) {
+                           (b->twidth <= 2 && _d != 0 ? _d - GDK_VAROFFSET : 
_d) >= ((size_t) 1 << (8 << b->tshift))) {
                                /* doesn't fit in current heap, upgrade it */
                                if (GDKupgradevarheap(b, _d, 0, bi.count) != 
GDK_SUCCEED) {
                                        MT_rwlock_wrunlock(&b->thashlock);
@@ -1641,10 +1647,14 @@ BUNinplacemulti(BAT *b, const oid *posit
                        _ptr = b->theap->base + p * b->twidth;
                        switch (b->twidth) {
                        case 1:
-                               * (uint8_t *) _ptr = (uint8_t) (_d - 
GDK_VAROFFSET);
+                               if (_d != 0)
+                                       _d -= GDK_VAROFFSET;
+                               * (uint8_t *) _ptr = (uint8_t) _d;
                                break;
                        case 2:
-                               * (uint16_t *) _ptr = (uint16_t) (_d - 
GDK_VAROFFSET);
+                               if (_d != 0)
+                                       _d -= GDK_VAROFFSET;
+                               * (uint16_t *) _ptr = (uint16_t) _d;
                                break;
                        case 4:
                                * (uint32_t *) _ptr = (uint32_t) _d;
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -55,25 +55,33 @@ unshare_varsized_heap(BAT *b)
                        switch (b->twidth) {
                        case 1:
                                for (BUN i = 0; i < b->batCount; i++) {
-                                       o = (var_t) ((uint8_t *) 
b->theap->base)[i] + GDK_VAROFFSET;
-                                       if (atomput(b, &o, oh->base + o) == 
(var_t) -1)
-                                               goto bailout;
-                                       ((uint8_t *) b->theap->base)[i] = 
(uint8_t) (o - GDK_VAROFFSET);
+                                       o = (var_t) ((uint8_t *) 
b->theap->base)[i];
+                                       if (o != 0) {
+                                               o += GDK_VAROFFSET;
+                                               if (atomput(b, &o, oh->base + 
o) == (var_t) -1)
+                                                       goto bailout;
+                                               o -= GDK_VAROFFSET;
+                                       }
+                                       ((uint8_t *) b->theap->base)[i] = 
(uint8_t) o;
                                }
                                break;
                        case 2:
                                for (BUN i = 0; i < b->batCount; i++) {
-                                       o = (var_t) ((uint16_t *) 
b->theap->base)[i] + GDK_VAROFFSET;
-                                       if (atomput(b, &o, oh->base + o) == 
(var_t) -1)
-                                               goto bailout;
-                                       ((uint16_t *) b->theap->base)[i] = 
(uint16_t) (o - GDK_VAROFFSET);
+                                       o = (var_t) ((uint16_t *) 
b->theap->base)[i];
+                                       if (o != 0) {
+                                               o += GDK_VAROFFSET;
+                                               if (atomput(b, &o, oh->base + 
o) == (var_t) -1)
+                                                       goto bailout;
+                                               o -= GDK_VAROFFSET;
+                                       }
+                                       ((uint16_t *) b->theap->base)[i] = 
(uint16_t) o;
                                }
                                break;
 #if SIZEOF_VAR_T == 8
                        case 4:
                                for (BUN i = 0; i < b->batCount; i++) {
                                        o = (var_t) ((uint32_t *) 
b->theap->base)[i];
-                                       if (atomput(b, &o, oh->base + o) == 
(var_t) -1)
+                                       if (o != 0 && atomput(b, &o, oh->base + 
o) == (var_t) -1)
                                                goto bailout;
                                        ((uint32_t *) b->theap->base)[i] = 
(uint32_t) o;
                                }
@@ -82,7 +90,7 @@ unshare_varsized_heap(BAT *b)
                        case SIZEOF_VAR_T:
                                for (BUN i = 0; i < b->batCount; i++) {
                                        o = ((var_t *) b->theap->base)[i];
-                                       if (atomput(b, &o, oh->base + o) == 
(var_t) -1)
+                                       if (o != 0 && atomput(b, &o, oh->base + 
o) == (var_t) -1)
                                                goto bailout;
                                        ((var_t *) b->theap->base)[i] = o;
                                }
@@ -265,10 +273,14 @@ insert_string_bat(BAT *b, BATiter *ni, s
                        p = canditer_next(ci) - ni->b->hseqbase;
                        switch (ni->width) {
                        case 1:
-                               v = (var_t) tbp[p] + GDK_VAROFFSET;
+                               v = (var_t) tbp[p];
+                               if (v != 0)
+                                       v += GDK_VAROFFSET;
                                break;
                        case 2:
-                               v = (var_t) tsp[p] + GDK_VAROFFSET;
+                               v = (var_t) tsp[p];
+                               if (v != 0)
+                                       v += GDK_VAROFFSET;
                                break;
                        case 4:
                                v = (var_t) tip[p];
@@ -281,17 +293,23 @@ insert_string_bat(BAT *b, BATiter *ni, s
                        default:
                                MT_UNREACHABLE();
                        }
-                       v = (var_t) ((size_t) v + toff);
-                       assert(v >= GDK_VAROFFSET);
-                       assert((size_t) v < b->tvheap->free);
+                       if (v != 0) {
+                               v = (var_t) ((size_t) v + toff);
+                               assert(v > GDK_VAROFFSET);
+                               assert((size_t) v < b->tvheap->free);
+                       }
                        switch (b->twidth) {
                        case 1:
-                               assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
-                               ((uint8_t *) b->theap->base)[r++] = (uint8_t) 
(v - GDK_VAROFFSET);
+                               assert(v == 0 || v - GDK_VAROFFSET < ((var_t) 1 
<< 8));
+                               if (v != 0)
+                                       v -= GDK_VAROFFSET;
+                               ((uint8_t *) b->theap->base)[r++] = (uint8_t) v;
                                break;
                        case 2:
-                               assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
-                               ((uint16_t *) b->theap->base)[r++] = (uint16_t) 
(v - GDK_VAROFFSET);
+                               assert(v == 0 || v - GDK_VAROFFSET < ((var_t) 1 
<< 16));
+                               if (v != 0)
+                                       v -= GDK_VAROFFSET;
+                               ((uint16_t *) b->theap->base)[r++] = (uint16_t) 
v;
                                break;
                        case 4:
 #if SIZEOF_VAR_T == 8
@@ -339,8 +357,9 @@ insert_string_bat(BAT *b, BATiter *ni, s
                        p = canditer_next(ci) - ni->b->hseqbase;
                        off = VarHeapVal(ni->base, p, ni->width); /* the offset 
*/
                        tp = ni->vh->base + off; /* the string */
-                       if (off < b->tvheap->free &&
-                           strcmp(b->tvheap->base + off, tp) == 0) {
+                       if (off == 0 ||
+                           (off < b->tvheap->free &&
+                            strcmp(b->tvheap->base + off, tp) == 0)) {
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to