Changeset: 3fbe8f439ac3 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/3fbe8f439ac3
Modified Files:
gdk/gdk.h
gdk/gdk_atoms.h
gdk/gdk_bat.c
gdk/gdk_batop.c
gdk/gdk_bbp.c
gdk/gdk_calc.c
gdk/gdk_calc_compare.h
gdk/gdk_heap.c
gdk/gdk_join.c
gdk/gdk_qsort.c
gdk/gdk_search.c
gdk/gdk_select.c
gdk/gdk_ssort.c
gdk/gdk_unique.c
monetdb5/modules/atoms/str.c
monetdb5/modules/atoms/str.h
monetdb5/modules/mal/pcre.c
monetdb5/modules/mal/txtsim.c
Branch: zero-offset-is-nil
Log Message:
Accept zero offset in var-sized bats as NIL value (without storing NIL).
Note, we do not (yet) save NILs in this way.
diffs (truncated from 970 to 300 lines):
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -881,7 +881,10 @@ static inline const void *
BUNtvar(const BATiter *bi, BUN p)
{
assert(bi->type && bi->vh);
- return bi->vh->base + VarHeapVal(bi->base, p, bi->width);
+ size_t off = VarHeapVal(bi->base, p, bi->width);
+ if (off == 0)
+ return ATOMnilptr(bi->type);
+ return bi->vh->base + off;
}
__attribute__((__pure__))
@@ -1092,23 +1095,27 @@ BATsettrivprop(BAT *b)
b->tmaxpos = 0;
}
b->tseqbase = sqbs;
- } else if (b->tvheap
- ? ATOMeq(b->ttype,
- b->tvheap->base +
VarHeapVal(Tloc(b, 0), 0, b->twidth),
- ATOMnilptr(b->ttype))
- : ATOMeq(b->ttype, Tloc(b, 0),
- ATOMnilptr(b->ttype))) {
- /* the only value is NIL */
- b->tminpos = BUN_NONE;
- b->tmaxpos = BUN_NONE;
- b->tnil = true;
- b->tnonil = false;
} else {
- /* the only value is both min and max */
- b->tminpos = 0;
- b->tmaxpos = 0;
- b->tnonil = true;
- b->tnil = false;
+ size_t off;
+ if (b->tvheap
+ ? ((off = VarHeapVal(Tloc(b, 0), 0,
b->twidth)) == 0 ||
+ ATOMeq(b->ttype,
+ b->tvheap->base + off,
+ ATOMnilptr(b->ttype)))
+ : ATOMeq(b->ttype, Tloc(b, 0),
+ ATOMnilptr(b->ttype))) {
+ /* the only value is NIL */
+ b->tminpos = BUN_NONE;
+ b->tmaxpos = BUN_NONE;
+ b->tnil = true;
+ b->tnonil = false;
+ } else {
+ /* the only value is both min and max */
+ b->tminpos = 0;
+ b->tmaxpos = 0;
+ b->tnil = false;
+ b->tnonil = true;
+ }
}
} else {
b->tsorted = false;
@@ -1118,11 +1125,20 @@ BATsettrivprop(BAT *b)
}
} else if (b->batCount == 2 && ATOMlinear(b->ttype)) {
int c;
- if (b->tvheap)
- c = ATOMcmp(b->ttype,
- b->tvheap->base + VarHeapVal(Tloc(b, 0), 0,
b->twidth),
- b->tvheap->base + VarHeapVal(Tloc(b, 0), 1,
b->twidth));
- else
+ if (b->tvheap) {
+ size_t off0 = VarHeapVal(Tloc(b, 0), 0, b->twidth);
+ size_t off1 = VarHeapVal(Tloc(b, 0), 1, b->twidth);
+ if (off0 == off1)
+ c = 0;
+ else if (off0 == 0)
+ c = -1;
+ else if (off1 == 0)
+ c = 1;
+ else
+ c = ATOMcmp(b->ttype,
+ b->tvheap->base + off0,
+ b->tvheap->base + off1);
+ } else
c = ATOMcmp(b->ttype, Tloc(b, 0), Tloc(b, 1));
b->tsorted = c <= 0;
b->tnosorted = !b->tsorted;
@@ -1208,7 +1224,7 @@ tfastins_nocheckVAR(BAT *b, BUN p, const
if (rc != GDK_SUCCEED)
return rc;
if (b->twidth < SIZEOF_VAR_T &&
- (b->twidth <= 2 ? d - GDK_VAROFFSET : d) >= ((size_t) 1 << (8 <<
b->tshift))) {
+ (b->twidth <= 2 && d != 0 ? d - GDK_VAROFFSET : d) >= ((size_t) 1
<< (8 << b->tshift))) {
/* doesn't fit in current heap, upgrade it */
rc = GDKupgradevarheap(b, d, 0, MAX(p, b->batCount));
if (rc != GDK_SUCCEED)
@@ -1216,10 +1232,14 @@ tfastins_nocheckVAR(BAT *b, BUN p, const
}
switch (b->twidth) {
case 1:
- ((uint8_t *) b->theap->base)[p] = (uint8_t) (d - GDK_VAROFFSET);
+ if (d != 0)
+ d -= GDK_VAROFFSET;
+ ((uint8_t *) b->theap->base)[p] = (uint8_t) d;
break;
case 2:
- ((uint16_t *) b->theap->base)[p] = (uint16_t) (d -
GDK_VAROFFSET);
+ if (d != 0)
+ d -= GDK_VAROFFSET;
+ ((uint16_t *) b->theap->base)[p] = (uint16_t) d;
break;
case 4:
((uint32_t *) b->theap->base)[p] = (uint32_t) d;
diff --git a/gdk/gdk_atoms.h b/gdk/gdk_atoms.h
--- a/gdk/gdk_atoms.h
+++ b/gdk/gdk_atoms.h
@@ -443,11 +443,14 @@ strEq(const char *l, const char *r)
static inline size_t
VarHeapVal(const void *b, BUN p, int w)
{
+ size_t off;
switch (w) {
case 1:
- return (size_t) ((const uint8_t *) b)[p] + GDK_VAROFFSET;
+ off = (size_t) ((const uint8_t *) b)[p];
+ return off == 0 ? 0 : off + GDK_VAROFFSET;
case 2:
- return (size_t) ((const uint16_t *) b)[p] + GDK_VAROFFSET;
+ off = (size_t) ((const uint16_t *) b)[p];
+ return off == 0 ? 0 : off + GDK_VAROFFSET;
case 4:
return (size_t) ((const uint32_t *) b)[p];
#if SIZEOF_VAR_T == 8
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1516,7 +1516,9 @@ BUNinplacemulti(BAT *b, const oid *posit
val = BUNtmsk(&bi, p);
} else if (b->tvheap) {
size_t off = VarHeapVal(bi.base, p, bi.width);
- if (off < bi.vhfree)
+ if (off == 0)
+ val = ATOMnilptr(bi.type);
+ else if (off < bi.vhfree)
val = bi.vh->base + off;
else
val = NULL; /* bad offset */
@@ -1598,10 +1600,14 @@ BUNinplacemulti(BAT *b, const oid *posit
_ptr = b->theap->base + p * b->twidth;
switch (b->twidth) {
case 1:
- _d = (var_t) * (uint8_t *) _ptr + GDK_VAROFFSET;
+ _d = (var_t) * (uint8_t *) _ptr;
+ if (_d != 0)
+ _d += GDK_VAROFFSET;
break;
case 2:
- _d = (var_t) * (uint16_t *) _ptr +
GDK_VAROFFSET;
+ _d = (var_t) * (uint16_t *) _ptr;
+ if (_d != 0)
+ _d += GDK_VAROFFSET;
break;
case 4:
_d = (var_t) * (uint32_t *) _ptr;
@@ -1622,7 +1628,7 @@ BUNinplacemulti(BAT *b, const oid *posit
}
MT_lock_unset(&b->theaplock);
if (b->twidth < SIZEOF_VAR_T &&
- (b->twidth <= 2 ? _d - GDK_VAROFFSET : _d) >=
((size_t) 1 << (8 << b->tshift))) {
+ (b->twidth <= 2 && _d != 0 ? _d - GDK_VAROFFSET :
_d) >= ((size_t) 1 << (8 << b->tshift))) {
/* doesn't fit in current heap, upgrade it */
if (GDKupgradevarheap(b, _d, 0, bi.count) !=
GDK_SUCCEED) {
MT_rwlock_wrunlock(&b->thashlock);
@@ -1641,10 +1647,14 @@ BUNinplacemulti(BAT *b, const oid *posit
_ptr = b->theap->base + p * b->twidth;
switch (b->twidth) {
case 1:
- * (uint8_t *) _ptr = (uint8_t) (_d -
GDK_VAROFFSET);
+ if (_d != 0)
+ _d -= GDK_VAROFFSET;
+ * (uint8_t *) _ptr = (uint8_t) _d;
break;
case 2:
- * (uint16_t *) _ptr = (uint16_t) (_d -
GDK_VAROFFSET);
+ if (_d != 0)
+ _d -= GDK_VAROFFSET;
+ * (uint16_t *) _ptr = (uint16_t) _d;
break;
case 4:
* (uint32_t *) _ptr = (uint32_t) _d;
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -55,25 +55,33 @@ unshare_varsized_heap(BAT *b)
switch (b->twidth) {
case 1:
for (BUN i = 0; i < b->batCount; i++) {
- o = (var_t) ((uint8_t *)
b->theap->base)[i] + GDK_VAROFFSET;
- if (atomput(b, &o, oh->base + o) ==
(var_t) -1)
- goto bailout;
- ((uint8_t *) b->theap->base)[i] =
(uint8_t) (o - GDK_VAROFFSET);
+ o = (var_t) ((uint8_t *)
b->theap->base)[i];
+ if (o != 0) {
+ o += GDK_VAROFFSET;
+ if (atomput(b, &o, oh->base +
o) == (var_t) -1)
+ goto bailout;
+ o -= GDK_VAROFFSET;
+ }
+ ((uint8_t *) b->theap->base)[i] =
(uint8_t) o;
}
break;
case 2:
for (BUN i = 0; i < b->batCount; i++) {
- o = (var_t) ((uint16_t *)
b->theap->base)[i] + GDK_VAROFFSET;
- if (atomput(b, &o, oh->base + o) ==
(var_t) -1)
- goto bailout;
- ((uint16_t *) b->theap->base)[i] =
(uint16_t) (o - GDK_VAROFFSET);
+ o = (var_t) ((uint16_t *)
b->theap->base)[i];
+ if (o != 0) {
+ o += GDK_VAROFFSET;
+ if (atomput(b, &o, oh->base +
o) == (var_t) -1)
+ goto bailout;
+ o -= GDK_VAROFFSET;
+ }
+ ((uint16_t *) b->theap->base)[i] =
(uint16_t) o;
}
break;
#if SIZEOF_VAR_T == 8
case 4:
for (BUN i = 0; i < b->batCount; i++) {
o = (var_t) ((uint32_t *)
b->theap->base)[i];
- if (atomput(b, &o, oh->base + o) ==
(var_t) -1)
+ if (o != 0 && atomput(b, &o, oh->base +
o) == (var_t) -1)
goto bailout;
((uint32_t *) b->theap->base)[i] =
(uint32_t) o;
}
@@ -82,7 +90,7 @@ unshare_varsized_heap(BAT *b)
case SIZEOF_VAR_T:
for (BUN i = 0; i < b->batCount; i++) {
o = ((var_t *) b->theap->base)[i];
- if (atomput(b, &o, oh->base + o) ==
(var_t) -1)
+ if (o != 0 && atomput(b, &o, oh->base +
o) == (var_t) -1)
goto bailout;
((var_t *) b->theap->base)[i] = o;
}
@@ -265,10 +273,14 @@ insert_string_bat(BAT *b, BATiter *ni, s
p = canditer_next(ci) - ni->b->hseqbase;
switch (ni->width) {
case 1:
- v = (var_t) tbp[p] + GDK_VAROFFSET;
+ v = (var_t) tbp[p];
+ if (v != 0)
+ v += GDK_VAROFFSET;
break;
case 2:
- v = (var_t) tsp[p] + GDK_VAROFFSET;
+ v = (var_t) tsp[p];
+ if (v != 0)
+ v += GDK_VAROFFSET;
break;
case 4:
v = (var_t) tip[p];
@@ -281,17 +293,23 @@ insert_string_bat(BAT *b, BATiter *ni, s
default:
MT_UNREACHABLE();
}
- v = (var_t) ((size_t) v + toff);
- assert(v >= GDK_VAROFFSET);
- assert((size_t) v < b->tvheap->free);
+ if (v != 0) {
+ v = (var_t) ((size_t) v + toff);
+ assert(v > GDK_VAROFFSET);
+ assert((size_t) v < b->tvheap->free);
+ }
switch (b->twidth) {
case 1:
- assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
- ((uint8_t *) b->theap->base)[r++] = (uint8_t)
(v - GDK_VAROFFSET);
+ assert(v == 0 || v - GDK_VAROFFSET < ((var_t) 1
<< 8));
+ if (v != 0)
+ v -= GDK_VAROFFSET;
+ ((uint8_t *) b->theap->base)[r++] = (uint8_t) v;
break;
case 2:
- assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
- ((uint16_t *) b->theap->base)[r++] = (uint16_t)
(v - GDK_VAROFFSET);
+ assert(v == 0 || v - GDK_VAROFFSET < ((var_t) 1
<< 16));
+ if (v != 0)
+ v -= GDK_VAROFFSET;
+ ((uint16_t *) b->theap->base)[r++] = (uint16_t)
v;
break;
case 4:
#if SIZEOF_VAR_T == 8
@@ -339,8 +357,9 @@ insert_string_bat(BAT *b, BATiter *ni, s
p = canditer_next(ci) - ni->b->hseqbase;
off = VarHeapVal(ni->base, p, ni->width); /* the offset
*/
tp = ni->vh->base + off; /* the string */
- if (off < b->tvheap->free &&
- strcmp(b->tvheap->base + off, tp) == 0) {
+ if (off == 0 ||
+ (off < b->tvheap->free &&
+ strcmp(b->tvheap->base + off, tp) == 0)) {
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]