Changeset: e6054fc6fb9c for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/e6054fc6fb9c
Modified Files:
clients/Tests/exports.stable.out
gdk/gdk.h
gdk/gdk_align.c
gdk/gdk_bat.c
gdk/gdk_batop.c
gdk/gdk_bbp.c
gdk/gdk_hash.c
gdk/gdk_logger.c
gdk/gdk_private.h
gdk/gdk_storage.c
gdk/gdk_string.c
sql/storage/bat/bat_logger.c
sql/storage/bat/bat_storage.c
Branch: ustr
Log Message:
Work in progress: initial stab at having a ustr implementation.
diffs (truncated from 1056 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -113,6 +113,7 @@ gdk_return BATclear(BAT *b, bool force);
void BATcommit(BAT *b, BUN size);
BAT *BATconstant(oid hseq, int tt, const void *val, BUN cnt, role_t role);
BAT *BATconvert(BAT *b, BAT *s, int tp, uint8_t scale1, uint8_t scale2,
uint8_t precision);
+gdk_return BATconvert2ustr(BAT *b) __attribute__((__warn_unused_result__));
BUN BATcount_no_nil(BAT *b, BAT *s);
gdk_return BATdel(BAT *b, BAT *d) __attribute__((__warn_unused_result__));
BAT *BATdense(oid hseq, oid tseq, BUN cnt)
__attribute__((__warn_unused_result__));
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -402,6 +402,7 @@ typedef struct BAT {
bool tsorted:1; /* column is sorted in ascending order */
bool trevsorted:1; /* column is sorted in descending order */
bool tascii:1; /* string column is fully ASCII (7 bit) */
+ bool ustr:1; /* use ustrbat */
BUN tnokey[2]; /* positions that prove key==FALSE */
BUN tnosorted; /* position that proves sorted==FALSE */
BUN tnorevsorted; /* position that proves revsorted==FALSE */
@@ -483,7 +484,7 @@ __attribute__((__pure__))
static inline bool
isVIEW(const BAT *b)
{
- return VIEWtparent(b) != 0 || VIEWvtparent(b) != 0;
+ return VIEWtparent(b) != 0 || (!b->ustr && VIEWvtparent(b) != 0);
}
typedef struct {
@@ -609,7 +610,8 @@ typedef struct BATiter {
vhdirty:1,
copiedtodisk:1,
transient:1,
- ascii:1;
+ ascii:1,
+ ustr:1;
restrict_t restricted:2;
#ifndef NDEBUG
bool locked:1;
@@ -657,6 +659,7 @@ bat_iterator_nolock(BAT *b)
.sorted = b->tsorted,
.revsorted = b->trevsorted,
.ascii = b->tascii,
+ .ustr = b->ustr,
/* only look at heap dirty flag if we own it */
.hdirty = b->theap->parentid == b->batCacheid &&
b->theap->dirty,
/* also, if there is no vheap, it's not dirty */
@@ -771,6 +774,8 @@ gdk_export BAT *BATdense(oid hseq, oid t
__attribute__((__warn_unused_result__));
gdk_export gdk_return BATextend(BAT *b, BUN newcap)
__attribute__((__warn_unused_result__));
+gdk_export gdk_return BATconvert2ustr(BAT *b)
+ __attribute__((__warn_unused_result__));
/* internal */
gdk_export uint8_t ATOMelmshift(int sz)
diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -178,6 +178,7 @@ VIEWcreate(oid seq, BAT *b, BUN l, BUN h
bn->tnonil = bi.nonil;
bn->tnil = bi.nil;
bn->tascii = bi.ascii;
+ bn->ustr = bi.ustr;
bn->tnokey[0] = bi.nokey[0];
bn->tnokey[1] = bi.nokey[1];
bn->tnosorted = bi.nosorted;
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -482,48 +482,50 @@ BATclear(BAT *b, bool force)
/* we must dispose of all inserted atoms */
MT_lock_set(&b->theaplock);
- if (force && BATatoms[b->ttype].atomDel == NULL) {
- assert(b->tvheap == NULL || b->tvheap->parentid ==
b->batCacheid);
- /* no stable elements: we do a quick heap clean */
- /* need to clean heap which keeps data even though the
- BUNs got removed. This means reinitialize when
- free > 0
- */
- if (b->tvheap && b->tvheap->free > 0) {
- Heap *th = GDKmalloc(sizeof(Heap));
+ if (!b->ustr) {
+ if (force && BATatoms[b->ttype].atomDel == NULL) {
+ assert(b->tvheap == NULL || b->tvheap->parentid ==
b->batCacheid);
+ /* no stable elements: we do a quick heap clean */
+ /* need to clean heap which keeps data even though the
+ BUNs got removed. This means reinitialize when
+ free > 0
+ */
+ if (b->tvheap && b->tvheap->free > 0) {
+ Heap *th = GDKmalloc(sizeof(Heap));
- if (th == NULL) {
- MT_lock_unset(&b->theaplock);
- return GDK_FAIL;
+ if (th == NULL) {
+ MT_lock_unset(&b->theaplock);
+ return GDK_FAIL;
+ }
+ *th = (Heap) {
+ .farmid = b->tvheap->farmid,
+ .parentid = b->tvheap->parentid,
+ .dirty = true,
+ .hasfile = b->tvheap->hasfile,
+ .refs = ATOMIC_VAR_INIT(1),
+ };
+ strtcpy(th->filename, b->tvheap->filename,
sizeof(th->filename));
+ if (ATOMheap(b->ttype, th, 0) != GDK_SUCCEED) {
+ MT_lock_unset(&b->theaplock);
+ return GDK_FAIL;
+ }
+ tvp = b->tvheap->parentid;
+ HEAPdecref(b->tvheap, false);
+ b->tvheap = th;
}
- *th = (Heap) {
- .farmid = b->tvheap->farmid,
- .parentid = b->tvheap->parentid,
- .dirty = true,
- .hasfile = b->tvheap->hasfile,
- .refs = ATOMIC_VAR_INIT(1),
- };
- strtcpy(th->filename, b->tvheap->filename,
sizeof(th->filename));
- if (ATOMheap(b->ttype, th, 0) != GDK_SUCCEED) {
- MT_lock_unset(&b->theaplock);
- return GDK_FAIL;
+ } else {
+ /* do heap-delete of all inserted atoms */
+ void (*tatmdel)(Heap*,var_t*) =
BATatoms[b->ttype].atomDel;
+
+ /* TYPE_str has no del method, so we shouldn't get here
*/
+ assert(tatmdel == NULL || b->twidth == sizeof(var_t));
+ if (tatmdel) {
+ BATiter bi = bat_iterator_nolock(b);
+
+ for (p = b->batInserted, q = BATcount(b); p <
q; p++)
+ (*tatmdel)(b->tvheap, (var_t*)
BUNtloc(&bi,p));
+ b->tvheap->dirty = true;
}
- tvp = b->tvheap->parentid;
- HEAPdecref(b->tvheap, false);
- b->tvheap = th;
- }
- } else {
- /* do heap-delete of all inserted atoms */
- void (*tatmdel)(Heap*,var_t*) = BATatoms[b->ttype].atomDel;
-
- /* TYPE_str has no del method, so we shouldn't get here */
- assert(tatmdel == NULL || b->twidth == sizeof(var_t));
- if (tatmdel) {
- BATiter bi = bat_iterator_nolock(b);
-
- for (p = b->batInserted, q = BATcount(b); p < q; p++)
- (*tatmdel)(b->tvheap, (var_t*) BUNtloc(&bi,p));
- b->tvheap->dirty = true;
}
}
@@ -697,9 +699,9 @@ COLcopy2(BAT *b, int tt, bool writable,
BATcheck(b, NULL);
- /* can't share vheap when persistent */
- assert(!mayshare || role == TRANSIENT);
- if (mayshare && role != TRANSIENT)
+ /* can't share vheap when persistent (unless ustr) */
+ assert(!mayshare || role == TRANSIENT || b->ustr);
+ if (mayshare && role != TRANSIENT && !b->ustr)
mayshare = false;
/* maybe a bit ugly to change the requested bat type?? */
@@ -794,6 +796,10 @@ COLcopy2(BAT *b, int tt, bool writable,
* memcpy (if true) or must do a slower
* individual insert (if false) */
slowcopy = true;
+ } else if (b->ustr) {
+ /* copy is not a ustr bat so we don't want to
+ * carry all unused strings */
+ slowcopy = true;
}
}
@@ -801,6 +807,9 @@ COLcopy2(BAT *b, int tt, bool writable,
if (bn == NULL) {
goto bunins_failed;
}
+ if (b->ustr && mayshare && BATconvert2ustr(bn) != GDK_SUCCEED) {
+ goto bunins_failed;
+ }
if (bn->tvheap != NULL && bn->tvheap->base == NULL && !mayshare) {
/* this combination can happen since the last
* argument of COLnew2 not being zero triggers a
@@ -818,10 +827,12 @@ COLcopy2(BAT *b, int tt, bool writable,
/* case (3): just copy the heaps */
if (bn->tvheap) {
if (mayshare) {
- HEAPincref(bi.vh);
- HEAPdecref(bn->tvheap, true);
- BBPretain(bi.vh->parentid);
- bn->tvheap = bi.vh;
+ if (!bn->ustr) {
+ HEAPincref(bi.vh);
+ HEAPdecref(bn->tvheap, true);
+ BBPretain(bi.vh->parentid);
+ bn->tvheap = bi.vh;
+ }
} else {
if (HEAPextend(bn->tvheap, bi.vhfree, true) !=
GDK_SUCCEED)
goto bunins_failed;
@@ -980,7 +991,7 @@ BUNappendmulti(BAT *b, const void *value
BATcheck(b, GDK_FAIL);
- assert(!VIEWtparent(b));
+ assert(!VIEWtparent(b) || b->ustr);
if (count == 0)
return GDK_SUCCEED;
@@ -1020,7 +1031,7 @@ BUNappendmulti(BAT *b, const void *value
}
}
- if (unshare_varsized_heap(b) != GDK_SUCCEED) {
+ if (!b->ustr && unshare_varsized_heap(b) != GDK_SUCCEED) {
return GDK_FAIL;
}
@@ -1831,20 +1842,17 @@ void_inplace(BAT *b, oid id, const void
* known and a hash index is available, one should use the inline
* functions to speed-up processing.
*/
-static BUN
-slowfnd(BAT *b, const void *v)
+static inline BUN
+slowfnd(BATiter *bi, const void *v)
{
- BATiter bi = bat_iterator(b);
BUN p, q;
- bool (*atomeq)(const void *, const void *) = ATOMequal(bi.type);
+ bool (*atomeq)(const void *, const void *) = ATOMequal(bi->type);
- BATloop(&bi, p, q) {
- if ((*atomeq)(v, BUNtail(&bi, p))) {
- bat_iterator_end(&bi);
+ BATloop(bi, p, q) {
+ if ((*atomeq)(v, BUNtail(bi, p))) {
return p;
}
}
- bat_iterator_end(&bi);
return BUN_NONE;
}
@@ -1896,12 +1904,11 @@ BUNfnd(BAT *b, const void *v)
if (BATordered(b) || BATordered_rev(b))
return SORTfnd(b, v);
}
+ bi = bat_iterator(b); /* outside of hashlock */
if (BAThash(b) == GDK_SUCCEED) {
- bi = bat_iterator(b); /* outside of hashlock */
MT_rwlock_rdlock(&b->thashlock);
if (b->thash == NULL) {
MT_rwlock_rdunlock(&b->thashlock);
- bat_iterator_end(&bi);
goto hashfnd_failed;
}
switch (ATOMbasetype(bi.type)) {
@@ -1963,7 +1970,9 @@ BUNfnd(BAT *b, const void *v)
hashfnd_failed:
/* can't build hash table, search the slow way */
GDKclrerr();
- return slowfnd(b, v);
+ r = slowfnd(&bi, v);
+ bat_iterator_end(&bi);
+ return r;
}
/*
@@ -2351,7 +2360,7 @@ BATcheckmodes(BAT *b, bool existing)
dirty |= (b->theap->newstorage != m1);
}
- if (b->tvheap) {
+ if (b->tvheap && !b->ustr) {
bool ta = (b->batRestricted == BAT_APPEND) &&
ATOMappendpriv(b->ttype, b->tvheap);
m3 = HEAPcommitpersistence(b->tvheap, wr || ta, existing);
dirty |= (b->tvheap->newstorage != m3);
@@ -2361,7 +2370,7 @@ BATcheckmodes(BAT *b, bool existing)
if (dirty) {
b->theap->newstorage = m1;
- if (b->tvheap)
+ if (b->tvheap && !b->ustr)
b->tvheap->newstorage = m3;
}
return GDK_SUCCEED;
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]