Changeset: e6054fc6fb9c for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/e6054fc6fb9c
Modified Files:
        clients/Tests/exports.stable.out
        gdk/gdk.h
        gdk/gdk_align.c
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_bbp.c
        gdk/gdk_hash.c
        gdk/gdk_logger.c
        gdk/gdk_private.h
        gdk/gdk_storage.c
        gdk/gdk_string.c
        sql/storage/bat/bat_logger.c
        sql/storage/bat/bat_storage.c
Branch: ustr
Log Message:

Work in progress: initial stab at having a ustr implementation.


diffs (truncated from 1056 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -113,6 +113,7 @@ gdk_return BATclear(BAT *b, bool force);
 void BATcommit(BAT *b, BUN size);
 BAT *BATconstant(oid hseq, int tt, const void *val, BUN cnt, role_t role);
 BAT *BATconvert(BAT *b, BAT *s, int tp, uint8_t scale1, uint8_t scale2, 
uint8_t precision);
+gdk_return BATconvert2ustr(BAT *b) __attribute__((__warn_unused_result__));
 BUN BATcount_no_nil(BAT *b, BAT *s);
 gdk_return BATdel(BAT *b, BAT *d) __attribute__((__warn_unused_result__));
 BAT *BATdense(oid hseq, oid tseq, BUN cnt) 
__attribute__((__warn_unused_result__));
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -402,6 +402,7 @@ typedef struct BAT {
        bool tsorted:1;         /* column is sorted in ascending order */
        bool trevsorted:1;      /* column is sorted in descending order */
        bool tascii:1;          /* string column is fully ASCII (7 bit) */
+       bool ustr:1;            /* use ustrbat */
        BUN tnokey[2];          /* positions that prove key==FALSE */
        BUN tnosorted;          /* position that proves sorted==FALSE */
        BUN tnorevsorted;       /* position that proves revsorted==FALSE */
@@ -483,7 +484,7 @@ __attribute__((__pure__))
 static inline bool
 isVIEW(const BAT *b)
 {
-       return VIEWtparent(b) != 0 || VIEWvtparent(b) != 0;
+       return VIEWtparent(b) != 0 || (!b->ustr && VIEWvtparent(b) != 0);
 }
 
 typedef struct {
@@ -609,7 +610,8 @@ typedef struct BATiter {
                vhdirty:1,
                copiedtodisk:1,
                transient:1,
-               ascii:1;
+               ascii:1,
+               ustr:1;
        restrict_t restricted:2;
 #ifndef NDEBUG
        bool locked:1;
@@ -657,6 +659,7 @@ bat_iterator_nolock(BAT *b)
                        .sorted = b->tsorted,
                        .revsorted = b->trevsorted,
                        .ascii = b->tascii,
+                       .ustr = b->ustr,
                        /* only look at heap dirty flag if we own it */
                        .hdirty = b->theap->parentid == b->batCacheid && 
b->theap->dirty,
                        /* also, if there is no vheap, it's not dirty */
@@ -771,6 +774,8 @@ gdk_export BAT *BATdense(oid hseq, oid t
        __attribute__((__warn_unused_result__));
 gdk_export gdk_return BATextend(BAT *b, BUN newcap)
        __attribute__((__warn_unused_result__));
+gdk_export gdk_return BATconvert2ustr(BAT *b)
+       __attribute__((__warn_unused_result__));
 
 /* internal */
 gdk_export uint8_t ATOMelmshift(int sz)
diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -178,6 +178,7 @@ VIEWcreate(oid seq, BAT *b, BUN l, BUN h
        bn->tnonil = bi.nonil;
        bn->tnil = bi.nil;
        bn->tascii = bi.ascii;
+       bn->ustr = bi.ustr;
        bn->tnokey[0] = bi.nokey[0];
        bn->tnokey[1] = bi.nokey[1];
        bn->tnosorted = bi.nosorted;
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -482,48 +482,50 @@ BATclear(BAT *b, bool force)
 
        /* we must dispose of all inserted atoms */
        MT_lock_set(&b->theaplock);
-       if (force && BATatoms[b->ttype].atomDel == NULL) {
-               assert(b->tvheap == NULL || b->tvheap->parentid == 
b->batCacheid);
-               /* no stable elements: we do a quick heap clean */
-               /* need to clean heap which keeps data even though the
-                  BUNs got removed. This means reinitialize when
-                  free > 0
-               */
-               if (b->tvheap && b->tvheap->free > 0) {
-                       Heap *th = GDKmalloc(sizeof(Heap));
+       if (!b->ustr) {
+               if (force && BATatoms[b->ttype].atomDel == NULL) {
+                       assert(b->tvheap == NULL || b->tvheap->parentid == 
b->batCacheid);
+                       /* no stable elements: we do a quick heap clean */
+                       /* need to clean heap which keeps data even though the
+                          BUNs got removed. This means reinitialize when
+                          free > 0
+                       */
+                       if (b->tvheap && b->tvheap->free > 0) {
+                               Heap *th = GDKmalloc(sizeof(Heap));
 
-                       if (th == NULL) {
-                               MT_lock_unset(&b->theaplock);
-                               return GDK_FAIL;
+                               if (th == NULL) {
+                                       MT_lock_unset(&b->theaplock);
+                                       return GDK_FAIL;
+                               }
+                               *th = (Heap) {
+                                       .farmid = b->tvheap->farmid,
+                                       .parentid = b->tvheap->parentid,
+                                       .dirty = true,
+                                       .hasfile = b->tvheap->hasfile,
+                                       .refs = ATOMIC_VAR_INIT(1),
+                               };
+                               strtcpy(th->filename, b->tvheap->filename, 
sizeof(th->filename));
+                               if (ATOMheap(b->ttype, th, 0) != GDK_SUCCEED) {
+                                       MT_lock_unset(&b->theaplock);
+                                       return GDK_FAIL;
+                               }
+                               tvp = b->tvheap->parentid;
+                               HEAPdecref(b->tvheap, false);
+                               b->tvheap = th;
                        }
-                       *th = (Heap) {
-                               .farmid = b->tvheap->farmid,
-                               .parentid = b->tvheap->parentid,
-                               .dirty = true,
-                               .hasfile = b->tvheap->hasfile,
-                               .refs = ATOMIC_VAR_INIT(1),
-                       };
-                       strtcpy(th->filename, b->tvheap->filename, 
sizeof(th->filename));
-                       if (ATOMheap(b->ttype, th, 0) != GDK_SUCCEED) {
-                               MT_lock_unset(&b->theaplock);
-                               return GDK_FAIL;
+               } else {
+                       /* do heap-delete of all inserted atoms */
+                       void (*tatmdel)(Heap*,var_t*) = 
BATatoms[b->ttype].atomDel;
+
+                       /* TYPE_str has no del method, so we shouldn't get here 
*/
+                       assert(tatmdel == NULL || b->twidth == sizeof(var_t));
+                       if (tatmdel) {
+                               BATiter bi = bat_iterator_nolock(b);
+
+                               for (p = b->batInserted, q = BATcount(b); p < 
q; p++)
+                                       (*tatmdel)(b->tvheap, (var_t*) 
BUNtloc(&bi,p));
+                               b->tvheap->dirty = true;
                        }
-                       tvp = b->tvheap->parentid;
-                       HEAPdecref(b->tvheap, false);
-                       b->tvheap = th;
-               }
-       } else {
-               /* do heap-delete of all inserted atoms */
-               void (*tatmdel)(Heap*,var_t*) = BATatoms[b->ttype].atomDel;
-
-               /* TYPE_str has no del method, so we shouldn't get here */
-               assert(tatmdel == NULL || b->twidth == sizeof(var_t));
-               if (tatmdel) {
-                       BATiter bi = bat_iterator_nolock(b);
-
-                       for (p = b->batInserted, q = BATcount(b); p < q; p++)
-                               (*tatmdel)(b->tvheap, (var_t*) BUNtloc(&bi,p));
-                       b->tvheap->dirty = true;
                }
        }
 
@@ -697,9 +699,9 @@ COLcopy2(BAT *b, int tt, bool writable, 
 
        BATcheck(b, NULL);
 
-       /* can't share vheap when persistent */
-       assert(!mayshare || role == TRANSIENT);
-       if (mayshare && role != TRANSIENT)
+       /* can't share vheap when persistent (unless ustr) */
+       assert(!mayshare || role == TRANSIENT || b->ustr);
+       if (mayshare && role != TRANSIENT && !b->ustr)
                mayshare = false;
 
        /* maybe a bit ugly to change the requested bat type?? */
@@ -794,6 +796,10 @@ COLcopy2(BAT *b, int tt, bool writable, 
                         * memcpy (if true) or must do a slower
                         * individual insert (if false) */
                        slowcopy = true;
+               } else if (b->ustr) {
+                       /* copy is not a ustr bat so we don't want to
+                        * carry all unused strings */
+                       slowcopy = true;
                }
        }
 
@@ -801,6 +807,9 @@ COLcopy2(BAT *b, int tt, bool writable, 
        if (bn == NULL) {
                goto bunins_failed;
        }
+       if (b->ustr && mayshare && BATconvert2ustr(bn) != GDK_SUCCEED) {
+               goto bunins_failed;
+       }
        if (bn->tvheap != NULL && bn->tvheap->base == NULL && !mayshare) {
                /* this combination can happen since the last
                 * argument of COLnew2 not being zero triggers a
@@ -818,10 +827,12 @@ COLcopy2(BAT *b, int tt, bool writable, 
                /* case (3): just copy the heaps */
                if (bn->tvheap) {
                        if (mayshare) {
-                               HEAPincref(bi.vh);
-                               HEAPdecref(bn->tvheap, true);
-                               BBPretain(bi.vh->parentid);
-                               bn->tvheap = bi.vh;
+                               if (!bn->ustr) {
+                                       HEAPincref(bi.vh);
+                                       HEAPdecref(bn->tvheap, true);
+                                       BBPretain(bi.vh->parentid);
+                                       bn->tvheap = bi.vh;
+                               }
                        } else {
                                if (HEAPextend(bn->tvheap, bi.vhfree, true) != 
GDK_SUCCEED)
                                        goto bunins_failed;
@@ -980,7 +991,7 @@ BUNappendmulti(BAT *b, const void *value
 
        BATcheck(b, GDK_FAIL);
 
-       assert(!VIEWtparent(b));
+       assert(!VIEWtparent(b) || b->ustr);
 
        if (count == 0)
                return GDK_SUCCEED;
@@ -1020,7 +1031,7 @@ BUNappendmulti(BAT *b, const void *value
                }
        }
 
-       if (unshare_varsized_heap(b) != GDK_SUCCEED) {
+       if (!b->ustr && unshare_varsized_heap(b) != GDK_SUCCEED) {
                return GDK_FAIL;
        }
 
@@ -1831,20 +1842,17 @@ void_inplace(BAT *b, oid id, const void 
  * known and a hash index is available, one should use the inline
  * functions to speed-up processing.
  */
-static BUN
-slowfnd(BAT *b, const void *v)
+static inline BUN
+slowfnd(BATiter *bi, const void *v)
 {
-       BATiter bi = bat_iterator(b);
        BUN p, q;
-       bool (*atomeq)(const void *, const void *) = ATOMequal(bi.type);
+       bool (*atomeq)(const void *, const void *) = ATOMequal(bi->type);
 
-       BATloop(&bi, p, q) {
-               if ((*atomeq)(v, BUNtail(&bi, p))) {
-                       bat_iterator_end(&bi);
+       BATloop(bi, p, q) {
+               if ((*atomeq)(v, BUNtail(bi, p))) {
                        return p;
                }
        }
-       bat_iterator_end(&bi);
        return BUN_NONE;
 }
 
@@ -1896,12 +1904,11 @@ BUNfnd(BAT *b, const void *v)
                if (BATordered(b) || BATordered_rev(b))
                        return SORTfnd(b, v);
        }
+       bi = bat_iterator(b);   /* outside of hashlock */
        if (BAThash(b) == GDK_SUCCEED) {
-               bi = bat_iterator(b); /* outside of hashlock */
                MT_rwlock_rdlock(&b->thashlock);
                if (b->thash == NULL) {
                        MT_rwlock_rdunlock(&b->thashlock);
-                       bat_iterator_end(&bi);
                        goto hashfnd_failed;
                }
                switch (ATOMbasetype(bi.type)) {
@@ -1963,7 +1970,9 @@ BUNfnd(BAT *b, const void *v)
   hashfnd_failed:
        /* can't build hash table, search the slow way */
        GDKclrerr();
-       return slowfnd(b, v);
+       r = slowfnd(&bi, v);
+       bat_iterator_end(&bi);
+       return r;
 }
 
 /*
@@ -2351,7 +2360,7 @@ BATcheckmodes(BAT *b, bool existing)
                dirty |= (b->theap->newstorage != m1);
        }
 
-       if (b->tvheap) {
+       if (b->tvheap && !b->ustr) {
                bool ta = (b->batRestricted == BAT_APPEND) && 
ATOMappendpriv(b->ttype, b->tvheap);
                m3 = HEAPcommitpersistence(b->tvheap, wr || ta, existing);
                dirty |= (b->tvheap->newstorage != m3);
@@ -2361,7 +2370,7 @@ BATcheckmodes(BAT *b, bool existing)
 
        if (dirty) {
                b->theap->newstorage = m1;
-               if (b->tvheap)
+               if (b->tvheap && !b->ustr)
                        b->tvheap->newstorage = m3;
        }
        return GDK_SUCCEED;
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to