Changeset: 38a40db4507d for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=38a40db4507d
Modified Files:
        gdk/gdk_cand.c
        gdk/gdk_cand.h
        sql/backends/monet5/sql.c
        sql/storage/bat/bat_storage.c
        sql/storage/bat/bat_storage.h
        sql/storage/bat/bat_table.c
Branch: unlock
Log Message:

optimize some more deletes handling


diffs (281 lines):

diff --git a/gdk/gdk_cand.c b/gdk/gdk_cand.c
--- a/gdk/gdk_cand.c
+++ b/gdk/gdk_cand.c
@@ -1298,7 +1298,7 @@ BATnegcands(BUN nr, BAT *odels)
 }
 
 BAT *
-BATmaskedcands(oid hseq, BAT *masked, bool selected)
+BATmaskedcands(oid hseq, BUN nr, BAT *masked, bool selected)
 {
        const char *nme;
        Heap *msks;
@@ -1326,7 +1326,7 @@ BATmaskedcands(oid hseq, BAT *masked, bo
        strconcat_len(msks->filename, sizeof(msks->filename),
                      nme, ".theap", NULL);
 
-       nmask = (BATcount(masked) + 31) / 32;
+       nmask = (nr + 31) / 32;
        if (HEAPalloc(msks, nmask + (sizeof(ccand_t)/sizeof(uint32_t)), 
sizeof(uint32_t), 0) != GDK_SUCCEED) {
                GDKfree(msks);
                BBPreclaim(bn);
@@ -1344,11 +1344,24 @@ BATmaskedcands(oid hseq, BAT *masked, bo
                memcpy(r, Tloc(masked, 0), nmask * sizeof(uint32_t));
        } else {
                const uint32_t *s = (const uint32_t *) Tloc(masked, 0);
-               for (BUN i = 0; i < nmask; i++)
+               BUN nmask_ = (BATcount(masked) + 31)/32;
+               for (BUN i = 0; i < nmask_; i++)
                        r[i] = ~s[i];
        }
+       if (nr > BATcount(masked)) {
+               BUN rest = BATcount(masked)&31, nmask_ = 
(BATcount(masked)+31)/32, nrest = nr;
+               int v = 0;
+               if (nmask_ > nmask)
+                       nrest = 32-rest;
+
+               for (BUN j = rest; j < nrest; j++)
+                       v |= 1U<<j;
+               r[nmask_ -1] |= v;
+               for (BUN j = nmask_; j < nmask; j++)
+                       r[j] = ~0;
+       }
        /* make sure last word doesn't have any spurious bits set */
-       BUN cnt = BATcount(masked) % 32;
+       BUN cnt = nr % 32;
        if (cnt > 0)
                r[nmask - 1] &= (1U << cnt) - 1;
        cnt = 0;
diff --git a/gdk/gdk_cand.h b/gdk/gdk_cand.h
--- a/gdk/gdk_cand.h
+++ b/gdk/gdk_cand.h
@@ -202,7 +202,7 @@ gdk_export BAT *canditer_sliceval(struct
 gdk_export BAT *canditer_slice2(struct canditer *ci, BUN lo1, BUN hi1, BUN 
lo2, BUN hi2);
 gdk_export BAT *canditer_slice2val(struct canditer *ci, oid lo1, oid hi1, oid 
lo2, oid hi2);
 gdk_export BAT *BATnegcands(BUN nr, BAT *odels);
-gdk_export BAT *BATmaskedcands(oid hseq, BAT *masked, bool selected);
+gdk_export BAT *BATmaskedcands(oid hseq, BUN nr, BAT *masked, bool selected);
 gdk_export BAT *BATunmask(BAT *b);
 
 gdk_export BAT *BATmergecand(BAT *a, BAT *b);
diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c
--- a/sql/backends/monet5/sql.c
+++ b/sql/backends/monet5/sql.c
@@ -2309,7 +2309,7 @@ SQLtid(Client cntxt, MalBlkPtr mb, MalSt
                }
                /* true == deleted, need not deleted  */
                if (bn) {
-                       tids = BATmaskedcands(sb, bn, false);
+                       tids = BATmaskedcands(sb, nr, bn, false);
                        BBPunfix(bn->batCacheid);
                        if (tids == NULL) {
                                throw(MAL, "sql.tids", SQLSTATE(45003) 
"TIDdeletes failed");
diff --git a/sql/storage/bat/bat_storage.c b/sql/storage/bat/bat_storage.c
--- a/sql/storage/bat/bat_storage.c
+++ b/sql/storage/bat/bat_storage.c
@@ -184,10 +184,11 @@ cs_real_update_bats( column_storage *cs,
 static size_t
 count_deletes(storage *d)
 {
-       /* needs to be optimized */
+       if (d->cached_cnt)
+               return d->cnt+d->ucnt;
+
+       lng cnt = 0;
        BAT *b = temp_descriptor(d->cs.bid);
-       lng cnt = 0;
-
        if (!d->cs.ucnt) {
                if (BATsum(&cnt, TYPE_lng, b, NULL, true, false, false) != 
GDK_SUCCEED) {
                        bat_destroy(b);
@@ -218,7 +219,9 @@ count_deletes(storage *d)
                }
                bat_destroy(c);
        }
-       return (size_t) cnt;
+       d->cached_cnt = 1;
+       d->cnt = (size_t)cnt;
+       return d->cnt;
 }
 
 static size_t
@@ -765,6 +768,9 @@ dup_dbat(storage *obat, storage *bat, in
                MT_lock_unset(&segs_lock);
                assert(bat->end <= bat->segs->end);
        }
+       bat->cached_cnt = obat->cached_cnt;
+       bat->cnt = obat->cnt + obat->ucnt;
+       bat->ucnt = 0;
        return dup_cs(&obat->cs, &bat->cs, TYPE_msk, is_new, temp);
 }
 
@@ -838,6 +844,7 @@ delta_delete_bat( storage *bat, BAT *i, 
 
        if (i->ttype == TYPE_msk || mask_cand(i))
                i = BATunmask(i);
+       bat->ucnt+=BATcount(i);
        t = BATconstant(i->hseqbase, TYPE_msk, &T, BATcount(i), TRANSIENT);
        int ok = LOG_OK;
 
@@ -855,6 +862,7 @@ delta_delete_val( storage *bat, oid rid,
 {
        /* update pos */
        msk T = TRUE;
+       bat->ucnt++;
        return cs_update_val(&bat->cs, rid, &T, is_new);
 }
 
@@ -1011,8 +1019,8 @@ claim_tab(sql_trans *tr, sql_table *t, s
 
        assert(isNew(t) || isTempTable(t) || s->cs.cleared || BATcount(b) == 
slot);
 
+
        msk deleted = FALSE;
-
        /* general case, write deleted in the central bat (ie others don't see 
these values) and
         * insert rows into the update bats */
        if (!s->cs.cleared && ps != s && !isTempTable(t)) {
@@ -1385,6 +1393,8 @@ create_del(sql_trans *tr, sql_table *t)
                assert(!bat->segs && !bat->end);
                bat->segs = new_segments(0);
                bat->end = 0;
+               bat->cnt = bat->ucnt = 0;
+               bat->cached_cnt = 1;
 
                b = bat_new(TYPE_msk, t->sz, PERSISTENT);
                if(b != NULL) {
@@ -1703,12 +1713,14 @@ clear_del(sql_trans *tr, sql_table *t)
                return;
        t->s->base.wtime = t->base.wtime = tr->wstime;
        storage *s = t->data;
+       s->cnt = 0;
        clear_cs(tr, &s->cs);
 
        if (s->segs)
                destroy_segments(s->segs);
        s->segs = new_segments(0);
        s->end = 0;
+       s->cnt = s->ucnt = 0;
 }
 
 static BUN
@@ -1948,8 +1960,30 @@ tr_merge_delta( sql_trans *tr, sql_delta
 }
 
 static int
+cs_grow( column_storage *cs, BUN nr)
+{
+       if (cs->bid) {
+               BAT *cur = temp_descriptor(cs->bid);
+               if (!cur)
+                       return LOG_ERR;
+               if (BATcount(cur) < nr) {
+                       msk deleted = 0;
+                       /* todo faster inserts */
+                       for(BUN i = BATcount(cur); i<nr; i++) {
+                               if (BUNappend(cur, &deleted, true) != 
GDK_SUCCEED) {
+                                       bat_destroy(cur);
+                                       return LOG_ERR;
+                               }
+                       }
+               }
+       }
+       return LOG_OK;
+}
+
+static int
 tr_update_dbat( sql_trans *tr, storage *ts, storage *fs)
 {
+       int grow = 0;
        if (fs->cs.cleared) {
                destroy_segments(ts->segs);
                MT_lock_set(&segs_lock);
@@ -1957,11 +1991,14 @@ tr_update_dbat( sql_trans *tr, storage *
                MT_lock_unset(&segs_lock);
                ts->end = ts->segs->end;
                assert(ts->segs->head);
+               ts->cnt = 0;
+               ts->ucnt = 0;
        } else {
                assert(ts->segs == fs->segs);
                /* merge segments or cleanup ? */
                segment *segs = ts->segs->head, *seg = segs;
                for (; segs; segs = segs->next) {
+                       grow |= (segs->owner == tr);
                        if (segs->owner == tr || !segs->owner) {
                                /* merge range */
                                segs->owner = NULL;
@@ -1980,6 +2017,10 @@ tr_update_dbat( sql_trans *tr, storage *
                }
                ts->end = ts->segs->end;
        }
+       ts->cnt += fs->ucnt;
+       /* first check if bat needs too grow */
+       if ( 0 && (fs->ucnt || grow))
+               cs_grow(&ts->cs, ts->end);
        int ok = tr_update_cs( tr, &ts->cs, &fs->cs);
        if (ok == LOG_OK && ts->next) {
                ok = destroy_dbat(tr, ts->next);
diff --git a/sql/storage/bat/bat_storage.h b/sql/storage/bat/bat_storage.h
--- a/sql/storage/bat/bat_storage.h
+++ b/sql/storage/bat/bat_storage.h
@@ -36,15 +36,16 @@ typedef struct segment {
 /* container structure too allow sharing this structure */
 typedef struct segments {
        sql_ref r;
-       //BUN start;
-       BUN end;        /* current end */
+       BUN end;                /* current end */
        struct segment *head;
 } segments;
 
 typedef struct storage {
-       column_storage cs;
-       //BUN start;
-       BUN end;
+       column_storage cs;      /* storage on disk */
+       bit cached_cnt;
+       size_t cnt;
+       size_t ucnt;    /* updates (ie deletes) in this transaction */
+       BUN end;                /* end maybe less than the segments indicate */
        segments *segs; /* local used segements */
        struct storage *next;
 } storage;
diff --git a/sql/storage/bat/bat_table.c b/sql/storage/bat/bat_table.c
--- a/sql/storage/bat/bat_table.c
+++ b/sql/storage/bat/bat_table.c
@@ -25,11 +25,22 @@ static BAT *
 
                if (d && store_funcs.count_del(tr, t, 2) > 0) {
                    BAT *nd = COLcopy(d, d->ttype, true, TRANSIENT);
+                       bat_destroy(d);
+                       d = nd;
+                       /*
+                       storage *s = t->data;
+                       if (BATcount(d) < s->segs->head->end) {
+                               msk deleted = 0;
+                               for(BUN i = BATcount(d); i<nr; i++) {
+                                       if (BUNappend(d, &deleted, true) != 
GDK_SUCCEED) {
+                                               bat_destroy(d);
+                                               return NULL;
+                                       }
+                               }
+                       }
+                       */
                        BAT *ui = store_funcs.bind_del(tr, t, RD_UPD_ID);
                        BAT *uv = store_funcs.bind_del(tr, t, RD_UPD_VAL);
-
-                       bat_destroy(d);
-                       d = nd;
                if (!ui || !uv || !d || BATreplace(d, ui, uv, true) != 
GDK_SUCCEED) {
                                bat_destroy(d);
                                bat_destroy(ui);
@@ -42,7 +53,7 @@ static BAT *
                if (!d)
                        return NULL;
                /* true == deleted, need not deleted  */
-               tids = BATmaskedcands(0, d, false);
+               tids = BATmaskedcands(0, nr, d, false);
                bat_destroy(d);
                if(tids == NULL)
                        return NULL;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to