Changeset: 38a40db4507d for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=38a40db4507d
Modified Files:
gdk/gdk_cand.c
gdk/gdk_cand.h
sql/backends/monet5/sql.c
sql/storage/bat/bat_storage.c
sql/storage/bat/bat_storage.h
sql/storage/bat/bat_table.c
Branch: unlock
Log Message:
optimize some more deletes handling
diffs (281 lines):
diff --git a/gdk/gdk_cand.c b/gdk/gdk_cand.c
--- a/gdk/gdk_cand.c
+++ b/gdk/gdk_cand.c
@@ -1298,7 +1298,7 @@ BATnegcands(BUN nr, BAT *odels)
}
BAT *
-BATmaskedcands(oid hseq, BAT *masked, bool selected)
+BATmaskedcands(oid hseq, BUN nr, BAT *masked, bool selected)
{
const char *nme;
Heap *msks;
@@ -1326,7 +1326,7 @@ BATmaskedcands(oid hseq, BAT *masked, bo
strconcat_len(msks->filename, sizeof(msks->filename),
nme, ".theap", NULL);
- nmask = (BATcount(masked) + 31) / 32;
+ nmask = (nr + 31) / 32;
if (HEAPalloc(msks, nmask + (sizeof(ccand_t)/sizeof(uint32_t)),
sizeof(uint32_t), 0) != GDK_SUCCEED) {
GDKfree(msks);
BBPreclaim(bn);
@@ -1344,11 +1344,24 @@ BATmaskedcands(oid hseq, BAT *masked, bo
memcpy(r, Tloc(masked, 0), nmask * sizeof(uint32_t));
} else {
const uint32_t *s = (const uint32_t *) Tloc(masked, 0);
- for (BUN i = 0; i < nmask; i++)
+ BUN nmask_ = (BATcount(masked) + 31)/32;
+ for (BUN i = 0; i < nmask_; i++)
r[i] = ~s[i];
}
+ if (nr > BATcount(masked)) {
+ BUN rest = BATcount(masked)&31, nmask_ =
(BATcount(masked)+31)/32, nrest = nr;
+ int v = 0;
+ if (nmask_ > nmask)
+ nrest = 32-rest;
+
+ for (BUN j = rest; j < nrest; j++)
+ v |= 1U<<j;
+ r[nmask_ -1] |= v;
+ for (BUN j = nmask_; j < nmask; j++)
+ r[j] = ~0;
+ }
/* make sure last word doesn't have any spurious bits set */
- BUN cnt = BATcount(masked) % 32;
+ BUN cnt = nr % 32;
if (cnt > 0)
r[nmask - 1] &= (1U << cnt) - 1;
cnt = 0;
diff --git a/gdk/gdk_cand.h b/gdk/gdk_cand.h
--- a/gdk/gdk_cand.h
+++ b/gdk/gdk_cand.h
@@ -202,7 +202,7 @@ gdk_export BAT *canditer_sliceval(struct
gdk_export BAT *canditer_slice2(struct canditer *ci, BUN lo1, BUN hi1, BUN
lo2, BUN hi2);
gdk_export BAT *canditer_slice2val(struct canditer *ci, oid lo1, oid hi1, oid
lo2, oid hi2);
gdk_export BAT *BATnegcands(BUN nr, BAT *odels);
-gdk_export BAT *BATmaskedcands(oid hseq, BAT *masked, bool selected);
+gdk_export BAT *BATmaskedcands(oid hseq, BUN nr, BAT *masked, bool selected);
gdk_export BAT *BATunmask(BAT *b);
gdk_export BAT *BATmergecand(BAT *a, BAT *b);
diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c
--- a/sql/backends/monet5/sql.c
+++ b/sql/backends/monet5/sql.c
@@ -2309,7 +2309,7 @@ SQLtid(Client cntxt, MalBlkPtr mb, MalSt
}
/* true == deleted, need not deleted */
if (bn) {
- tids = BATmaskedcands(sb, bn, false);
+ tids = BATmaskedcands(sb, nr, bn, false);
BBPunfix(bn->batCacheid);
if (tids == NULL) {
throw(MAL, "sql.tids", SQLSTATE(45003)
"TIDdeletes failed");
diff --git a/sql/storage/bat/bat_storage.c b/sql/storage/bat/bat_storage.c
--- a/sql/storage/bat/bat_storage.c
+++ b/sql/storage/bat/bat_storage.c
@@ -184,10 +184,11 @@ cs_real_update_bats( column_storage *cs,
static size_t
count_deletes(storage *d)
{
- /* needs to be optimized */
+ if (d->cached_cnt)
+ return d->cnt+d->ucnt;
+
+ lng cnt = 0;
BAT *b = temp_descriptor(d->cs.bid);
- lng cnt = 0;
-
if (!d->cs.ucnt) {
if (BATsum(&cnt, TYPE_lng, b, NULL, true, false, false) !=
GDK_SUCCEED) {
bat_destroy(b);
@@ -218,7 +219,9 @@ count_deletes(storage *d)
}
bat_destroy(c);
}
- return (size_t) cnt;
+ d->cached_cnt = 1;
+ d->cnt = (size_t)cnt;
+ return d->cnt;
}
static size_t
@@ -765,6 +768,9 @@ dup_dbat(storage *obat, storage *bat, in
MT_lock_unset(&segs_lock);
assert(bat->end <= bat->segs->end);
}
+ bat->cached_cnt = obat->cached_cnt;
+ bat->cnt = obat->cnt + obat->ucnt;
+ bat->ucnt = 0;
return dup_cs(&obat->cs, &bat->cs, TYPE_msk, is_new, temp);
}
@@ -838,6 +844,7 @@ delta_delete_bat( storage *bat, BAT *i,
if (i->ttype == TYPE_msk || mask_cand(i))
i = BATunmask(i);
+ bat->ucnt+=BATcount(i);
t = BATconstant(i->hseqbase, TYPE_msk, &T, BATcount(i), TRANSIENT);
int ok = LOG_OK;
@@ -855,6 +862,7 @@ delta_delete_val( storage *bat, oid rid,
{
/* update pos */
msk T = TRUE;
+ bat->ucnt++;
return cs_update_val(&bat->cs, rid, &T, is_new);
}
@@ -1011,8 +1019,8 @@ claim_tab(sql_trans *tr, sql_table *t, s
assert(isNew(t) || isTempTable(t) || s->cs.cleared || BATcount(b) ==
slot);
+
msk deleted = FALSE;
-
/* general case, write deleted in the central bat (ie others don't see
these values) and
* insert rows into the update bats */
if (!s->cs.cleared && ps != s && !isTempTable(t)) {
@@ -1385,6 +1393,8 @@ create_del(sql_trans *tr, sql_table *t)
assert(!bat->segs && !bat->end);
bat->segs = new_segments(0);
bat->end = 0;
+ bat->cnt = bat->ucnt = 0;
+ bat->cached_cnt = 1;
b = bat_new(TYPE_msk, t->sz, PERSISTENT);
if(b != NULL) {
@@ -1703,12 +1713,14 @@ clear_del(sql_trans *tr, sql_table *t)
return;
t->s->base.wtime = t->base.wtime = tr->wstime;
storage *s = t->data;
+ s->cnt = 0;
clear_cs(tr, &s->cs);
if (s->segs)
destroy_segments(s->segs);
s->segs = new_segments(0);
s->end = 0;
+ s->cnt = s->ucnt = 0;
}
static BUN
@@ -1948,8 +1960,30 @@ tr_merge_delta( sql_trans *tr, sql_delta
}
static int
+cs_grow( column_storage *cs, BUN nr)
+{
+ if (cs->bid) {
+ BAT *cur = temp_descriptor(cs->bid);
+ if (!cur)
+ return LOG_ERR;
+ if (BATcount(cur) < nr) {
+ msk deleted = 0;
+ /* todo faster inserts */
+ for(BUN i = BATcount(cur); i<nr; i++) {
+ if (BUNappend(cur, &deleted, true) !=
GDK_SUCCEED) {
+ bat_destroy(cur);
+ return LOG_ERR;
+ }
+ }
+ }
+ }
+ return LOG_OK;
+}
+
+static int
tr_update_dbat( sql_trans *tr, storage *ts, storage *fs)
{
+ int grow = 0;
if (fs->cs.cleared) {
destroy_segments(ts->segs);
MT_lock_set(&segs_lock);
@@ -1957,11 +1991,14 @@ tr_update_dbat( sql_trans *tr, storage *
MT_lock_unset(&segs_lock);
ts->end = ts->segs->end;
assert(ts->segs->head);
+ ts->cnt = 0;
+ ts->ucnt = 0;
} else {
assert(ts->segs == fs->segs);
/* merge segments or cleanup ? */
segment *segs = ts->segs->head, *seg = segs;
for (; segs; segs = segs->next) {
+ grow |= (segs->owner == tr);
if (segs->owner == tr || !segs->owner) {
/* merge range */
segs->owner = NULL;
@@ -1980,6 +2017,10 @@ tr_update_dbat( sql_trans *tr, storage *
}
ts->end = ts->segs->end;
}
+ ts->cnt += fs->ucnt;
+ /* first check if bat needs too grow */
+ if ( 0 && (fs->ucnt || grow))
+ cs_grow(&ts->cs, ts->end);
int ok = tr_update_cs( tr, &ts->cs, &fs->cs);
if (ok == LOG_OK && ts->next) {
ok = destroy_dbat(tr, ts->next);
diff --git a/sql/storage/bat/bat_storage.h b/sql/storage/bat/bat_storage.h
--- a/sql/storage/bat/bat_storage.h
+++ b/sql/storage/bat/bat_storage.h
@@ -36,15 +36,16 @@ typedef struct segment {
/* container structure too allow sharing this structure */
typedef struct segments {
sql_ref r;
- //BUN start;
- BUN end; /* current end */
+ BUN end; /* current end */
struct segment *head;
} segments;
typedef struct storage {
- column_storage cs;
- //BUN start;
- BUN end;
+ column_storage cs; /* storage on disk */
+ bit cached_cnt;
+ size_t cnt;
+ size_t ucnt; /* updates (ie deletes) in this transaction */
+ BUN end; /* end maybe less than the segments indicate */
segments *segs; /* local used segements */
struct storage *next;
} storage;
diff --git a/sql/storage/bat/bat_table.c b/sql/storage/bat/bat_table.c
--- a/sql/storage/bat/bat_table.c
+++ b/sql/storage/bat/bat_table.c
@@ -25,11 +25,22 @@ static BAT *
if (d && store_funcs.count_del(tr, t, 2) > 0) {
BAT *nd = COLcopy(d, d->ttype, true, TRANSIENT);
+ bat_destroy(d);
+ d = nd;
+ /*
+ storage *s = t->data;
+ if (BATcount(d) < s->segs->head->end) {
+ msk deleted = 0;
+ for(BUN i = BATcount(d); i<nr; i++) {
+ if (BUNappend(d, &deleted, true) !=
GDK_SUCCEED) {
+ bat_destroy(d);
+ return NULL;
+ }
+ }
+ }
+ */
BAT *ui = store_funcs.bind_del(tr, t, RD_UPD_ID);
BAT *uv = store_funcs.bind_del(tr, t, RD_UPD_VAL);
-
- bat_destroy(d);
- d = nd;
if (!ui || !uv || !d || BATreplace(d, ui, uv, true) !=
GDK_SUCCEED) {
bat_destroy(d);
bat_destroy(ui);
@@ -42,7 +53,7 @@ static BAT *
if (!d)
return NULL;
/* true == deleted, need not deleted */
- tids = BATmaskedcands(0, d, false);
+ tids = BATmaskedcands(0, nr, d, false);
bat_destroy(d);
if(tids == NULL)
return NULL;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list