Changeset: 9cdaaa7ad00c for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/9cdaaa7ad00c
Branch: pushcands
Log Message:
Merged with default
diffs (truncated from 2504 to 300 lines):
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -742,8 +742,9 @@ typedef struct {
#define assert_shift_width(shift,width) assert(((shift) == 0 && (width) == 0)
|| ((unsigned)1<<(shift)) == (unsigned)(width))
#define GDKLIBRARY_MINMAX_POS 061042U /* first in Nov2019: no min/max
position; no BBPinfo value */
+#define GDKLIBRARY_TAILN 061043U /* first after Oct2020: str offset
heaps names don't take width into account */
/* if the version number is updated, also fix snapshot_bats() in bat_logger.c
*/
-#define GDKLIBRARY 061043U /* first after Oct2020 */
+#define GDKLIBRARY 061044U /* first after Oct2020 */
typedef struct BAT {
/* static bat properties */
diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -329,7 +329,7 @@ VIEWreset(BAT *b)
.farmid = BBPselectfarm(b->batRole, b->ttype, offheap),
};
ATOMIC_INIT(&tail->refs, 1);
- strconcat_len(tail->filename, sizeof(tail->filename), nme, ".tail",
NULL);
+ settailname(tail, nme, b->ttype, b->twidth);
if (b->ttype && HEAPalloc(tail, cnt, Tsize(b), Tsize(b)) !=
GDK_SUCCEED) {
GDKfree(tail);
BBPunfix(v->batCacheid);
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -179,6 +179,55 @@ BATsetdims(BAT *b)
b->tvarsized = b->ttype == TYPE_void || BATatoms[b->ttype].atomPut !=
NULL;
}
+const char *
+gettailname(const BAT *b)
+{
+ if (b->ttype != TYPE_str)
+ return "tail";
+ switch (b->twidth) {
+ case 1:
+ return "tail1";
+ case 2:
+ return "tail2";
+#if SIZEOF_VAR_T == 8
+ case 4:
+ return "tail4";
+#endif
+ default:
+ return "tail";
+ }
+}
+
+void
+settailname(Heap *restrict tail, const char *restrict physnme, int tt, int
width)
+{
+ strconcat_len(tail->filename, sizeof(tail->filename), physnme,
+ ".tail", NULL);
+ if (tt == TYPE_str) {
+ switch (width) {
+ case 1:
+ strconcat_len(tail->filename,
+ sizeof(tail->filename), physnme,
+ ".tail1", NULL);
+ break;
+ case 2:
+ strconcat_len(tail->filename,
+ sizeof(tail->filename), physnme,
+ ".tail2", NULL);
+ break;
+#if SIZEOF_VAR_T == 8
+ case 4:
+ strconcat_len(tail->filename,
+ sizeof(tail->filename), physnme,
+ ".tail4", NULL);
+ break;
+#endif
+ default:
+ break;
+ }
+ }
+}
+
/*
* @- BAT allocation
* Allocate BUN heap and variable-size atomheaps (see e.g. strHeap).
@@ -223,15 +272,13 @@ COLnew(oid hseq, int tt, BUN cap, role_t
if (ATOMstorage(tt) == TYPE_msk)
cap /= 8; /* 8 values per byte */
+ else if (tt == TYPE_str)
+ settailname(bn->theap, BBP_physical(bn->batCacheid), tt,
bn->twidth);
/* alloc the main heaps */
if (tt && HEAPalloc(bn->theap, cap, bn->twidth, ATOMsize(bn->ttype)) !=
GDK_SUCCEED) {
goto bailout;
}
- if (bn->theap->storage == STORE_MMAP) {
- bn->twidth = ATOMsize(bn->ttype);
- bn->tshift = ATOMelmshift(Tsize(bn));
- }
if (bn->tvheap && ATOMheap(tt, bn->tvheap, cap) != GDK_SUCCEED) {
goto bailout;
@@ -680,8 +727,9 @@ BATdestroy(BAT *b)
static void
heapmove(Heap *dst, Heap *src)
{
- HEAPfree(dst, false);
- /* copy all fields of src except filename and refs */
+ HEAPfree(dst, strcmp(dst->filename, src->filename) != 0);
+ /* copy all fields of src except refs */
+ strcpy_len(dst->filename, src->filename, sizeof(dst->filename));
dst->free = src->free;
dst->size = src->size;
dst->base = src->base;
@@ -808,9 +856,8 @@ COLcopy(BAT *b, int tt, bool writable, r
.farmid = BBPselectfarm(role, b->ttype,
varheap),
.parentid = bn->batCacheid,
};
- strconcat_len(bthp.filename, sizeof(bthp.filename),
- BBP_physical(bn->batCacheid),
- ".tail", NULL);
+ settailname(&bthp, BBP_physical(bn->batCacheid),
+ bn->ttype, bn->twidth);
strconcat_len(thp.filename, sizeof(thp.filename),
BBP_physical(bn->batCacheid),
".theap", NULL);
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -66,7 +66,7 @@ insert_string_bat(BAT *b, BAT *n, struct
#if SIZEOF_VAR_T == 8
unsigned int tiv; /* tail value-as-int */
#endif
- var_t v; /* value */
+ var_t v = GDK_VAROFFSET; /* value */
size_t off; /* offset within n's string heap */
BUN cnt = ci->ncand;
BUN oldcnt = BATcount(b);
@@ -80,9 +80,9 @@ insert_string_bat(BAT *b, BAT *n, struct
return GDK_SUCCEED;
ni = bat_iterator(n);
tp = NULL;
- if ((!GDK_ELIMDOUBLES(b->tvheap) || oldcnt == 0) &&
- !GDK_ELIMDOUBLES(n->tvheap) &&
- b->tvheap->hashash == n->tvheap->hashash) {
+ if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) &&
+ !GDK_ELIMDOUBLES(n->tvheap) &&
+ b->tvheap->hashash == n->tvheap->hashash)) {
if (b->batRole == TRANSIENT || b->tvheap == n->tvheap) {
/* If b is in the transient farm (i.e. b will
* never become persistent), we try some
@@ -128,6 +128,12 @@ insert_string_bat(BAT *b, BAT *n, struct
MT_lock_unset(&n->theaplock);
b->batDirtydesc = true;
toff = 0;
+ v = n->twidth == 1 ? GDK_VAROFFSET + 1 :
+ n->twidth == 2 ? GDK_VAROFFSET + (1 <<
9) :
+#if SIZEOF_VAR_T == 8
+ n->twidth != 4 ? (var_t) 1 << 33 :
+#endif
+ (var_t) 1 << 17;
} else if (b->tvheap->parentid == n->tvheap->parentid &&
ci->tpe == cand_dense) {
toff = 0;
@@ -135,6 +141,25 @@ insert_string_bat(BAT *b, BAT *n, struct
unshare_varsized_heap(b) != GDK_SUCCEED) {
return GDK_FAIL;
}
+ } else if (oldcnt == 0) {
+ v = n->twidth == 1 ? GDK_VAROFFSET + 1 :
+ n->twidth == 2 ? GDK_VAROFFSET + (1 << 9) :
+#if SIZEOF_VAR_T == 8
+ n->twidth != 4 ? (var_t) 1 << 33 :
+#endif
+ (var_t) 1 << 17;
+ if (b->tvheap->size < n->tvheap->free) {
+ Heap *h = HEAPgrow(b->tvheap, n->tvheap->free);
+ if (h == NULL)
+ return GDK_FAIL;
+ MT_lock_set(&b->theaplock);
+ HEAPdecref(b->tvheap, false);
+ b->tvheap = h;
+ MT_lock_unset(&b->theaplock);
+ }
+ memcpy(b->tvheap->base, n->tvheap->base,
n->tvheap->free);
+ b->tvheap->free = n->tvheap->free;
+ toff = 0;
}
if (toff == ~(size_t) 0 && cnt > 1024 && b->tvheap->free >=
n->tvheap->free) {
/* If b and n aren't sharing their string
@@ -196,9 +221,8 @@ insert_string_bat(BAT *b, BAT *n, struct
return GDK_FAIL;
/* make sure there is (vertical) space in the offset heap, we
- * may have to widen the heap later */
- if (GDKupgradevarheap(b, (var_t) b->tvheap->size, BATcount(b) + cnt,
- false) != GDK_SUCCEED)
+ * may also widen if v was set to some limit above */
+ if (GDKupgradevarheap(b, v, oldcnt + cnt < b->batCapacity ?
b->batCapacity : oldcnt + cnt, false) != GDK_SUCCEED)
return GDK_FAIL;
if (toff == 0 && n->twidth == b->twidth && ci->tpe == cand_dense) {
diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -494,8 +494,7 @@ heapinit(BAT *b, const char *buf, int *h
b->theap->free = (size_t) free;
b->theap->size = (size_t) size;
b->theap->base = NULL;
- strconcat_len(b->theap->filename, sizeof(b->theap->filename),
- filename, ".tail", NULL);
+ settailname(b->theap, filename, t, width);
b->theap->storage = (storage_t) storage;
b->theap->newstorage = (storage_t) storage;
b->theap->farmid = BBPselectfarm(PERSISTENT, b->ttype, offheap);
@@ -729,8 +728,9 @@ BBPreadEntries(FILE *fp, unsigned bbpver
/* check that the necessary files for all BATs exist and are large
* enough */
static gdk_return
-BBPcheckbats(void)
+BBPcheckbats(unsigned bbpversion)
{
+ (void) bbpversion;
for (bat bid = 1; bid < (bat) ATOMIC_GET(&BBPsize); bid++) {
struct stat statb;
BAT *b;
@@ -744,11 +744,34 @@ BBPcheckbats(void)
/* no files needed */
continue;
}
- path = GDKfilepath(0, BATDIR, BBP_physical(b->batCacheid),
"tail");
+ path = GDKfilepath(0, BATDIR, b->theap->filename, NULL);
if (path == NULL)
return GDK_FAIL;
- if (MT_stat(path, &statb) < 0) {
- GDKsyserror("BBPcheckbats: cannot stat file %s
(expected size %zu)\n",
+#ifdef GDKLIBRARY_TAILN
+ /* if bbpversion > GDKLIBRARY_TAILN, the offset heap can
+ * exist with either name .tail1 (etc) or .tail, if <=
+ * GDKLIBRARY_TAILN, only with .tail */
+ char tailsave = 0;
+ size_t taillen = 0;
+ if (b->ttype == TYPE_str &&
+ b->twidth < SIZEOF_VAR_T) {
+ /* old version: .tail, not .tail1, .tail2, .tail4 */
+ taillen = strlen(path) - 1;
+ tailsave = path[taillen];
+ path[taillen] = 0;
+ }
+#endif
+ if (MT_stat(path, &statb) < 0
+#ifdef GDKLIBRARY_TAILN
+ && bbpversion > GDKLIBRARY_TAILN
+ && b->ttype == TYPE_str
+ && b->twidth < SIZEOF_VAR_T
+ && (path[taillen] = tailsave) != 0
+ && MT_stat(path, &statb) < 0
+#endif
+ ) {
+
+ GDKsyserror("cannot stat file %s (expected size %zu)\n",
path, b->theap->free);
GDKfree(path);
return GDK_FAIL;
@@ -764,7 +787,7 @@ BBPcheckbats(void)
if (path == NULL)
return GDK_FAIL;
if (MT_stat(path, &statb) < 0) {
- GDKsyserror("BBPcheckbats: cannot stat file
%s\n",
+ GDKsyserror("cannot stat file %s\n",
path);
GDKfree(path);
return GDK_FAIL;
@@ -805,6 +828,7 @@ BBPheader(FILE *fp, int *lineno)
return 0;
}
if (bbpversion != GDKLIBRARY &&
+ bbpversion != GDKLIBRARY_TAILN &&
bbpversion != GDKLIBRARY_MINMAX_POS) {
TRC_CRITICAL(GDK, "incompatible BBP version: expected 0%o, got
0%o. "
"This database was probably created by a %s
version of MonetDB.",
@@ -958,6 +982,30 @@ BBPaddfarm(const char *dirname, uint32_t
return GDK_FAIL;
}
+#ifdef GDKLIBRARY_TAILN
+static gdk_return
+movestrbats(void)
+{
+ for (bat bid = 1, nbat = (bat) ATOMIC_GET(&BBPsize); bid < nbat; bid++)
{
+ BAT *b = BBP_desc(bid);
+ if (b == NULL) {
+ /* not a valid BAT */
+ continue;
+ }
+ if (b->ttype != TYPE_str || b->twidth == SIZEOF_VAR_T)
+ continue;
+ char *oldpath = GDKfilepath(0, BATDIR,
BBP_physical(b->batCacheid), "tail");
+ char *newpath = GDKfilepath(0, BATDIR, b->theap->filename,
NULL);
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list