Changeset: cc031ce310c7 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=cc031ce310c7
Modified Files:
gdk/gdk_align.c
gdk/gdk_bat.c
gdk/gdk_batop.c
gdk/gdk_bbp.c
gdk/gdk_imprints.c
gdk/gdk_logger.c
gdk/gdk_search.c
gdk/gdk_search.h
monetdb5/mal/mal_authorize.c
monetdb5/mal/mal_debugger.c
monetdb5/mal/mal_resource.c
monetdb5/mal/mal_runtime.c
monetdb5/modules/kernel/bat5.c
monetdb5/modules/kernel/status.c
monetdb5/modules/mal/tokenizer.c
sql/backends/monet5/sql.c
sql/storage/bat/bat_table.c
Branch: Jul2015
Log Message:
Avoid unlinking hash and imprints files at every change.
We now remember that there may be imprints or hashes available on disk
by setting the pointer to 1, 0 meaning there definitely is nothing
available on disk.
diffs (truncated from 618 to 300 lines):
diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -785,10 +785,7 @@ VIEWdestroy(BAT *b)
assert(isVIEW(b));
/* remove any leftover private hash structures */
- if (b->H->hash)
- HASHremove(BATmirror(b));
- if (b->T->hash)
- HASHremove(b);
+ HASHdestroy(b);
IMPSdestroy(b);
VIEWunlink(b);
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1763,7 +1763,7 @@ BUNfnd(BAT *b, const void *v)
return r;
if (BATtvoid(b))
return BUNfndVOID(b, v);
- if (!b->T->hash) {
+ if (!BATcheckhash(b)) {
if (BATtordered(b) || BATtrevordered(b))
return SORTfnd(b, v);
}
@@ -1894,6 +1894,8 @@ BUNlocate(BAT *b, const void *x, const v
* strategy: create a hash on both columns, and select
* the column with the best distribution
*/
+ BATcheckhash(b);
+ BATcheckhash(BATmirror(b));
if ((b->T->hash && b->H->hash == NULL) || !dohash(b->H))
usemirror();
if (b->H->hash == NULL && (v = VIEWcreate_(b, b, TRUE)) !=
NULL) {
@@ -2116,8 +2118,8 @@ BATvmsize(BAT *b, int dirty)
dirty = 0;
return (!dirty || b->H->heap.dirty ? HEAPvmsize(&b->H->heap) : 0) +
(!dirty || b->T->heap.dirty ? HEAPvmsize(&b->T->heap) : 0) +
- ((!dirty || b->H->heap.dirty) && b->H->hash ?
HEAPvmsize(b->H->hash->heap) : 0) +
- ((!dirty || b->T->heap.dirty) && b->T->hash ?
HEAPvmsize(b->T->hash->heap) : 0) +
+ ((!dirty || b->H->heap.dirty) && b->H->hash && b->H->hash !=
(Hash *) 1 ? HEAPvmsize(b->H->hash->heap) : 0) +
+ ((!dirty || b->T->heap.dirty) && b->T->hash && b->T->hash !=
(Hash *) 1 ? HEAPvmsize(b->T->hash->heap) : 0) +
(b->H->vheap && (!dirty || b->H->vheap->dirty) ?
HEAPvmsize(b->H->vheap) : 0) +
(b->T->vheap && (!dirty || b->T->vheap->dirty) ?
HEAPvmsize(b->T->vheap) : 0);
}
@@ -2132,8 +2134,8 @@ BATmemsize(BAT *b, int dirty)
return (!dirty || b->batDirtydesc ? sizeof(BATstore) : 0) +
(!dirty || b->H->heap.dirty ? HEAPmemsize(&b->H->heap) : 0) +
(!dirty || b->T->heap.dirty ? HEAPmemsize(&b->T->heap) : 0) +
- ((!dirty || b->H->heap.dirty) && b->H->hash ?
HEAPmemsize(b->H->hash->heap) : 0) +
- ((!dirty || b->T->heap.dirty) && b->T->hash ?
HEAPmemsize(b->T->hash->heap) : 0) +
+ ((!dirty || b->H->heap.dirty) && b->H->hash && b->H->hash !=
(Hash *) 1 ? HEAPmemsize(b->H->hash->heap) : 0) +
+ ((!dirty || b->T->heap.dirty) && b->T->hash && b->T->hash !=
(Hash *) 1 ? HEAPmemsize(b->T->hash->heap) : 0) +
(b->H->vheap && (!dirty || b->H->vheap->dirty) ?
HEAPmemsize(b->H->vheap) : 0) +
(b->T->vheap && (!dirty || b->T->vheap->dirty) ?
HEAPmemsize(b->T->vheap) : 0);
}
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -413,6 +413,9 @@ BATins(BAT *b, BAT *n, bit force)
return GDK_FAIL;
}
+ BATcheckhash(b);
+ BATcheckhash(BATmirror(b));
+
if (b->htype != TYPE_void &&
(b->ttype == TYPE_void ||
(!b->H->hash && b->T->hash &&
@@ -698,7 +701,7 @@ BATappend(BAT *b, BAT *n, bit force)
if (b->H->hash)
HASHremove(BATmirror(b));
- if (b->T->hash && (2 * b->T->hash->mask) < (BATcount(b) + sz)) {
+ if (BATcheckhash(b) && (2 * b->T->hash->mask) < (BATcount(b) + sz)) {
HASHremove(b);
}
if (b->T->hash != NULL ||
diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -1569,14 +1569,14 @@ BBPdump(void)
HEAPvmsize(&b->H->heap),
HEAPmemsize(b->H->vheap),
HEAPvmsize(b->H->vheap),
- b->H->hash && b->H->hash != (Hash *) -1 ?
HEAPmemsize(b->H->hash->heap) : 0,
- b->H->hash && b->H->hash != (Hash *) -1 ?
HEAPvmsize(b->H->hash->heap) : 0,
+ b->H->hash && b->H->hash != (Hash *) -1 && b->H->hash
!= (Hash *) 1 ? HEAPmemsize(b->H->hash->heap) : 0,
+ b->H->hash && b->H->hash != (Hash *) -1 && b->H->hash
!= (Hash *) 1 ? HEAPvmsize(b->H->hash->heap) : 0,
HEAPmemsize(&b->T->heap),
HEAPvmsize(&b->T->heap),
HEAPmemsize(b->T->vheap),
HEAPvmsize(b->T->vheap),
- b->T->hash && b->T->hash != (Hash *) -1 ?
HEAPmemsize(b->T->hash->heap) : 0,
- b->T->hash && b->T->hash != (Hash *) -1 ?
HEAPvmsize(b->T->hash->heap) : 0);
+ b->T->hash && b->T->hash != (Hash *) -1 && b->T->hash
!= (Hash *) 1 ? HEAPmemsize(b->T->hash->heap) : 0,
+ b->T->hash && b->T->hash != (Hash *) -1 && b->T->hash
!= (Hash *) 1 ? HEAPvmsize(b->T->hash->heap) : 0);
if (BBP_logical(i) && BBP_logical(i)[0] == '.') {
cmem += HEAPmemsize(&b->H->heap);
cvm += HEAPvmsize(&b->H->heap);
@@ -1595,7 +1595,7 @@ BBPdump(void)
vm += HEAPvmsize(b->H->vheap);
}
}
- if (b->H->hash && b->H->hash != (Hash *) -1) {
+ if (b->H->hash && b->H->hash != (Hash *) -1 && b->H->hash !=
(Hash *) 1) {
if (BBP_logical(i) && BBP_logical(i)[0] == '.') {
cmem += HEAPmemsize(b->H->hash->heap);
cvm += HEAPvmsize(b->H->hash->heap);
@@ -1620,7 +1620,7 @@ BBPdump(void)
vm += HEAPvmsize(b->T->vheap);
}
}
- if (b->T->hash && b->T->hash != (Hash *) -1) {
+ if (b->T->hash && b->T->hash != (Hash *) -1 && b->T->hash !=
(Hash *) 1) {
if (BBP_logical(i) && BBP_logical(i)[0] == '.') {
cmem += HEAPmemsize(b->T->hash->heap);
cvm += HEAPvmsize(b->T->hash->heap);
@@ -3873,18 +3873,35 @@ BBPdiskscan(const char *parent)
} else if (strncmp(p + 1, "theap", 5) == 0) {
BAT *b = getdesc(bid);
delete = (b == NULL || !b->T->vheap ||
b->batCopiedtodisk == 0);
+ } else if (strncmp(p + 1, "hhash", 5) == 0) {
+#ifdef PERSISTENTHASH
+ BAT *b = getdesc(bid);
+ delete = b == NULL;
+ if (!delete)
+ b->H->hash = (Hash *) 1;
+#else
+ delete = TRUE;
+#endif
} else if (strncmp(p + 1, "hhash", 5) == 0 ||
strncmp(p + 1, "thash", 5) == 0) {
#ifdef PERSISTENTHASH
BAT *b = getdesc(bid);
delete = b == NULL;
+ if (!delete)
+ b->T->hash = (Hash *) 1;
#else
delete = TRUE;
#endif
- } else if (strncmp(p + 1, "himprints", 9) == 0 ||
- strncmp(p + 1, "timprints", 9) == 0) {
+ } else if (strncmp(p + 1, "himprints", 9) == 0) {
BAT *b = getdesc(bid);
delete = b == NULL;
+ if (!delete)
+ b->H->imprints = (Imprints *) 1;
+ } else if (strncmp(p + 1, "timprints", 9) == 0) {
+ BAT *b = getdesc(bid);
+ delete = b == NULL;
+ if (!delete)
+ b->T->imprints = (Imprints *) 1;
} else if (strncmp(p + 1, "priv", 4) != 0 &&
strncmp(p + 1, "new", 3) != 0 &&
strncmp(p + 1, "head", 4) != 0 &&
diff --git a/gdk/gdk_imprints.c b/gdk/gdk_imprints.c
--- a/gdk/gdk_imprints.c
+++ b/gdk/gdk_imprints.c
@@ -556,7 +556,13 @@ do {
\
/* Check whether we have imprints on b (and return true if we do). It
* may be that the imprints were made persistent, but we hadn't seen
* that yet, so check the file system. This also returns true if b is
- * a view and there are imprints on b's parent. */
+ * a view and there are imprints on b's parent.
+ *
+ * Note that the b->T->imprints pointer can be NULL, meaning there are
+ * no imprints; (Heap *) 1, meaning there are no imprints loaded, but
+ * they may exist on disk; or a valid pointer to loaded imprints.
+ * These values are maintained here, in the IMPSdestroy and IMPSfree
+ * functions, and in BBPdiskscan during initialization. */
int
BATcheckimprints(BAT *b)
{
@@ -568,12 +574,13 @@ BATcheckimprints(BAT *b)
}
MT_lock_set(&GDKimprintsLock(abs(b->batCacheid)), "BATcheckimprints");
- if (b->T->imprints == NULL) {
+ if (b->T->imprints == (Imprints *) 1) {
Imprints *imprints;
Heap *hp;
str nme = BBP_physical(b->batCacheid);
const char *ext = b->batCacheid > 0 ? "timprints" : "himprints";
+ b->T->imprints = NULL;
if ((hp = GDKzalloc(sizeof(Heap))) != NULL &&
(hp->farmid = BBPselectfarm(b->batRole, b->ttype,
imprintsheap)) >= 0 &&
(hp->filename = GDKmalloc(strlen(nme) + 12)) != NULL) {
@@ -664,11 +671,17 @@ BATimprints(BAT *b)
if (BATcheckimprints(b))
return GDK_SUCCEED;
+ assert(b->T->imprints == NULL);
if (VIEWtparent(b)) {
bat p = VIEWtparent(b);
o = b;
b = BATmirror(BATdescriptor(p));
+ if (BATcheckimprints(b)) {
+ BBPunfix(b->batCacheid);
+ return GDK_SUCCEED;
+ }
+ assert(b->T->imprints == NULL);
}
if (b->batFirst > 0) {
/* no imprints if batFirst is not 0
@@ -690,7 +703,7 @@ BATimprints(BAT *b)
"created imprints\n", BATgetId(b),
BATcount(b), b->T->heap.filename);
- imprints = (Imprints *) GDKzalloc(sizeof(Imprints));
+ imprints = GDKzalloc(sizeof(Imprints));
if (imprints == NULL) {
GDKerror("#BATimprints: memory allocation error.\n");
MT_lock_unset(&GDKimprintsLock(abs(b->batCacheid)),
@@ -948,7 +961,7 @@ lng
IMPSimprintsize(BAT *b)
{
lng sz = 0;
- if (b->T->imprints) {
+ if (b->T->imprints && b->T->imprints != (Imprints *) 1) {
sz = b->T->imprints->impcnt * b->T->imprints->bits / 8;
sz += b->T->imprints->dictcnt * sizeof(cchdc_t);
}
@@ -988,28 +1001,31 @@ void
IMPSdestroy(BAT *b)
{
if (b) {
- if (b->T->imprints != NULL && !VIEWtparent(b))
- IMPSremove(b);
- else
+ if (b->T->imprints == (Imprints *) 1) {
+ b->T->imprints = NULL;
GDKunlink(BBPselectfarm(b->batRole, b->ttype,
imprintsheap),
BATDIR,
BBP_physical(b->batCacheid),
"timprints");
+ } else if (b->T->imprints != NULL && !VIEWtparent(b))
+ IMPSremove(b);
- if (b->H->imprints != NULL && !VIEWhparent(b))
- IMPSremove(BATmirror(b));
- else
+ if (b->H->imprints == (Imprints *) 1) {
+ b->H->imprints = NULL;
GDKunlink(BBPselectfarm(b->batRole, b->htype,
imprintsheap),
BATDIR,
BBP_physical(b->batCacheid),
"himprints");
+ } else if (b->H->imprints != NULL && !VIEWhparent(b))
+ IMPSremove(BATmirror(b));
}
return;
}
/* free the memory associated with the imprints, do not remove the
- * heap files */
+ * heap files; indicate that imprints are available on disk by setting
+ * the imprints pointer to 1 */
void
IMPSfree(BAT *b)
{
@@ -1017,8 +1033,9 @@ IMPSfree(BAT *b)
if (b) {
MT_lock_set(&GDKimprintsLock(abs(b->batCacheid)), "IMPSdelete");
- if ((imprints = b->T->imprints) != NULL) {
- b->T->imprints = NULL;
+ imprints = b->T->imprints;
+ if (imprints != NULL && imprints != (Imprints *) 1) {
+ b->T->imprints = (Imprints *) 1;
if (!VIEWtparent(b)) {
HEAPfree(imprints->imprints, 0);
GDKfree(imprints->imprints);
diff --git a/gdk/gdk_logger.c b/gdk/gdk_logger.c
--- a/gdk/gdk_logger.c
+++ b/gdk/gdk_logger.c
@@ -109,7 +109,7 @@ log_find(BAT *b, BAT *d, int val)
assert(b->ttype == TYPE_int);
assert(d->ttype == TYPE_oid);
- if (b->T->hash || BAThash(b, 0) == GDK_SUCCEED) {
+ if (BAThash(b, 0) == GDK_SUCCEED) {
HASHloop_int(cni, cni.b->T->hash, p, &val) {
oid pos = p;
if (BUNfnd(d, &pos) == BUN_NONE)
@@ -2531,7 +2531,7 @@ logger_find_bat(logger *lg, const char *
BATiter cni = bat_iterator(lg->catalog_nme);
BUN p;
- if (lg->catalog_nme->T->hash || BAThash(lg->catalog_nme, 0) ==
GDK_SUCCEED) {
+ if (BAThash(lg->catalog_nme, 0) == GDK_SUCCEED) {
HASHloop_str(cni, cni.b->T->hash, p, name) {
oid pos = p;
if (BUNfnd(lg->dcatalog, &pos) == BUN_NONE)
diff --git a/gdk/gdk_search.c b/gdk/gdk_search.c
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list