Changeset: cc031ce310c7 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=cc031ce310c7
Modified Files:
        gdk/gdk_align.c
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_bbp.c
        gdk/gdk_imprints.c
        gdk/gdk_logger.c
        gdk/gdk_search.c
        gdk/gdk_search.h
        monetdb5/mal/mal_authorize.c
        monetdb5/mal/mal_debugger.c
        monetdb5/mal/mal_resource.c
        monetdb5/mal/mal_runtime.c
        monetdb5/modules/kernel/bat5.c
        monetdb5/modules/kernel/status.c
        monetdb5/modules/mal/tokenizer.c
        sql/backends/monet5/sql.c
        sql/storage/bat/bat_table.c
Branch: Jul2015
Log Message:

Avoid unlinking hash and imprints files at every change.
We now remember that there may be imprints or hashes available on disk
by setting the pointer to 1, 0 meaning there definitely is nothing
available on disk.


diffs (truncated from 618 to 300 lines):

diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -785,10 +785,7 @@ VIEWdestroy(BAT *b)
        assert(isVIEW(b));
 
        /* remove any leftover private hash structures */
-       if (b->H->hash)
-               HASHremove(BATmirror(b));
-       if (b->T->hash)
-               HASHremove(b);
+       HASHdestroy(b);
        IMPSdestroy(b);
        VIEWunlink(b);
 
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1763,7 +1763,7 @@ BUNfnd(BAT *b, const void *v)
                return r;
        if (BATtvoid(b))
                return BUNfndVOID(b, v);
-       if (!b->T->hash) {
+       if (!BATcheckhash(b)) {
                if (BATtordered(b) || BATtrevordered(b))
                        return SORTfnd(b, v);
        }
@@ -1894,6 +1894,8 @@ BUNlocate(BAT *b, const void *x, const v
                 * strategy: create a hash on both columns, and select
                 * the column with the best distribution
                 */
+               BATcheckhash(b);
+               BATcheckhash(BATmirror(b));
                if ((b->T->hash && b->H->hash == NULL) || !dohash(b->H))
                        usemirror();
                if (b->H->hash == NULL && (v = VIEWcreate_(b, b, TRUE)) != 
NULL) {
@@ -2116,8 +2118,8 @@ BATvmsize(BAT *b, int dirty)
                dirty = 0;
        return (!dirty || b->H->heap.dirty ? HEAPvmsize(&b->H->heap) : 0) +
                (!dirty || b->T->heap.dirty ? HEAPvmsize(&b->T->heap) : 0) +
-               ((!dirty || b->H->heap.dirty) && b->H->hash ? 
HEAPvmsize(b->H->hash->heap) : 0) +
-               ((!dirty || b->T->heap.dirty) && b->T->hash ? 
HEAPvmsize(b->T->hash->heap) : 0) +
+               ((!dirty || b->H->heap.dirty) && b->H->hash && b->H->hash != 
(Hash *) 1 ? HEAPvmsize(b->H->hash->heap) : 0) +
+               ((!dirty || b->T->heap.dirty) && b->T->hash && b->T->hash != 
(Hash *) 1 ? HEAPvmsize(b->T->hash->heap) : 0) +
                (b->H->vheap && (!dirty || b->H->vheap->dirty) ? 
HEAPvmsize(b->H->vheap) : 0) +
                (b->T->vheap && (!dirty || b->T->vheap->dirty) ? 
HEAPvmsize(b->T->vheap) : 0);
 }
@@ -2132,8 +2134,8 @@ BATmemsize(BAT *b, int dirty)
        return (!dirty || b->batDirtydesc ? sizeof(BATstore) : 0) +
                (!dirty || b->H->heap.dirty ? HEAPmemsize(&b->H->heap) : 0) +
                (!dirty || b->T->heap.dirty ? HEAPmemsize(&b->T->heap) : 0) +
-               ((!dirty || b->H->heap.dirty) && b->H->hash ? 
HEAPmemsize(b->H->hash->heap) : 0) +
-               ((!dirty || b->T->heap.dirty) && b->T->hash ? 
HEAPmemsize(b->T->hash->heap) : 0) +
+               ((!dirty || b->H->heap.dirty) && b->H->hash && b->H->hash != 
(Hash *) 1 ? HEAPmemsize(b->H->hash->heap) : 0) +
+               ((!dirty || b->T->heap.dirty) && b->T->hash && b->T->hash != 
(Hash *) 1 ? HEAPmemsize(b->T->hash->heap) : 0) +
                (b->H->vheap && (!dirty || b->H->vheap->dirty) ? 
HEAPmemsize(b->H->vheap) : 0) +
                (b->T->vheap && (!dirty || b->T->vheap->dirty) ? 
HEAPmemsize(b->T->vheap) : 0);
 }
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -413,6 +413,9 @@ BATins(BAT *b, BAT *n, bit force)
                return GDK_FAIL;
        }
 
+       BATcheckhash(b);
+       BATcheckhash(BATmirror(b));
+
        if (b->htype != TYPE_void &&
            (b->ttype == TYPE_void ||
             (!b->H->hash && b->T->hash &&
@@ -698,7 +701,7 @@ BATappend(BAT *b, BAT *n, bit force)
        if (b->H->hash)
                HASHremove(BATmirror(b));
 
-       if (b->T->hash && (2 * b->T->hash->mask) < (BATcount(b) + sz)) {
+       if (BATcheckhash(b) && (2 * b->T->hash->mask) < (BATcount(b) + sz)) {
                HASHremove(b);
        }
        if (b->T->hash != NULL ||
diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -1569,14 +1569,14 @@ BBPdump(void)
                        HEAPvmsize(&b->H->heap),
                        HEAPmemsize(b->H->vheap),
                        HEAPvmsize(b->H->vheap),
-                       b->H->hash && b->H->hash != (Hash *) -1 ? 
HEAPmemsize(b->H->hash->heap) : 0,
-                       b->H->hash && b->H->hash != (Hash *) -1 ? 
HEAPvmsize(b->H->hash->heap) : 0,
+                       b->H->hash && b->H->hash != (Hash *) -1 && b->H->hash 
!= (Hash *) 1 ? HEAPmemsize(b->H->hash->heap) : 0,
+                       b->H->hash && b->H->hash != (Hash *) -1 && b->H->hash 
!= (Hash *) 1 ? HEAPvmsize(b->H->hash->heap) : 0,
                        HEAPmemsize(&b->T->heap),
                        HEAPvmsize(&b->T->heap),
                        HEAPmemsize(b->T->vheap),
                        HEAPvmsize(b->T->vheap),
-                       b->T->hash && b->T->hash != (Hash *) -1 ? 
HEAPmemsize(b->T->hash->heap) : 0,
-                       b->T->hash && b->T->hash != (Hash *) -1 ? 
HEAPvmsize(b->T->hash->heap) : 0);
+                       b->T->hash && b->T->hash != (Hash *) -1 && b->T->hash 
!= (Hash *) 1 ? HEAPmemsize(b->T->hash->heap) : 0,
+                       b->T->hash && b->T->hash != (Hash *) -1 && b->T->hash 
!= (Hash *) 1 ? HEAPvmsize(b->T->hash->heap) : 0);
                if (BBP_logical(i) && BBP_logical(i)[0] == '.') {
                        cmem += HEAPmemsize(&b->H->heap);
                        cvm += HEAPvmsize(&b->H->heap);
@@ -1595,7 +1595,7 @@ BBPdump(void)
                                vm += HEAPvmsize(b->H->vheap);
                        }
                }
-               if (b->H->hash && b->H->hash != (Hash *) -1) {
+               if (b->H->hash && b->H->hash != (Hash *) -1 && b->H->hash != 
(Hash *) 1) {
                        if (BBP_logical(i) && BBP_logical(i)[0] == '.') {
                                cmem += HEAPmemsize(b->H->hash->heap);
                                cvm += HEAPvmsize(b->H->hash->heap);
@@ -1620,7 +1620,7 @@ BBPdump(void)
                                vm += HEAPvmsize(b->T->vheap);
                        }
                }
-               if (b->T->hash && b->T->hash != (Hash *) -1) {
+               if (b->T->hash && b->T->hash != (Hash *) -1 && b->T->hash != 
(Hash *) 1) {
                        if (BBP_logical(i) && BBP_logical(i)[0] == '.') {
                                cmem += HEAPmemsize(b->T->hash->heap);
                                cvm += HEAPvmsize(b->T->hash->heap);
@@ -3873,18 +3873,35 @@ BBPdiskscan(const char *parent)
                } else if (strncmp(p + 1, "theap", 5) == 0) {
                        BAT *b = getdesc(bid);
                        delete = (b == NULL || !b->T->vheap || 
b->batCopiedtodisk == 0);
+               } else if (strncmp(p + 1, "hhash", 5) == 0) {
+#ifdef PERSISTENTHASH
+                       BAT *b = getdesc(bid);
+                       delete = b == NULL;
+                       if (!delete)
+                               b->H->hash = (Hash *) 1;
+#else
+                       delete = TRUE;
+#endif
                } else if (strncmp(p + 1, "hhash", 5) == 0 ||
                           strncmp(p + 1, "thash", 5) == 0) {
 #ifdef PERSISTENTHASH
                        BAT *b = getdesc(bid);
                        delete = b == NULL;
+                       if (!delete)
+                               b->T->hash = (Hash *) 1;
 #else
                        delete = TRUE;
 #endif
-               } else if (strncmp(p + 1, "himprints", 9) == 0 ||
-                          strncmp(p + 1, "timprints", 9) == 0) {
+               } else if (strncmp(p + 1, "himprints", 9) == 0) {
                        BAT *b = getdesc(bid);
                        delete = b == NULL;
+                       if (!delete)
+                               b->H->imprints = (Imprints *) 1;
+               } else if (strncmp(p + 1, "timprints", 9) == 0) {
+                       BAT *b = getdesc(bid);
+                       delete = b == NULL;
+                       if (!delete)
+                               b->T->imprints = (Imprints *) 1;
                } else if (strncmp(p + 1, "priv", 4) != 0 &&
                           strncmp(p + 1, "new", 3) != 0 &&
                           strncmp(p + 1, "head", 4) != 0 &&
diff --git a/gdk/gdk_imprints.c b/gdk/gdk_imprints.c
--- a/gdk/gdk_imprints.c
+++ b/gdk/gdk_imprints.c
@@ -556,7 +556,13 @@ do {                                                       
                \
 /* Check whether we have imprints on b (and return true if we do).  It
  * may be that the imprints were made persistent, but we hadn't seen
  * that yet, so check the file system.  This also returns true if b is
- * a view and there are imprints on b's parent. */
+ * a view and there are imprints on b's parent.
+ *
+ * Note that the b->T->imprints pointer can be NULL, meaning there are
+ * no imprints; (Heap *) 1, meaning there are no imprints loaded, but
+ * they may exist on disk; or a valid pointer to loaded imprints.
+ * These values are maintained here, in the IMPSdestroy and IMPSfree
+ * functions, and in BBPdiskscan during initialization. */
 int
 BATcheckimprints(BAT *b)
 {
@@ -568,12 +574,13 @@ BATcheckimprints(BAT *b)
        }
 
        MT_lock_set(&GDKimprintsLock(abs(b->batCacheid)), "BATcheckimprints");
-       if (b->T->imprints == NULL) {
+       if (b->T->imprints == (Imprints *) 1) {
                Imprints *imprints;
                Heap *hp;
                str nme = BBP_physical(b->batCacheid);
                const char *ext = b->batCacheid > 0 ? "timprints" : "himprints";
 
+               b->T->imprints = NULL;
                if ((hp = GDKzalloc(sizeof(Heap))) != NULL &&
                    (hp->farmid = BBPselectfarm(b->batRole, b->ttype, 
imprintsheap)) >= 0 &&
                    (hp->filename = GDKmalloc(strlen(nme) + 12)) != NULL) {
@@ -664,11 +671,17 @@ BATimprints(BAT *b)
 
        if (BATcheckimprints(b))
                return GDK_SUCCEED;
+       assert(b->T->imprints == NULL);
 
        if (VIEWtparent(b)) {
                bat p = VIEWtparent(b);
                o = b;
                b = BATmirror(BATdescriptor(p));
+               if (BATcheckimprints(b)) {
+                       BBPunfix(b->batCacheid);
+                       return GDK_SUCCEED;
+               }
+               assert(b->T->imprints == NULL);
        }
        if (b->batFirst > 0) {
                /* no imprints if batFirst is not 0
@@ -690,7 +703,7 @@ BATimprints(BAT *b)
                                  "created imprints\n", BATgetId(b),
                                  BATcount(b), b->T->heap.filename);
 
-               imprints = (Imprints *) GDKzalloc(sizeof(Imprints));
+               imprints = GDKzalloc(sizeof(Imprints));
                if (imprints == NULL) {
                        GDKerror("#BATimprints: memory allocation error.\n");
                        MT_lock_unset(&GDKimprintsLock(abs(b->batCacheid)),
@@ -948,7 +961,7 @@ lng
 IMPSimprintsize(BAT *b)
 {
        lng sz = 0;
-       if (b->T->imprints) {
+       if (b->T->imprints && b->T->imprints != (Imprints *) 1) {
                sz = b->T->imprints->impcnt * b->T->imprints->bits / 8;
                sz += b->T->imprints->dictcnt * sizeof(cchdc_t);
        }
@@ -988,28 +1001,31 @@ void
 IMPSdestroy(BAT *b)
 {
        if (b) {
-               if (b->T->imprints != NULL && !VIEWtparent(b))
-                       IMPSremove(b);
-               else
+               if (b->T->imprints == (Imprints *) 1) {
+                       b->T->imprints = NULL;
                        GDKunlink(BBPselectfarm(b->batRole, b->ttype, 
imprintsheap),
                                  BATDIR,
                                  BBP_physical(b->batCacheid),
                                  "timprints");
+               } else if (b->T->imprints != NULL && !VIEWtparent(b))
+                       IMPSremove(b);
 
-               if (b->H->imprints != NULL && !VIEWhparent(b))
-                       IMPSremove(BATmirror(b));
-               else
+               if (b->H->imprints == (Imprints *) 1) {
+                       b->H->imprints = NULL;
                        GDKunlink(BBPselectfarm(b->batRole, b->htype, 
imprintsheap),
                                  BATDIR,
                                  BBP_physical(b->batCacheid),
                                  "himprints");
+               } else if (b->H->imprints != NULL && !VIEWhparent(b))
+                       IMPSremove(BATmirror(b));
        }
 
        return;
 }
 
 /* free the memory associated with the imprints, do not remove the
- * heap files */
+ * heap files; indicate that imprints are available on disk by setting
+ * the imprints pointer to 1 */
 void
 IMPSfree(BAT *b)
 {
@@ -1017,8 +1033,9 @@ IMPSfree(BAT *b)
 
        if (b) {
                MT_lock_set(&GDKimprintsLock(abs(b->batCacheid)), "IMPSdelete");
-               if ((imprints = b->T->imprints) != NULL) {
-                       b->T->imprints = NULL;
+               imprints = b->T->imprints;
+               if (imprints != NULL && imprints != (Imprints *) 1) {
+                       b->T->imprints = (Imprints *) 1;
                        if (!VIEWtparent(b)) {
                                HEAPfree(imprints->imprints, 0);
                                GDKfree(imprints->imprints);
diff --git a/gdk/gdk_logger.c b/gdk/gdk_logger.c
--- a/gdk/gdk_logger.c
+++ b/gdk/gdk_logger.c
@@ -109,7 +109,7 @@ log_find(BAT *b, BAT *d, int val)
 
        assert(b->ttype == TYPE_int);
        assert(d->ttype == TYPE_oid);
-       if (b->T->hash || BAThash(b, 0) == GDK_SUCCEED) {
+       if (BAThash(b, 0) == GDK_SUCCEED) {
                HASHloop_int(cni, cni.b->T->hash, p, &val) {
                        oid pos = p;
                        if (BUNfnd(d, &pos) == BUN_NONE)
@@ -2531,7 +2531,7 @@ logger_find_bat(logger *lg, const char *
        BATiter cni = bat_iterator(lg->catalog_nme);
        BUN p;
 
-       if (lg->catalog_nme->T->hash || BAThash(lg->catalog_nme, 0) == 
GDK_SUCCEED) {
+       if (BAThash(lg->catalog_nme, 0) == GDK_SUCCEED) {
                HASHloop_str(cni, cni.b->T->hash, p, name) {
                        oid pos = p;
                        if (BUNfnd(lg->dcatalog, &pos) == BUN_NONE)
diff --git a/gdk/gdk_search.c b/gdk/gdk_search.c
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to