Changeset: fe2f21823085 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=fe2f21823085
Modified Files:
gdk/gdk.h
gdk/gdk_private.h
gdk/gdk_search.c
gdk/gdk_select.c
sql/test/orderidx/Tests/simpletable.stable.out
Branch: leftmart
Log Message:
Persist order index.
diffs (213 lines):
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -896,7 +896,7 @@ typedef struct {
PROPrec *props; /* list of dynamic properties stored in the bat
descriptor */
} COLrec;
-#define ORDERIDXOFF 1
+#define ORDERIDXOFF 2
/* assert that atom width is power of 2, i.e., width == 1<<shift */
#define assert_shift_width(shift,width) assert(((shift) == 0 && (width) == 0)
|| ((unsigned)1<<(shift)) == (unsigned)(width))
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -56,6 +56,8 @@ struct BATstore {
__attribute__((__visibility__("hidden")));
__hidden gdk_return BATcheckmodes(BAT *b, int persistent)
__attribute__((__visibility__("hidden")));
+__hidden int BATcheckorderidx(BAT *b)
+ __attribute__((__visibility__("hidden")));
__hidden BATstore *BATcreatedesc(int ht, int tt, int heapnames, int role)
__attribute__((__visibility__("hidden")));
__hidden void BATdelete(BAT *b)
diff --git a/gdk/gdk_search.c b/gdk/gdk_search.c
--- a/gdk/gdk_search.c
+++ b/gdk/gdk_search.c
@@ -692,7 +692,7 @@ SORTfndwhich(BAT *b, const void *v, enum
if (b == NULL ||
(!b->tsorted && !b->trevsorted &&
- (!use_orderidx || b->torderidx == NULL)))
+ (!use_orderidx || !BATcheckorderidx(b))))
return BUN_NONE;
lo = BUNfirst(b);
@@ -947,6 +947,62 @@ ORDERfndlast(BAT *b, const void *v)
return SORTfndwhich(b, v, FIND_LAST, 1);
}
+#define ORDERIDX_VERSION ((oid) 1)
+
+/* return TRUE if we have a orderidx on the tail, even if we need to read
+ * one from disk */
+int
+BATcheckorderidx(BAT *b)
+{
+ int ret;
+ lng t;
+
+ t = GDKusec();
+ MT_lock_set(&GDKhashLock(abs(b->batCacheid)), "BATcheckorderidx");
+ t = GDKusec() - t;
+ if (b->torderidx == NULL) {
+ Heap *hp;
+ const char *nme = BBP_physical(b->batCacheid);
+ const char *ext = b->batCacheid > 0 ? "torderidx" : "horderidx";
+ int fd;
+
+ if ((hp = GDKzalloc(sizeof(*hp))) != NULL &&
+ (hp->farmid = BBPselectfarm(b->batRole, b->ttype,
orderidxheap)) >= 0 &&
+ (hp->filename = GDKmalloc(strlen(nme) + 10)) != NULL) {
+ sprintf(hp->filename, "%s.%s", nme, ext);
+
+ /* check whether a persisted orderidx can be found */
+ if ((fd = GDKfdlocate(hp->farmid, nme, "rb+", ext)) >=
0) {
+ struct stat st;
+ oid hdata[ORDERIDXOFF];
+
+ if (read(fd, hdata, sizeof(hdata)) ==
sizeof(hdata) &&
+ hdata[0] == (((oid) 1 << 24) |
ORDERIDX_VERSION) &&
+ hdata[1] == (oid) BATcount(b) &&
+ fstat(fd, &st) == 0 &&
+ st.st_size >= (off_t) (hp->size = hp->free
= (ORDERIDXOFF + hdata[1]) * SIZEOF_OID) &&
+ HEAPload(hp, nme, ext, 0) == GDK_SUCCEED) {
+ close(fd);
+ b->torderidx = hp;
+ ALGODEBUG fprintf(stderr,
"#BATcheckorderidx: reusing persisted orderidx %d\n", b->batCacheid);
+
MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BATcheckorderidx");
+ return 1;
+ }
+ close(fd);
+ /* unlink unusable file */
+ GDKunlink(hp->farmid, BATDIR, nme, ext);
+ }
+ GDKfree(hp->filename);
+ }
+ GDKfree(hp);
+ GDKclrerr(); /* we're not currently interested in errors */
+ }
+ ret = b->T->orderidx != NULL;
+ MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BATcheckorderidx");
+ ALGODEBUG if (ret) fprintf(stderr, "#BATcheckorderidx: already has
orderidx %d, waited " LLFMT " usec\n", b->batCacheid, t);
+ return ret;
+}
+
gdk_return
GDKmergeidx(BAT *b, BAT**a, int n_ar)
{
@@ -954,23 +1010,32 @@ GDKmergeidx(BAT *b, BAT**a, int n_ar)
int i;
size_t nmelen;
oid *restrict mv;
+ const char *nme = BBP_physical(b->batCacheid);
- nmelen = strlen(BBP_physical(b->batCacheid)) + 10;
+ if (BATcheckorderidx(b))
+ return GDK_SUCCEED;
+ MT_lock_set(&GDKhashLock(abs(b->batCacheid)), "GDKmergeidx");
+ if (b->torderidx) {
+ MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "GDKmergeidx");
+ return GDK_SUCCEED;
+ }
+ nmelen = strlen(nme) + 10;
if ((m = GDKzalloc(sizeof(Heap))) == NULL ||
(m->farmid = BBPselectfarm(b->batRole, b->ttype, orderidxheap)) < 0
||
(m->filename = GDKmalloc(nmelen)) == NULL ||
- snprintf(m->filename, nmelen, "%s.torderidx",
- BBP_physical(b->batCacheid)) < 0 ||
+ snprintf(m->filename, nmelen, "%s.torderidx", nme) < 0 ||
HEAPalloc(m, BATcount(b) + ORDERIDXOFF, SIZEOF_OID) != GDK_SUCCEED)
{
if (m)
GDKfree(m->filename);
GDKfree(m);
+ MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "GDKmergeidx");
return GDK_FAIL;
}
m->free = (BATcount(b) + ORDERIDXOFF) * SIZEOF_OID;
mv = (oid *) m->base;
- *mv++ = 0;
+ *mv++ = ORDERIDX_VERSION;
+ *mv++ = (oid) BATcount(b);
if (n_ar == 1) {
/* One oid order bat, nothing to merge */
@@ -1018,6 +1083,7 @@ GDKmergeidx(BAT *b, BAT**a, int n_ar)
assert(0);
HEAPfree(m, 1);
GDKfree(m);
+ MT_lock_unset(&GDKhashLock(abs(b->batCacheid)),
"GDKmergeidx");
return GDK_FAIL;
}
@@ -1033,6 +1099,7 @@ bailout:
GDKfree(q);
HEAPfree(m, 1);
GDKfree(m);
+ MT_lock_unset(&GDKhashLock(abs(b->batCacheid)),
"GDKmergeidx");
return GDK_FAIL;
}
for (i = 0; i < n_ar; i++) {
@@ -1122,13 +1189,30 @@ bailout:
}
}
- if (HEAPsave(m, BBP_physical(b->batCacheid), "torderidx") !=
GDK_SUCCEED) {
- HEAPfree(m, 1);
- GDKfree(m);
- return GDK_FAIL;
+ if ((BBP_status(b->batCacheid) & BBPEXISTING) &&
+ b->batInserted == b->batCount &&
+ HEAPsave(m, nme, "torderidx") == GDK_SUCCEED &&
+ (i = GDKfdlocate(m->farmid, nme, "rb+", "torderidx")) >= 0) {
+ ALGODEBUG fprintf(stderr, "#GDKmergeidx: persisting orderidx
%d\n", b->batCacheid);
+ ((oid *) m->base)[0] |= (oid) 1 << 24;
+ if (write(i, m->base, SIZEOF_OID) < 0)
+ perror("write orderidx");
+ if (!(GDKdebug & FORCEMITOMASK)) {
+#if defined(NATIVE_WIN32)
+ _commit(i);
+#elif defined(HAVE_FDATASYNC)
+ fdatasync(i);
+#elif defined(HAVE_FSYNC)
+ fsync(i);
+#endif
+ }
+ close(i);
+ } else {
+ ALGODEBUG fprintf(stderr, "#GDKmergeidx: NOT persisting
orderidx %d\n", b->batCacheid);
}
b->batDirtydesc = TRUE;
b->torderidx = m;
+ MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "GDKmergeidx");
return GDK_SUCCEED;
}
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -1429,8 +1429,8 @@ BATsubselect(BAT *b, BAT *s, const void
* TODO: we do not support anti-select with order index */
if (!anti &&
!(b->tsorted || b->trevsorted) &&
- (b->torderidx ||
- (VIEWtparent(b) && BBPquickdesc(abs(VIEWtparent(b)),
0)->torderidx)))
+ (BATcheckorderidx(b) ||
+ (VIEWtparent(b) &&
BATcheckorderidx(BBPquickdesc(abs(VIEWtparent(b)), 0)))))
{
BAT *view = NULL;
if (VIEWtparent(b)) {
diff --git a/sql/test/orderidx/Tests/simpletable.stable.out
b/sql/test/orderidx/Tests/simpletable.stable.out
--- a/sql/test/orderidx/Tests/simpletable.stable.out
+++ b/sql/test/orderidx/Tests/simpletable.stable.out
@@ -90,7 +90,7 @@ Ready.
% schema, table, column, type, mode, count, hashes, phash,
imprints, sorted, orderidx # name
% clob, clob, clob, clob, clob, bigint, bigint, boolean,
bigint, boolean, bigint # type
% 3, 5, 1, 3, 8, 1, 1, 5, 1, 5,
2 # length
-[ "sys", "xtmp1", "i", "int", "readonly", 4, 0,
false, 0, false, 40 ]
+[ "sys", "xtmp1", "i", "int", "readonly", 4, 0,
false, 0, false, 48 ]
#select * from xtmp1 where i <0;
% sys.xtmp1 # table_name
% i # name
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list