Update of /cvsroot/monetdb/pathfinder/runtime
In directory 23jxhf1.ch3.sourceforge.com:/tmp/cvs-serv15416/runtime
Modified Files:
Tag: Aug2009_NFI
pathfinder.mx
Log Message:
optimizations for NFI XIRAF use case -- thanks a great bunch Jan R.!!
- ds_link (already in Stable) optimized for 1-node case
- indices now contain all data (but still not used automatically, nor based on
Lefteris' new indexing schemes)
most prominently though is: subexpression result caching
- caching hints in pragmas
- query enclosed in (# pf:session id:msec ) { query } or (# pf:session-use
id:msec ) { query }
+ queries in the same session use the same working set (documents opened only
once)
+ same working set allows to cache results
+ pf:session-use only uses cache, cannot add to it
- but, multiple pf:session-use can run concurrently; whereas pf:session is
exclusive
- inside a query, an arbitrary number of expressions can be marked up for
caching/reuse
+ (# pf:cache id ) { subexpr }
+ subexpr may not be enclosed by a for-loop
U pathfinder.mx
Index: pathfinder.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/pathfinder.mx,v
retrieving revision 1.462
retrieving revision 1.462.6.1
diff -u -d -r1.462 -r1.462.6.1
--- pathfinder.mx 1 Jul 2009 12:34:40 -0000 1.462
+++ pathfinder.mx 28 Sep 2009 23:20:26 -0000 1.462.6.1
@@ -154,7 +154,7 @@
# - rid_size BAT[void,int] RID / #descendants,
# - rid_level BAT[void,chr] RID / distance from root,
# - rid_prop BAT[void,oid] RID / property-ID,
-# - rid_kind BAT[void,chr] RID / node kind (determins
prop_* column)
+# - rid_kind BAT[void,chr] RID / node kind (determines
prop_* column)
# - rid_nid BAT[void,oid] RID / stable NID
# - frag_root BAT[oid,oid] document-ID / root NID
#
@@ -274,6 +274,12 @@
# re-checked in ATTR_OWN to filter out deleted attributes.
#
# ATTR_OWN_SHARED is stored as a runtime index structure in RT_ATTR_OWN.
+#
+# cache bats used for caching node sequences:
+# - CACHE_ID[void,str] name
+# - CACHE_VAL[void,bat] bat-of-bats (iter,pos,..)
+# - CACHE_LRU[void,lng] last time used (usec)
+# - CACHE_SIZE[void,lng] size in bytes of VAL
@- ws definition
@@ -345,28 +351,33 @@
@:w...@1(CONT_LOCKED, 43, void, lock,void, void, oid_nil)@
@:w...@1(XRPC_PARTICIPANTS, 44, void, str, void, void, oid_nil)@
-@:w...@1(_MAP_PID, 45, void, bat, void, oid, PRE_BASE)@
-@:w...@1(_RID_SIZE, 46, void, bat, void, int, PRE_BASE)@
-@:w...@1(_RID_LEVEL, 47, void, bat, void, chr, PRE_BASE)@
-@:w...@1(_RID_PROP, 48, void, bat, void, oid, PRE_BASE)@
-@:w...@1(_RID_KIND, 49, void, bat, void, chr, PRE_BASE)@
-@:w...@1(_RID_NID, 50, void, bat, void, oid, PRE_BASE)@
-@:w...@1(_NID_RID, 51, void, bat, void, oid, PRE_BASE)@
-@:w...@1(_FRAG_ROOT, 52, void, bat, oid, oid, oid_nil)@
-@:w...@1(_QN_HISTOGRAM, 53, void, bat, void, str, PRE_BASE)@
-@:w...@1(_QN_PREFIX_URI_LOC,54, void, bat, void, str, PRE_BASE)@
-@:w...@1(_QN_URI_LOC, 55, void, bat, void, str, PRE_BASE)@
-@:w...@1(_QN_PREFIX, 56, void, bat, void, str, PRE_BASE)@
-@:w...@1(_QN_URI, 57, void, bat, void, str, PRE_BASE)@
-@:w...@1(_QN_LOC, 58, void, bat, void, str, PRE_BASE)@
-@:w...@1(_PROP_TEXT, 59, void, bat, void, str, PRE_BASE)@
-@:w...@1(_PROP_COM, 60, void, bat, void, str, PRE_BASE)@
-@:w...@1(_PROP_INS, 61, void, bat, void, str, PRE_BASE)@
-@:w...@1(_PROP_TGT, 62, void, bat, void, str, PRE_BASE)@
-@:w...@1(_PROP_VAL, 63, void, bat, void, str, PRE_BASE)@
-@:w...@1(_ATTR_OWN, 64, void, bat, void, oid, PRE_BASE)@
-@:w...@1(_ATTR_QN, 65, void, bat, void, oid, PRE_BASE)@
-@:w...@1(_ATTR_PROP, 66, void, bat, void, oid, PRE_BASE)@
+@:w...@1(CACHE_ID, 45, void, str, void, void, oid_nil)@
+@:w...@1(CACHE_VAL, 46, void, bat, void, void, oid_nil)@
+@:w...@1(CACHE_LRU, 47, void, lng, void, void, oid_nil)@
+@:w...@1(CACHE_SIZE, 48, void, lng, void, void, oid_nil)@
+
+@:w...@1(_MAP_PID, 49, void, bat, void, oid, PRE_BASE)@
+@:w...@1(_RID_SIZE, 50, void, bat, void, int, PRE_BASE)@
+@:w...@1(_RID_LEVEL, 51, void, bat, void, chr, PRE_BASE)@
+@:w...@1(_RID_PROP, 52, void, bat, void, oid, PRE_BASE)@
+@:w...@1(_RID_KIND, 53, void, bat, void, chr, PRE_BASE)@
+@:w...@1(_RID_NID, 54, void, bat, void, oid, PRE_BASE)@
+@:w...@1(_NID_RID, 55, void, bat, void, oid, PRE_BASE)@
+@:w...@1(_FRAG_ROOT, 56, void, bat, oid, oid, oid_nil)@
+@:w...@1(_QN_HISTOGRAM, 57, void, bat, void, str, PRE_BASE)@
+@:w...@1(_QN_PREFIX_URI_LOC,58, void, bat, void, str, PRE_BASE)@
+@:w...@1(_QN_URI_LOC, 59, void, bat, void, str, PRE_BASE)@
+@:w...@1(_QN_PREFIX, 60, void, bat, void, str, PRE_BASE)@
+@:w...@1(_QN_URI, 61, void, bat, void, str, PRE_BASE)@
+@:w...@1(_QN_LOC, 62, void, bat, void, str, PRE_BASE)@
+@:w...@1(_PROP_TEXT, 63, void, bat, void, str, PRE_BASE)@
+@:w...@1(_PROP_COM, 64, void, bat, void, str, PRE_BASE)@
+@:w...@1(_PROP_INS, 65, void, bat, void, str, PRE_BASE)@
+@:w...@1(_PROP_TGT, 66, void, bat, void, str, PRE_BASE)@
+@:w...@1(_PROP_VAL, 67, void, bat, void, str, PRE_BASE)@
+@:w...@1(_ATTR_OWN, 68, void, bat, void, oid, PRE_BASE)@
+@:w...@1(_ATTR_QN, 69, void, bat, void, oid, PRE_BASE)@
+@:w...@1(_ATTR_PROP, 70, void, bat, void, oid, PRE_BASE)@
@-
The bottom segment are the master copies of the top segment.
The middle segment is private to the query.
@@ -376,7 +387,7 @@
BEWARE: the logger version number below needs to be incremented whenever the
working set schema is changed.
@= ws_decl
-@:w...@1_decl(WS_SIZE, 67)@
+@:w...@1_decl(WS_SIZE, 71)@
@:w...@1_decl(QNAME, 2)@
@:w...@1_decl(BOOL, 3)@
@:w...@1_decl(INT, 4)@
@@ -528,6 +539,7 @@
xrpc_statuses.inplace(idx, status);
xrpc_wsbats.inplace(idx, xrpc_statuses); # use xrpc_statuses as a dummy
bat, replacing the ws (that gets freed)
xrpc_locks.inplace(idx, lock_nil);
+ if (xrpc_qids.find(idx).search(":") < 0) xrpc_qids.inplace(idx, "");
lock_destroy(wslock);
}
@@ -537,10 +549,10 @@
var idx := reverse(xrpc_qids).find(xrpc_qid);
if (isnil(errmsg)) {
xrpc_statuses.inplace(idx, "wait");
- lock_unset(xrpc_locks.find(idx));
} else {
- _ws_xrpc_abort(idx, "abort");
+ xrpc_statuses.inplace(idx, "abort");
}
+ lock_unset(xrpc_locks.find(idx));
}
# background check to time-out waiting 2PC transactions, and prune them fully
after an hour
@@ -1442,6 +1454,43 @@
return bat(void,tpe).seqbase(PRE_BASE); # a constant bat
}
+PROC ws_cache_expr(bat ws, str id) : bit {
+ return not(ws.fetch(CACHE_ID).texist(id));
+}
+
+PROC ws_cache_get(bat ws, str id) : bit {
+ var idx := reverse(ws.fetch(CACHE_ID)).find(id);
+ inplace(ws.fetch(CACHE_LRU), idx, usec(), true);
+ return ws.fetch(CACHE_VAL).fetch(idx);
+}
+
+PROC ws_cache_put(bat ws, str id, bat val) : void {
+ ws.fetch(CACHE_ID).append(id);
+ ws.fetch(CACHE_VAL).append(val);
+ ws.fetch(CACHE_SIZE).append(sum([batsize](val)));
+ ws.fetch(CACHE_LRU).append(0LL);
+#print(ws.fetch(CACHE_ID), ws.fetch(CACHE_VAL), ws.fetch(CACHE_SIZE),
ws.fetch(CACHE_LRU));
+}
+
+PROC ws_cache_end(bat ws) : void {
+ if (count(ws.fetch(CACHE_SIZE)) = 0) return;
+ var totsize, now := usec(), prune := false;
+ while ((totsize := sum(ws.fetch(CACHE_SIZE))) > xquery_cache_lim) {
+ var lru := min(ws.fetch(CACHE_LRU));
+ var idx := reverse(ws.fetch(CACHE_LRU)).find(lru);
+ ws.fetch(CACHE_LRU).inplace(idx,LNG_MAX, true);
+ ws.fetch(CACHE_SIZE).inplace(idx,0LL, true);
+ prune := true;
+ }
+ if (prune) {
+ var pivot := ws.fetch(CACHE_SIZE).uselect(1LL,lng_nil).hmark(0...@0);
+ ws.inplace(oid(CACHE_ID),
pivot.leftfetchjoin(ws.fetch(CACHE_ID)).access(BAT_APPEND), true);
+ ws.inplace(oid(CACHE_VAL),
pivot.leftfetchjoin(ws.fetch(CACHE_VAL)).access(BAT_APPEND), true);
+ ws.inplace(oid(CACHE_LRU),
pivot.leftfetchjoin(ws.fetch(CACHE_LRU)).access(BAT_APPEND), true);
+ ws.inplace(oid(CACHE_SIZE),
pivot.leftfetchjoin(ws.fetch(CACHE_SIZE)).access(BAT_APPEND), true);
+ }
+#print(ws.fetch(CACHE_ID), ws.fetch(CACHE_VAL), ws.fetch(CACHE_SIZE),
ws.fetch(CACHE_LRU));
+}
# check the working-set cache for multi-request XRPC queries (repeatable reads)
@@ -1451,7 +1500,6 @@
# <0 = create new ws and cache it at idx
PROC _ws_xrpcget() : int
{
-return 0;
var xrpc := 0;
xrpc_seqnr := (xrpc_querynr :+= 1LL);
xrpc_coord := false;
@@ -1473,21 +1521,31 @@
var ws;
if (xrpc > 0) {
ws := xrpc_wsbats.find(idx);
- xrpc_statuses.inplace(idx,"exec");
+ if (xrpc_mode.search("cache") >= 0) { # extend lifetime of session
+ var timeout := 0LL;
+ if (bit(xrpc_timeout)) timeout := usec() + *(1000LL, xrpc_timeout);
+ xrpc_timeouts.inplace(idx, timeout);
+ }
+ if (xrpc_mode != "use-cache-repeatable") {
+ xrpc_statuses.inplace(idx, "exec");
+ return ws;
+ }
+ ws := [access]([copy](ws),BAT_WRITE); # use a copy, no locking required
} else {
- var id := and(lng(newoid(1)), 2147483647LL);
ws := [ws_new](mirror(ws_tpe), ws_tpe, ws_htp, ws_ttp, ws_seq);
- wsid := <<(id, 32) + (lng(update) << 30) + lng(ws);
- ws.access(BAT_READ).rename(str(wsid));
if (xrpc < 0) {
- var wslock := lock_create();
+ var wslock := lock_create();
xrpc_qids.append(xrpc_qid);
xrpc_timeouts.append(usec() + *(1000LL, xrpc_timeout));
xrpc_wsbats.append(ws);
xrpc_locks.append(wslock);
xrpc_statuses.append("exec");
+ xrpc_mode := "cache-repeatable";
}
}
+ var id := and(lng(newoid(1)), 2147483647LL);
+ var wsid := <<(id, 32) + (lng(update) << 30) + lng(ws);
+ ws.access(BAT_READ).rename(str(wsid));
return ws;
}
@@ -1495,9 +1553,10 @@
{
ws_log_wsid := ws_id(ws);
if (not(isnil(err))) ws_log(ws, err);
- if (xrpc_qid = "") {
+ if ((xrpc_qid = "") or (xrpc_mode = "use-cache-repeatable")) {
ws_destroy(ws);
} else {
+ ws_cache_end(ws);
lock_set(xrpc_lock);
CATCH(_ws_xrpc_end(xrpc_qid, err));
lock_unset(xrpc_lock);
@@ -1509,14 +1568,22 @@
PROC ws_create(int update) : BAT[void,bat]
{
+ if (bit(and(update,2)) and (xrpc_mode.search("cache") >= 0)) {
+ ERROR("ws_create: update queries should not be done in a cached
session");
+ }
+
# NOTE: use pre-query MIL variables xrpc_qid/xrpc_timeout to possibly
re-use an existing ws
lock_set(xrpc_lock);
- var ws, wsid, xrpc, idx, err := CATCH(ws := _ws_new(xrpc := _ws_xrpcget(),
idx := oid(abs(xrpc)), update), wsid := ws_id(ws));
+ var wslock := lock_nil, ws, xrpc, idx, err := CATCH(ws := _ws_new(xrpc :=
_ws_xrpcget(), idx := oid(abs(xrpc)), update));
+ if (xrpc != 0) wslock := xrpc_locks.find(idx);
lock_unset(xrpc_lock);
if (not(isnil(err))) ERROR(err);
- # each xrpc request must lock its ws
- if (xrpc != 0) lock_set(xrpc_locks.find(idx));
+ # each xrpc request must lock its ws; read-only use directly unlocks again
+ if (not(isnil(wslock))) {
+ lock_set(wslock);
+ if (xrpc_mode = "use-cache-repeatable") lock_unset(wslock);
+ }
if (xrpc > 0) return ws;
# only instantiates the default views of the ws-bats (not the master bats)
@@ -1552,6 +1619,7 @@
mirror(ws_rid).leftfetchjoin(ws).[insert](0...@0,
[fetch](ws_rid.leftfetchjoin(ws), 0));
mirror(ws_mem).leftfetchjoin(ws).[insert](0...@0,
[fetch](ws_mem.leftfetchjoin(ws), 0));
+ var wsid := ws_id(ws);
pflock_begin(wsid);
if (bit(and(update,2))) {
@@ -2213,7 +2281,8 @@
var vxt := vxm.leftfetchjoin(prp); vxm := nil;
vxt := vxt.leftfetchjoin(prp_txt);
vxt := [xquery_hash](vxt);
- vxm := vx_reduce(vxt, vxp);
+ #vxm := vx_reduce(vxt, vxp); for explicit usage, a full index is more
useful
+ vxm := reverse(vxt).leftfetchjoin(vxp);
vxp := nil; vxt := nil;
# add the newly shredded attributes. The new attributes are >att and have
an attr_own > pre
@@ -4188,7 +4257,8 @@
uri_lifetime.insert(reverse(b.[string](0,[-](i,1))).leftfetchjoin([lng](b.[string]([+](i,1)))));
}
# start the query cache
-xquery_start_query_cache(lng(monet_environment.find("xquery_procMB")) * 1024LL
* 1024LL);
+const xquery_cache_lim := lng(monet_environment.find("xquery_procMB")) *
1024LL * 1024LL;
+xquery_start_query_cache(xquery_cache_lim);
cleantmpdir(msec() / 1000LL); # delete tmp files in
DBFARM/DBNAME/tmp
collection_cleanup(_collection_cleanup()); # silently clean up the repository
@@ -4665,8 +4735,9 @@
PFerrbuf = ctx->errbuf;
PFerrbuf[0] = 0;
err = PFcompile_MonetDB(xquery, url, prologue, &del, epilogue,
options,
- *(ctx->genType) /* Using the content of
the MIL variable genType
- defined in the prologue
is yet another hack
+ *(ctx->genType), *(ctx->xrpc_qid),
*(ctx->xrpc_mode), *(ctx->xrpc_timeout)
+ /* Using the content of
the MIL variables genType and
+ xrpc_* defined in the
prologue is yet another hack
to make the
serialization information visible
to the algebra backend
that does not use prologue
and epilogue
structures. */);
@@ -5860,7 +5931,7 @@
int nsbuf = 0, loaded_modules = 0, len;
char *ns = (char*)&nsbuf, *nsend = ns, *locend, *loc = NULL, *q, *p =
query;
char val[1024], url1[1024], url2[1024];
- char *err = NULL;
+ char *err = NULL, *mode = NULL, *sid = NULL;
if (ctx->mode&XQ_DEBUG) {
/* for debugging purposes, we simulate a full MIL on the log; even if
parts are cached */
@@ -5889,7 +5960,7 @@
}
}
}
-
+
/* parse one ore more import module statements */
while(*p && err == NULL) {
p = xquery_parse_space(p);
@@ -5949,6 +6020,41 @@
}
}
+ /* detect pf:session pragma enclosing the entire query */
+ if (p[0] == '(' && p[1] == '#') {
+ str off, lim;
+ p = xquery_parse_space(p+2);
+ if (strncmp(p, "pf:session", 10) == 0) {
+ p += 10;
+ mode = "use-cache-repeatable";
+ if (strncmp(p, "-use", 4) == 0) {
+ p += 4;
+ } else {
+ mode += 4;
+ }
+ off = xquery_parse_space(p);
+ p = lim = xquery_parse_ident(off);
+ if (*lim == ':') {
+ *(ctx->xrpc_timeout) = strtol(lim+1,&p,10);
+ }
+ p = xquery_parse_space(p);
+ if (p[0] == '#' && p[1] == ')') {
+ p = xquery_parse_space(p+2);
+ if (p[0] == '{') {
+ str q, end = NULL;
+ for(q=p=xquery_parse_space(p+1); *q; q++) if (*q == '}')
end = q;
+ if (end) {
+ /* cut off the enclosing pragma, record the session id
*/
+ sid = off; query = p;
+ *lim = *end = 0;
+ }
+ }
+ }
+ }
+ }
+ @:setvar(xrpc_qid, sid?sid:"")@
+ @:setvar(xrpc_mode, (sid && mode)?mode:"none")@
+
if (loaded_modules == 0) {
err = xquery_too_complex; /* must at least load one module to be able
to just execute a function */
} else if (err == NULL) {
@@ -6089,7 +6195,7 @@
}
}
@= setvar
- if (@2 && strcmp(*(ctx->@1), @2)) {
+ if ((@2) != NULL && strcmp(*(ctx->@1), @2)) {
char* newval = GDKstrdup(@2);
if (newval == NULL) return "setvar(@1): malloc failure";
GDKfree(*(ctx->@1));
------------------------------------------------------------------------------
Come build with us! The BlackBerry® Developer Conference in SF, CA
is the only developer event you need to attend this year. Jumpstart your
developing skills, take BlackBerry mobile applications to market and stay
ahead of the curve. Join us from November 9-12, 2009. Register now!
http://p.sf.net/sfu/devconf
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins