Update of /cvsroot/monetdb/pathfinder/runtime
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv32645/runtime
Modified Files:
pathfinder.mx pf_support.mx serialize.mx shredder.mx
Log Message:
propagated changes of Wednesday Jun 06 2007 - Tuesday Jun 12 2007
from the XQuery_0-18 branch to the development trunk
Index: pathfinder.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/pathfinder.mx,v
retrieving revision 1.369
retrieving revision 1.370
diff -u -d -r1.369 -r1.370
--- pathfinder.mx 6 Jun 2007 20:19:29 -0000 1.369
+++ pathfinder.mx 12 Jun 2007 17:35:59 -0000 1.370
@@ -72,6 +72,8 @@
const XQUERY_STATUS_READY := 3;
var xquery_status := XQUERY_STATUS_INITIALIZING;
+proc tj_is_indexed(str name) : bit { return false; } # dummy function in case
we have no tijah
+
var CATCH_module_pftiah := CATCH(module("pftijah")); # load pftijah only if
available
if (not(isnil(CATCH_module_pftiah))) {
if (trim(CATCH_module_pftiah) != "!ERROR: moduleClient: module(pftijah)
load error.") {
@@ -2159,18 +2161,16 @@
# first shred (readonly) automatically creates persistent sorted index
var cnt := bat(str(lng(coll_oid)) + "_qn_histogram");
unq := reverse(ord_uselect([*](cnt,[log]([dbl](cnt))), dbl_nil,
dbl(count(knd))));
- } else if (maintain) {
+ } else {
# incremental readonly shred with existing indices: deactivate them
var coll_shortlock := reverse(runtime).fetch(RT_LOCK_FREELIST);
var coll_longlock := reverse(runtime).fetch(RT_NID_FREELIST);
-
runtime.delete().insert(coll_shortlock,empty_bat).insert(coll_longlock,empty_bat);
+ runtime.delete().insert(runtime(coll_shortlock,coll_longlock));
var qn_nid := commitbat + "_qn_nid";
var vx_hsh_nid := commitbat + "_vx_hsh_nid";
commitbat := "old_" + commitbat;
CATCH({ bat(qn_nid).persists(false).rename(commitbat + "_qn_nid");
bat(vx_hsh_nid).persists(false).rename(commitbat +
"_vx_hsh_nid"); }); # delete idx in commit
- } else {
- commitbat := str_nil; # incremental readonly shred, but no index to
delete
}
if (int(unq) != int(empty_bat)) {
# compute a (partial) [qn,nid] inverted list, and insert in into the
index
@@ -2245,11 +2245,11 @@
del := reverse(ws.fetch(QN_NID_DEL).fetch(cont)).join(qn_ids);
}
}
- if (valid and bit(count(ins) + count(del))) {
+ if (not(valid)) ERROR("index_lookup: qn_nid not indexed");
+ if (bit(count(ins) + count(del))) {
# avoid doing this when ins/del are empty: res maybe a view on idx
(readonly case)
res :=
res.access(BAT_WRITE).insert(ins).deleteBuns(del).access(BAT_READ);
}
- if (not(valid)) ERROR("index_lookup: qn_nid not indexed");
# SCJ must catch error and use sequential post-filter instead.
return ws_docfilter(ws, sort(res).hmark(oid_nil), cont);
@@ -2435,6 +2435,8 @@
{
var loc := str_nil, uri := ""; # nil loc/uri tells vx_lookup to look for
*all* ID/IDREF attributes
+ if (count(id_iter) = 0) return bat(oid,oid);
+
# get root nids, which identify the XML fragment in which we must look
var id_root := get_root(ws, id_item, id_kind, id_cont).mposjoin(id_cont,
ws.fetch(PRE_NID));
@@ -3095,26 +3097,28 @@
}
PROC ws_opendoc(BAT[void,BAT] ws, BAT[void,str] idx_names) : BAT[oid,oid]
{
- return ws_opendoc(ws,idx_names, bat(void,str).seqbase([EMAIL PROTECTED]),
- bat(void,oid).seqbase([EMAIL PROTECTED]),
- bat(void,oid).seqbase([EMAIL PROTECTED]),
true);
+ return ws_opendoc(ws,idx_names.seqbase([EMAIL PROTECTED]),
bat(void,str).seqbase([EMAIL PROTECTED]),
+ bat(void,oid).seqbase([EMAIL
PROTECTED]),
+ bat(void,oid).seqbase([EMAIL
PROTECTED]), true);
}
# pf:collection(), get 'collection root' as a single startpoint to query an
entire collection
PROC ws_collection_root(BAT[void,BAT] ws, BAT[void,str] colnames) :
BAT[oid,oid]
{
+ var colnames_unq :=
colnames.tdiff(ws.fetch(CONT_NAME)).tunique().mirror(); # [str,str]
lock_set(pf_short);
- var colname_coll, err := CATCH({ colname_coll :=
reverse(colnames).mirror().leftjoin(reverse(collection_name));
-
colname_pins.insert(colname_coll.kunique().project(ws_id(ws))); });
+ var colnames_coll, err := CATCH({ colnames_coll :=
colnames_unq.leftjoin(reverse(collection_name));
+
colname_pins.insert(colnames_coll.project(ws_id(ws))); });
lock_unset(pf_short);
- var notfound := colname_coll.kdiff(reverse(colnames));
+ var notfound := reverse(colnames_coll).tdiff(colnames);
if (count(notfound) > 0) ERROR("pf:collection() %s not found (%d such
errors).\n", notfound.fetch(0), count(notfound));
- [EMAIL PROTECTED]() {
+ [EMAIL PROTECTED]() {
var docBAT := [bat]([+](str(int($t)), ws_dsk).reverse().mirror()); #
get master bats
ws_opencoll(ws, docBAT, $h, $t); # collections are loaded one-by-one
}
- return
reverse(colname_coll.leftjoin(reverse(ws.fetch(CONT_COLL)))).project([EMAIL
PROTECTED]); # [CONT,PRE]
+ var ret :=
reverse(colnames.leftjoin(reverse(ws.fetch(CONT_NAME)))).project([EMAIL
PROTECTED]); # [CONT,PRE]
+ return ret;
}
# fn:collection(), get document nodes of all documents in a collection
@@ -3379,6 +3383,7 @@
lng wsid) : oid
{
var verbose := >=(wsid, WS_MAXID);
+ var updatable := >(pageFree,0LL);
var err := str_nil;
var coll_shortlock := reverse(runtime).fetch(RT_LOCK_FREELIST);
var pre := [EMAIL PROTECTED];
@@ -3411,11 +3416,13 @@
}
if (isnil(coll_oid))
coll_oid := docid_base; # new collection got oid of first doc in it
+ else
+ updatable := (ttype(docBAT.fetch(MAP_PID)) = oid);
# finish the shred (set doc_oids in FRAG_ROOT, and maintain the nsloc
index)
var protect := not(isnil(coll_shortlock));
if (protect) lock_set(coll_shortlock); # never lock a collection inside
the short lock
- err := CATCH(__shred_into_docBAT(docBAT, colname, coll_oid, docid_base,
runtime, pre, att, >(pageFree,0LL), wsid));
+ err := CATCH(__shred_into_docBAT(docBAT, colname, coll_oid, docid_base,
runtime, pre, att, updatable, wsid));
if (protect) lock_unset(coll_shortlock);
if (not(isnil(err))) ERROR(err);
@@ -3517,10 +3524,9 @@
[logger_add_bat](pf_logger, [bat](newcoll), newcoll);
lock_unset(pf_wal);
}
-
# checkpoint the new bats
if (pf_commit_docmgt)
- if (pf_checkpoint(commitBAT)) {
+ if (pf_checkpoint(commitBAT) or cleanup) {
# remove the in-memory undo log; and trim collection
lock_set(pf_short);
err := CATCH(commitBAT := _shred_doc_cleanup(wsid, cleanup));
@@ -3574,11 +3580,7 @@
count(reverse(del).uselect(oid_nil)));
}
del := kunique(del);
-
- # check if a document-id = its collection-id
- # this detects del-doc()s on single-doc collections
- var chk := mirror(del).leftjoin(doc_collection);
- ret := [=](chk.hmark([EMAIL PROTECTED]),chk.tmark([EMAIL
PROTECTED])).texist(true);
+ ret := true;
} else if (cachedOnly) {
del := del.uselect(timestamp_nil,timestamp_nil);
}
Index: serialize.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/serialize.mx,v
retrieving revision 1.96
retrieving revision 1.97
diff -u -d -r1.96 -r1.97
--- serialize.mx 20 May 2007 00:25:04 -0000 1.96
+++ serialize.mx 12 Jun 2007 17:36:00 -0000 1.97
@@ -1539,7 +1539,8 @@
res = GDK_SUCCEED;
} else { /* the new container implementation */
getWsValue(p, ctx, docIndex, PRE_SIZE, start_oid);
- oid sz = *(int*) p;
+ /* oid sz = *(int*) p; */
+ oid sz = (*(int*) p) & ~(1<<31);
if (emitNodesInRange (ctx, start_oid,start_oid+sz, NULL,
docIndex)) {
if (ctx->driverFun->handle_endDocument(ctx)) {
res = GDK_SUCCEED;
Index: pf_support.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/pf_support.mx,v
retrieving revision 1.246
retrieving revision 1.247
diff -u -d -r1.246 -r1.247
--- pf_support.mx 6 Jun 2007 21:28:44 -0000 1.246
+++ pf_support.mx 12 Jun 2007 17:35:59 -0000 1.247
@@ -2582,6 +2582,12 @@
ERROR("updating transient container.\n");
}
+ # we currently cannot update collections that have a pftijah text index
+ var conflict :=
[tj_is_indexed](affected_conts.leftjoin(ws.fetch(CONT_NAME)).reverse().mirror()).uselect(true);
+ if (count(conflict) > 0) {
+ ERROR("cannot update text-indexed collection %s (%s such errors).\n",
reverse(conflict).fetch(0), count(confict));
+ }
+
# check that all containers are updatable (i.e. that none are read-only)
if ([ttype](affected_conts.join(ws.fetch(MAP_PID))).uselect(void).count() >
0) {
ERROR("updating read-only document.\n");
Index: shredder.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/shredder.mx,v
retrieving revision 1.128
retrieving revision 1.129
diff -u -d -r1.128 -r1.129
--- shredder.mx 6 Jun 2007 20:19:31 -0000 1.128
+++ shredder.mx 12 Jun 2007 17:36:00 -0000 1.129
@@ -408,7 +408,10 @@
BAT *b = sb->bat;
b->batBuns->free = n*BUNsize(b);
BATsetcount(b, n);
- b->tsorted = 0;
+ if (b->ttype) {
+ b->tsorted = 0;
+ b->tdense = 0;
+ }
}
@@ -1065,8 +1068,13 @@
const xmlChar *c)
{
shredCtxStruct *shredCtx = (shredCtxStruct*) xmlCtx;
+ xmlParserCtxtPtr pctx = ((shredCtxStruct*) xmlCtx)->xmlCtx;
node_t node;
+ if ( pctx->inSubset ) {
+ /* handle a libxml2 peculiarity, comments in DTD are added to the doc
*/
+ return;
+ }
if (!handle_xml_chars(shredCtx)) {
BAILOUT(shredCtx);
}
@@ -1198,8 +1206,13 @@
* subset for this doc
*/
ctx->myDoc = xmlNewDoc(ctx->version);
- ctx->myDoc->extSubset = dtd;
}
+ if ( !ctx->myDoc->extSubset ) {
+ ctx->myDoc->extSubset = dtd;
+ } else {
+ stream_printf(GDKout, "!WARNING: double external
subset(\"%s\") may cause problems.\n", SystemID);
+ xmlFreeDtd(dtd);
+ }
if (!handle_externalSubset(shredCtx, dtd))
stream_printf(GDKout, "!WARNING: xmlParseDTD(\"%s\") failed,
skipping ID/IDREF information.\n", SystemID);
// xmlFreeDtd(dtd); should be done by freeer of myDoc
@@ -1760,9 +1773,13 @@
* whether it might still hold...)
*/
if (shredCtx->dstBAT[PRE_SIZE].bat->tkey == TRUE) {
+ shredCtx->dstBAT[PRE_SIZE].bat->tsorted = 0;
+ shredCtx->dstBAT[PRE_SIZE].bat->tdense = 0;
BATkey(BATmirror(shredCtx->dstBAT[PRE_SIZE].bat), FALSE);
}
if (shredCtx->dstBAT[PRE_KIND].bat->tkey == TRUE) {
+ shredCtx->dstBAT[PRE_KIND].bat->tsorted = 0;
+ shredCtx->dstBAT[PRE_KIND].bat->tdense = 0;
BATkey(BATmirror(shredCtx->dstBAT[PRE_KIND].bat), FALSE);
}
}
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins