Update of /cvsroot/monetdb/pathfinder/runtime
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv6543/runtime
Modified Files:
Tag: XQuery_0-22
pathfinder.mx pf_support.mx
Log Message:
Peter implemented a more efficient version of ws_docfilter by doing
most of the work in C.
This also fixes bug [ 1925123 ] XQ: insert fails after many queries.
U pathfinder.mx
Index: pathfinder.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/pathfinder.mx,v
retrieving revision 1.399.2.7
retrieving revision 1.399.2.8
diff -u -d -r1.399.2.7 -r1.399.2.8
--- pathfinder.mx 20 Mar 2008 15:51:21 -0000 1.399.2.7
+++ pathfinder.mx 31 Mar 2008 15:59:40 -0000 1.399.2.8
@@ -2405,22 +2405,19 @@
return id_pre;
}
-# routine that filters out pre-s that belong to deleted collection documents
-# also makes sure that documents from which surviving pre-s stem are added to
OPEN_DOCID ws entry
+# routine that converts NIDs to PREs and filters out pre-s that belong to
deleted collection documents
+# also makes sure that returned documents get an OPEN_DOCID ws entry
PROC ws_docfilter(bat[void,bat] ws,
bat[any::1,oid] any_pre,
oid cont) : bat[any::1,oid]
{
- var cont_coll := ws.fetch(CONT_COLL);
+ var map_pid := ws.fetch(MAP_PID).fetch(cont);
+ var nid_rid := ws.fetch(NID_RID).fetch(cont);
+ var pre_size := ws.fetch(PRE_SIZE).fetch(cont);
+ var frag_root := ws.fetch(FRAG_ROOT).fetch(cont);
var open_cont := ws.fetch(OPEN_CONT);
var open_docid:= ws.fetch(OPEN_DOCID);
var open_name := ws.fetch(OPEN_NAME);
- var pre_size := ws.fetch(PRE_SIZE).fetch(cont);
- var pre_level := ws.fetch(PRE_LEVEL).fetch(cont);
- var pre_nid := ws.fetch(PRE_NID).fetch(cont);
- var nid_rid := ws.fetch(NID_RID).fetch(cont);
- var map_pid := ws.fetch(MAP_PID).fetch(cont);
- var frag_root := ws.fetch(FRAG_ROOT).fetch(cont);
# first, ensure sorted PREs
var isolate := (ttype(map_pid) = oid);
@@ -2434,67 +2431,35 @@
any_pre := reverse(reverse(any_nid).leftfetchjoin(any_pre.hmark([EMAIL
PROTECTED])));
}
- var one := any_pre.ord_select([EMAIL PROTECTED]); # never returned by
index, except special 'give-up' case
- if (count(any_pre) = count(one)) return any_pre;
- var cur := count(one); # ignore bogus nodes
- var tot := count(any_pre);
- var res := bat(one.htype(), oid, tot);
- if (one.htype() = void) {
- res := res.seqbase(one.seqbase());
- }
- res := res.insert(one); # final result (copied excluding deleted docs)
- var opt := true;
- var anc := bat(void,oid).append([EMAIL PROTECTED]); # start
opportunistically with first (potential) doc [EMAIL PROTECTED]
-
- while(cur < tot) {
- var doc_bat := bat(oid,bat);
- [EMAIL PROTECTED]() {
- # get all any_pre tuples in this document. any_pre is tsorted,
ord_select => SLICE VIEW.
- var new_bat := any_pre.slice(cur,tot).ord_select(oid_nil,
oid(lng($t) + pre_size.fetch($t)));
-
- # fill bat-of-bats, note that it is limited to max
IDX_MAGICAL_CONST bats
- var cnt := count(new_bat);
- if (cnt > 0) doc_bat.insert(reverse(frag_root).find($t), new_bat);
- cur :+= cnt; # skips document (nice if any_pre has >>
IDX_MAGICAL_CONST nodes from it!)
- }
+ # get the list of new documents and their root nodes of which we have
received nodes
+ var docpre_new := any_pre.docsused(frag_root, nid_rid, map_pid,
pre_size).tdiff(open_docid);
- # only if we saw a yet unopened document, check its continued existence
- var doc_new := doc_bat.kdiff(reverse(open_docid));
- if (count(doc_new) > 0) {
- # have to access the meta tables to check whether the docs still
exists
- lock_set(pf_short);
- var doc_nme, err := CATCH(doc_nme :=
mirror(doc_bat).leftjoin(doc_name));
- lock_unset(pf_short);
- if (not(isnil(err))) ERROR(err);
+ if (count(docpre_new) > 0) {
+ # lock to access the meta tables for checking whether the new docs
still exists
+ lock_set(pf_short);
+ var err := CATCH(docpre_new := docpre_new.outerjoin(doc_name));
+ lock_unset(pf_short);
+ if (not(isnil(err))) ERROR(err);
- # add new documents as open in working set
- doc_nme := doc_nme.ord_select(str_nil,str_nil);
- doc_new := mirror(doc_new).leftjoin(doc_nme);
- open_name.append(doc_new.tmark([EMAIL PROTECTED]));
- open_cont.append(doc_new.tmark([EMAIL PROTECTED]).project(cont));
- open_docid.append(doc_new.hmark([EMAIL PROTECTED]));
+ # remove results that correspond to already deleted documents (if any)
+ var docpre_nme := docpre_new.ord_select(str_nil,str_nil);
+ if (count(docpre_nme) < count(docpre_new)) {
+ var del := bat(void,oid,count(any_pre));
+ var pre := any_pre.tmark(oid_nil);
- if (count(doc_nme) < count(doc_bat)) { # some documents did not
exist!
- doc_bat := mirror(doc_nme).leftjoin(doc_bat);
- opt := false;
+ docpre_new.kdiff(docpre_nme)@batloop() {
+ del.append(pre.ord_select($h,oid(lng($h) +
pre_size.fetch($h))));
}
+ any_pre := any_pre.tdiff(del);
}
- # if in the first ancestor batch, we found everything: no res
construction is needed
- if (opt and (cur = tot)) return any_pre; # most code should get here
-
- # add all 'ok' any_pre tuples to the result
- [insert](const res, doc_bat);
- if (count(res) > 0) opt := false;
-
- # give *multiple* next tuples to ancestor (nice if we have nodes from
many docs)
- anc := any_pre.slice(cur,cur+IDX_MAGICAL_CONST).tmark([EMAIL
PROTECTED]);
- anc := ll_ancestor(mirror(anc), anc, pre_size, pre_level);
- anc := {min}(anc).tunique().mirror().leftfetchjoin(pre_nid); # reduce
to unique doc nodes
+ # register the newly opened documents
+ open_name.append(docpre_nme.tmark([EMAIL PROTECTED]));
+ open_cont.append(docpre_nme.tmark([EMAIL PROTECTED]).project(cont));
+ open_docid.append(docpre_nme.hmark([EMAIL PROTECTED]));
}
- return res.access(BAT_READ);
+ return assert_order(any_pre);
}
-
# find node pre numbers by ID (/NID) or IDREF.
# ID/IDREF lookup uses the value index (vx), using the same method as
attribute lookup
#
U pf_support.mx
Index: pf_support.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/pf_support.mx,v
retrieving revision 1.277.6.7
retrieving revision 1.277.6.8
diff -u -d -r1.277.6.7 -r1.277.6.8
--- pf_support.mx 25 Feb 2008 17:55:18 -0000 1.277.6.7
+++ pf_support.mx 31 Mar 2008 15:59:44 -0000 1.277.6.8
@@ -408,6 +408,9 @@
.COMMAND ws_bat(lng wsid) : BAT[void,bat] = CMDws_bat;
"retrieve BAT ws that belongs to wsid; note that you must be sure that it is
still alive"
+.COMMAND docsused(BAT[any,oid] any_pre, BAT[oid,oid] frag_root, BAT[void,oid]
nid_rid,
+ BAT[void,oid] map_pid, BAT[void,int] pre_size) : BAT[oid,oid]
= CMDdocsused;
+ "find out the list of [rootpre,docid] in frag_root[docid,rootnid] that have
nodes in any_pre (nids are swizzled on demand)"
.PRELUDE = pf_support_prelude;
.EPILOGUE = pf_support_epilogue;
@@ -7519,6 +7522,57 @@
return GDK_FAIL;
}
+#define SWIZZLE(idx) ((map[idx >> REMAP_PAGE_BITS] << REMAP_PAGE_BITS) | (idx
& REMAP_PAGE_MASK))
+
+int
+CMDdocsused(BAT** res, BAT* input, BAT* frag_root, BAT* nid_rid, BAT* map_pid,
BAT* pre_size) {
+ oid* docid = (oid*) Hloc(frag_root, BUNfirst(frag_root));
+ oid* root = (oid*) Tloc(frag_root, BUNfirst(frag_root));
+ int* size = (int*) Tloc(pre_size, BUNfirst(pre_size));
+ BATiter inputi = bat_iterator(input);
+ oid* map = (oid*) Tloc(map_pid, BUNfirst(map_pid));
+ oid* rid = (oid*) Tloc(nid_rid, BUNfirst(nid_rid));
+ size_t i=0,j=0, n=BATcount(input), m=BATcount(frag_root);
+
+ *res = BATnew(TYPE_oid, TYPE_oid, 100);
+ if (*res == NULL)
+ return GDK_FAIL;
+ BATseqbase(*res, 0);
+
+ while (i<n) {
+ oid delta, docpre, cur = * (oid *) BUNtail(inputi, i);
+
+ /* skip all documents before cur */
+ if (map_pid->ttype == TYPE_void) {
+ for (delta = (m-j) >> 4; delta > 40; delta >>= 4)
+ while (j+delta < m && cur > (docpre=root[j+delta]) +
size[docpre])
+ j += delta;
+ while (j < m && cur > (docpre=root[j]) + size[docpre])
+ j++;
+ } else {
+ /* we need swizzling */
+ for (delta = (m-j) >> 4; delta > 40; delta >>= 4)
+ while (j+delta < m && cur >
(docpre=SWIZZLE(rid[root[j+delta]])) + size[docpre])
+ j += delta;
+ while (j < m && cur > (docpre=SWIZZLE(rid[root[j]])) +
size[docpre])
+ j++;
+ }
+ assert(j < m);
+
+ /* insert docid */
+ BUNins(*res, &docpre, &docid[j], FALSE);
+
+ /* skip all nodes of this document */
+ docpre = docpre + size[docpre];
+ for (delta = (n-i) >> 4; delta > 40; delta >>= 4)
+ while (i+delta < n && * (oid *) BUNtail(inputi, i+delta) <= docpre)
+ i += delta;
+ while (i < n && * (oid *) BUNtail(inputi, i) <= docpre)
+ i++;
+ }
+ return GDK_SUCCEED;
+}
+
#include "serialize.h"
int
-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins