Update of /cvsroot/monetdb/pathfinder/runtime
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv6543/runtime

Modified Files:
      Tag: XQuery_0-22
        pathfinder.mx pf_support.mx 
Log Message:
Peter implemented a more efficient version of ws_docfilter by doing
most of the work in C.
This also fixes bug [ 1925123 ] XQ: insert fails after many queries.


U pathfinder.mx
Index: pathfinder.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/pathfinder.mx,v
retrieving revision 1.399.2.7
retrieving revision 1.399.2.8
diff -u -d -r1.399.2.7 -r1.399.2.8
--- pathfinder.mx       20 Mar 2008 15:51:21 -0000      1.399.2.7
+++ pathfinder.mx       31 Mar 2008 15:59:40 -0000      1.399.2.8
@@ -2405,22 +2405,19 @@
     return id_pre;
 }
 
-# routine that filters out pre-s that belong to deleted collection documents 
-# also makes sure that documents from which surviving pre-s stem are added to 
OPEN_DOCID ws entry
+# routine that converts NIDs to PREs and filters out pre-s that belong to 
deleted collection documents
+# also makes sure that returned documents get an OPEN_DOCID ws entry
 PROC ws_docfilter(bat[void,bat] ws,
                   bat[any::1,oid] any_pre,
                   oid cont) : bat[any::1,oid]
 {
-    var cont_coll := ws.fetch(CONT_COLL);
+    var map_pid   := ws.fetch(MAP_PID).fetch(cont);
+    var nid_rid   := ws.fetch(NID_RID).fetch(cont);
+    var pre_size  := ws.fetch(PRE_SIZE).fetch(cont);
+    var frag_root := ws.fetch(FRAG_ROOT).fetch(cont);
     var open_cont := ws.fetch(OPEN_CONT);
     var open_docid:= ws.fetch(OPEN_DOCID);
     var open_name := ws.fetch(OPEN_NAME);
-    var pre_size  := ws.fetch(PRE_SIZE).fetch(cont);
-    var pre_level := ws.fetch(PRE_LEVEL).fetch(cont);
-    var pre_nid   := ws.fetch(PRE_NID).fetch(cont);
-    var nid_rid   := ws.fetch(NID_RID).fetch(cont);
-    var map_pid   := ws.fetch(MAP_PID).fetch(cont);
-    var frag_root := ws.fetch(FRAG_ROOT).fetch(cont);
 
     # first, ensure sorted PREs
     var isolate := (ttype(map_pid) = oid);
@@ -2434,67 +2431,35 @@
         any_pre := reverse(reverse(any_nid).leftfetchjoin(any_pre.hmark([EMAIL 
PROTECTED])));
     }
 
-    var one := any_pre.ord_select([EMAIL PROTECTED]); # never returned by 
index, except special 'give-up' case
-    if (count(any_pre) = count(one)) return any_pre;
-    var cur := count(one); # ignore bogus nodes 
-    var tot := count(any_pre);
-    var res := bat(one.htype(), oid, tot);
-    if (one.htype() = void) {
-        res := res.seqbase(one.seqbase());
-    }
-        res := res.insert(one); # final result (copied excluding deleted docs)
-    var opt := true;
-    var anc := bat(void,oid).append([EMAIL PROTECTED]); # start 
opportunistically with first (potential) doc [EMAIL PROTECTED]
-
-    while(cur < tot) {
-        var doc_bat := bat(oid,bat); 
-        [EMAIL PROTECTED]() {
-            # get all any_pre tuples in this document. any_pre is tsorted, 
ord_select => SLICE VIEW.
-            var new_bat := any_pre.slice(cur,tot).ord_select(oid_nil, 
oid(lng($t) + pre_size.fetch($t))); 
-
-            # fill bat-of-bats, note that it is limited to max 
IDX_MAGICAL_CONST bats
-            var cnt := count(new_bat); 
-            if (cnt > 0) doc_bat.insert(reverse(frag_root).find($t), new_bat); 
-            cur :+= cnt; # skips document (nice if any_pre has >> 
IDX_MAGICAL_CONST nodes from it!)
-        }
+    # get the list of new documents and their root nodes of which we have 
received nodes
+    var docpre_new := any_pre.docsused(frag_root, nid_rid, map_pid, 
pre_size).tdiff(open_docid);
 
-        # only if we saw a yet unopened document, check its continued existence
-        var doc_new := doc_bat.kdiff(reverse(open_docid));
-        if (count(doc_new) > 0) {
-            # have to access the meta tables to check whether the docs still 
exists
-            lock_set(pf_short);
-            var doc_nme, err := CATCH(doc_nme := 
mirror(doc_bat).leftjoin(doc_name));
-            lock_unset(pf_short);
-            if (not(isnil(err))) ERROR(err);
+    if (count(docpre_new) > 0) {
+        # lock to access the meta tables for checking whether the new docs 
still exists
+        lock_set(pf_short);
+        var err := CATCH(docpre_new := docpre_new.outerjoin(doc_name));
+        lock_unset(pf_short);
+        if (not(isnil(err))) ERROR(err);
 
-            # add new documents as open in working set 
-            doc_nme := doc_nme.ord_select(str_nil,str_nil);
-            doc_new := mirror(doc_new).leftjoin(doc_nme);
-            open_name.append(doc_new.tmark([EMAIL PROTECTED]));
-            open_cont.append(doc_new.tmark([EMAIL PROTECTED]).project(cont));
-            open_docid.append(doc_new.hmark([EMAIL PROTECTED]));
+        # remove results that correspond to already deleted documents (if any)
+        var docpre_nme := docpre_new.ord_select(str_nil,str_nil);
+        if (count(docpre_nme) < count(docpre_new)) {
+            var del := bat(void,oid,count(any_pre));
+            var pre := any_pre.tmark(oid_nil);
 
-            if (count(doc_nme) < count(doc_bat)) { # some documents did not 
exist!
-                doc_bat := mirror(doc_nme).leftjoin(doc_bat);
-                opt := false;
+            docpre_new.kdiff(docpre_nme)@batloop() {
+                del.append(pre.ord_select($h,oid(lng($h) + 
pre_size.fetch($h))));
             }
+            any_pre := any_pre.tdiff(del);
         }
-        # if in the first ancestor batch, we found everything: no res 
construction is needed
-        if (opt and (cur = tot)) return any_pre; # most code should get here
-
-        # add all 'ok' any_pre tuples to the result
-        [insert](const res, doc_bat); 
-        if (count(res) > 0) opt := false;
-
-        # give *multiple* next tuples to ancestor (nice if we have nodes from 
many docs)
-        anc := any_pre.slice(cur,cur+IDX_MAGICAL_CONST).tmark([EMAIL 
PROTECTED]); 
-        anc := ll_ancestor(mirror(anc), anc, pre_size, pre_level);
-        anc := {min}(anc).tunique().mirror().leftfetchjoin(pre_nid); # reduce 
to unique doc nodes
+        # register the newly opened documents
+        open_name.append(docpre_nme.tmark([EMAIL PROTECTED]));
+        open_cont.append(docpre_nme.tmark([EMAIL PROTECTED]).project(cont));
+        open_docid.append(docpre_nme.hmark([EMAIL PROTECTED]));
     }
-    return res.access(BAT_READ);
+    return assert_order(any_pre);
 }
 
-
 # find node pre numbers by ID (/NID) or IDREF. 
 # ID/IDREF lookup uses the value index (vx), using the same method as 
attribute lookup
 #

U pf_support.mx
Index: pf_support.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/pf_support.mx,v
retrieving revision 1.277.6.7
retrieving revision 1.277.6.8
diff -u -d -r1.277.6.7 -r1.277.6.8
--- pf_support.mx       25 Feb 2008 17:55:18 -0000      1.277.6.7
+++ pf_support.mx       31 Mar 2008 15:59:44 -0000      1.277.6.8
@@ -408,6 +408,9 @@
 .COMMAND ws_bat(lng wsid) : BAT[void,bat] = CMDws_bat; 
  "retrieve BAT ws that belongs to wsid; note that you must be sure that it is 
still alive"
 
+.COMMAND docsused(BAT[any,oid] any_pre, BAT[oid,oid] frag_root, BAT[void,oid] 
nid_rid,
+                 BAT[void,oid] map_pid, BAT[void,int] pre_size) : BAT[oid,oid] 
= CMDdocsused;
+ "find out the list of [rootpre,docid] in frag_root[docid,rootnid] that have 
nodes in any_pre (nids are swizzled on demand)"
 
 .PRELUDE = pf_support_prelude;
 .EPILOGUE = pf_support_epilogue;
@@ -7519,6 +7522,57 @@
     return GDK_FAIL;
 }
 
+#define SWIZZLE(idx) ((map[idx >> REMAP_PAGE_BITS] << REMAP_PAGE_BITS) | (idx 
& REMAP_PAGE_MASK))
+
+int
+CMDdocsused(BAT** res, BAT* input, BAT* frag_root, BAT* nid_rid, BAT* map_pid, 
BAT* pre_size) {
+    oid* docid = (oid*) Hloc(frag_root, BUNfirst(frag_root));
+    oid* root = (oid*) Tloc(frag_root, BUNfirst(frag_root));
+    int* size = (int*) Tloc(pre_size, BUNfirst(pre_size));
+    BATiter inputi = bat_iterator(input);
+    oid* map = (oid*) Tloc(map_pid, BUNfirst(map_pid));
+    oid* rid = (oid*) Tloc(nid_rid, BUNfirst(nid_rid));
+    size_t i=0,j=0, n=BATcount(input), m=BATcount(frag_root);
+
+    *res = BATnew(TYPE_oid, TYPE_oid, 100);
+    if (*res == NULL)
+        return GDK_FAIL;
+    BATseqbase(*res, 0);
+
+    while (i<n) {
+        oid delta, docpre, cur = * (oid *) BUNtail(inputi, i);
+
+        /* skip all documents before cur */
+        if (map_pid->ttype == TYPE_void) {
+            for (delta = (m-j) >> 4; delta > 40; delta >>= 4)
+                while (j+delta < m && cur > (docpre=root[j+delta]) + 
size[docpre])
+                    j += delta;
+            while (j < m && cur > (docpre=root[j]) + size[docpre])
+                j++;
+        } else {
+            /* we need swizzling */
+            for (delta = (m-j) >> 4; delta > 40; delta >>= 4)
+                while (j+delta < m && cur > 
(docpre=SWIZZLE(rid[root[j+delta]])) + size[docpre])
+                    j += delta;
+            while (j < m && cur > (docpre=SWIZZLE(rid[root[j]])) + 
size[docpre])
+                j++;
+        }
+        assert(j < m);
+
+        /* insert docid */
+        BUNins(*res, &docpre, &docid[j], FALSE);
+
+        /* skip all nodes of this document */
+        docpre = docpre + size[docpre];
+        for (delta = (n-i) >> 4; delta > 40; delta >>= 4)
+            while (i+delta < n && * (oid *) BUNtail(inputi, i+delta) <= docpre)
+                i += delta;
+        while (i < n && * (oid *) BUNtail(inputi, i) <= docpre)
+            i++;
+    }
+    return GDK_SUCCEED;
+}
+
 #include "serialize.h"
 
 int


-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to