Update of /cvsroot/monetdb/pathfinder/runtime
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv32077/runtime

Modified Files:
        ll_staircasejoin.mx pf_support.mx 
Log Message:

Finally, the last remaining XPath location step has been loop-lifted: 
preceding-siblings.

The implementation falls back to applying a loop-lifted parent-step, first
--- using unique temporary iter-numbers, one for each item, we keep the 
loop-lifted parent-step from pruning, and hence, get excatly one parent-node
for each context-node (item) ---
then, a slightly modified loop-lifted child-step is used to retried all children
of the just calculated parents up to the respective original context nodes.

For now, we only have kind-test push-down (like with following-sibling,
preceding & following), but no ns/loc-test push-down, yet.
While the child step should support this readily, I was not yet able to
locate and eliminate some strange bug in the calculation of candidates,
that occurs with one of our preceding-sibling tests.

Special thanks go to Jan R. for proving the initial idea of falling-back
to parent-step plus "context-node-limited" child-step.


Index: ll_staircasejoin.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/ll_staircasejoin.mx,v
retrieving revision 1.57
retrieving revision 1.58
diff -u -d -r1.57 -r1.58
--- ll_staircasejoin.mx 13 May 2007 11:39:21 -0000      1.57
+++ ll_staircasejoin.mx 9 Aug 2007 21:49:39 -0000       1.58
@@ -74,21 +74,20 @@
 
 /* DESCENDANT STEP */
 @:ll_proto(descendant)@
-@:ll_head(descendant)@
+@:ll_head(descendant,FALSE)@
     self = FALSE;
     res_size = BATcount(iter_bat);     /* FIXME: estimate size! */
 @:ll_main(descendant, (size[pre] & (1<<31)))@
 
 /* DESCENDANT-OR-SELF STEP */
-@:ll_head(descendant_or_self)@
+@:ll_head(descendant_or_self,FALSE)@
     self = TRUE;
     res_size = BATcount(iter_bat);     /* FIXME: estimate size! */
 @:ll_main(descendant, (size[pre] & (1<<31)))@
 
 /* CHILD STEP */
 @:ll_proto(child)@
-@:ll_head(child)@
-    trivial_cases = FALSE;
+@:ll_head(child,TRUE)@
     res_size = BATcount(iter_bat);     /* FIXME: estimate size! */
 @:ll_main(child, FALSE)@
 
@@ -96,12 +95,12 @@
 
 @= ll_proto
 static int
[EMAIL PROTECTED] (     BAT **res, BAT *iter_bat, BAT *ctx_bat, BAT *cand_bat, 
int *size,
[EMAIL PROTECTED] (     BAT **res, BAT *iter_bat, BAT *ctx_bat, BAT *end_ctx, 
BAT *cand_bat, int *size,
        oid min_iter, oid max_iter, bit no_iter_order, bit self, chr* kind, chr 
kind_test);
 @
 @= ll_head
 int
[EMAIL PROTECTED] (BAT **result, BAT *iter_bat, BAT *ctx_bat, BAT *pre_size, 
BAT *cand_bat,
[EMAIL PROTECTED] (BAT **result, BAT *iter_bat, BAT *ctx_bat, BAT *end_ctx, BAT 
*pre_size, BAT *cand_bat,
         bit *_one_iter, bit *_one_ctx,
         oid *_min_iter, oid *_max_iter, bit *_no_iter_order, chr *_kind_test)
 {
@@ -114,7 +113,7 @@
     oid max_iter = *_max_iter;
     bit no_iter_order = *_no_iter_order;
     bit self = FALSE;
-    bit trivial_cases = TRUE;
+    bit child_step = @2;
     chr *kind = NULL, kind_test = *_kind_test;
     BAT *res = NULL;
     BAT *del = NULL;
@@ -125,6 +124,7 @@
 
     BATcheck(iter_bat, name);
     BATcheck(ctx_bat, name);
+    BATcheck(end_ctx, name);
     BATcheck(pre_size, name);
     BATcheck(cand_bat, name);
 
@@ -148,6 +148,19 @@
         GDKerror("%s: iter_bat must be ordered on tail.\n", name);
         @:ll_return(GDK_FAIL)@
     }
+    if (BATcount(end_ctx) == 0) {
+        end_ctx = NULL; /* empty; not used. */
+    } else {
+        if (!child_step) {
+            GDKerror("%s: end_ctx must be empty.\n", name);
+            @:ll_return(GDK_FAIL)@
+        } else 
+        if ((end_ctx->hseqbase != ctx_bat->hseqbase) || (BATcount(end_ctx) != 
BATcount(ctx_bat)))
+        {
+            GDKerror("%s: end_ctx must be head-aligned with iter_bat & 
ctx_bat, i.e., have equal head seqbases and length.\n", name);
+            @:ll_return(GDK_FAIL)@
+        }
+    }
 
     if (!BAThdense(pre_size))
     {
@@ -214,7 +227,7 @@
      * @1: step name
      */
     
-    if ( trivial_cases && one_ctx ) {
+    if ( one_ctx && !child_step ) {
 
         /* ------------------------ trivial cases ------------------------- */
 
@@ -306,7 +319,7 @@
         res->batBuns->free = dst - res->batBuns->base;
         BATsetcount(res, 
(res->batBuns->free+Bunbase(res)-BUNfirst(res))/BUNsize(res));
     } else
-    /* !(trivial_cases && one_ctx) */ {
+    /* ( !one_ctx || child_step ) */ {
 
         /* ------------------------ general cases ------------------------- */
 
@@ -318,7 +331,7 @@
             @:ll_return(GDK_FAIL)@
         }
 
-        if ([EMAIL PROTECTED](&res, iter_bat, ctx_bat, cand_bat, size, 
+        if ([EMAIL PROTECTED](&res, iter_bat, ctx_bat, end_ctx, cand_bat, 
size, 
                   min_iter, max_iter, no_iter_order, self, kind, kind_test) == 
GDK_FAIL )
         {
             @:ll_return(GDK_FAIL)@
@@ -406,19 +419,20 @@
 struct stack_item_C {
     oid eocs;           /* end of ctx scope (pre + size) */
     oid next_child;     /* preorder rank of the next child node */
-    BUN first;          /* first iter row of the actual ctx node */
-    BUN last;           /* last iter row of the actual ctx node */
+    size_t first;       /* first iter row of the actual ctx node */
+    size_t last;        /* last iter row of the actual ctx node */
 };
 
 static int
-ll_child ( BAT **result, BAT *iter_bat, BAT *ctx_bat, BAT *cand_bat, int* size,
+ll_child ( BAT **result, BAT *iter_bat, BAT *ctx_bat, BAT *end_ctx, BAT 
*cand_bat, int* size,
              oid min_iter, oid max_iter, bit no_iter_order, bit self, chr 
*kind, chr kind_test)
 {
     BAT *res = *result;
     si_C *stack = 0;
-    int stack_top = 0, ctx_bunsize = 0, iter_bunsize = 0;
+    int stack_top = 0, ctx_bunsize = 0;
     oid pre = 0, ctx = 0, cur_ctx = 0;
-    BUN cur_bun = 0, iter_bun = 0, ctx_bun = 0, ctx_last = 0;
+    size_t cur_idx = 0, iter_idx = 0;
+    BUN ctx_bun = 0, ctx_last = 0;
     BUN cand_cur = cand_bat?BUNtloc(cand_bat,BUNfirst(cand_bat)):NULL;
     BUN cand_lst = 
cand_bat?BUNtloc(cand_bat,BUNlast(cand_bat)-BUNsize(cand_bat)):NULL;
 
@@ -436,24 +450,23 @@
     }
     stack_top = 0;
 
+    iter_idx = BUNindex(iter_bat,BUNfirst(iter_bat));
+    assert(!end_ctx || iter_idx == BUNindex(end_ctx,BUNfirst(end_ctx)));
 
-    iter_bunsize = BUNsize(iter_bat);
-    ctx_bunsize = BUNsize(ctx_bat);
-
-    iter_bun = BUNfirst(iter_bat);
     ctx_bun = BUNfirst(ctx_bat);
     ctx_last = BUNlast(ctx_bat);
+    ctx_bunsize = BUNsize(ctx_bat);
 
 @= getnextctx
-    iter_bun += iter_bunsize;
+    iter_idx++;
     ctx_bun += ctx_bunsize;
 @
 @= pushctx
     si_C new_stack_item;
     new_stack_item.eocs = ctx + size[ctx];
     new_stack_item.next_child = ctx + 1;
-    new_stack_item.first = iter_bun;
-    new_stack_item.last = iter_bun;
+    new_stack_item.first = iter_idx;
+    new_stack_item.last = iter_idx;
     stack[stack_top++] = new_stack_item;
     cur_ctx = ctx;
 @
@@ -487,7 +500,7 @@
         /* only a new iter has to be added
            to the list of active iters */
         else if (cur_ctx == ctx) {
-            stack[stack_top-1].last = iter_bun;
+            stack[stack_top-1].last = iter_idx;
             @:getnextctx@
         }
         /* evaluates the ctx node on top of the stack
@@ -570,7 +583,7 @@
         oid prev_iter = oid_nil;
 @
 @= skip_duplicates_body
-            oid next_iter = *(oid*)BUNtail(iter_bat,cur_bun);
+            oid next_iter = *(oid*)BUNtail(iter_bat,BUNptr(iter_bat,cur_idx));
             if (next_iter != prev_iter) {
                 /* (Even) if the same context node (item)
                  * occurs multiple times per iter,
@@ -588,14 +601,14 @@
             }
 @
 @= no_duplicates_body
-            oid next_iter = *(oid*)BUNtail(iter_bat,cur_bun);
+            oid next_iter = *(oid*)BUNtail(iter_bat,BUNptr(iter_bat,cur_idx));
             *(oid*)dst              = next_iter;
             *(oid*)(dst+SIZEOF_OID) = pre;
             dst += bunsize;
 @
 @= inner_loop_child_body
         @1
-        size_t grow = ((stack[stack_top-1].last - stack[stack_top-1].first) / 
iter_bunsize) + 1;
+        size_t grow = (stack[stack_top-1].last - stack[stack_top-1].first) + 1;
         BUN dst = NULL;
         int bunsize = BUNsize(res);
 
@@ -605,10 +618,20 @@
            a match */
         dst = BUNlast(res);
         assert(!res->hloc);
-        for (cur_bun = stack[stack_top-1].first;
-             cur_bun <= stack[stack_top-1].last;
-             cur_bun += iter_bunsize) {
-            @2
+        if (end_ctx) {
+            for (cur_idx = stack[stack_top-1].first;
+                 cur_idx <= stack[stack_top-1].last;
+                 cur_idx++) {
+                if (pre < *(oid*)BUNtail(end_ctx,BUNptr(end_ctx,cur_idx))) {
+                    @2
+                }
+            }
+        } else {
+            for (cur_idx = stack[stack_top-1].first;
+                 cur_idx <= stack[stack_top-1].last;
+                 cur_idx++) {
+                @2
+            }
         }
         res->batBuns->free = dst - res->batBuns->base;
         BATsetcount(res, 
(res->batBuns->free+Bunbase(res)-BUNfirst(res))/bunsize);
@@ -642,7 +665,7 @@
 #define onstack_get(b) (onstack[(b)>>OST_shift] &   (((OST)1)<<((b)&OST_mask)))
 
 static int
-ll_descendant ( BAT **result, BAT *iter_bat, BAT *ctx_bat, BAT *cand_bat, int* 
size,
+ll_descendant ( BAT **result, BAT *iter_bat, BAT *ctx_bat, BAT *end_ctx, BAT 
*cand_bat, int* size,
                 oid min_iter, oid max_iter, bit no_iter_order, bit self, chr* 
kind, chr kind_test)
 {
     BAT *res = *result;
@@ -659,6 +682,9 @@
     int OST_bits  = OST_bytes * 8;
     int OST_mask  = OST_bits - 1;
     int OST_shift = 0;
+
+    /* not used, here; keep compilers happy */
+    (void)end_ctx;
     
     if (cand_bat) {
         bs_cnd  = BUNsize(cand_bat);

Index: pf_support.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/pf_support.mx,v
retrieving revision 1.261
retrieving revision 1.262
diff -u -d -r1.261 -r1.262
--- pf_support.mx       8 Aug 2007 14:02:17 -0000       1.261
+++ pf_support.mx       9 Aug 2007 21:49:39 -0000       1.262
@@ -347,6 +347,7 @@
 @= ll_cmd
 .COMMAND [EMAIL PROTECTED](BAT[oid,oid] iter,
                    BAT[oid,oid] ctx,
+                   BAT[oid,oid] end_ctx,
                    BAT[oid,int] pre_size,
                    BAT[void,any] cands,
                    bit one_iter, bit one_ctx,
@@ -355,6 +356,8 @@
 "PARAMETERS:
 BAT[void,oid] iter (grouping relation; sorted on tail within each ctx group)
 BAT[void,oid] ctx (context set; sorted on tail)
+BAT[void,oid] end_ctx (context end for 'early-exit' child step in 
preceding-sibling implementation;
+                       head-aligned with iter & ctx or empty (=> no 
'early-exit'))
 BAT[void,int] pre_size (from the working set)
 BAT[oid,oid]  cands (sorted list of result candidate OIDs in the tail)
 bit           one_iter (only one iter?)
@@ -385,8 +388,8 @@
 @:lev_cmd(parent,parent)@
 @(
 @:lev_cmd(fs,following-sibling)@
-@)
 @:lev_cmd(ps,preceding-sibling)@
+@)
 
 @= lev_cmd
 .COMMAND [EMAIL PROTECTED](BAT[void,chr] pre_level,
@@ -1444,14 +1447,12 @@
 
 @(
 
@:step(following_sibling,following-sibling,lev_fs,@:level_intro@,@:level_code@,,)@
-@)
 
@:step(preceding_sibling,preceding-sibling,lev_ps,@:level_intro@,@:level_code@,,)@
-
-@:ll_sibling_impl(following)@
-@(
-@:ll_sibling_impl(preceding)@
 @)
 
+@:ll_sibling_impl_following@
+@:ll_sibling_impl_preceding@
+
 @(
 @:step(following,following,following_void,,@:sizes_code@, 
@:foll_prec_code@,@:doc_pre@)@
 @:step(preceding,preceding,preceding_void,,@:sizes_code@, 
@:foll_prec_code@,@:doc_pre@)@
@@ -1509,8 +1510,47 @@
                     res.chk_order();
                 }
 @= wrap
+PROC [EMAIL PROTECTED](BAT[oid,oid] iter, BAT[oid,oid] ctx, BAT[oid,int] 
pre_size, BAT[void,any] cands, bit one_iter, bit one_ctx, oid min_iter, oid 
max_iter, bit no_iter_order, chr kind_test) : BAT[oid,oid]
+{
+        var end_ctx := bat(void,oid,0).seqbase([EMAIL 
PROTECTED]).access(BAT_READ);
+       return [EMAIL PROTECTED](iter, ctx, end_ctx, pre_size, cands, one_iter, 
one_ctx, min_iter, max_iter, no_iter_order, kind_test);
+}
+ADDHELP("[EMAIL PROTECTED]", "manegold", "Aug 2007",
+"PARAMETERS:\n\
+BAT[void,oid] iter (grouping relation; sorted on tail within each ctx group)\n\
+BAT[void,oid] ctx (context set; sorted on tail)\n\
+BAT[void,int] pre_size (from the working set)\n\
+BAT[oid,oid]  cands (sorted list of result candidate OIDs in the tail)\n\
+bit           one_iter (only one iter?)\n\
+bit           one_ctx  (only one ctx node, i.e., the same for all iters?)\n\
+oid           min_iter,max_iter (smallest and largest iter id)\n\
+bit           no_iter_order (descendant & descendant_or_self, only: \n\
+               result will be ordered on item, but not sub-ordered on iter)\n\
+DESCRIPTION:\n\
+returns all nodes on the @1 axis of the ctx-nodes duplicate free for each 
group.",
+"pf_support");
+
 PROC @1 (BAT[oid,oid] iter, BAT[oid,oid] item, oid cont, BAT[oid,bat] ws, int 
order, BAT[void,any] cands, chr kind_test) : BAT[void,bat]
 {
+        var end_ctx := bat(void,oid,0).seqbase([EMAIL 
PROTECTED]).access(BAT_READ);
+       return @1(iter, item, end_ctx, cont, ws, order, cands, kind_test);
+}
+ADDHELP("@1", "tsheyar", "Sep 2004",
+"PARAMETERS:\n\
+BAT[void,oid] iter (grouping relation)\n\
+BAT[void,oid] item (context set)\n\
+oid cont (the current container of the ws)\n\
+BAT[void,bat] ws (working set)\n\
+int order (input & output order properties:\n\
+           bit 0: input is sorted on iter(0) or item(1)\n\
+           bit 1: output must be sorted on iter(0) or item(1))\n\
+BAT[oid,oid] cands (sorted list of result candidate OIDs in the tail)\n\
+DESCRIPTION:\n\
+returns all nodes on the @1 axis of the ctx-nodes duplicate free for each 
group.",
+"pf_support");
+
+PROC @1 (BAT[oid,oid] iter, BAT[oid,oid] item, BAT[oid,oid] end_ctx, oid cont, 
BAT[oid,bat] ws, int order, BAT[void,any] cands, chr kind_test) : BAT[void,bat]
+{
        var result := nil;
        var one_iter := FALSE;
        var one_item := FALSE;
@@ -1578,7 +1618,7 @@
 
        # the actual location step
        if ( isnil(result) ) {
-               var res := [EMAIL PROTECTED] (iter, item, pre_size, cands, 
one_iter, one_item, 
+               var res := [EMAIL PROTECTED] (iter, item, end_ctx, pre_size, 
cands, one_iter, one_item, 
                                   min_iter, max_iter, (and(order,2) = 0), 
kind_test);
                                
                 @:resolve_unique_iters()@
@@ -1597,6 +1637,8 @@
 "PARAMETERS:\n\
 BAT[void,oid] iter (grouping relation)\n\
 BAT[void,oid] item (context set)\n\
+BAT[void,oid] end_ctx (context end for 'early-exit' child step in 
preceding-sibling implementation;\n\
+                       head-aligned with iter & item or empty (=> no 
'early-exit'))\n\
 oid cont (the current container of the ws)\n\
 BAT[void,bat] ws (working set)\n\
 int order (input & output order properties:\n\
@@ -1612,10 +1654,9 @@
        if ( (and(order,1) = 0) and not(ordered(reverse(item)))) {
                var ord := item.tsort();
                    ord := ord.CTrefine(iter).mark([EMAIL PROTECTED]).reverse();
-               iter := ord.leftfetchjoin(iter);
-               item := ord.leftfetchjoin(item);
-               iter := iter.chk_order();
-               item := item.chk_order();
+               iter := ord.leftfetchjoin(iter).chk_order();
+               item := ord.leftfetchjoin(item).chk_order();
+               order := or(order,1);
        }
 @
 @= post_sort_output
@@ -1739,8 +1780,8 @@
 }
 @
 
[EMAIL PROTECTED] ll_sibling_impl
-PROC @1_sibling(BAT[void,oid] iter, BAT[void,oid] item, oid cont, 
BAT[void,bat] ws, int order, chr kind_test) : BAT[void,bat]
[EMAIL PROTECTED] ll_sibling_impl_following
+PROC following_sibling(BAT[void,oid] iter, BAT[void,oid] item, oid cont, 
BAT[void,bat] ws, int order, chr kind_test) : BAT[void,bat]
 {
     var pre_levels := ws.fetch(PRE_LEVEL).fetch(cont);
     var pre_sizes := ws.fetch(PRE_SIZE).fetch(cont);
@@ -1756,7 +1797,7 @@
     item := item.chk_order();
     @:pre_sort_input@
 
-    var res := [EMAIL PROTECTED](iter.chk_order(), item.chk_order(), 
pre_sizes, pre_levels, pre_kinds, kind_test);
+    var res := ll_following_sibling(iter.chk_order(), item.chk_order(), 
pre_sizes, pre_levels, pre_kinds, kind_test);
     var result := new(void,bat,2).seqbase([EMAIL 
PROTECTED]).append(hmark(res,[EMAIL PROTECTED])).append(tmark(res,[EMAIL 
PROTECTED])).access(BAT_READ);
 
     @:post_sort_output@
@@ -1765,6 +1806,50 @@
 }
 @
 
[EMAIL PROTECTED] ll_sibling_impl_preceding
+#PROC preceding_sibling(BAT[void,oid] iter, BAT[void,oid] item, oid cont, 
BAT[void,bat] ws, int order, BAT[void,any] cands, chr kind_test) : BAT[void,bat]
+PROC preceding_sibling(BAT[void,oid] iter, BAT[void,oid] item, oid cont, 
BAT[void,bat] ws, int order, chr kind_test) : BAT[void,bat]
+{
+    var cands;
+    if (isnil(kind_test)) {
+        cands := ws.fetch(PRE_SIZE).fetch(cont).mirror();
+    } else {
+        cands := ws.fetch(PRE_KIND).fetch(cont);
+    }
+    var itr, par;
+    var pre_levels := ws.fetch(PRE_LEVEL).fetch(cont);
+    var pre_sizes := ws.fetch(PRE_SIZE).fetch(cont);
+
+    iter := iter.chk_order();
+    item := item.chk_order();
+    @:pre_sort_input@
+
+    # use one iter per item to get parent of each item
+    itr := iter.mirror();
+    par := ll_parent(itr, item, pre_sizes, pre_levels).chk_order();
+    if (count(par) < count(itr)) {
+        # nodes that don't have a parent also don't have (preceding) siblings
+        itr  := par.hmark([EMAIL PROTECTED]);
+        par  := par.tmark([EMAIL PROTECTED]);
+        iter := itr.leftfetchjoin(iter);
+        item := itr.leftfetchjoin(item);
+    }
+
+    # ensure that intermediate [iter,par,item] result is ordered on 
[par(asc),iter(asc),item(desc)]
+    if (not(ordered(reverse(par)))) {
+            var ord := 
par.tsort().CTrefine(iter).CTrefine_rev(item).hmark([EMAIL PROTECTED]);
+            par  := ord.leftfetchjoin(par ).chk_order();
+            iter := ord.leftfetchjoin(iter).chk_order();
+            item := ord.leftfetchjoin(item).chk_order();
+    }
+    # no further sorting required in child()
+    order := or(order,1);
+
+    # child-step from parents with original items as context-ends yields the 
desired result
+    return child(iter, par, item, cont, ws, order, cands, kind_test);
+}
+@
+
 @= ll_prec_foll_impl
 PROC @1(BAT[void,oid] iter, BAT[void,oid] item, oid cont, BAT[void,bat] ws, 
int order, chr kind_test) : BAT[void,bat]
 {
@@ -1804,7 +1889,7 @@
 @:loop_lifted_scj_step1x(preceding)@
 
 @:loop_lifted_scj_step1x(following_sibling)@
-@:loop_lifted_scj_step1(preceding_sibling)@
+@:loop_lifted_scj_step1x(preceding_sibling)@
 @
 #==================================================================
 # expansions of the loop lifted scj


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to