Update of /cvsroot/monetdb/pathfinder/runtime
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv32077/runtime
Modified Files:
ll_staircasejoin.mx pf_support.mx
Log Message:
Finally, the last remaining XPath location step has been loop-lifted:
preceding-siblings.
The implementation falls back to applying a loop-lifted parent-step, first
--- using unique temporary iter-numbers, one for each item, we keep the
loop-lifted parent-step from pruning, and hence, get excatly one parent-node
for each context-node (item) ---
then, a slightly modified loop-lifted child-step is used to retried all children
of the just calculated parents up to the respective original context nodes.
For now, we only have kind-test push-down (like with following-sibling,
preceding & following), but no ns/loc-test push-down, yet.
While the child step should support this readily, I was not yet able to
locate and eliminate some strange bug in the calculation of candidates,
that occurs with one of our preceding-sibling tests.
Special thanks go to Jan R. for proving the initial idea of falling-back
to parent-step plus "context-node-limited" child-step.
Index: ll_staircasejoin.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/ll_staircasejoin.mx,v
retrieving revision 1.57
retrieving revision 1.58
diff -u -d -r1.57 -r1.58
--- ll_staircasejoin.mx 13 May 2007 11:39:21 -0000 1.57
+++ ll_staircasejoin.mx 9 Aug 2007 21:49:39 -0000 1.58
@@ -74,21 +74,20 @@
/* DESCENDANT STEP */
@:ll_proto(descendant)@
-@:ll_head(descendant)@
+@:ll_head(descendant,FALSE)@
self = FALSE;
res_size = BATcount(iter_bat); /* FIXME: estimate size! */
@:ll_main(descendant, (size[pre] & (1<<31)))@
/* DESCENDANT-OR-SELF STEP */
-@:ll_head(descendant_or_self)@
+@:ll_head(descendant_or_self,FALSE)@
self = TRUE;
res_size = BATcount(iter_bat); /* FIXME: estimate size! */
@:ll_main(descendant, (size[pre] & (1<<31)))@
/* CHILD STEP */
@:ll_proto(child)@
-@:ll_head(child)@
- trivial_cases = FALSE;
+@:ll_head(child,TRUE)@
res_size = BATcount(iter_bat); /* FIXME: estimate size! */
@:ll_main(child, FALSE)@
@@ -96,12 +95,12 @@
@= ll_proto
static int
[EMAIL PROTECTED] ( BAT **res, BAT *iter_bat, BAT *ctx_bat, BAT *cand_bat,
int *size,
[EMAIL PROTECTED] ( BAT **res, BAT *iter_bat, BAT *ctx_bat, BAT *end_ctx,
BAT *cand_bat, int *size,
oid min_iter, oid max_iter, bit no_iter_order, bit self, chr* kind, chr
kind_test);
@
@= ll_head
int
[EMAIL PROTECTED] (BAT **result, BAT *iter_bat, BAT *ctx_bat, BAT *pre_size,
BAT *cand_bat,
[EMAIL PROTECTED] (BAT **result, BAT *iter_bat, BAT *ctx_bat, BAT *end_ctx, BAT
*pre_size, BAT *cand_bat,
bit *_one_iter, bit *_one_ctx,
oid *_min_iter, oid *_max_iter, bit *_no_iter_order, chr *_kind_test)
{
@@ -114,7 +113,7 @@
oid max_iter = *_max_iter;
bit no_iter_order = *_no_iter_order;
bit self = FALSE;
- bit trivial_cases = TRUE;
+ bit child_step = @2;
chr *kind = NULL, kind_test = *_kind_test;
BAT *res = NULL;
BAT *del = NULL;
@@ -125,6 +124,7 @@
BATcheck(iter_bat, name);
BATcheck(ctx_bat, name);
+ BATcheck(end_ctx, name);
BATcheck(pre_size, name);
BATcheck(cand_bat, name);
@@ -148,6 +148,19 @@
GDKerror("%s: iter_bat must be ordered on tail.\n", name);
@:ll_return(GDK_FAIL)@
}
+ if (BATcount(end_ctx) == 0) {
+ end_ctx = NULL; /* empty; not used. */
+ } else {
+ if (!child_step) {
+ GDKerror("%s: end_ctx must be empty.\n", name);
+ @:ll_return(GDK_FAIL)@
+ } else
+ if ((end_ctx->hseqbase != ctx_bat->hseqbase) || (BATcount(end_ctx) !=
BATcount(ctx_bat)))
+ {
+ GDKerror("%s: end_ctx must be head-aligned with iter_bat &
ctx_bat, i.e., have equal head seqbases and length.\n", name);
+ @:ll_return(GDK_FAIL)@
+ }
+ }
if (!BAThdense(pre_size))
{
@@ -214,7 +227,7 @@
* @1: step name
*/
- if ( trivial_cases && one_ctx ) {
+ if ( one_ctx && !child_step ) {
/* ------------------------ trivial cases ------------------------- */
@@ -306,7 +319,7 @@
res->batBuns->free = dst - res->batBuns->base;
BATsetcount(res,
(res->batBuns->free+Bunbase(res)-BUNfirst(res))/BUNsize(res));
} else
- /* !(trivial_cases && one_ctx) */ {
+ /* ( !one_ctx || child_step ) */ {
/* ------------------------ general cases ------------------------- */
@@ -318,7 +331,7 @@
@:ll_return(GDK_FAIL)@
}
- if ([EMAIL PROTECTED](&res, iter_bat, ctx_bat, cand_bat, size,
+ if ([EMAIL PROTECTED](&res, iter_bat, ctx_bat, end_ctx, cand_bat,
size,
min_iter, max_iter, no_iter_order, self, kind, kind_test) ==
GDK_FAIL )
{
@:ll_return(GDK_FAIL)@
@@ -406,19 +419,20 @@
struct stack_item_C {
oid eocs; /* end of ctx scope (pre + size) */
oid next_child; /* preorder rank of the next child node */
- BUN first; /* first iter row of the actual ctx node */
- BUN last; /* last iter row of the actual ctx node */
+ size_t first; /* first iter row of the actual ctx node */
+ size_t last; /* last iter row of the actual ctx node */
};
static int
-ll_child ( BAT **result, BAT *iter_bat, BAT *ctx_bat, BAT *cand_bat, int* size,
+ll_child ( BAT **result, BAT *iter_bat, BAT *ctx_bat, BAT *end_ctx, BAT
*cand_bat, int* size,
oid min_iter, oid max_iter, bit no_iter_order, bit self, chr
*kind, chr kind_test)
{
BAT *res = *result;
si_C *stack = 0;
- int stack_top = 0, ctx_bunsize = 0, iter_bunsize = 0;
+ int stack_top = 0, ctx_bunsize = 0;
oid pre = 0, ctx = 0, cur_ctx = 0;
- BUN cur_bun = 0, iter_bun = 0, ctx_bun = 0, ctx_last = 0;
+ size_t cur_idx = 0, iter_idx = 0;
+ BUN ctx_bun = 0, ctx_last = 0;
BUN cand_cur = cand_bat?BUNtloc(cand_bat,BUNfirst(cand_bat)):NULL;
BUN cand_lst =
cand_bat?BUNtloc(cand_bat,BUNlast(cand_bat)-BUNsize(cand_bat)):NULL;
@@ -436,24 +450,23 @@
}
stack_top = 0;
+ iter_idx = BUNindex(iter_bat,BUNfirst(iter_bat));
+ assert(!end_ctx || iter_idx == BUNindex(end_ctx,BUNfirst(end_ctx)));
- iter_bunsize = BUNsize(iter_bat);
- ctx_bunsize = BUNsize(ctx_bat);
-
- iter_bun = BUNfirst(iter_bat);
ctx_bun = BUNfirst(ctx_bat);
ctx_last = BUNlast(ctx_bat);
+ ctx_bunsize = BUNsize(ctx_bat);
@= getnextctx
- iter_bun += iter_bunsize;
+ iter_idx++;
ctx_bun += ctx_bunsize;
@
@= pushctx
si_C new_stack_item;
new_stack_item.eocs = ctx + size[ctx];
new_stack_item.next_child = ctx + 1;
- new_stack_item.first = iter_bun;
- new_stack_item.last = iter_bun;
+ new_stack_item.first = iter_idx;
+ new_stack_item.last = iter_idx;
stack[stack_top++] = new_stack_item;
cur_ctx = ctx;
@
@@ -487,7 +500,7 @@
/* only a new iter has to be added
to the list of active iters */
else if (cur_ctx == ctx) {
- stack[stack_top-1].last = iter_bun;
+ stack[stack_top-1].last = iter_idx;
@:getnextctx@
}
/* evaluates the ctx node on top of the stack
@@ -570,7 +583,7 @@
oid prev_iter = oid_nil;
@
@= skip_duplicates_body
- oid next_iter = *(oid*)BUNtail(iter_bat,cur_bun);
+ oid next_iter = *(oid*)BUNtail(iter_bat,BUNptr(iter_bat,cur_idx));
if (next_iter != prev_iter) {
/* (Even) if the same context node (item)
* occurs multiple times per iter,
@@ -588,14 +601,14 @@
}
@
@= no_duplicates_body
- oid next_iter = *(oid*)BUNtail(iter_bat,cur_bun);
+ oid next_iter = *(oid*)BUNtail(iter_bat,BUNptr(iter_bat,cur_idx));
*(oid*)dst = next_iter;
*(oid*)(dst+SIZEOF_OID) = pre;
dst += bunsize;
@
@= inner_loop_child_body
@1
- size_t grow = ((stack[stack_top-1].last - stack[stack_top-1].first) /
iter_bunsize) + 1;
+ size_t grow = (stack[stack_top-1].last - stack[stack_top-1].first) + 1;
BUN dst = NULL;
int bunsize = BUNsize(res);
@@ -605,10 +618,20 @@
a match */
dst = BUNlast(res);
assert(!res->hloc);
- for (cur_bun = stack[stack_top-1].first;
- cur_bun <= stack[stack_top-1].last;
- cur_bun += iter_bunsize) {
- @2
+ if (end_ctx) {
+ for (cur_idx = stack[stack_top-1].first;
+ cur_idx <= stack[stack_top-1].last;
+ cur_idx++) {
+ if (pre < *(oid*)BUNtail(end_ctx,BUNptr(end_ctx,cur_idx))) {
+ @2
+ }
+ }
+ } else {
+ for (cur_idx = stack[stack_top-1].first;
+ cur_idx <= stack[stack_top-1].last;
+ cur_idx++) {
+ @2
+ }
}
res->batBuns->free = dst - res->batBuns->base;
BATsetcount(res,
(res->batBuns->free+Bunbase(res)-BUNfirst(res))/bunsize);
@@ -642,7 +665,7 @@
#define onstack_get(b) (onstack[(b)>>OST_shift] & (((OST)1)<<((b)&OST_mask)))
static int
-ll_descendant ( BAT **result, BAT *iter_bat, BAT *ctx_bat, BAT *cand_bat, int*
size,
+ll_descendant ( BAT **result, BAT *iter_bat, BAT *ctx_bat, BAT *end_ctx, BAT
*cand_bat, int* size,
oid min_iter, oid max_iter, bit no_iter_order, bit self, chr*
kind, chr kind_test)
{
BAT *res = *result;
@@ -659,6 +682,9 @@
int OST_bits = OST_bytes * 8;
int OST_mask = OST_bits - 1;
int OST_shift = 0;
+
+ /* not used, here; keep compilers happy */
+ (void)end_ctx;
if (cand_bat) {
bs_cnd = BUNsize(cand_bat);
Index: pf_support.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/pf_support.mx,v
retrieving revision 1.261
retrieving revision 1.262
diff -u -d -r1.261 -r1.262
--- pf_support.mx 8 Aug 2007 14:02:17 -0000 1.261
+++ pf_support.mx 9 Aug 2007 21:49:39 -0000 1.262
@@ -347,6 +347,7 @@
@= ll_cmd
.COMMAND [EMAIL PROTECTED](BAT[oid,oid] iter,
BAT[oid,oid] ctx,
+ BAT[oid,oid] end_ctx,
BAT[oid,int] pre_size,
BAT[void,any] cands,
bit one_iter, bit one_ctx,
@@ -355,6 +356,8 @@
"PARAMETERS:
BAT[void,oid] iter (grouping relation; sorted on tail within each ctx group)
BAT[void,oid] ctx (context set; sorted on tail)
+BAT[void,oid] end_ctx (context end for 'early-exit' child step in
preceding-sibling implementation;
+ head-aligned with iter & ctx or empty (=> no
'early-exit'))
BAT[void,int] pre_size (from the working set)
BAT[oid,oid] cands (sorted list of result candidate OIDs in the tail)
bit one_iter (only one iter?)
@@ -385,8 +388,8 @@
@:lev_cmd(parent,parent)@
@(
@:lev_cmd(fs,following-sibling)@
-@)
@:lev_cmd(ps,preceding-sibling)@
+@)
@= lev_cmd
.COMMAND [EMAIL PROTECTED](BAT[void,chr] pre_level,
@@ -1444,14 +1447,12 @@
@(
@:step(following_sibling,following-sibling,lev_fs,@:level_intro@,@:level_code@,,)@
-@)
@:step(preceding_sibling,preceding-sibling,lev_ps,@:level_intro@,@:level_code@,,)@
-
-@:ll_sibling_impl(following)@
-@(
-@:ll_sibling_impl(preceding)@
@)
+@:ll_sibling_impl_following@
+@:ll_sibling_impl_preceding@
+
@(
@:step(following,following,following_void,,@:sizes_code@,
@:foll_prec_code@,@:doc_pre@)@
@:step(preceding,preceding,preceding_void,,@:sizes_code@,
@:foll_prec_code@,@:doc_pre@)@
@@ -1509,8 +1510,47 @@
res.chk_order();
}
@= wrap
+PROC [EMAIL PROTECTED](BAT[oid,oid] iter, BAT[oid,oid] ctx, BAT[oid,int]
pre_size, BAT[void,any] cands, bit one_iter, bit one_ctx, oid min_iter, oid
max_iter, bit no_iter_order, chr kind_test) : BAT[oid,oid]
+{
+ var end_ctx := bat(void,oid,0).seqbase([EMAIL
PROTECTED]).access(BAT_READ);
+ return [EMAIL PROTECTED](iter, ctx, end_ctx, pre_size, cands, one_iter,
one_ctx, min_iter, max_iter, no_iter_order, kind_test);
+}
+ADDHELP("[EMAIL PROTECTED]", "manegold", "Aug 2007",
+"PARAMETERS:\n\
+BAT[void,oid] iter (grouping relation; sorted on tail within each ctx group)\n\
+BAT[void,oid] ctx (context set; sorted on tail)\n\
+BAT[void,int] pre_size (from the working set)\n\
+BAT[oid,oid] cands (sorted list of result candidate OIDs in the tail)\n\
+bit one_iter (only one iter?)\n\
+bit one_ctx (only one ctx node, i.e., the same for all iters?)\n\
+oid min_iter,max_iter (smallest and largest iter id)\n\
+bit no_iter_order (descendant & descendant_or_self, only: \n\
+ result will be ordered on item, but not sub-ordered on iter)\n\
+DESCRIPTION:\n\
+returns all nodes on the @1 axis of the ctx-nodes duplicate free for each
group.",
+"pf_support");
+
PROC @1 (BAT[oid,oid] iter, BAT[oid,oid] item, oid cont, BAT[oid,bat] ws, int
order, BAT[void,any] cands, chr kind_test) : BAT[void,bat]
{
+ var end_ctx := bat(void,oid,0).seqbase([EMAIL
PROTECTED]).access(BAT_READ);
+ return @1(iter, item, end_ctx, cont, ws, order, cands, kind_test);
+}
+ADDHELP("@1", "tsheyar", "Sep 2004",
+"PARAMETERS:\n\
+BAT[void,oid] iter (grouping relation)\n\
+BAT[void,oid] item (context set)\n\
+oid cont (the current container of the ws)\n\
+BAT[void,bat] ws (working set)\n\
+int order (input & output order properties:\n\
+ bit 0: input is sorted on iter(0) or item(1)\n\
+ bit 1: output must be sorted on iter(0) or item(1))\n\
+BAT[oid,oid] cands (sorted list of result candidate OIDs in the tail)\n\
+DESCRIPTION:\n\
+returns all nodes on the @1 axis of the ctx-nodes duplicate free for each
group.",
+"pf_support");
+
+PROC @1 (BAT[oid,oid] iter, BAT[oid,oid] item, BAT[oid,oid] end_ctx, oid cont,
BAT[oid,bat] ws, int order, BAT[void,any] cands, chr kind_test) : BAT[void,bat]
+{
var result := nil;
var one_iter := FALSE;
var one_item := FALSE;
@@ -1578,7 +1618,7 @@
# the actual location step
if ( isnil(result) ) {
- var res := [EMAIL PROTECTED] (iter, item, pre_size, cands,
one_iter, one_item,
+ var res := [EMAIL PROTECTED] (iter, item, end_ctx, pre_size,
cands, one_iter, one_item,
min_iter, max_iter, (and(order,2) = 0),
kind_test);
@:resolve_unique_iters()@
@@ -1597,6 +1637,8 @@
"PARAMETERS:\n\
BAT[void,oid] iter (grouping relation)\n\
BAT[void,oid] item (context set)\n\
+BAT[void,oid] end_ctx (context end for 'early-exit' child step in
preceding-sibling implementation;\n\
+ head-aligned with iter & item or empty (=> no
'early-exit'))\n\
oid cont (the current container of the ws)\n\
BAT[void,bat] ws (working set)\n\
int order (input & output order properties:\n\
@@ -1612,10 +1654,9 @@
if ( (and(order,1) = 0) and not(ordered(reverse(item)))) {
var ord := item.tsort();
ord := ord.CTrefine(iter).mark([EMAIL PROTECTED]).reverse();
- iter := ord.leftfetchjoin(iter);
- item := ord.leftfetchjoin(item);
- iter := iter.chk_order();
- item := item.chk_order();
+ iter := ord.leftfetchjoin(iter).chk_order();
+ item := ord.leftfetchjoin(item).chk_order();
+ order := or(order,1);
}
@
@= post_sort_output
@@ -1739,8 +1780,8 @@
}
@
[EMAIL PROTECTED] ll_sibling_impl
-PROC @1_sibling(BAT[void,oid] iter, BAT[void,oid] item, oid cont,
BAT[void,bat] ws, int order, chr kind_test) : BAT[void,bat]
[EMAIL PROTECTED] ll_sibling_impl_following
+PROC following_sibling(BAT[void,oid] iter, BAT[void,oid] item, oid cont,
BAT[void,bat] ws, int order, chr kind_test) : BAT[void,bat]
{
var pre_levels := ws.fetch(PRE_LEVEL).fetch(cont);
var pre_sizes := ws.fetch(PRE_SIZE).fetch(cont);
@@ -1756,7 +1797,7 @@
item := item.chk_order();
@:pre_sort_input@
- var res := [EMAIL PROTECTED](iter.chk_order(), item.chk_order(),
pre_sizes, pre_levels, pre_kinds, kind_test);
+ var res := ll_following_sibling(iter.chk_order(), item.chk_order(),
pre_sizes, pre_levels, pre_kinds, kind_test);
var result := new(void,bat,2).seqbase([EMAIL
PROTECTED]).append(hmark(res,[EMAIL PROTECTED])).append(tmark(res,[EMAIL
PROTECTED])).access(BAT_READ);
@:post_sort_output@
@@ -1765,6 +1806,50 @@
}
@
[EMAIL PROTECTED] ll_sibling_impl_preceding
+#PROC preceding_sibling(BAT[void,oid] iter, BAT[void,oid] item, oid cont,
BAT[void,bat] ws, int order, BAT[void,any] cands, chr kind_test) : BAT[void,bat]
+PROC preceding_sibling(BAT[void,oid] iter, BAT[void,oid] item, oid cont,
BAT[void,bat] ws, int order, chr kind_test) : BAT[void,bat]
+{
+ var cands;
+ if (isnil(kind_test)) {
+ cands := ws.fetch(PRE_SIZE).fetch(cont).mirror();
+ } else {
+ cands := ws.fetch(PRE_KIND).fetch(cont);
+ }
+ var itr, par;
+ var pre_levels := ws.fetch(PRE_LEVEL).fetch(cont);
+ var pre_sizes := ws.fetch(PRE_SIZE).fetch(cont);
+
+ iter := iter.chk_order();
+ item := item.chk_order();
+ @:pre_sort_input@
+
+ # use one iter per item to get parent of each item
+ itr := iter.mirror();
+ par := ll_parent(itr, item, pre_sizes, pre_levels).chk_order();
+ if (count(par) < count(itr)) {
+ # nodes that don't have a parent also don't have (preceding) siblings
+ itr := par.hmark([EMAIL PROTECTED]);
+ par := par.tmark([EMAIL PROTECTED]);
+ iter := itr.leftfetchjoin(iter);
+ item := itr.leftfetchjoin(item);
+ }
+
+ # ensure that intermediate [iter,par,item] result is ordered on
[par(asc),iter(asc),item(desc)]
+ if (not(ordered(reverse(par)))) {
+ var ord :=
par.tsort().CTrefine(iter).CTrefine_rev(item).hmark([EMAIL PROTECTED]);
+ par := ord.leftfetchjoin(par ).chk_order();
+ iter := ord.leftfetchjoin(iter).chk_order();
+ item := ord.leftfetchjoin(item).chk_order();
+ }
+ # no further sorting required in child()
+ order := or(order,1);
+
+ # child-step from parents with original items as context-ends yields the
desired result
+ return child(iter, par, item, cont, ws, order, cands, kind_test);
+}
+@
+
@= ll_prec_foll_impl
PROC @1(BAT[void,oid] iter, BAT[void,oid] item, oid cont, BAT[void,bat] ws,
int order, chr kind_test) : BAT[void,bat]
{
@@ -1804,7 +1889,7 @@
@:loop_lifted_scj_step1x(preceding)@
@:loop_lifted_scj_step1x(following_sibling)@
-@:loop_lifted_scj_step1(preceding_sibling)@
+@:loop_lifted_scj_step1x(preceding_sibling)@
@
#==================================================================
# expansions of the loop lifted scj
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins