Update of /cvsroot/monetdb/pathfinder/runtime
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv24797
Modified Files:
Tag: PF_ROX
ws_step_down.mx ll_staircasejoin.mx
Log Message:
for ws_child() and ll_child() (in case it receives a candidate list):
improved ignoring/skipping of ctx/left nodes
that obviously do not find a match among the cand/right nodes
(improves performance of ws_child() and ll_child() by up to factor 4)
Index: ws_step_down.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/Attic/ws_step_down.mx,v
retrieving revision 1.1.2.6
retrieving revision 1.1.2.7
diff -u -d -r1.1.2.6 -r1.1.2.7
--- ws_step_down.mx 4 Mar 2008 15:06:45 -0000 1.1.2.6
+++ ws_step_down.mx 9 Mar 2008 23:49:55 -0000 1.1.2.7
@@ -21,9 +21,9 @@
@' 2000-2005 University of Konstanz and (C) 2005-2008 Technische
@' Universitaet Muenchen, respectively. All Rights Reserved.
[EMAIL PROTECTED] ws_step_down.mx
[EMAIL PROTECTED] ws_step_down
@a Stefan Manegold
[EMAIL PROTECTED] loop-lifted staircasejoin
[EMAIL PROTECTED] step-joins for child, descendant, & descendant-or-self axes
@c
@@ -98,7 +98,7 @@
bit child_step = @2;
#endif
BAT *res = NULL;
- BAT *del = NULL;
+ BAT *del1 = NULL, *del2 = NULL;
size_t res_size = cutoff;
int *size;
size_t estimate = 0;
@@ -152,16 +152,23 @@
{
GDKerror("%s: head of cand_bat must be dense.\n", name);
@:ws_return(GDK_FAIL)@
- }
- if (!(BATtordered(cand_bat) & 1)) {
+ } else if (!(BATtordered(cand_bat) & 1)) {
GDKerror("%s: cand_bat must be ordered on tail.\n", name);
@:ws_return(GDK_FAIL)@
} else if (cand_bat->ttype == TYPE_void) {
/* rare case: an oid candidate-list that is void-dense. materialize
it */
- del = BATnew(TYPE_void, TYPE_oid, BATcount(cand_bat));
- if (del == NULL || (cand_bat->tseqbase != oid_nil && BATins(del,
cand_bat, FALSE) == NULL))
+ del1 = BATnew(TYPE_void, TYPE_oid, BATcount(cand_bat));
+ if (del1 == NULL || (cand_bat->tseqbase != oid_nil && BATins(del1,
cand_bat, FALSE) == NULL))
@:ws_return(GDK_FAIL)@
- cand_bat = del;
+ cand_bat = del1;
+ }
+
+ if (ctx_bat->ttype == TYPE_void) {
+ /* rare case: an oid context-list that is void-dense. materialize it
*/
+ del2 = BATnew(TYPE_void, TYPE_oid, BATcount(ctx_bat));
+ if (del2 == NULL || (ctx_bat->tseqbase != oid_nil && BATins(del2,
ctx_bat, FALSE) == NULL))
+ @:ws_return(GDK_FAIL)@
+ ctx_bat = del2;
}
size = ((int*) Tloc(pre_size, BUNfirst(pre_size))) -
(int)pre_size->hseqbase;
@@ -396,7 +403,8 @@
@
@= ws_return
-{ if (del) BBPreclaim(del);
+{ if (del1) BBPreclaim(del1);
+ if (del2) BBPreclaim(del2);
if (res && (@1 == GDK_FAIL))
BBP_unfix_reclaim(res);
return @1; }
@@ -448,14 +456,15 @@
ws_child ( BAT **result, BAT *iter_bat, BAT *ctx_bat, BAT *cand_bat, int* size,
oid min_iter, oid max_iter, bit no_iter_order, bit self, size_t
cutoff, size_t* estimate)
{
- BATiter ctx_bati = bat_iterator(ctx_bat);
BATiter iter_bati = bat_iterator(iter_bat);
BAT *res = *result;
si_C *stack = 0;
int stack_top = 0;
- oid pre = 0, ctx = 0, cur_ctx = 0;
+ oid pre = 0, ctx = 0, cur_ctx = 0, cand = 0;
size_t cur_idx = 0, iter_idx = 0;
- BUN ctx_bun = 0, ctx_last = 0;
+ oid *ctx_fst = (oid*)Tloc(ctx_bat,BUNfirst(ctx_bat));
+ oid *ctx_cur = ctx_fst;
+ oid *ctx_lst = (oid*)Tloc(ctx_bat,BUNlast(ctx_bat));
oid *cand_fst = (oid*)Tloc(cand_bat,BUNfirst(cand_bat));
oid *cand_cur = cand_fst;
oid *cand_lst = (oid*)Tloc(cand_bat,BUNlast(cand_bat)-1);
@@ -485,12 +494,9 @@
iter_idx = BUNfirst(iter_bat);
- ctx_bun = BUNfirst(ctx_bat);
- ctx_last = BUNlast(ctx_bat);
-
@= getnextctx
iter_idx++;
- ctx_bun++;
+ ctx_cur++;
@
@= pushctx
si_C new_stack_item;
@@ -519,12 +525,46 @@
starting from the ctx (on the top of the stack) until
the next ctx node (or the end of the stack top
ctx node scope) is reached */
- while (ctx_bun < ctx_last && cand_cur <= cand_lst && free > 0) {
- ctx = *(oid*)BUNtail(ctx_bati,ctx_bun);
+ while (ctx_cur < ctx_lst && cand_cur <= cand_lst && free > 0) {
+ ctx = *ctx_cur;
+ cand = *cand_cur;
+
+ /* if the stack is empty,
+ we can ignore/skip "obviously" non-matching ctx- & cand-nodes
+ by advancing the both cursors in a merge-like fashion ... */
+ if (!stack_top && (cand < ctx || ctx + size[ctx] < cand)) {
+ while (ctx_cur < ctx_lst && cand_cur <= cand_lst && (*cand_cur <
*ctx_cur || *ctx_cur + size[*ctx_cur] < *cand_cur)) {
+ ctx = *ctx_cur;
+ cand = *cand_cur;
+ if (cand < ctx) {
+ /* poor man's binary search / exploiting forward scan */
+ while (cand_cur+1048576 <= cand_lst && *(cand_cur+1048576)
< ctx)
+ cand_cur += 1048576; /* this avoids mmapped I/O */
+ while (cand_cur+32768 <= cand_lst && *(cand_cur+32768) <
ctx)
+ cand_cur += 32768;
+ while (cand_cur+1024 <= cand_lst && *(cand_cur+1024) < ctx)
+ cand_cur += 1024;
+ while (cand_cur+32 <= cand_lst && *(cand_cur+32) < ctx)
+ cand_cur += 32;
+ while (cand_cur <= cand_lst && *cand_cur < ctx)
+ cand_cur ++;
+ } else /* ctx + size[ctx] < cand */ {
+ while (ctx_cur < ctx_lst && *ctx_cur + size[*ctx_cur] <
cand) {
+ @:getnextctx@
+ }
+ }
+ }
+ }
+ /* ... otherwise, we can at least ignore ctx nodes that end before
next cand */
+ else if (ctx + size[ctx] < cand) {
+ while (ctx_cur < ctx_lst && *ctx_cur + size[*ctx_cur] < cand) {
+ @:getnextctx@
+ }
+ }
/* if the stack is empty the next ctx node
has to be pushed on the stack
and the next ctx node is called*/
- if (!stack_top) {
+ else if (!stack_top) {
@:pushctx@
@:getnextctx@
}
@@ -656,8 +696,7 @@
GDKfree(stack);
if (free == 0) {
- BUN ctx_fst = BUNfirst(ctx_bat);
- *estimate = (size_t)(((dbl)(ctx_last - ctx_fst) / (dbl)(ctx_bun -
ctx_fst)) * (dbl)cutoff);
+ *estimate = (size_t)(((dbl)(ctx_lst - ctx_fst) / (dbl)(ctx_cur -
ctx_fst)) * (dbl)cutoff);
}
*result = res;
return GDK_SUCCEED;
Index: ll_staircasejoin.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/ll_staircasejoin.mx,v
retrieving revision 1.61.4.2
retrieving revision 1.61.4.3
diff -u -d -r1.61.4.2 -r1.61.4.3
--- ll_staircasejoin.mx 20 Feb 2008 13:25:30 -0000 1.61.4.2
+++ ll_staircasejoin.mx 9 Mar 2008 23:49:55 -0000 1.61.4.3
@@ -120,7 +120,7 @@
bit child_step = @2;
chr *kind = NULL, kind_test = *_kind_test;
BAT *res = NULL;
- BAT *del = NULL;
+ BAT *del1 = NULL, *del2 = NULL;
size_t res_size = cutoff;
int *size;
size_t estimate = 0;
@@ -201,10 +201,18 @@
@:ll_return(GDK_FAIL)@
} else if (cand_bat->ttype == TYPE_void) {
/* rare case: an oid candidate-list that is void-dense. materialize
it */
- del = BATnew(TYPE_void, TYPE_oid, BATcount(cand_bat));
- if (del == NULL || (cand_bat->tseqbase != oid_nil && BATins(del,
cand_bat, FALSE) == NULL))
+ del1 = BATnew(TYPE_void, TYPE_oid, BATcount(cand_bat));
+ if (del1 == NULL || (cand_bat->tseqbase != oid_nil && BATins(del1,
cand_bat, FALSE) == NULL))
@:ll_return(GDK_FAIL)@
- cand_bat = del;
+ cand_bat = del1;
+ }
+
+ if (ctx_bat->ttype == TYPE_void) {
+ /* rare case: an oid context-list that is void-dense. materialize it
*/
+ del2 = BATnew(TYPE_void, TYPE_oid, BATcount(ctx_bat));
+ if (del2 == NULL || (ctx_bat->tseqbase != oid_nil && BATins(del2,
ctx_bat, FALSE) == NULL))
+ @:ll_return(GDK_FAIL)@
+ ctx_bat = del2;
}
size = ((int*) Tloc(pre_size, BUNfirst(pre_size))) -
(int)pre_size->hseqbase;
@@ -416,7 +424,8 @@
@
@= ll_return
-{ if (del) BBPreclaim(del);
+{ if (del1) BBPreclaim(del1);
+ if (del2) BBPreclaim(del2);
if (res && (@1 == GDK_FAIL))
BBP_unfix_reclaim(res);
return @1; }
@@ -468,14 +477,16 @@
ll_child ( BAT **result, BAT *iter_bat, BAT *ctx_bat, BAT *end_ctx, BAT
*cand_bat, int* size,
oid min_iter, oid max_iter, bit no_iter_order, bit self, chr
*kind, chr kind_test, size_t cutoff, size_t* estimate)
{
- BATiter ctx_bati = bat_iterator(ctx_bat), end_ctxi = bat_iterator(end_ctx);
+ BATiter end_ctxi = bat_iterator(end_ctx);
BATiter iter_bati = bat_iterator(iter_bat);
BAT *res = *result;
si_C *stack = 0;
int stack_top = 0;
- oid pre = 0, ctx = 0, cur_ctx = 0;
+ oid pre = 0, ctx = 0, cur_ctx = 0, cand = 0;
size_t cur_idx = 0, iter_idx = 0;
- BUN ctx_bun = 0, ctx_last = 0;
+ oid *ctx_fst = (oid*)Tloc(ctx_bat,BUNfirst(ctx_bat));
+ oid *ctx_cur = ctx_fst;
+ oid *ctx_lst = (oid*)Tloc(ctx_bat,BUNlast(ctx_bat));
oid *cand_cur = (oid*)(cand_bat?Tloc(cand_bat,BUNfirst(cand_bat)):NULL);
oid *cand_lst = (oid*)(cand_bat?Tloc(cand_bat,BUNlast(cand_bat)-1):NULL);
size_t free, decr;
@@ -505,12 +516,9 @@
iter_idx = BUNfirst(iter_bat);
assert(!end_ctx || iter_idx == BUNfirst(end_ctx));
- ctx_bun = BUNfirst(ctx_bat);
- ctx_last = BUNlast(ctx_bat);
-
@= getnextctx
iter_idx++;
- ctx_bun++;
+ ctx_cur++;
@
@= pushctx
si_C new_stack_item;
@@ -539,12 +547,46 @@
starting from the ctx (on the top of the stack) until
the next ctx node (or the end of the stack top
ctx node scope) is reached */
- while (ctx_bun < ctx_last && cand_cur <= cand_lst && free > 0) {
- ctx = *(oid*)BUNtail(ctx_bati,ctx_bun);
+ while (ctx_cur < ctx_lst && cand_cur <= cand_lst && free > 0) {
+ ctx = *ctx_cur;
+ cand = cand_cur ? *cand_cur : oid_nil;
+
+ /* if the stack is empty,
+ we can ignore/skip "obviously" non-matching ctx- & cand-nodes
+ by advancing the both cursors in a merge-like fashion ... */
+ if (cand_bat && !stack_top && (cand < ctx || ctx + size[ctx] < cand)) {
+ while (ctx_cur < ctx_lst && cand_cur <= cand_lst && (*cand_cur <
*ctx_cur || *ctx_cur + size[*ctx_cur] < *cand_cur)) {
+ ctx = *ctx_cur;
+ cand = *cand_cur;
+ if (cand < ctx) {
+ /* poor man's binary search / exploiting forward scan */
+ while (cand_cur+1048576 <= cand_lst && *(cand_cur+1048576)
< ctx)
+ cand_cur += 1048576; /* this avoids mmapped I/O */
+ while (cand_cur+32768 <= cand_lst && *(cand_cur+32768) <
ctx)
+ cand_cur += 32768;
+ while (cand_cur+1024 <= cand_lst && *(cand_cur+1024) < ctx)
+ cand_cur += 1024;
+ while (cand_cur+32 <= cand_lst && *(cand_cur+32) < ctx)
+ cand_cur += 32;
+ while (cand_cur <= cand_lst && *cand_cur < ctx)
+ cand_cur ++;
+ } else /* ctx + size[ctx] < cand */ {
+ while (ctx_cur < ctx_lst && *ctx_cur + size[*ctx_cur] <
cand) {
+ @:getnextctx@
+ }
+ }
+ }
+ }
+ /* ... otherwise, we can at least ignore ctx nodes that end before
next cand */
+ else if (cand_bat && ctx + size[ctx] < cand) {
+ while (ctx_cur < ctx_lst && *ctx_cur + size[*ctx_cur] < cand) {
+ @:getnextctx@
+ }
+ }
/* if the stack is empty the next ctx node
has to be pushed on the stack
and the next ctx node is called*/
- if (!stack_top) {
+ else if (!stack_top) {
@:pushctx@
@:getnextctx@
}
@@ -690,8 +732,7 @@
GDKfree(stack);
if (free == 0) {
- BUN ctx_fst = BUNfirst(ctx_bat);
- *estimate = (size_t)(((dbl)(ctx_last - ctx_fst) / (dbl)(ctx_bun -
ctx_fst)) * (dbl)cutoff);
+ *estimate = (size_t)(((dbl)(ctx_lst - ctx_fst) / (dbl)(ctx_cur -
ctx_fst)) * (dbl)cutoff);
}
*result = res;
return GDK_SUCCEED;
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins