Update of /cvsroot/monetdb/pathfinder/runtime
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv32270/runtime

Modified Files:
      Tag: PF_ROX
        pf_support.mx ws_step_down.mx 
Log Message:

implemented result-size limit ("cutoff") for downward step-joins:
when provided with a limit ("cutoff"), the downward step-joins
(ws_child(), ws_descendant(),ws_descendant_or_self())
will produce a result that is no larger than "cutoff";
in addition to the actual result, the downward step-joins
now return an estimate how large the result might have been,
in case all (left-)input node had been consumed;
in case no limit was given (cutoff == int(nil) or cutoff < 0),
or on case the actual result is smaller than the cutoff,
the estimate is he exact result size.

TODO:
implement cutoff also for staircase-joins


Index: ws_step_down.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/Attic/ws_step_down.mx,v
retrieving revision 1.1.2.2
retrieving revision 1.1.2.3
diff -u -d -r1.1.2.2 -r1.1.2.3
--- ws_step_down.mx     19 Feb 2008 12:28:08 -0000      1.1.2.2
+++ ws_step_down.mx     20 Feb 2008 00:41:47 -0000      1.1.2.3
@@ -53,19 +53,22 @@
 @:ws_proto(descendant)@
 @:ws_head(descendant,FALSE)@
     self = FALSE;
-    res_size = BATcount(iter_bat);     /* FIXME: estimate size! */
+    if (res_size == 0)
+        res_size = BATcount(iter_bat); /* FIXME: estimate size! */
 @:ws_main(descendant, (size[pre] & (1<<31)))@
 
 /* DESCENDANT-OR-SELF STEP */
 @:ws_head(descendant_or_self,FALSE)@
     self = TRUE;
-    res_size = BATcount(iter_bat);     /* FIXME: estimate size! */
+    if (res_size == 0)
+        res_size = BATcount(iter_bat); /* FIXME: estimate size! */
 @:ws_main(descendant, (size[pre] & (1<<31)))@
 
 /* CHILD STEP */
 @:ws_proto(child)@
 @:ws_head(child,TRUE)@
-    res_size = BATcount(iter_bat);     /* FIXME: estimate size! */
+    if (res_size == 0)
+        res_size = BATcount(iter_bat); /* FIXME: estimate size! */
 @:ws_main(child, FALSE)@
 
 /* Templates / Wrappers */
@@ -73,13 +76,13 @@
 @= ws_proto
 static int
 [EMAIL PROTECTED] (    BAT **res, BAT *iter_bat, BAT *ctx_bat, BAT *cand_bat, 
int *size,
-       oid min_iter, oid max_iter, bit no_iter_order, bit self);
+       oid min_iter, oid max_iter, bit no_iter_order, bit self, size_t cutoff, 
size_t *estimate);
 @
 @= ws_head
 int
 [EMAIL PROTECTED] (BAT **result, BAT *iter_bat, BAT *ctx_bat, BAT *pre_size, 
BAT *cand_bat,
         bit *_one_iter, bit *_one_ctx,
-        oid *_min_iter, oid *_max_iter, bit *_no_iter_order)
+        oid *_min_iter, oid *_max_iter, bit *_no_iter_order, int *_cutoff)
 {
     /* --------------------------- declarations ---------------------------- */
 
@@ -89,12 +92,17 @@
     oid min_iter = *_min_iter;
     oid max_iter = *_max_iter;
     bit no_iter_order = *_no_iter_order;
+    size_t cutoff = ((*_cutoff < 0) ? 0 : (size_t)*_cutoff);
     bit self = FALSE;
+#if 0
     bit child_step = @2;
+#endif
     BAT *res = NULL;
     BAT *del = NULL;
-    size_t res_size = 0;
+    size_t res_size = cutoff;
     int *size;
+    size_t estimate = 0;
+    int _estimate = 0;
 
     /* --------------------------- checks ---------------------------------- */
 
@@ -157,6 +165,8 @@
     }
     size = ((int*) Tloc(pre_size, BUNfirst(pre_size))) - 
(int)pre_size->hseqbase;
 
+    *result = NULL;
+
     /* --------------------------- empty result ---------------------------- */
 
     if (BATcount(iter_bat) == 0 || BATcount(pre_size) == 0 || 
BATcount(cand_bat) == 0)
@@ -171,7 +181,10 @@
         res->tdense = TRUE;
         BATseqbase (BATmirror(res), (oid)0); /* does not really matter */
         BATset(res, TRUE);
-        *result = res;
+        _estimate = 0;
+        *result = BATnew(TYPE_int, TYPE_bat, 1);
+       bunfastins (*result, (void*) &_estimate, (void*) &res->batCacheid);
+       BBPunfix(res->batCacheid);
         @:ws_return(GDK_SUCCEED)@
     }
 @
@@ -182,6 +195,8 @@
      * @1: step name
      */
     
+#if 0
+  /* NOTE: THIS CODE IS NOT YET CORRECT / COMPLETE ! */
     if ( one_ctx && !child_step ) {
 
         /* ------------------------ trivial cases ------------------------- */
@@ -194,18 +209,22 @@
         size_t max_size = 0;
 
         assert((size[fst_ctx] & (1<<31)) == 0);
-        max_size = res_size = (size_t)(size[fst_ctx] + (int)self);
-        if ( BATcount(cand_bat) < res_size ) {
-            res_size = BATcount(cand_bat);
+        if (cutoff == 0) {
+            max_size = res_size = (size_t)(size[fst_ctx] + (int)self);
+            if ( BATcount(cand_bat) < res_size ) {
+                res_size = BATcount(cand_bat);
+            }
+            res_size *= BATcount(iter_bat);
         }
-        res_size *= BATcount(iter_bat);
         res = BATnew(TYPE_oid, TYPE_oid, res_size);
         if (res == NULL) 
         { 
             GDKerror("%s: could not allocate a result BAT[oid,void] of size " 
SZFMT ".\n", name, res_size);
             @:ws_return(GDK_FAIL)@
         }
-        res_size /= BATcount(iter_bat);
+        if (cutoff == 0) {
+            res_size /= BATcount(iter_bat);
+        }
         fst_ctx = fst_ctx + (oid)(1 - (int)self);
         lst_ctx = fst_ctx + (oid)max_size;
         sdst = hdst = (oid*)Hloc(res, BUNlast(res));
@@ -217,38 +236,53 @@
         oid left = oid_nil, right = oid_nil;
         oid fst_right = *(oid*)BUNhead(cand_bati, _fst_cand);
        oid lst_right = cand_bat->hseqbase + BATcount(cand_bat);
+       size_t free, decr;
             
+        if (cutoff > 0) {
+            free = cutoff;
+            decr = 1; 
+        } else {
+            free = 1;
+            decr = 0; 
+        }
         /* TODO: check for keyness of cand_bat's tail => will make 
(*(lst_cand-1) < lst_ctx) optimization impossible */
         if ( one_iter ) {
             left = ctx_bat->hseqbase;
             if (*(lst_cand-1) < lst_ctx) {
-                for (right = fst_right; right < lst_right; right++) {
+                for (right = fst_right; right < lst_right && free > 0; 
right++) {
                     *hdst++ = left;
                     *tdst++ = right;
+                    free -= decr;
                 }
             } else {
-                for (right = fst_right, cand = fst_cand ; cand < lst_cand && 
*cand < lst_ctx; cand++, right++) {
+                for (right = fst_right, cand = fst_cand ; cand < lst_cand && 
*cand < lst_ctx && free > 0; cand++, right++) {
                     *hdst++ = left;
                     *tdst++ = right;
+                    free -= decr;
                 }
             }
+            if (free == 0) {
+                estimate = (size_t)(((dbl)(lst_right - fst_right) / 
(dbl)(right - fst_right)) * (dbl)cutoff);
+            }
         } else 
         {   /* multiple iters */
             if (iter_bat->tkey) {
                 oid lst_left = ctx_bat->hseqbase + BATcount(ctx_bat);
                 
                 if (*(lst_cand-1) < lst_ctx) {
-                    for (right = fst_right; right < lst_right; right++) {
-                        for (left = ctx_bat->hseqbase; left < lst_left; 
left++) {
+                    for (right = fst_right; right < lst_right && free > 0; 
right++) {
+                        for (left = ctx_bat->hseqbase; left < lst_left && free 
> 0; left++) {
                             *hdst++ = left;
                             *tdst++ = right;
+                            free -= decr;
                         }
                     }
                 } else {
-                    for (right = fst_right, cand = fst_cand ; cand < lst_cand 
&& *cand < lst_ctx; cand++, right++) {
-                        for (left = ctx_bat->hseqbase; left < lst_left; 
left++) {
+                    for (right = fst_right, cand = fst_cand ; cand < lst_cand 
&& *cand < lst_ctx && free > 0; cand++, right++) {
+                        for (left = ctx_bat->hseqbase; left < lst_left && free 
> 0; left++) {
                             *hdst++ = left;
                             *tdst++ = right;
+                            free -= decr;
                         }
                     }
                 }
@@ -258,26 +292,28 @@
                 BUN lst_iter = BUNlast(iter_bat);
 
                if (*(lst_cand-1) < lst_ctx) {
-                    for (right = fst_right; right < lst_right; right++) {
+                    for (right = fst_right; right < lst_right && free > 0; 
right++) {
                         oid prev_iter = oid_nil;
-                        for (left = ctx_bat->hseqbase, iter = fst_iter ; iter 
< lst_iter; iter++, left++) {
+                        for (left = ctx_bat->hseqbase, iter = fst_iter ; iter 
< lst_iter && free > 0; iter++, left++) {
                             oid cur_iter = *(oid*)BUNtail(iter_bati,iter);
                             if (cur_iter != prev_iter) {
                                 *hdst++ = left;
                                 *tdst++ = right;
                                 prev_iter = cur_iter;
+                                free -= decr;
                             }
                         }
                     }
                } else {
-                    for (right = fst_right, cand = fst_cand ; cand < lst_cand 
&& *cand < lst_ctx; cand++, right++) {
+                    for (right = fst_right, cand = fst_cand ; cand < lst_cand 
&& *cand < lst_ctx && free > 0; cand++, right++) {
                         oid prev_iter = oid_nil;
-                        for (left = ctx_bat->hseqbase, iter = fst_iter ; iter 
< lst_iter; iter++, left++) {
+                        for (left = ctx_bat->hseqbase, iter = fst_iter ; iter 
< lst_iter && free > 0; iter++, left++) {
                             oid cur_iter = *(oid*)BUNtail(iter_bati,iter);
                             if (cur_iter != prev_iter) {
                                 *hdst++ = left;
                                 *tdst++ = right;
                                 prev_iter = cur_iter;
+                                free -= decr;
                             }
                         }
                     }
@@ -286,6 +322,9 @@
         }
         BATsetcount(res, hdst-sdst);
     } else
+  /* NOTE: THIS CODE IS NOT YET CORRECT / COMPLETE ! */
+#endif
+
     /* ( !one_ctx || child_step ) */ {
 
         /* ------------------------ general cases ------------------------- */
@@ -299,7 +338,7 @@
         }
 
         if ([EMAIL PROTECTED](&res, iter_bat, ctx_bat, cand_bat, size, 
-                  min_iter, max_iter, no_iter_order, self) == GDK_FAIL )
+                  min_iter, max_iter, no_iter_order, self, cutoff, &estimate) 
== GDK_FAIL )
         {
             @:ws_return(GDK_FAIL)@
         }
@@ -336,9 +375,23 @@
     BATkey(BATmirror(res),(res->tdense||one_iter)); /* might be TRUE in some 
more cases... */
     BATset(res, TRUE);
 }
-    *result = res;
+    if (estimate == 0)
+        estimate = BATcount(res);
+    if (estimate < (size_t) GDK_int_max)
+        _estimate = (int) estimate;
+    else
+        _estimate = GDK_int_max;
+    *result = BATnew(TYPE_int, TYPE_bat, 1);
+    bunfastins (*result, (void*) &_estimate, (void*) &res->batCacheid);
+    BBPunfix(res->batCacheid);
 
     @:ws_return(GDK_SUCCEED)@
+
+bunins_failed:
+    if (*result)
+        BBP_unfix_reclaim(*result);
+    *result = NULL;
+    @:ws_return(GDK_FAIL)@
 }
 @
 
@@ -393,7 +446,7 @@
 
 static int
 ws_child ( BAT **result, BAT *iter_bat, BAT *ctx_bat, BAT *cand_bat, int* size,
-             oid min_iter, oid max_iter, bit no_iter_order, bit self)
+             oid min_iter, oid max_iter, bit no_iter_order, bit self, size_t 
cutoff, size_t* estimate)
 {
     BATiter ctx_bati = bat_iterator(ctx_bat);
     BATiter iter_bati = bat_iterator(iter_bat);
@@ -406,6 +459,15 @@
     oid *cand_fst = (oid*)Tloc(cand_bat,BUNfirst(cand_bat));
     oid *cand_cur = cand_fst;
     oid *cand_lst = (oid*)Tloc(cand_bat,BUNlast(cand_bat)-1);
+    size_t free, decr;
+        
+    if (cutoff > 0) {
+        free = cutoff;
+        decr = 1; 
+    } else {
+        free = 1;
+        decr = 0; 
+    }
 
     /* not used, here; keep compilers happy */
     (void)min_iter;
@@ -457,7 +519,7 @@
        starting from the ctx (on the top of the stack) until
        the next ctx node (or the end of the stack top
        ctx node scope) is reached */
-    while (ctx_bun < ctx_last && cand_cur <= cand_lst) {
+    while (ctx_bun < ctx_last && cand_cur <= cand_lst && free > 0) {
         ctx = *(oid*)BUNtail(ctx_bati,ctx_bun);
         /* if the stack is empty the next ctx node
            has to be pushed on the stack 
@@ -494,7 +556,7 @@
     /* need to process the ctx nodes, which are still on the stack 
        - only need to evaluate the inner loop and pop, because 
          there are no more new ctx nodes */
-    while (stack_top) {
+    while (stack_top && free > 0) {
         @:inner_loop_child(stack[stack_top-1].eocs,@1,@2)@
         @:popctx@
     }
@@ -510,7 +572,7 @@
 
         for (; pre <= @1 && pre < *fst_pre; pre += (size[pre]&GDK_int_max) + 
1) { 
         }
-        for (;              pre <= lst_pre; pre += (size[pre]&GDK_int_max) + 1)
+        for (; free > 0  && pre <= lst_pre; pre += (size[pre]&GDK_int_max) + 1)
           if ((size[pre] & (1<<31)) == 0) {
 
             /* poor man's binary search / exploiting forward scan */
@@ -553,11 +615,13 @@
                 *hdst++ = left;
                 *tdst++ = right;
                 prev_iter = next_iter;
+                free -= decr;
             }
 @
 @= no_duplicates_body
             *hdst++ = left;
             *tdst++ = right;
+            free -= decr;
 @
 @= inner_loop_child_body
         @1
@@ -572,7 +636,7 @@
         hdst = (oid*)Hloc(res, BUNlast(res));
         tdst = (oid*)Tloc(res, BUNlast(res));
         for (cur_idx = stack[stack_top-1].first, left = iter_bat->hseqbase + 
(stack[stack_top-1].first - BUNfirst(iter_bat));
-             cur_idx <= stack[stack_top-1].last;
+             cur_idx <= stack[stack_top-1].last && free > 0;
              cur_idx++, left++) {
             @2
         }
@@ -581,6 +645,10 @@
 @c
 
     GDKfree(stack);
+    if (free == 0) {
+        BUN ctx_fst = BUNfirst(ctx_bat);
+        *estimate = (size_t)(((dbl)(ctx_last - ctx_fst) / (dbl)(ctx_bun - 
ctx_fst)) * (dbl)cutoff);
+    }
     *result = res;    
     return GDK_SUCCEED;
 bunins_failed:
@@ -609,7 +677,7 @@
 
 static int
 ws_descendant ( BAT **result, BAT *iter_bat, BAT *ctx_bat, BAT *cand_bat, int* 
size,
-                oid min_iter, oid max_iter, bit no_iter_order, bit self)
+                oid min_iter, oid max_iter, bit no_iter_order, bit self, 
size_t cutoff, size_t *estimate)
 {
     BATiter ctx_bati = bat_iterator(ctx_bat), iter_bati = 
bat_iterator(iter_bat);
     BAT *res = *result;
@@ -626,6 +694,15 @@
     int OST_bits  = OST_bytes * 8;
     int OST_mask  = OST_bits - 1;
     int OST_shift = 0;
+    size_t free, decr;
+        
+    if (cutoff > 0) {
+        free = cutoff;
+        decr = 1; 
+    } else {
+        free = 1;
+        decr = 0; 
+    }
 
     cnd_fst = cnd_ptr = (oid*)Tloc(cand_bat, BUNfirst(cand_bat));
     cnd     = *cnd_ptr;
@@ -705,14 +782,14 @@
        starting from the ctx (on the top of the stack) until
        the next ctx node (or the end of the stack top
        ctx node scope) is reached */
-    while (ctx_bun < ctx_last && cnd <= lst_cnd) {
+    while (ctx_bun < ctx_last && cnd <= lst_cnd && free > 0) {
         oid cur_ctx = ctx;
         /* scan over all iters for the current ctx node;
            only a new (non-active) iters have to be added
            to the list of active iters */
         pre = ctx;
         @:skip_cands_before_pre@
-        while (ctx_bun < ctx_last) {
+        while (ctx_bun < ctx_last && free > 0) {
             ctx = *(oid*)BUNtail(ctx_bati,ctx_bun);
             iter_idx = *(oid*)BUNtail(iter_bati,iter_bun) - min_iter;
             left = *(oid*)BUNhead(iter_bati,iter_bun);
@@ -729,6 +806,7 @@
                     if (pre == cnd) {
                         right = cand_bat->hseqbase + (cnd_ptr - cnd_fst);
                         bunfastins(res, &left, &right);
+                        free -= decr;
                     }
                 }
             }
@@ -737,7 +815,7 @@
         }
         @:sort_iters_on_stack@
         pre++;
-        if (ctx_bun < ctx_last) {
+        if (ctx_bun < ctx_last && free > 0) {
             if (ctx <= stack[stack_top-1].eocs) {
                 /* find all results between the current ctx node
                    and the next (descendant) ctx node */
@@ -746,10 +824,10 @@
             } else {
                 /* successively finish all active scopes
                    that do not contain the next ctx node */
-                while (stack_top && stack[stack_top-1].eocs <= ctx) {
+                while (stack_top && stack[stack_top-1].eocs <= ctx && free > 
0) {
                     @:finish_scope_descendant@
                 }
-                if (ctx <= stack[stack_top-1].eocs) {
+                if (ctx <= stack[stack_top-1].eocs && free > 0) {
                     @:inner_loop_descendant(ctx)@
                 }
             }
@@ -758,7 +836,7 @@
     /* need to process the ctx nodes, which are still on the stack 
        - only need to evaluate the inner loop and pop, because 
          there are no more new ctx nodes */
-    while (stack_top) {
+    while (stack_top && free > 0) {
         @:finish_scope_descendant@
     }
 
@@ -767,7 +845,7 @@
     /* find all results in the current scope */
     @:inner_loop_descendant(eocs)@
     /* back to enclosing scope: remove all iters that are done */
-    while (stack_top && stack[stack_top-1].eocs <= eocs) {
+    while (stack_top && stack[stack_top-1].eocs <= eocs && free > 0) {
         stack_top--;
         onstack_clr(stack[stack_top].iter_idx);
     }
@@ -796,10 +874,11 @@
         oid *fst_pre = cnd_ptr;
         oid *lst_pre = (oid*)Tloc(cand_bat, BUNlast(cand_bat));
                         
-        for (right = cand_bat->hseqbase + (cnd_ptr - cnd_fst), cur_pre = 
fst_pre ; cur_pre < lst_pre && *cur_pre <= lst_ctx; cur_pre++, right++) {
-            for (j = 0 ; j < stack_top ; j++ ) {
+        for (right = cand_bat->hseqbase + (cnd_ptr - cnd_fst), cur_pre = 
fst_pre ; cur_pre < lst_pre && *cur_pre <= lst_ctx && free > 0; cur_pre++, 
right++) {
+            for (j = 0 ; j < stack_top && free > 0 ; j++ ) {
                 *hdst++ = iters_on_stack[j];
                 *tdst++ = right;
+                free -= decr;
             }
         }
         BATsetcount(res, hdst-sdst);
@@ -819,6 +898,10 @@
     GDKfree(onstack);
     GDKfree(iters_on_stack);
     
+    if (free == 0) {
+        BUN ctx_fst = BUNfirst(ctx_bat);
+        *estimate = (size_t)(((dbl)(ctx_last - ctx_fst) / (dbl)(ctx_bun - 
ctx_fst)) * (dbl)cutoff);
+    }
     *result = res;    
     return GDK_SUCCEED;
 bunins_failed:

Index: pf_support.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/pf_support.mx,v
retrieving revision 1.277.4.3
retrieving revision 1.277.4.4
diff -u -d -r1.277.4.3 -r1.277.4.4
--- pf_support.mx       19 Feb 2008 12:28:07 -0000      1.277.4.3
+++ pf_support.mx       20 Feb 2008 00:41:46 -0000      1.277.4.4
@@ -329,7 +329,8 @@
                    BAT[oid,oid] r_pre,
                    bit one_iter, bit one_ctx,
                    oid min_iter, oid max_iter,
-                   bit no_iter_order): BAT[oid,oid] = [EMAIL PROTECTED];
+                   bit no_iter_order,
+                   int cutoff): BAT[int,BAT] = [EMAIL PROTECTED];
 "PARAMETERS:
 BAT[void,oid] l_prune (grouping relation; sorted on tail within each ctx group)
 BAT[void,oid] l_pre (context set; sorted on tail)
@@ -340,8 +341,11 @@
 oid           min_iter,max_iter (smallest and largest iter id)
 bit           no_iter_order (descendant & descendant_or_self, only: 
                result will be ordered on item, but not sub-ordered on iter)
+int           cutoff (result size limit; int(nil) or <=0  =>  no limit)
 DESCRIPTION:
-returns all pairs of positions/IDs ([l_pre.head, r_pre.head]) such that
+returns an [int,BAT]-BAT with a single BUN;
+head: estimated result size, in case result was cut off; actual result size, 
otherwise;
+tail: [oid,oid]-BAT with all pairs of positions/IDs ([l_pre.head, r_pre.head]) 
such that
 (l_pre.tail STEP r_pre.tail) holds and {[l_prune.tail, r_pre.tail]} is 
distinct."
 @
 @m


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to