Update of /cvsroot/monetdb/pathfinder/modules/pftijah
In directory 23jxhf1.ch3.sourceforge.com:/tmp/cvs-serv26677
Modified Files:
pftijah.mx
Log Message:
-enable pftijah debugging outout
-some more checks in containment-joins
-add another operator tj_pre2nid_noprop that translates pre IDs to nid IDs
without score propagation
U pftijah.mx
Index: pftijah.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah.mx,v
retrieving revision 1.211
retrieving revision 1.212
diff -u -d -r1.211 -r1.212
--- pftijah.mx 24 Feb 2009 14:52:57 -0000 1.211
+++ pftijah.mx 25 Feb 2009 14:43:19 -0000 1.212
@@ -2079,14 +2079,25 @@
##
# Translate all pre identifiers to nid identifiers
-# Returns a bat [nid, any].
+# Returns a bat [nid, score].
##
-PROC tj_pre2nid(bat[oid,any] pre_score) : bat[oid,any] :=
+PROC tj_pre2nid_prop(bat[oid,any] pre_score) : bat[oid,any] :=
{
# todo: if join turns out to be expensive, we can slice out the part
belonging to
# the respective tagname. pre2nid is only used, if result region has a
single tag-name.
- return pre_score.reverse().join(bat("tj_" + ftindex +
"_Tags").reverse()).reverse().sort();
+ return bat("tj_" + ftindex + "_Tags").leftjoin(pre_score).chk_order();
+}
+##
+# Translate all pre identifiers to nid identifiers, pre remains in tail
+# Returns a bat [nid, pre].
+##
+PROC tj_pre2nid_noprop(bat[oid,any] pre_score) : bat[oid,any] :=
+{
+ # todo: if join turns out to be expensive, we can slice out the part
belonging to
+ # the respective tagname. pre2nid is only used, if result region has a
single tag-name.
+
+ return bat("tj_" + ftindex +
"_Tags").leftjoin(pre_score.mirror()).chk_order();
}
#####################################################################
@@ -2268,20 +2279,20 @@
@:getTermDocCnt_nid(unnest)@
@= getTermDocCnt_nid
PROC _gettermdocc...@1_nid(BAT[oid,oid] e_pre, BAT[void,int] e_size,
BAT[void,oid] t_pre) : BAT[oid,int] := {
-
- # get doc - term relation
- var e_tPre := treemergejo...@1_nid(e_pre, e_size, t_pre);
- return e_tPre.reverse().histogram().sort();
+
+ # get doc - term relation
+ var e_tPre := treemergejo...@1_nid(e_pre, e_size, t_pre);
+ return e_tPre.reverse().histogram().sort();
}
@mil
@:getTermDocCnt_pre(nest)@
@:getTermDocCnt_pre(unnest)@
@= getTermDocCnt_pre
PROC _gettermdocc...@1_pre(BAT[oid,any] e_pre, BAT[void,int] e_size,
BAT[void,oid] t_pre) : BAT[oid,int] := {
-
- # get doc - term relation
- var e_tPre := treemergejo...@1_pre(e_pre, e_size, t_pre.reverse());
- return e_tPre.reverse().histogram().sort();
+
+ # get doc - term relation
+ var e_tPre := treemergejo...@1_pre(e_pre, e_size, t_pre.reverse());
+ return e_tPre.reverse().histogram().sort();
}
@mil
@@ -5216,6 +5227,7 @@
GDKerror("%s: could not allocate result BAT.\n", name);
return(GDK_FAIL);
}
+ free = BATcapacity(res);
size = ((int*) Tloc(nid_size, BUNfirst(nid_size))) -
(int)nid_size->hseqbase;
D_cur = BUNfirst(Dstart);
@@ -5250,6 +5262,10 @@
new_stack_item.ctx = *(oid*)BUNhead(Ai, A_cur);
new_stack_item.eocs = *(oid*)BUNtail(Ai, A_cur) +
size[*(oid*)BUNhead(Ai, A_cur)];
stack[stack_top++] = new_stack_item;
+ if (stack_top >= max_stack_size) {
+ GDKerror("%s: ancestor stack grew beyond stack-size.\n", name);
+ return(GDK_FAIL);
+ }
}
A_cur++;
}
@@ -5261,7 +5277,10 @@
{
BATsetcount(res, hdst - sdst);
BUN sz = BATgrows(res);
- if (BATextend(res, sz) == NULL)
+ while (sz < ((hdst - sdst) + stack_top))
+ sz = 2 * sz;
+ res = BATextend(res, sz);
+ if (res == NULL)
{
GDKerror("%s: could not extend result BAT.\n", name);
return(GDK_FAIL);
@@ -5356,6 +5375,7 @@
GDKerror("%s: could not allocate result BAT.\n", name);
return(GDK_FAIL);
}
+ free = BATcapacity(res);
size = ((int*) Tloc(nid_size, BUNfirst(nid_size))) -
(int)nid_size->hseqbase;
D_cur = BUNfirst(Dstart);
@@ -5368,7 +5388,7 @@
tdst = (oid*)Tloc(res, BUNlast(res));
/* -------------------------------- main
---------------------------------------- */
-
+
while(D_cur < D_last) {
D_cur_pre = *(oid*)BUNtail(Di, D_cur);
@@ -5390,6 +5410,10 @@
new_stack_item.ctx = *(oid*)BUNhead(Ai, A_cur);
new_stack_item.eocs = *(oid*)BUNtail(Ai, A_cur) +
size[*(oid*)BUNhead(Ai, A_cur)];
stack[stack_top++] = new_stack_item;
+ if (stack_top >= max_stack_size) {
+ GDKerror("%s: ancestor stack grew beyond stack-size.\n", name);
+ return(GDK_FAIL);
+ }
}
A_cur++;
}
@@ -5399,9 +5423,12 @@
/* it has to end after Dend. */
if (free < stack_top)
{
- BATsetcount(res, hdst - sdst);
+ BATsetcount(res, hdst - sdst);
BUN sz = BATgrows(res);
- if (BATextend(res, sz) == NULL)
+ while (sz < ((hdst - sdst) + stack_top))
+ sz = 2 * sz;
+ res = BATextend(res, sz);
+ if (res == NULL)
{
GDKerror("%s: could not extend result BAT.\n", name);
return(GDK_FAIL);
@@ -5493,6 +5520,7 @@
GDKerror("%s: could not allocate result BAT.\n", name);
return(GDK_FAIL);
}
+ free = BATcapacity(res);
size = ((int*) Tloc(pre_size, BUNfirst(pre_size))) -
(int)pre_size->hseqbase;
D_cur = BUNfirst(Dstart);
@@ -5527,6 +5555,10 @@
new_stack_item.ctx = A_cur_pre;
new_stack_item.eocs = A_cur_pre + size[A_cur_pre];
stack[stack_top++] = new_stack_item;
+ if (stack_top >= max_stack_size) {
+ GDKerror("%s: ancestor stack grew beyond stack-size.\n", name);
+ return(GDK_FAIL);
+ }
}
A_cur++;
}
@@ -5538,7 +5570,10 @@
{
BATsetcount(res, hdst - sdst);
BUN sz = BATgrows(res);
- if (BATextend(res, sz) == NULL)
+ while (sz < ((hdst - sdst) + stack_top))
+ sz = 2 * sz;
+ res = BATextend(res, sz);
+ if (res == NULL)
{
GDKerror("%s: could not extend result BAT.\n", name);
return(GDK_FAIL);
@@ -5618,6 +5653,7 @@
GDKerror("%s: could not allocate result BAT.\n", name);
return(GDK_FAIL);
}
+ free = BATcapacity(res);
size = ((int*) Tloc(nid_size, BUNfirst(nid_size))) -
(int)nid_size->hseqbase;
D_cur = BUNfirst(Dstart);
@@ -5721,6 +5757,7 @@
GDKerror("%s: could not allocate result BAT.\n", name);
return(GDK_FAIL);
}
+ free = BATcapacity(res);
size = ((int*) Tloc(nid_size, BUNfirst(nid_size))) -
(int)nid_size->hseqbase;
D_cur = BUNfirst(Dstart);
@@ -5825,6 +5862,7 @@
GDKerror("%s: could not allocate result BAT.\n", name);
return(GDK_FAIL);
}
+ free = BATcapacity(res);
size = ((int*) Tloc(pre_size, BUNfirst(pre_size))) -
(int)pre_size->hseqbase;
D_cur = BUNfirst(Dstart);
------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins