Update of /cvsroot/monetdb/pathfinder/modules/pftijah
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv17168
Modified Files:
nexi.c nexi.l pftijah.mx pftijah_tokenize.l
Log Message:
- make tj_firstpre BAT type [oid,oid] instead of [oid,int] to solve testcoll2
64 bit problem in a better way.
- synchronize for pre-release cleanup with Henning.
Index: pftijah_tokenize.l
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah_tokenize.l,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -d -r1.14 -r1.15
--- pftijah_tokenize.l 1 Mar 2007 11:26:31 -0000 1.14
+++ pftijah_tokenize.l 30 May 2007 14:01:53 -0000 1.15
@@ -37,7 +37,7 @@
static char *c, *e;
-/* This hack is to prevent problems with flex 2.5.33 and lower on Debian and
+/* This fix is to prevent problems with flex 2.5.33 and lower on Debian and
* Gentoo systems. When flex.2.5.4 and higher is obligatory this define
* may be removed
*/
Index: nexi.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/nexi.c,v
retrieving revision 1.62
retrieving revision 1.63
diff -u -d -r1.62 -r1.63
--- nexi.c 29 May 2007 15:09:02 -0000 1.62
+++ nexi.c 30 May 2007 14:01:52 -0000 1.63
@@ -189,14 +189,6 @@
struct_RF *rel_feedback;
/* structure initialization */
- /*
- [EMAIL PROTECTED] (2006-11-24):
- bad hack (2*) because of this function writes behind the allocated
space
- and code ist not understood
-
- [EMAIL PROTECTED] (2006-12-15):
- This bug should be fixed now, with the cleanup of nexi_rewriter.c
- */
txt_retr_model = GDKmalloc(MAX_QUERIES*sizeof(struct_RMT));
img_retr_model = GDKmalloc(MAX_QUERIES*sizeof(struct_RMI));
rel_feedback = GDKmalloc(MAX_QUERIES*sizeof(struct_RF));
Index: nexi.l
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/nexi.l,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -d -r1.14 -r1.15
--- nexi.l 21 Mar 2007 07:35:28 -0000 1.14
+++ nexi.l 30 May 2007 14:01:53 -0000 1.15
@@ -82,7 +82,7 @@
#define WRITE_COMMAND(COMMAND) tnl_append(&parserCtx->command_preLIST,COMMAND)
#define WRITE_TOKEN(TOKEN) tsl_append(&parserCtx->token_preLIST,TOKEN)
-/* This hack is to prevent problems with flex 2.5.33 and lower on Debian and
+/* This define is to prevent problems with flex 2.5.33 and lower on Debian and
* Gentoo systems. When flex.2.5.4 and hiher is obligatory this define
* may be removed
*/
Index: pftijah.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah.mx,v
retrieving revision 1.127
retrieving revision 1.128
diff -u -d -r1.127 -r1.128
--- pftijah.mx 29 May 2007 15:09:03 -0000 1.127
+++ pftijah.mx 30 May 2007 14:01:53 -0000 1.128
@@ -72,7 +72,7 @@
.COMMAND pf2tijah_node(
BAT[oid,str] doc_name,
- BAT[oid,int] doc_firstpre,
+ BAT[oid,oid] doc_firstpre,
BAT[oid,oid] pfpre,
BAT[oid,oid] item,
BAT[oid,int] kind,
@@ -159,18 +159,18 @@
var tj_coll_lockbat := new(str,lock); # locks for all active collections
# Comparisons
-const GREATER := 10;
-const LESS := 11;
-const EQUAL := 12;
-const GEQ := 13;
-const LEQ := 14;
+# const GREATER := 10;
+# const LESS := 11;
+# const EQUAL := 12;
+# const GEQ := 13;
+# const LEQ := 14;
# Modifiers
-const NORMAL := 71;
-const PLUS := 72;
-const MINUS := 73;
-const MUST := 74;
-const MUST_NOT := 75;
+# const NORMAL := 71;
+# const PLUS := 72;
+# const MINUS := 73;
+# const MUST := 74;
+# const MUST_NOT := 75;
const ENTITY_NUM := 10000;
@@ -498,7 +498,7 @@
new(oid,str).persists(true).bbpname(_tj_TagBat(ftiName));
}
new(void,str).seqbase([EMAIL PROTECTED]).persists(true).bbpname("tj_" +
ftiName + "_doc_name");
- new(void,int).seqbase([EMAIL PROTECTED]).persists(true).bbpname("tj_" +
ftiName + "_doc_firstpre");
+ new(void,oid).seqbase([EMAIL PROTECTED]).persists(true).bbpname("tj_" +
ftiName + "_doc_firstpre");
new(str,str).persists(true).bbpname("tj_" + ftiName + "_param");
new(void,oid).seqbase([EMAIL PROTECTED]).persists(true).bbpname("tj_" +
ftiName + "_tid1");
@@ -964,25 +964,6 @@
}
}
-ADDHELP("tj_finalize_collection", "flokstra & rode", "Jan 2007",
-"PARAMETERS:\n\
-- str ftiName: the name of the collection.\n
-DESCRIPTION:\n\
-Obsolete function which prepares the collection for usage of new documents\n\
-added to the collection. This is now done automagically",
-"pftijah");
-PROC tj_finalize_collection(str ftiName) : void
-{
- var coll_lock := tj_get_collection_lock(ftiName);
- lock_set(coll_lock);
- var err := CATCH({
- print("Warning: This function is obsolete.");
- #_tj_finalize_collection(ftiName);
- });
- lock_unset(coll_lock);
- if (not(isnil(err))) ERROR(err);
-}
-
# internal finalize function
PROC _tj_finalize_collection(str ftiName, BAT[str,bat] collBat) : void
{
@@ -1154,7 +1135,6 @@
}
}
-
#
#
#
@@ -2193,8 +2173,8 @@
### Background probability:
# For each term: collection term frequency tc(tm_i, col):
- #var col_term_frq := tid_pre.reverse().histogram();
- #small hack to get LMs running again...
+ # var col_term_frq := tid_pre.reverse().histogram();
+ # small fix to get LMs running again...
var col_term_frq := collTermCount(qenv.find(QENV_FTIBGNAME),
terms.reverse().project(int(0)));
# Collection size len(col): int
@@ -2297,7 +2277,6 @@
##
# Returns the collection frequency of the term with the indicated tid
##
-# INCOMPLETE-TT function should have a ftiName parameter
PROC col_freq(oid tid, BAT[oid,str] qenv) : dbl :=
{
var start_time := time();
@@ -2311,7 +2290,6 @@
##
# For each region in left, count the number of regions in right it contains.
##
-# INCOMPLETE-TT function should have a ftiName parameter
PROC reg_freq(bat[oid,dbl] left, bat[oid,dbl] right, int size_type,
BAT[oid,str] qenv) : bat[oid,dbl] :=
{
if ( trace ) tj_trace( "START reg_freq");
@@ -2350,7 +2328,6 @@
# Calculate the term size of the region: how many terms does it contain?
-# INCOMPLETE-TT function should have a ftiName parameter
PROC size_term( bat[oid,any] region, BAT[oid,str] qenv ) : bat[oid,dbl] :=
{
if ( trace ) tj_trace( "BEGIN size_term" );
@@ -2465,7 +2442,6 @@
}
-# INCOMPLETE-TT function should have a ftiName parameter
PROC p_containing_wsumd(bat[oid,dbl] left, bat[oid,dbl] right, int
size_type,BAT[oid,str] qenv) : bat[oid,dbl] :=
{
if ( trace ) tj_trace( "BEGIN p_containing_wsumd" );
@@ -2676,51 +2652,6 @@
}
-################################################################################
-# VALUE SELECTION
-################################################################################
-
-PROC near_val(bat region, int command, str value) : bat :=
-{
- print( "Sorry, near_val is not yet supported" );
- return region;
-
- var r_value;
- var reg_res := new(oid,dbl,ENTITY_NUM);
- var reg_tmp := new(oid,oid,ENTITY_NUM);
-
- var entity_pre := new(oid,oid,WORD_NUM);
- entity_pre := load(col_name+"entity_pre");
-
- # region: [region-id, score]
- # region.mirror(): [region-id, region-id]
- # entity_pre: [region-id, region pre-id]
- # reg_tmp: [region-id, region pre-id]
- reg_tmp := region.mirror().join(entity_pre);
-
- entity_pre := nil;
-
- r_value := int(value);
-
- if (command = GREATER)
- reg_tmp := gt(reg_tmp, r_value);
- else if (command = LESS)
- reg_tmp := lt(reg_tmp, r_value);
- else if (command = EQUAL)
- reg_tmp := eq(reg_tmp, r_value);
- else if (command = GEQ)
- reg_tmp := gte(reg_tmp, r_value);
- else if (command = LEQ)
- reg_tmp := lte(reg_tmp, r_value);
-
- reg_res := reg_tmp.mirror().join(region);
-
- return reg_res;
-}
-#ADDHELP("near_val", "vojkan", "March 16, 2005",
-#"Selects regions that satisfy the inequality (integer) test.",
-#"TIJAH");
-
########################################################################################################
# Procudures for executing value comparison:
# - eq selects the regions with equal text content
@@ -2780,7 +2711,6 @@
# PRIOR
################################################################################
-# INCOMPLETE-TT function should have a ftiName parameter
PROC prior_ls(bat region, int size_type, BAT[oid,str] qenv) : bat :=
{
@@ -3271,8 +3201,7 @@
return GDK_FAIL;
}
- // 64bit ERROR?? doc_start = *(oid*)BUNtail(doc_firstpre,r);
- doc_start = (oid)*(int*)BUNtail(doc_firstpre,r);
+ doc_start = *(oid*)BUNtail(doc_firstpre,r);
oid tj_nextIndex = tj_docIndex + 1;
if ( BATcount(doc_firstpre) > tj_nextIndex ) {
r = BUNfnd(doc_firstpre,&tj_nextIndex);
@@ -3280,7 +3209,7 @@
stream_printf(GDKout,"Cannot do range for
tijah-firstpre @ %d.\n",tj_docIndex);
return GDK_FAIL;
}
- doc_end = (oid)*(int*)BUNtail(doc_firstpre,r) - 1;
+ doc_end = *(oid*)BUNtail(doc_firstpre,r) - 1;
} else {
doc_end = oid_nil;
}
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins