Update of /cvsroot/monetdb/pathfinder/modules/pftijah
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv32196

Modified Files:
        nexi.c nexi.h nexi_generate_mil.c pftijah.mx 
Log Message:
Overlap removal function + corresponding TijahOptions attribute + tests added.
Removes overlapping elements from result lists, i.e., makes sure result list 
never
contains both a paragraph and its containing section. This way, the same 
information is never
presented twice to the user. Useful for INEX and other XML IR tasks.



Index: nexi_generate_mil.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/nexi_generate_mil.c,v
retrieving revision 1.35
retrieving revision 1.36
diff -u -d -r1.35 -r1.36
--- nexi_generate_mil.c 15 Jun 2007 14:41:34 -0000      1.35
+++ nexi_generate_mil.c 14 Aug 2007 13:37:22 -0000      1.36
@@ -1058,6 +1058,9 @@
       MILPRINTF(MILOUT, "R%d := collect;\n",com_num);
       MILPRINTF(MILOUT, "collect := nil;\n");
     }
+   if(txt_retr_model->rmoverlap){
+    MILPRINTF(MILOUT, "R%d := rm_overlap(R%d%s);\n", com_num, 
com_num,parserCtx->flastPfx);
+    }
     MILPRINTF(MILOUT, "R%d := R%d.tsort_rev();\n", com_num, com_num);
 #if 0
     MILPRINTF(MILOUT, "if ( retNum >= 0 ) { R%d := R%d.slice(0, retNum - 1); 
}\n", com_num, com_num);

Index: nexi.h
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/nexi.h,v
retrieving revision 1.17
retrieving revision 1.18
diff -u -d -r1.17 -r1.18
--- nexi.h      4 Apr 2007 13:52:05 -0000       1.17
+++ nexi.h      14 Aug 2007 13:37:21 -0000      1.18
@@ -269,6 +269,7 @@
   int param3;
   int prior_type;
   int prior_size;
+  bool rmoverlap;
   char context[TERM_LENGTH];
   float extra;
   struct_RMT *next;

Index: nexi.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/nexi.c,v
retrieving revision 1.69
retrieving revision 1.70
diff -u -d -r1.69 -r1.70
--- nexi.c      1 Aug 2007 09:52:15 -0000       1.69
+++ nexi.c      14 Aug 2007 13:37:21 -0000      1.70
@@ -217,6 +217,7 @@
     //txt_retr_model->prior_type  = LENGTH_PRIOR;    
     //txt_retr_model->prior_size  = 0;
     txt_retr_model->prior_size  = 2;
+    txt_retr_model->rmoverlap = FALSE;
     strcpy(txt_retr_model->context, "");
     txt_retr_model->extra       = 0.0;
     txt_retr_model->next        = NULL;
@@ -465,7 +466,13 @@
                 txt_retr_model->prior_type  = NO_PRIOR;
             }
             
-        } else if (strcmp(optName, "returnNumber") == 0) {
+        }  else if (strcmp(optName, "rmoverlap") == 0) {
+           if (strcasecmp(optVal, "TRUE") == 0) {
+                txt_retr_model->rmoverlap = TRUE;
+           } else {
+               txt_retr_model->rmoverlap=FALSE;
+           }
+       } else if (strcmp(optName, "returnNumber") == 0) {
            // ignore, is handled by milprint_summer
         } else if (strcmp(optName, "term-proximity") == 0) {
                 qenv_prox_val = (char*)strdup(optVal);

Index: pftijah.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah.mx,v
retrieving revision 1.151
retrieving revision 1.152
diff -u -d -r1.151 -r1.152
--- pftijah.mx  10 Aug 2007 00:26:41 -0000      1.151
+++ pftijah.mx  14 Aug 2007 13:37:22 -0000      1.152
@@ -2690,6 +2690,36 @@
 #"Adds element prior based on element length to the region score.",
 #"TIJAH");
 
+################################################################################
+# OVERLAP REMOVAL
+################################################################################
+
+PROC rm_overlap(bat ctx,str ind) : bat{
+
+ ctx := ctx.sort();
+ var size := bat("tj_"+qenv.find(QENV_FTINAME)+"_size"+ind);
+ var AD:=anc_desc(ctx,ctx,size);
+ var maxD := AD.join(ctx).sort().{max}();
+ var maxA := AD.reverse().sort().join(ctx).{max}();
+
+ AD:=nil;
+
+ # candidates are nodes that have no descendents or a higher score
+ # than all descendents
+ var candidates := ctx.kdiff(maxD).access(BAT_APPEND);
+ candidates.insert(ctx.[>](maxD).select(true).mirror().join(ctx));
+
+ # keep the candidates without ancestor and w score >= ancestor scores
+ var res := candidates.kdiff(maxA).access(BAT_APPEND);
+ res.insert(candidates.[>=](maxA).select(true).mirror().join(candidates));
+
+ candidates := nil;
+ maxA:=nil;maxD:=nil;
+ return res;
+}
+#ADDHELP("rm_overlap","thijs", "August, 2007",
+#"Removes overlapping elements from result list.",
+#"TIJAH");
 
 
 
################################################################################


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to