Update of /cvsroot/monetdb/pathfinder/modules/pftijah
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv24527/modules/pftijah

Modified Files:
        pftijah.mx 
Log Message:
- implement delayed finalization.  Activated by the "delay-finalize" option

tijah:create-ft-index(<TijahOptions delay-finalize="100000" ...........

The number is the number of pre-nodes added to the ft-index. When less
than this number are added finalize does not modify the inverted indices.
When a user does a query on a non-finalized ft-index the index will
be finalized automatically before execution of the query.

This fix should help increase the performance of pftijah in cases where a
lot of small docs are added seperately to the index like in the streettivo
and some Wiki scripts.




Index: pftijah.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah.mx,v
retrieving revision 1.137
retrieving revision 1.138
diff -u -d -r1.137 -r1.138
--- pftijah.mx  12 Jun 2007 17:35:58 -0000      1.137
+++ pftijah.mx  13 Jun 2007 11:57:52 -0000      1.138
@@ -27,10 +27,7 @@
 @m
 .MODULE pftijah;
 
-.COMMAND run_tijah_command(str s) : bit = CMDtijah_command;
- "INCOMPLETE"
-
-.COMMAND run_tijah_query(BAT[str,str] opt, bit use_startnodes, BAT[void,oid] 
nodes, str q) : BAT[oid,dbl] = CMDtijah_query;
+.COMMAND _run_tijah_query(BAT[str,str] opt, bit use_startnodes, BAT[void,oid] 
nodes, str q) : BAT[oid,dbl] = CMDtijah_query;
  "INCOMPLETE"
 
 .COMMAND tj_normalizeTerm(str, str) : str = CMDtj_normalizeTerm;
@@ -317,6 +314,25 @@
 
 # var GQENV := create_qenv(dflt_ft_index,dflt_bg_index,dflt_score_base);
 
+PROC run_tijah_query(str ftiName, BAT[str,str] opt, bit use_startnodes, 
BAT[void,oid] nodes, str q) : BAT[oid,dbl] :=
+{
+       if ( verbose ) printf("#TJ:run_tijah_query(\"%s\",..) 
called.\n",ftiName);
+       var parambat := bat("tj_" + ftiName + "_param");
+       var delfin   := lng(parambat.find("delay_finalize"));
+        if ( delfin > lng(0) ) {
+           if ( verbose ) printf("#TJ:run_tijah_query(\"%s\",..) checking 
delayed finalize.\n",ftiName);
+            var finlast  := lng(parambat.find("_last_finalizedPre"));
+            var prelast  := lng(parambat.find("_last_tijahPre"));
+           if ( not(prelast=finlast) ) {
+               if ( verbose ) printf("#TJ:run_tijah_query(\"%s\",..) 
performing delayed finalize (%d != %d).\n",ftiName,int(finlast),int(prelast));
+                var collBat := _tj_collection(ftiName);
+                _tj_finalize_collection(ftiName, collBat, TRUE);
+                _tj_commit(collBat); 
+           }
+       }
+       return run_tijah_query(opt,use_startnodes,nodes,q);
+}
+
 PROC tj_get_ft_index(BAT[str,str] tj_options, bit chk_exists) : str :=
 {
     var res := dflt_ft_index;
@@ -542,10 +558,11 @@
       #
       # now read the param file
       #
-      var stemmer    := "nostemming";
-      var tokenizer  := "flex";
-      var tagfilter  := "";
-      var fragsize   := "0";
+      var stemmer        := "nostemming";
+      var tokenizer      := "flex";
+      var tagfilter      := "";
+      var fragsize       := "0";
+      var delay_finalize := "0";
   
       [EMAIL PROTECTED]() {
          if ( verbose ) 
printf("#TJ:tj_init_collection():param[%s]=\"%s\"\n",$h,$t);
@@ -561,6 +578,9 @@
               tagfilter := $t;
          } else if ( $h = "ft-index" ) {
              # ignore this one here
+         } else if ( $h = "delay-finalize" ) {
+             # the number of pre nodes to delay a finalize
+             delay_finalize := $t;
          } else {
              ERROR("# tj_init_collection() unknown parameter [%s].\n",$h);
          }
@@ -579,6 +599,7 @@
       bat("tj_" + ftiName + "_param").insert("status","building");
       bat("tj_" + ftiName + "_param").insert("_last_tijahPre","1");
       bat("tj_" + ftiName + "_param").insert("_last_finalizedPre","0");
+      bat("tj_" + ftiName + "_param").insert("delay_finalize",delay_finalize);
       #
       # now modify the global fti pfc dependency administration. We may ignore
       # the return value because all dependencies for this collection are new.
@@ -887,7 +908,7 @@
       bat(_tj_TagBat(ftiName)).access(BAT_APPEND);
       var collBat := _tj_collection(ftiName);
       _tj_add2collection(ftiName, collBat, uri_loc, uri_name, store);
-      _tj_finalize_collection(ftiName, collBat);
+      _tj_finalize_collection(ftiName, collBat, FALSE);
       _tj_commit(collBat); 
       if ( timing ) {
          var ms := (usec()-t_start)/1000;
@@ -923,7 +944,7 @@
       [EMAIL PROTECTED]() {
         _tj_add2collection(ftiName, collBat, $h, $t, store);
       }
-      _tj_finalize_collection(ftiName, collBat);
+      _tj_finalize_collection(ftiName, collBat, FALSE);
       if ( timing ) {
          var ms := (usec()-t_start)/1000;
          printf("#C[%s]:tj_add2collection(BAT): + aggregate time = 
%lld.%03llds.\n",ftiName,/(ms,1000),%(ms,1000));
@@ -997,10 +1018,25 @@
 }
 
 # internal finalize function
-PROC _tj_finalize_collection(str ftiName, BAT[str,bat] collBat) : void
+PROC _tj_finalize_collection(str ftiName, BAT[str,bat] collBat, bit fforce) : 
void
 {
     var t_start := usec();
     if ( verbose ) printf("#TJ:_tj_finalize_collection(\"%s\") 
called.\n",ftiName);
+    var parambat := bat("tj_" + ftiName + "_param");
+    if ( not(fforce) ) {
+        var delfin   := lng(parambat.find("delay_finalize"));
+        if ( delfin > lng(0) ) {
+            var finlast  := lng(parambat.find("_last_finalizedPre"));
+            var prelast  := lng(parambat.find("_last_tijahPre"));
+            var fdelta   := prelast - finlast;
+           if ( (prelast - finlast) < delfin ) {
+                  if ( false ) printf("#TJ:_tj_finalize_collection(\"%s\") 
delaying finalization (%d < %d).\n",ftiName,int(fdelta),int(delfin));
+                 return;
+           } else {
+                  if ( false ) printf("#TJ:_tj_finalize_collection(\"%s\") 
finalization treshhold reached (%d > %d).\n",ftiName,int(fdelta),int(delfin));
+           }
+        }
+    }
     #
     var mod_frags := _tj_chk_modified_fragments(ftiName, collBat); 
     # set all fragments except the last one to BAT_READ
@@ -1025,7 +1061,7 @@
     _buildIRindex(ftiName, collBat);
     #
     _tj_set_parameter(collBat, "status", "finalized");
-    var lst_fpre := bat("tj_" + ftiName + "_param").find("_last_tijahPre");
+    var lst_fpre := parambat.find("_last_tijahPre");
     _tj_set_parameter(collBat, "_last_finalizedPre", lst_fpre);
     #
     var gterm_sort := bat(_tj_TermBat(ftiName)).reverse().sort().reverse();
@@ -3325,12 +3361,6 @@
        return GDK_SUCCEED;
 }
 
-int CMDtijah_command(bit* res, str command) {
-       if ( 0 ) stream_printf(GDKout,"# tijah_command(%s)\n",(char*)command);
-       *res = executeMIL(command); /* OK */
-       return GDK_SUCCEED;
-}
-
 extern char* tijahParse(BAT* optbat, char* startNodes_name, char* query, 
char** errBUFF);
 
 static int nexiTmpCounter = 0;


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to