Update of /cvsroot/monetdb/pathfinder/modules/pftijah
In directory 23jxhf1.ch3.sourceforge.com:/tmp/cvs-serv28119/modules/pftijah

Modified Files:
      Tag: M5XQ
        pftijah.mx 
Log Message:
propagated changes of Wednesday Sep 23 2009 - Thursday Sep 24 2009
from the development trunk to the M5XQ branch

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2009/09/23 - hrode: modules/pftijah/pftijah.mx,1.237
create global term frequency BAT on collection level at indexing time
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


Index: pftijah.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah.mx,v
retrieving revision 1.226.2.9
retrieving revision 1.226.2.10
diff -u -d -r1.226.2.9 -r1.226.2.10
--- pftijah.mx  23 Sep 2009 07:26:59 -0000      1.226.2.9
+++ pftijah.mx  24 Sep 2009 07:52:30 -0000      1.226.2.10
@@ -623,11 +623,13 @@
          if ( rebuild ) {
              # INCOMPLETE, not throwing them away is much faster!!!
               bat(_tj_TermBat(ftiName)).delete();
+              bat("tj_" + ftiName + "_termfreq").delete();
               bat(_tj_TagBat(ftiName)).delete();
               bat(_tj_RTagBat(ftiName)).delete();
          } else {
              if ( VOID_TTBAT) {
                   
new(void,str).seqbase(0...@0).persists(true).bbpname(_tj_TermBat(ftiName));
+                  new(void,int).seqbase(0...@0).persists(true).bbpname("tj_" + 
ftiName + "_termfreq");
                   
new(void,str).seqbase(0...@0).persists(true).bbpname(_tj_TagBat(ftiName));
              } else {
                   new(oid,str).persists(true).bbpname(_tj_TermBat(ftiName));
@@ -648,7 +650,7 @@
                var bn := "tj_" + ftiName + "_tid";
                _tj_safe_remove(bn);
                 extra_del_bat.append(bn);
-               bn := "tj_" + ftiName + "_size" + str(frag_offset);
+               bn := "tj_" + ftiName + "_size";
                _tj_safe_remove(bn);
                 extra_del_bat.append(bn);
                frag_offset :+= 1;
@@ -927,6 +929,7 @@
     var err := CATCH({
         if ( not(GLOBAL_TTBAT) ) {
           _tj_safe_remove(_tj_TermBat(ftiName));
+          _tj_safe_remove("tj_" + ftiName + "_termfreq");
           _tj_safe_remove(_tj_TagBat(ftiName));
           _tj_safe_remove(_tj_RTagBat(ftiName));
         }
@@ -965,6 +968,7 @@
         var tjCollBat := new(void,str).seqbase(0...@0);
 
         tjCollBat.append(_tj_TermBat(ftiName));
+        tjCollBat.append("tj_" + ftiName + "_termdict");
         tjCollBat.append(_tj_TagBat(ftiName));
         tjCollBat.append(_tj_RTagBat(ftiName));
         tjCollBat.append("tj_pfc_fti_dep");
@@ -1015,6 +1019,7 @@
             ERROR("_tj_collection():%s: pftijah index structure changed, 
reindex collection!!",curversion);
        }
        tjCollBat.insert("termdict", bat(_tj_TermBat(ftiName)));
+       tjCollBat.insert("termfreq", bat("tj_" + ftiName + "_termfreq"));
        tjCollBat.insert("tagdict", bat(_tj_TagBat(ftiName)));
        tjCollBat.insert("rtags", bat(_tj_RTagBat(ftiName)));
        tjCollBat.insert("doc_name", bat("tj_" + ftiName + "_doc_name"));
@@ -1243,6 +1248,7 @@
     _tj_set_parameter(collBat, "_last_finalizedPre", lst_fpre);
     #
     collBat.find("submitBats").append(_tj_TermBat(ftiName));
+    collBat.find("submitBats").append("tj_" + ftiName + "_termfreq");
     collBat.find("submitBats").append(_tj_TagBat(ftiName));
     collBat.find("submitBats").append(_tj_RTagBat(ftiName));
     
@@ -1455,6 +1461,9 @@
           submitBats.append(_tj_RTagBat(ftiName));
           submitBats.append("tj_" + ftiName + "_conceptdict");
       }
+      # create global term frequency table
+      new(void,int).seqbase(0...@0).persists(true).bbpname("tj_" + ftiName + 
"_termfreq");
+      submitBats.append("tj_" + ftiName + "_termfreq");
 
       # create param BAT
       new(str,str).persists(true).bbpname("tj_" + ftiName + "_param");
@@ -1606,6 +1615,7 @@
        tjCollBat.insert("tagdict", bat(_tj_TagBat(ftiName)));
        tjCollBat.insert("rtags", bat(_tj_RTagBat(ftiName)));
        tjCollBat.insert("conceptdict", bat("tj_" + ftiName + "_conceptdict"));
+       tjCollBat.insert("termfreq", bat("tj_" + ftiName + "_termfreq"));
        tjCollBat.insert("param", parbat);
 
        # only load the top collection fragment
@@ -1845,6 +1855,7 @@
        
        var replaceBats := commitBats.find("replaceBats");
         var submitBats := commitBats.find("submitBats");
+        collBat.find("termdict").access(BAT_WRITE);
 
        # incremental index merge
        if (collBat.exist("_tags")) 
@@ -1874,6 +1885,10 @@
                 # handle terms
                 tmp := tids.kdiff(collBat.find("pfpre"));
                 tmp := tmp.reverse().ssort();
+               var tf := collBat.find("termfreq");
+                var termfreq := {count}(tmp, collBat.find("termdict"), false);
+                tf [:+=] termfreq.slice(0,tf.count() - 1);
+               tf.append(termfreq.slice(tf.count(), termfreq.count() - 1));
                i := mergeindex(tmp, collBat.find("_termIndex"),
                                          collBat.find("_terms"),
                                          collBat.find("termdict").count_wrd() 
+ 1);       
@@ -1885,7 +1900,7 @@
                 replaceBats.insert("_Terms", "tj_" + ftindex + "_Terms");
                submitBats.append("tj_" + ftindex + "_TermIndex");
                submitBats.append("tj_" + ftindex + "_Terms");
-               
+               submitBats.append(tf.bbpname());
         }
         else # create new index
         {       
@@ -1910,6 +1925,10 @@
                # handle terms
                 tmp := tids.kdiff(collBat.find("pfpre"));
                 tmp := tmp.reverse().ssort();
+               var tf := collBat.find("termfreq");
+                var termfreq := {count}(tmp, collBat.find("termdict"), false);
+                tf [:+=] termfreq.slice(0,tf.count() - 1);
+               tf.append(termfreq.slice(tf.count(), termfreq.count() - 1));
                var termindex := 
tmp.hmark(0...@0).offsetindex(collBat.find("termdict").count_wrd() + 1);
                var terms := tmp.tmark(0...@0);
                tmp := nil;
@@ -1920,6 +1939,7 @@
                terms := nil;
                submitBats.append("tj_" + ftiName + "_TermIndex");
                submitBats.append("tj_" + ftiName + "_Terms");
+               submitBats.append(tf.bbpname());
         }
         
         # always create concept table from scratch
@@ -1942,6 +1962,7 @@
        submitBats.append("tj_" + ftiName + "_ConceptIndex");
        submitBats.append("tj_" + ftiName + "_Concepts");
        submitBats.append("tj_" + ftiName + "_ConceptScore");
+        collBat.find("termdict").access(BAT_READ);
       if ( verbose ) printf(HASH +"TJ _tj_build_inverted_index_frag() 
finished.\n");
 }
 
@@ -4945,6 +4966,7 @@
        
        var replaceBats := collBat.find("replaceBats");
         var submitBats := collBat.find("submitBats");
+        collBat.find("termdict").access(BAT_WRITE);
 
        # incremental index merge
        if (isnil(CATCH(bat("tj_" + ftiName + "_TermIndex").count_wrd()))) 
@@ -4975,6 +4997,10 @@
                 tmp := tids.kdiff(elements);
                 elements := nil;
                 tmp := tmp.reverse().ssort();
+               var tf := collBat.find("termfreq");
+                var termfreq := {count}(tmp, collBat.find("termdict"), false);
+                tf [:+=] termfreq.slice(0,tf.count() - 1);
+               tf.append(termfreq.slice(tf.count(), termfreq.count() - 1));
                i := mergeindex(tmp, collBat.find("_termIndex"),
                                          collBat.find("_terms"),
                                          collBat.find("termdict").count_wrd() 
+ 1);       
@@ -4986,7 +5012,7 @@
                 replaceBats.insert("_terms", "tj_" + ftiName + "_Terms");
                submitBats.append("tj_" + ftiName + "_TermIndex");
                submitBats.append("tj_" + ftiName + "_Terms");
-               
+               submitBats.append(tf.bbpname());
         }
         else # create new index
         {       
@@ -5012,6 +5038,10 @@
                 tmp := tids.kdiff(elements);
                 elements := nil;
                 tmp := tmp.reverse().ssort();
+               var tf := collBat.find("termfreq");
+                var termfreq := {count}(tmp, collBat.find("termdict"), false);
+                tf [:+=] termfreq.slice(0,tf.count() - 1);
+               tf.append(termfreq.slice(tf.count(), termfreq.count() - 1));
                var termindex := 
tmp.hmark(0...@0).offsetindex(bat(_tj_TermBat(ftiName)).count_wrd() + 1);
                var terms := tmp.tmark(0...@0);
                tmp := nil;
@@ -5022,6 +5052,7 @@
                terms := nil;
                submitBats.append("tj_" + ftiName + "_TermIndex");
                submitBats.append("tj_" + ftiName + "_Terms");
+               submitBats.append(tf.bbpname());
         }
         
         # always create concept table from scratch
@@ -5044,6 +5075,7 @@
        submitBats.append("tj_" + ftiName + "_ConceptIndex");
        submitBats.append("tj_" + ftiName + "_Concepts");
        submitBats.append("tj_" + ftiName + "_ConceptScore");
+        collBat.find("termdict").access(BAT_READ);
     if ( verbose ) printf(HASH +"TJ:_buildIRindex(\"%s\") 
finished.\n",ftiName);
 }
 


------------------------------------------------------------------------------
Come build with us! The BlackBerry® Developer Conference in SF, CA
is the only developer event you need to attend this year. Jumpstart your
developing skills, take BlackBerry mobile applications to market and stay 
ahead of the curve. Join us from November 9-12, 2009. Register now!
http://p.sf.net/sfu/devconf
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to