Update of /cvsroot/monetdb/pathfinder/modules/pftijah
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv9539

Modified Files:
        pftijah.mx serialize_pftijah.mx 
Log Message:
- implement recursive tag check en storage



Index: serialize_pftijah.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/serialize_pftijah.mx,v
retrieving revision 1.51
retrieving revision 1.52
diff -u -d -r1.51 -r1.52
--- serialize_pftijah.mx        15 Jun 2007 07:00:42 -0000      1.51
+++ serialize_pftijah.mx        15 Jun 2007 09:32:43 -0000      1.52
@@ -197,6 +197,14 @@
        oid  n_globalTag;
        BAT* b_globalTag;       /* global tag dictionary*/
        BAT* hm_globalTag;      /* hashed mirrorred global tag dictionary*/
+
+       int   tagswitch_sz;     /* the size of the next buff */
+       char* tagswitch;        /* the recursive tag detector switch */
+                               /* 0 means: tag is not in use */
+                               /* 1 means: tag is in use */
+                               /* 2 means: tag is recursive */
+       BAT* b_globalRTag;      /* recursive tag dictionary*/
+
        BAT* b_docName;         /* BAT to store docnames in collection */
        BAT* b_docFirstPre;     /* First tijah-pre-nr of document */
        BAT* b_collParam;       /* Collection Parameters BAT */
@@ -242,14 +250,21 @@
             GDKerror("tj_pushTag: MAXTAGDEPTH exceeded.\n");
             return -1;
        }
-       tjctx->tagOidStack[tjctx->tagStackPtr]     = (int)tagoid;
+       if ( tjctx->tagswitch[(int)tagoid] != 2 ) {
+           tjctx->tagswitch[(int)tagoid]++;
+       }
+       tjctx->tagOidStack[tjctx->tagStackPtr] = (int)tagoid;
        tjctx->tagStartStack[tjctx->tagStackPtr++] = start;
        return 1;
 }
 
 INLINE static oid
 tj_popTag(tjCtx* tjctx) {
-       return tjctx->tagStartStack[--tjctx->tagStackPtr];
+       --tjctx->tagStackPtr;
+       if ( tjctx->tagswitch[tjctx->tagOidStack[tjctx->tagStackPtr]] != 2 ) {
+           tjctx->tagswitch[tjctx->tagOidStack[tjctx->tagStackPtr]]--;
+       }
+       return tjctx->tagStartStack[tjctx->tagStackPtr];
 }
 
 /************************************************
@@ -259,6 +274,7 @@
 INLINE static oid
 tj_tagOid(tjCtx* tjctx, str t) {
 #ifdef USE_TERMDB
+    /* incomplete, also tag check here */
     return tdb_lookupTag(tjctx->tdb,t);
 #else
     BUN bun;
@@ -268,6 +284,14 @@
         return *(oid*)BUNtail(tjctx->hm_globalTag,bun);
     } else {
        if ( BUNins(tjctx->b_globalTag, &tjctx->n_globalTag, (str)t, FALSE) ) {
+           /* check if the recursive tagswitcher still is large enough */
+           if ( (int)tjctx->n_globalTag >= tjctx->tagswitch_sz ) {
+                int old = tjctx->tagswitch_sz;
+               tjctx->tagswitch_sz = 2*(int)tjctx->n_globalTag;
+               tjctx->tagswitch    = 
GDKrealloc(tjctx->tagswitch,tjctx->tagswitch_sz );
+               for(int i=old; i<tjctx->tagswitch_sz; i++)
+                   tjctx->tagswitch[i] = 0;
+            }
            return tjctx->n_globalTag++;
         } else {
            GDKerror("INSERT OF \"%s\" in globalTag fails.\n");
@@ -551,6 +575,14 @@
        res->n_globalTerm = (oid)BATcount(res->b_globalTerm);
        if ( !(res->b_globalTag   = getBAT(tjCtx_BAT,"_globalTags")) ) return 
NULL;
        res->n_globalTag = (oid)BATcount(res->b_globalTag);
+       if ( !(res->b_globalRTag   = getBAT(tjCtx_BAT,"_globalRTags")) ) return 
NULL;
+       res->tagswitch_sz = (int)res->n_globalTag;
+       if ( res->tagswitch_sz < 128 )
+               res->tagswitch_sz = 128;
+       res->tagswitch_sz *= 2;
+       res->tagswitch = GDKmalloc( res->tagswitch_sz );
+       for(int i=0; i<res->tagswitch_sz; i++)
+           res->tagswitch[i] = 0;
        /*
         *
         */
@@ -665,6 +697,7 @@
                 return NULL;
             }
        }
+
        if ( ! (res->hm_globalTerm->hhash && res->hm_globalTag->hhash) )
           stream_printf(GDKout,"# WARNING: MISSING HASH ON TAG/TERM 
DICTIONARY\n");
        return res;
@@ -674,6 +707,18 @@
 freeTijahContext(tjCtx* tjctx) {
         if ( !setPreSize(tjctx,tjctx->tijahPre) )
                return GDK_FAIL;
+       /* first store all new detected recursive tag oids */
+       for(int i=0; i<tjctx->tagswitch_sz; i++) {
+           if ( tjctx->tagswitch[i] == 2) {
+               oid store = (oid)i;
+               if ( !BUNfnd(tjctx->b_globalRTag,&store) ) {
+                   if ( !BUNins(tjctx->b_globalRTag, &store,&store, 0)) {
+                       GDKerror("error writing recursive tagbat");
+                       return GDK_FAIL;
+                   }
+               }
+            }
+        }
 #ifdef USE_TERMDB
        if ( ! tdb_close(tjctx->tdb) )
                return GDK_FAIL;
@@ -694,12 +739,14 @@
        }
        BBPunfix(BBPcacheid(tjctx->b_globalTerm));
        BBPunfix(BBPcacheid(tjctx->b_globalTag));
+       BBPunfix(BBPcacheid(tjctx->b_globalRTag));
        BBPunfix(BBPcacheid(tjctx->b_docName));
        BBPunfix(BBPcacheid(tjctx->b_docFirstPre));
        BBPunfix(BBPcacheid(tjctx->b_collParam));
        BBPunfix(BBPcacheid(tjctx->b_collPre));
        BBPunfix(BBPcacheid(tjctx->b_collSize));
        BBPunfix(BBPcacheid(tjctx->b_collPfPre));
+       GDKfree(tjctx->tagswitch);
        if ( tjctx->stemCtx->clear && !tjctx->stemCtx->clear(tjctx->stemCtx) )
                return GDK_FAIL;
        free(tjctx);
@@ -1110,6 +1157,7 @@
                stream_printf(GDKout,"C[%s]:SIZES\n",tjctx->name);
                stream_printf(GDKout,"C[%s]:size( b_globalTerm ) = 
%d\n",tjctx->name, BATcount(tjctx->b_globalTerm));
                stream_printf(GDKout,"C[%s]:size( b_globalTag )  = 
%d\n",tjctx->name, BATcount(tjctx->b_globalTag));
+               stream_printf(GDKout,"C[%s]:size( b_globalRTag )  = 
%d\n",tjctx->name, BATcount(tjctx->b_globalRTag));
                stream_printf(GDKout,"C[%s]:size( b_collPre )    = 
%d\n",tjctx->name, BATcount(tjctx->b_collPre));
                stream_printf(GDKout,"C[%s]:size( b_collSize )   = 
%d\n",tjctx->name, BATcount(tjctx->b_collSize));
                stream_printf(GDKout,"C[%s]:size( b_collPfPre )  = 
%d\n",tjctx->name, BATcount(tjctx->b_collPfPre));

Index: pftijah.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah.mx,v
retrieving revision 1.145
retrieving revision 1.146
diff -u -d -r1.145 -r1.146
--- pftijah.mx  15 Jun 2007 09:01:41 -0000      1.145
+++ pftijah.mx  15 Jun 2007 09:32:43 -0000      1.146
@@ -377,6 +377,14 @@
        return "tj_" + ftiName + "_tags";
 }
 
+PROC _tj_RTagBat(str ftiName) : str :=
+{
+    if ( GLOBAL_TTBAT )
+        return "tj_globalRTags";
+    else
+       return "tj_" + ftiName + "_rtags";
+}
+
 PROC tj_init_global(BAT[str,str] param, bit doLock) : void :=
 {
     if (doLock) lock_set(tj_adm_lock);
@@ -395,6 +403,7 @@
       if ( GLOBAL_TTBAT ) {
           new(oid,str).persists(true).bbpname(_tj_TermBat(""));
           new(oid,str).persists(true).bbpname(_tj_TagBat(""));
+          new(oid,oid).persists(true).bbpname(_tj_RTagBat(""));
       }
       new(oid,str).persists(true).bbpname("tj_collName");
       new(str,str).persists(true).bbpname("tj_pfc_fti_dep");
@@ -403,6 +412,7 @@
       if ( GLOBAL_TTBAT ) {
           globals.append(_tj_TermBat(""));
           globals.append(_tj_TagBat(""));
+          globals.append(_tj_RTagBat(""));
       }
       globals.append("tj_collName");
       globals.append("tj_pfc_fti_dep");
@@ -435,6 +445,7 @@
       if ( GLOBAL_TTBAT ) {
           bat(_tj_TermBat("")).persists(false);
           bat(_tj_TagBat("")).persists(false);
+          bat(_tj_RTagBat("")).persists(false);
       }
       bat("tj_collName").persists(false);
       bat("tj_pfc_fti_dep").persists(false);
@@ -443,6 +454,7 @@
       if ( GLOBAL_TTBAT ) {
           globals.append(_tj_TermBat(""));
           globals.append(_tj_TagBat(""));
+          globals.append(_tj_RTagBat(""));
       }
       globals.append("tj_collName");
       globals.append("tj_pfc_fti_dep");
@@ -553,9 +565,11 @@
              # INCOMPLETE, not throwing them away is much faster!!!
               bat(_tj_TermBat(ftiName)).delete();
               bat(_tj_TagBat(ftiName)).delete();
+              bat(_tj_RTagBat(ftiName)).delete();
          } else {
               new(oid,str).persists(true).bbpname(_tj_TermBat(ftiName));
               new(oid,str).persists(true).bbpname(_tj_TagBat(ftiName));
+              new(oid,oid).persists(true).bbpname(_tj_RTagBat(ftiName));
          }
       }
       extra_del_bat := new(void,str).seqbase([EMAIL PROTECTED]);
@@ -829,6 +843,7 @@
         if ( not(GLOBAL_TTBAT) ) {
           bat(_tj_TermBat(ftiName)).persists(false);
           bat(_tj_TagBat(ftiName)).persists(false);
+          bat(_tj_RTagBat(ftiName)).persists(false);
         }
        bat("tj_" + ftiName + "_doc_name").persists(false);
        bat("tj_" + ftiName + "_doc_firstpre").persists(false);
@@ -862,6 +877,7 @@
 
         tjCollBat.append(_tj_TermBat(ftiName));
         tjCollBat.append(_tj_TagBat(ftiName));
+        tjCollBat.append(_tj_RTagBat(ftiName));
         tjCollBat.append("tj_pfc_fti_dep");
         tjCollBat.append("tj_pfc_fti_dep_star");
         tjCollBat.append("tj_collName");
@@ -905,6 +921,7 @@
        }
        tjCollBat.insert("_globalTerms", bat(_tj_TermBat(ftiName)));
        tjCollBat.insert("_globalTags", bat(_tj_TagBat(ftiName)));
+       tjCollBat.insert("_globalRTags", bat(_tj_RTagBat(ftiName)));
        tjCollBat.insert("_doc_name", bat("tj_" + ftiName + "_doc_name"));
        tjCollBat.insert("_doc_firstpre", bat("tj_" + ftiName + 
"_doc_firstpre"));
        tjCollBat.insert("_param", parbat);
@@ -963,6 +980,7 @@
       var t_start := usec();
       bat(_tj_TermBat(ftiName)).access(BAT_APPEND);
       bat(_tj_TagBat(ftiName)).access(BAT_APPEND);
+      bat(_tj_RTagBat(ftiName)).access(BAT_APPEND);
       var collBat := _tj_collection(ftiName);
       _tj_add2collection(ftiName, collBat, uri_loc, uri_name, store);
       _tj_finalize_collection(ftiName, collBat, FALSE);
@@ -996,6 +1014,7 @@
       var t_start := usec();
       bat(_tj_TermBat(ftiName)).access(BAT_APPEND);
       bat(_tj_TagBat(ftiName)).access(BAT_APPEND);
+      bat(_tj_RTagBat(ftiName)).access(BAT_APPEND);
       var collBat;
       collBat := _tj_collection(ftiName);
       [EMAIL PROTECTED]() {
@@ -1530,6 +1549,10 @@
     
     # filter out the top document nodes which have no 'tag'
     pfpre := pfpre.kdiff(firstpre.reverse());
+
+    # set the recursive tag flag on "true" because all tags are selected
+    modify_qenv(qenv,QENV_RECURSIVE_TAGS,"1");
+
     return pfpre.project( dbl(qenv.find(QENV_SCOREBASE) ) );
 }
 
@@ -1546,6 +1569,12 @@
     var tids := bat(_tj_TagBat(qenv.find(QENV_FTINAME))).select(name);
     if (tids.count() = 0) return new(oid,dbl);
     var tid := tids.reverse().fetch(0);
+
+    # set the recursive tag flag on "true" because all tags are selected
+    if ( bat(_tj_RTagBat(qenv.find(QENV_FTINAME))).exist(tid) ) {
+        modify_qenv(qenv,QENV_RECURSIVE_TAGS,"1");
+    }
+    modify_qenv(qenv,QENV_RECURSIVE_TAGS,"1"); # REMOVE
     var result := indexfetchjoin( new(void,oid).append(tid).seqbase(oid(0)),
                                  bat("tj_" + qenv.find(QENV_FTINAME) + 
"_TagIndex"),
                                  bat("tj_" + qenv.find(QENV_FTINAME) + 
"_Tags") );


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to