Update of /cvsroot/monetdb/pathfinder/modules/pftijah
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv9539
Modified Files:
pftijah.mx serialize_pftijah.mx
Log Message:
- implement recursive tag check en storage
Index: serialize_pftijah.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/serialize_pftijah.mx,v
retrieving revision 1.51
retrieving revision 1.52
diff -u -d -r1.51 -r1.52
--- serialize_pftijah.mx 15 Jun 2007 07:00:42 -0000 1.51
+++ serialize_pftijah.mx 15 Jun 2007 09:32:43 -0000 1.52
@@ -197,6 +197,14 @@
oid n_globalTag;
BAT* b_globalTag; /* global tag dictionary*/
BAT* hm_globalTag; /* hashed mirrorred global tag dictionary*/
+
+ int tagswitch_sz; /* the size of the next buff */
+ char* tagswitch; /* the recursive tag detector switch */
+ /* 0 means: tag is not in use */
+ /* 1 means: tag is in use */
+ /* 2 means: tag is recursive */
+ BAT* b_globalRTag; /* recursive tag dictionary*/
+
BAT* b_docName; /* BAT to store docnames in collection */
BAT* b_docFirstPre; /* First tijah-pre-nr of document */
BAT* b_collParam; /* Collection Parameters BAT */
@@ -242,14 +250,21 @@
GDKerror("tj_pushTag: MAXTAGDEPTH exceeded.\n");
return -1;
}
- tjctx->tagOidStack[tjctx->tagStackPtr] = (int)tagoid;
+ if ( tjctx->tagswitch[(int)tagoid] != 2 ) {
+ tjctx->tagswitch[(int)tagoid]++;
+ }
+ tjctx->tagOidStack[tjctx->tagStackPtr] = (int)tagoid;
tjctx->tagStartStack[tjctx->tagStackPtr++] = start;
return 1;
}
INLINE static oid
tj_popTag(tjCtx* tjctx) {
- return tjctx->tagStartStack[--tjctx->tagStackPtr];
+ --tjctx->tagStackPtr;
+ if ( tjctx->tagswitch[tjctx->tagOidStack[tjctx->tagStackPtr]] != 2 ) {
+ tjctx->tagswitch[tjctx->tagOidStack[tjctx->tagStackPtr]]--;
+ }
+ return tjctx->tagStartStack[tjctx->tagStackPtr];
}
/************************************************
@@ -259,6 +274,7 @@
INLINE static oid
tj_tagOid(tjCtx* tjctx, str t) {
#ifdef USE_TERMDB
+ /* incomplete, also tag check here */
return tdb_lookupTag(tjctx->tdb,t);
#else
BUN bun;
@@ -268,6 +284,14 @@
return *(oid*)BUNtail(tjctx->hm_globalTag,bun);
} else {
if ( BUNins(tjctx->b_globalTag, &tjctx->n_globalTag, (str)t, FALSE) ) {
+ /* check if the recursive tagswitcher still is large enough */
+ if ( (int)tjctx->n_globalTag >= tjctx->tagswitch_sz ) {
+ int old = tjctx->tagswitch_sz;
+ tjctx->tagswitch_sz = 2*(int)tjctx->n_globalTag;
+ tjctx->tagswitch =
GDKrealloc(tjctx->tagswitch,tjctx->tagswitch_sz );
+ for(int i=old; i<tjctx->tagswitch_sz; i++)
+ tjctx->tagswitch[i] = 0;
+ }
return tjctx->n_globalTag++;
} else {
GDKerror("INSERT OF \"%s\" in globalTag fails.\n");
@@ -551,6 +575,14 @@
res->n_globalTerm = (oid)BATcount(res->b_globalTerm);
if ( !(res->b_globalTag = getBAT(tjCtx_BAT,"_globalTags")) ) return
NULL;
res->n_globalTag = (oid)BATcount(res->b_globalTag);
+ if ( !(res->b_globalRTag = getBAT(tjCtx_BAT,"_globalRTags")) ) return
NULL;
+ res->tagswitch_sz = (int)res->n_globalTag;
+ if ( res->tagswitch_sz < 128 )
+ res->tagswitch_sz = 128;
+ res->tagswitch_sz *= 2;
+ res->tagswitch = GDKmalloc( res->tagswitch_sz );
+ for(int i=0; i<res->tagswitch_sz; i++)
+ res->tagswitch[i] = 0;
/*
*
*/
@@ -665,6 +697,7 @@
return NULL;
}
}
+
if ( ! (res->hm_globalTerm->hhash && res->hm_globalTag->hhash) )
stream_printf(GDKout,"# WARNING: MISSING HASH ON TAG/TERM
DICTIONARY\n");
return res;
@@ -674,6 +707,18 @@
freeTijahContext(tjCtx* tjctx) {
if ( !setPreSize(tjctx,tjctx->tijahPre) )
return GDK_FAIL;
+ /* first store all new detected recursive tag oids */
+ for(int i=0; i<tjctx->tagswitch_sz; i++) {
+ if ( tjctx->tagswitch[i] == 2) {
+ oid store = (oid)i;
+ if ( !BUNfnd(tjctx->b_globalRTag,&store) ) {
+ if ( !BUNins(tjctx->b_globalRTag, &store,&store, 0)) {
+ GDKerror("error writing recursive tagbat");
+ return GDK_FAIL;
+ }
+ }
+ }
+ }
#ifdef USE_TERMDB
if ( ! tdb_close(tjctx->tdb) )
return GDK_FAIL;
@@ -694,12 +739,14 @@
}
BBPunfix(BBPcacheid(tjctx->b_globalTerm));
BBPunfix(BBPcacheid(tjctx->b_globalTag));
+ BBPunfix(BBPcacheid(tjctx->b_globalRTag));
BBPunfix(BBPcacheid(tjctx->b_docName));
BBPunfix(BBPcacheid(tjctx->b_docFirstPre));
BBPunfix(BBPcacheid(tjctx->b_collParam));
BBPunfix(BBPcacheid(tjctx->b_collPre));
BBPunfix(BBPcacheid(tjctx->b_collSize));
BBPunfix(BBPcacheid(tjctx->b_collPfPre));
+ GDKfree(tjctx->tagswitch);
if ( tjctx->stemCtx->clear && !tjctx->stemCtx->clear(tjctx->stemCtx) )
return GDK_FAIL;
free(tjctx);
@@ -1110,6 +1157,7 @@
stream_printf(GDKout,"C[%s]:SIZES\n",tjctx->name);
stream_printf(GDKout,"C[%s]:size( b_globalTerm ) =
%d\n",tjctx->name, BATcount(tjctx->b_globalTerm));
stream_printf(GDKout,"C[%s]:size( b_globalTag ) =
%d\n",tjctx->name, BATcount(tjctx->b_globalTag));
+ stream_printf(GDKout,"C[%s]:size( b_globalRTag ) =
%d\n",tjctx->name, BATcount(tjctx->b_globalRTag));
stream_printf(GDKout,"C[%s]:size( b_collPre ) =
%d\n",tjctx->name, BATcount(tjctx->b_collPre));
stream_printf(GDKout,"C[%s]:size( b_collSize ) =
%d\n",tjctx->name, BATcount(tjctx->b_collSize));
stream_printf(GDKout,"C[%s]:size( b_collPfPre ) =
%d\n",tjctx->name, BATcount(tjctx->b_collPfPre));
Index: pftijah.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah.mx,v
retrieving revision 1.145
retrieving revision 1.146
diff -u -d -r1.145 -r1.146
--- pftijah.mx 15 Jun 2007 09:01:41 -0000 1.145
+++ pftijah.mx 15 Jun 2007 09:32:43 -0000 1.146
@@ -377,6 +377,14 @@
return "tj_" + ftiName + "_tags";
}
+PROC _tj_RTagBat(str ftiName) : str :=
+{
+ if ( GLOBAL_TTBAT )
+ return "tj_globalRTags";
+ else
+ return "tj_" + ftiName + "_rtags";
+}
+
PROC tj_init_global(BAT[str,str] param, bit doLock) : void :=
{
if (doLock) lock_set(tj_adm_lock);
@@ -395,6 +403,7 @@
if ( GLOBAL_TTBAT ) {
new(oid,str).persists(true).bbpname(_tj_TermBat(""));
new(oid,str).persists(true).bbpname(_tj_TagBat(""));
+ new(oid,oid).persists(true).bbpname(_tj_RTagBat(""));
}
new(oid,str).persists(true).bbpname("tj_collName");
new(str,str).persists(true).bbpname("tj_pfc_fti_dep");
@@ -403,6 +412,7 @@
if ( GLOBAL_TTBAT ) {
globals.append(_tj_TermBat(""));
globals.append(_tj_TagBat(""));
+ globals.append(_tj_RTagBat(""));
}
globals.append("tj_collName");
globals.append("tj_pfc_fti_dep");
@@ -435,6 +445,7 @@
if ( GLOBAL_TTBAT ) {
bat(_tj_TermBat("")).persists(false);
bat(_tj_TagBat("")).persists(false);
+ bat(_tj_RTagBat("")).persists(false);
}
bat("tj_collName").persists(false);
bat("tj_pfc_fti_dep").persists(false);
@@ -443,6 +454,7 @@
if ( GLOBAL_TTBAT ) {
globals.append(_tj_TermBat(""));
globals.append(_tj_TagBat(""));
+ globals.append(_tj_RTagBat(""));
}
globals.append("tj_collName");
globals.append("tj_pfc_fti_dep");
@@ -553,9 +565,11 @@
# INCOMPLETE, not throwing them away is much faster!!!
bat(_tj_TermBat(ftiName)).delete();
bat(_tj_TagBat(ftiName)).delete();
+ bat(_tj_RTagBat(ftiName)).delete();
} else {
new(oid,str).persists(true).bbpname(_tj_TermBat(ftiName));
new(oid,str).persists(true).bbpname(_tj_TagBat(ftiName));
+ new(oid,oid).persists(true).bbpname(_tj_RTagBat(ftiName));
}
}
extra_del_bat := new(void,str).seqbase([EMAIL PROTECTED]);
@@ -829,6 +843,7 @@
if ( not(GLOBAL_TTBAT) ) {
bat(_tj_TermBat(ftiName)).persists(false);
bat(_tj_TagBat(ftiName)).persists(false);
+ bat(_tj_RTagBat(ftiName)).persists(false);
}
bat("tj_" + ftiName + "_doc_name").persists(false);
bat("tj_" + ftiName + "_doc_firstpre").persists(false);
@@ -862,6 +877,7 @@
tjCollBat.append(_tj_TermBat(ftiName));
tjCollBat.append(_tj_TagBat(ftiName));
+ tjCollBat.append(_tj_RTagBat(ftiName));
tjCollBat.append("tj_pfc_fti_dep");
tjCollBat.append("tj_pfc_fti_dep_star");
tjCollBat.append("tj_collName");
@@ -905,6 +921,7 @@
}
tjCollBat.insert("_globalTerms", bat(_tj_TermBat(ftiName)));
tjCollBat.insert("_globalTags", bat(_tj_TagBat(ftiName)));
+ tjCollBat.insert("_globalRTags", bat(_tj_RTagBat(ftiName)));
tjCollBat.insert("_doc_name", bat("tj_" + ftiName + "_doc_name"));
tjCollBat.insert("_doc_firstpre", bat("tj_" + ftiName +
"_doc_firstpre"));
tjCollBat.insert("_param", parbat);
@@ -963,6 +980,7 @@
var t_start := usec();
bat(_tj_TermBat(ftiName)).access(BAT_APPEND);
bat(_tj_TagBat(ftiName)).access(BAT_APPEND);
+ bat(_tj_RTagBat(ftiName)).access(BAT_APPEND);
var collBat := _tj_collection(ftiName);
_tj_add2collection(ftiName, collBat, uri_loc, uri_name, store);
_tj_finalize_collection(ftiName, collBat, FALSE);
@@ -996,6 +1014,7 @@
var t_start := usec();
bat(_tj_TermBat(ftiName)).access(BAT_APPEND);
bat(_tj_TagBat(ftiName)).access(BAT_APPEND);
+ bat(_tj_RTagBat(ftiName)).access(BAT_APPEND);
var collBat;
collBat := _tj_collection(ftiName);
[EMAIL PROTECTED]() {
@@ -1530,6 +1549,10 @@
# filter out the top document nodes which have no 'tag'
pfpre := pfpre.kdiff(firstpre.reverse());
+
+ # set the recursive tag flag on "true" because all tags are selected
+ modify_qenv(qenv,QENV_RECURSIVE_TAGS,"1");
+
return pfpre.project( dbl(qenv.find(QENV_SCOREBASE) ) );
}
@@ -1546,6 +1569,12 @@
var tids := bat(_tj_TagBat(qenv.find(QENV_FTINAME))).select(name);
if (tids.count() = 0) return new(oid,dbl);
var tid := tids.reverse().fetch(0);
+
+ # set the recursive tag flag on "true" because all tags are selected
+ if ( bat(_tj_RTagBat(qenv.find(QENV_FTINAME))).exist(tid) ) {
+ modify_qenv(qenv,QENV_RECURSIVE_TAGS,"1");
+ }
+ modify_qenv(qenv,QENV_RECURSIVE_TAGS,"1"); # REMOVE
var result := indexfetchjoin( new(void,oid).append(tid).seqbase(oid(0)),
bat("tj_" + qenv.find(QENV_FTINAME) +
"_TagIndex"),
bat("tj_" + qenv.find(QENV_FTINAME) +
"_Tags") );
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins