Update of /cvsroot/monetdb/pathfinder/modules/pftijah
In directory sc8-pr-cvs7.sourceforge.net:/tmp/cvs-serv12882/modules/pftijah
Modified Files:
nexi.c nexi_generate_mil.c pftijah.mx serialize_pftijah.mx
Log Message:
propagated changes of Wednesday Feb 21 2007 - Thursday Feb 22 2007
from the XQuery_0-16 branch to the development trunk
Index: serialize_pftijah.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/serialize_pftijah.mx,v
retrieving revision 1.39
retrieving revision 1.40
diff -u -d -r1.39 -r1.40
--- serialize_pftijah.mx 20 Feb 2007 12:06:53 -0000 1.39
+++ serialize_pftijah.mx 22 Feb 2007 11:34:42 -0000 1.40
@@ -513,17 +513,15 @@
}
static BAT*
-getBAT(BAT* batbat, int i) {
+getBAT(BAT* batbat, str bname) {
BUN bun;
- oid index = (oid)i;
-
- if ( !(bun=BUNfnd(batbat,&index)) ) {
- stream_printf(GDKerr,"getBAT:BUNfnd(%d) failed.\n",i);
+ if ( !(bun=BUNfnd(batbat,bname)) ) {
+ stream_printf(GDKerr,"getBAT:BUNfnd(%s) failed.\n",bname);
return NULL;
}
- BAT* res = BATdescriptor( *(bat*)bun);
+ BAT* res = BATdescriptor( *(bat*)BUNtail(batbat,bun));
if ( !res )
- stream_printf(GDKerr,"getBAT:BATdescriptor() for %d failed.\n",i);
+ stream_printf(GDKerr,"getBAT:BATdescriptor() for %s
failed.\n",bname);
return res;
}
@@ -570,7 +568,7 @@
*/
/* set parameter bat first */
- if ( !(res->b_collParam = getBAT(tjCtx_BAT,4)) ) return NULL;
+ if ( !(res->b_collParam = getBAT(tjCtx_BAT,"_param")) ) return NULL;
res->preExpansion = 1;
str str_preExpansion = readCollParam(res,"preExpansion");
@@ -627,17 +625,17 @@
if ( !(res->tdb = tdb_open("termDB")) )
return NULL;
#endif
- if ( !(res->b_globalTerm = getBAT(tjCtx_BAT,0)) ) return NULL;
+ if ( !(res->b_globalTerm = getBAT(tjCtx_BAT,"_globalTerms")) ) return
NULL;
res->n_globalTerm = (oid)BATcount(res->b_globalTerm);
- if ( !(res->b_globalTag = getBAT(tjCtx_BAT,1)) ) return NULL;
+ if ( !(res->b_globalTag = getBAT(tjCtx_BAT,"_globalTags")) ) return
NULL;
res->n_globalTag = (oid)BATcount(res->b_globalTag);
/*
*
*/
- if ( !(res->b_docName = getBAT(tjCtx_BAT,2)) ) return NULL;
- if ( !(res->b_docFirstPre = getBAT(tjCtx_BAT,3)) ) return NULL;
- if ( !(res->b_collPre = getBAT(tjCtx_BAT,5)) ) return NULL;
- if ( !(res->b_collSize = getBAT(tjCtx_BAT,6)) ) return NULL;
+ if ( !(res->b_docName = getBAT(tjCtx_BAT,"_doc_name")) ) return
NULL;
+ if ( !(res->b_docFirstPre = getBAT(tjCtx_BAT,"_doc_firstpre")) ) return
NULL;
+ if ( !(res->b_collPre = getBAT(tjCtx_BAT,"_tid")) ) return NULL;
+ if ( !(res->b_collSize = getBAT(tjCtx_BAT,"_size")) ) return NULL;
if ( (res->tijahPre = getPreSize(res)) == oid_nil )
return NULL;;
/* check here for new fragmentation */
@@ -647,7 +645,7 @@
if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]: loadTijahContext()
[b_collPre|b_collSize] too big(%d), create new
fragment\n",res->name,BATcount(res->b_collPre));
#endif
BAT* fragments;
- if ( !(fragments = getBAT(tjCtx_BAT,8)) )
+ if ( !(fragments = getBAT(tjCtx_BAT,"_fragments")) )
return NULL;
int newFragments = (int)BATcount(fragments) + 1;
/* */
@@ -673,7 +671,7 @@
return NULL;
if ( dbat_init("b_collSize", &res->dbat_collSize, res->b_collSize) < -1
)
return NULL;
- if ( !(res->b_collPfPre = getBAT(tjCtx_BAT,7)) ) return NULL;
+ if ( !(res->b_collPfPre = getBAT(tjCtx_BAT,"_pfpre")) ) return NULL;
if ( !loadSelectionTagList(res,selTagList) ) return NULL;
/* */
res->tagStackPtr = 0;
Index: nexi_generate_mil.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/nexi_generate_mil.c,v
retrieving revision 1.24
retrieving revision 1.25
diff -u -d -r1.24 -r1.25
--- nexi_generate_mil.c 25 Jan 2007 00:50:25 -0000 1.24
+++ nexi_generate_mil.c 22 Feb 2007 11:34:42 -0000 1.25
@@ -1026,6 +1026,10 @@
if ( TDEBUG(5) ) {
MILPRINTF(MILOUT,"printf(\"# tijah-mil-exec: computed
R%d.\\n\");\n",com_num);
}
+ if ( TDEBUG(98) ) {
+ MILPRINTF(MILOUT,"printf(\"# tijah-mil-exec: contents of R%d
is:\\n\");\n",com_num);
+ MILPRINTF(MILOUT,"R%d.print();\n",com_num);
+ }
POP_COMMAND();
com_sp++;
Index: nexi.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/nexi.c,v
retrieving revision 1.47
retrieving revision 1.48
diff -u -d -r1.47 -r1.48
--- nexi.c 19 Jan 2007 13:41:06 -0000 1.47
+++ nexi.c 22 Feb 2007 11:34:42 -0000 1.48
@@ -79,6 +79,7 @@
#include "pftijah.h"
#include "nexi.h"
#include "pftijah.h"
+#include "pftijah_util.h"
#define LOGFILE GDKout
#define LOGPRINTF if ( 0 ) stream_printf
@@ -97,14 +98,6 @@
parserCtx->collection = "PFX";
parserCtx->queryText = query;
parserCtx->errBUFF[0] = 0;
- parserCtx->useFragments = 0;
- if ( 1 /* not fragmented */ ) {
- parserCtx->ffPfx = ""; /* "_frag"*/;
- parserCtx->flastPfx = ", str(1)"; /* */;
- } else {
- parserCtx->ffPfx = "_frag"; /* "_frag"*/;
- parserCtx->flastPfx = ""; /* "_frag"*/;
- }
parserCtx->milFILEname = NULL;
/* initialize the lists */
if ( ! (
@@ -273,24 +266,9 @@
} else if ( strcmp(optName,"collection") == 0 ) {
parserCtx->collection = optVal;
} else if ( strcmp(optName,"fragments") == 0 ) {
- if ( (strcmp(optVal,"true")==0) ||
- (strcmp(optVal,"TRUE")==0) ||
- (strcmp(optVal,"on")==0) ||
- (strcmp(optVal,"ON")==0)
- ) {
- if (TDEBUG(1)) stream_printf(GDKout,"# old_main: setting
fragmentation ON.\n");
- parserCtx->useFragments = 1;
- parserCtx->ffPfx = "_frag";
- parserCtx->flastPfx = "";
- } else {
- if (TDEBUG(1)) stream_printf(GDKout,"# old_main: setting
fragmentation OFF.\n");
- parserCtx->useFragments = 0;
- parserCtx->ffPfx = "";
- parserCtx->flastPfx = ", str(1)";
- }
+ if (TDEBUG(1)) stream_printf(GDKout,"# old_main: ignoring
fragmentation setting.\n");
} else if ( strcmp(optName,"background_collection") == 0 ) {
strcpy(background_collection, optVal);
-
} else if ( strcmp(optName,"returnNumber") == 0 ||
strcmp(optName,"retNum") == 0 ||
strcmp(optName,"top") == 0 ) {
@@ -470,8 +448,25 @@
stream_printf(GDKout,"TijahOptions: should handle:
%s=%s\n",optName,optVal);
}
}
-
-
+ /*
+ * Now find out if the collection is fragmented or not.
+ */
+ BAT* fb =
pftu_lookup_bat(pftu_batname1("tj_%s_fragments",(char*)parserCtx->collection,0));
+ if ( ! fb ) {
+ stream_printf(GDKerr,"Error: cannot find fragments bat for
collection \"%s\".\n",parserCtx->collection);
+ return 0;
+ }
+ if ( BATcount(fb) > 1 ) {
+ if (TDEBUG(1)) stream_printf(GDKout,"# old_main: setting
fragmentation ON.\n");
+ parserCtx->useFragments = 1;
+ parserCtx->ffPfx = "_frag";
+ parserCtx->flastPfx = "";
+ } else {
+ if (TDEBUG(1)) stream_printf(GDKout,"# old_main: setting
fragmentation OFF.\n");
+ parserCtx->useFragments = 0;
+ parserCtx->ffPfx = "";
+ parserCtx->flastPfx = ", str(1)";
+ }
// Some special cases for NLLR, since NLLR only works with COARSE2 at the
moment
if ( txt_retr_model->model == MODEL_NLLR ) {
// Switch to COARSE2 algebra for NLLR
Index: pftijah.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah.mx,v
retrieving revision 1.96
retrieving revision 1.97
diff -u -d -r1.96 -r1.97
--- pftijah.mx 21 Feb 2007 14:25:54 -0000 1.96
+++ pftijah.mx 22 Feb 2007 11:34:42 -0000 1.97
@@ -38,10 +38,10 @@
.COMMAND tj_normalizeTerm(str, str) : str = CMDtj_normalizeTerm;
"INCOMPLETE"
-.COMMAND _tj_throw2collection(BAT[oid,bat],BAT[oid,bat],str,str) : void =
CMDtj_throw2collection;
+.COMMAND _tj_throw2collection(BAT[str,bat],BAT[oid,bat],str,str) : void =
CMDtj_throw2collection;
"INCOMPLETE"
-.COMMAND _tj_throw2collection_index(BAT[oid,bat],str) : void =
CMDtj_throw2collection_index;
+.COMMAND _tj_throw2collection_index(BAT[str,bat],str) : void =
CMDtj_throw2collection_index;
"INCOMPLETE"
.COMMAND tijah_tokenize(str) : str = CMDtijah_tokenize;
@@ -255,6 +255,11 @@
new(oid,str).persists(true).bbpname("tj_globalTerms");
new(oid,str).persists(true).bbpname("tj_globalTags");
new(oid,str).persists(true).bbpname("tj_collName");
+ var globals := new(void,str).seqbase([EMAIL PROTECTED]);
+ globals.append("tj_globalTerms");
+ globals.append("tj_globalTags");
+ globals.append("tj_collName");
+ subcommit(globals);
});
if (doLock) lock_unset(tj_adm_lock);
if (not(isnil(err))) ERROR(err);
@@ -282,6 +287,11 @@
bat("tj_globalTerms").persists(false);
bat("tj_globalTags").persists(false);
bat("tj_collName").persists(false);
+ var globals := new(void,str).seqbase([EMAIL PROTECTED]);
+ globals.append("tj_globalTerms");
+ globals.append("tj_globalTags");
+ globals.append("tj_collName");
+ subcommit(globals);
});
lock_unset(tj_adm_lock);
if (not(isnil(err))) ERROR(err);
@@ -309,7 +319,6 @@
return collection_lock;
}
-
ADDHELP("tj_init_collection", "flokstra & rode", "Jan 2007",
"PARAMETERS:\n\
- str collName: the name of the collection
@@ -403,6 +412,7 @@
bat("tj_" + collName + "_param").insert("_last_finalizedPre","0");
#
});
+ subcommit(_tj_collection_str(collName));
lock_unset(coll_lock);
if (not(isnil(err))) ERROR(err);
}
@@ -464,16 +474,46 @@
bat("tj_" + collName + "_TagIndex").persists(false);
bat("tj_" + collName + "_Tags").persists(false);
}
+ subcommit(_tj_collection_str(collName));
});
lock_unset(coll_lock);
if (not(isnil(err))) ERROR(err);
}
+
+# internal method which return all batnames of a collection in a
+# a [void,str] bat
+PROC _tj_collection_str(str collName) : BAT[void,bat]
+{
+ var tjCollBat := new(void,str).seqbase([EMAIL PROTECTED]);
+
+ tjCollBat.append("tj_globalTerms");
+ tjCollBat.append("tj_globalTags");
+ tjCollBat.append("tj_collName");
+ tjCollBat.append("tj_" + collName + "_param");
+ tjCollBat.append("tj_" + collName + "_doc_name");
+ tjCollBat.append("tj_" + collName + "_doc_firstpre");
+ tjCollBat.append("tj_" + collName + "_pfpre");
+ tjCollBat.append("tj_" + collName + "_fragments");
+ bat("tj_" + collName + "_fragments")@batloop()
+ {
+ tjCollBat.append("tj_" + collName + "_tid" + str(int($t)));
+ tjCollBat.append("tj_" + collName + "_size"+ str(int($t)));
+ }
+ if (view_bbp_name().reverse().exist("tj_" + collName + "_TermIndex")) {
+ tjCollBat.append("tj_" + collName + "_Terms");
+ tjCollBat.append("tj_" + collName + "_Tags");
+ tjCollBat.append("tj_" + collName + "_TermIndex");
+ tjCollBat.append("tj_" + collName + "_TagIndex");
+ }
+ return tjCollBat;
+}
+
# internal method which return all relevant data about a collection in a
# a single [void,bat] bat
-PROC _tj_collection(str collName) : BAT[void,bat]
+PROC _tj_collection(str collName) : BAT[str, bat]
{
- var tjCollBat := new(void,bat).seqbase([EMAIL PROTECTED]);
+ var tjCollBat := new(str,bat);
var parbat := bat("tj_" + collName + "_param");
var curversion;
@@ -485,20 +525,44 @@
if ( curversion < "1.0" ) {
ERROR("_tj_collection(): pftijah index structure changed, reindex
collection!!");
}
- tjCollBat.append(bat("tj_globalTerms"));
- tjCollBat.append(bat("tj_globalTags"));
- tjCollBat.append(bat("tj_" + collName + "_doc_name"));
- tjCollBat.append(bat("tj_" + collName + "_doc_firstpre"));
- tjCollBat.append(parbat);
+ tjCollBat.insert("_globalTerms", bat("tj_globalTerms"));
+ tjCollBat.insert("_globalTags", bat("tj_globalTags"));
+ tjCollBat.insert("_doc_name", bat("tj_" + collName + "_doc_name"));
+ tjCollBat.insert("_doc_firstpre", bat("tj_" + collName +
"_doc_firstpre"));
+ tjCollBat.insert("_param", parbat);
# only load the top [pre|term|size] fragments
var fpfx := str(bat("tj_" + collName + "_fragments").count());
- tjCollBat.append(bat("tj_" + collName + "_tid"+fpfx));
- tjCollBat.append(bat("tj_" + collName + "_size"+fpfx));
- tjCollBat.append(bat("tj_" + collName + "_pfpre"));
- tjCollBat.append(bat("tj_" + collName + "_fragments"));
+ tjCollBat.insert("_tid", bat("tj_" + collName + "_tid"+fpfx));
+ tjCollBat.insert("_size", bat("tj_" + collName + "_size"+fpfx));
+ tjCollBat.insert("_pfpre", bat("tj_" + collName + "_pfpre"));
+ tjCollBat.insert("_fragments", bat("tj_" + collName + "_fragments"));
+
+ if (view_bbp_name().reverse().exist("tj_" + collName + "_TermIndex")) {
+ tjCollBat.insert("_Terms", bat("tj_" + collName + "_Terms"));
+ tjCollBat.insert("_Tags", bat("tj_" + collName + "_Tags"));
+ tjCollBat.insert("_TermIndex", bat("tj_" + collName +
"_TermIndex"));
+ tjCollBat.insert("_TagIndex", bat("tj_" + collName + "_TagIndex"));
+ }
+ tjCollBat.insert("submitBats", new(void,str).seqbase([EMAIL
PROTECTED]));
+ tjCollBat.insert("replaceBats", new(str,str));
+
return tjCollBat;
}
+PROC _tj_commit(BAT[str,bat] collBat) : void
+{
+ var replaceBats := collBat.find("replaceBats");
+ [EMAIL PROTECTED]() {
+ bat($t).persists(false).rename("del_" + $t);
+ collBat.find($h).persists(true).bbpname($t);
+ }
+
+ var submitBats := collBat.find("submitBats");
+ submitBats.append([+](const "del_", replaceBats.tmark([EMAIL
PROTECTED])));
+
+ subcommit(submitBats);
+}
+
ADDHELP("tj_add2collection", "flokstra & rode", "Jan 2007",
"PARAMETERS:\n\
- str collName: the name of the collection.\n
@@ -520,8 +584,10 @@
var t_start := usec();
bat("tj_globalTerms").access(BAT_WRITE);
bat("tj_globalTags").access(BAT_WRITE);
-
_tj_add2collection(collName,_tj_collection(collName),uri_loc,uri_name,store);
- _tj_finalize_collection(collName);
+ var collBat := _tj_collection(collName);
+ _tj_add2collection(collName, collBat, uri_loc, uri_name, store);
+ _tj_finalize_collection(collName, collBat);
+ _tj_commit(collBat);
if ( timing ) {
var ms := (usec()-t_start)/1000;
printf("#C[%s]:tj_add2collection(): + aggregate time =
%lld.%03llds.\n",collName,/(ms,1000),%(ms,1000));
@@ -551,15 +617,16 @@
var t_start := usec();
bat("tj_globalTerms").access(BAT_WRITE);
bat("tj_globalTags").access(BAT_WRITE);
- # var cb := _tj_collection(collName);
+ var collBat := _tj_collection(collName);
[EMAIL PROTECTED]() {
- _tj_add2collection(collName,_tj_collection(collName),$h,$t,store);
+ _tj_add2collection(collName, collBat, $h, $t, store);
}
- _tj_finalize_collection(collName);
+ _tj_finalize_collection(collName, collBat);
if ( timing ) {
var ms := (usec()-t_start)/1000;
printf("#C[%s]:tj_add2collection(BAT): + aggregate time =
%lld.%03llds.\n",collName,/(ms,1000),%(ms,1000));
}
+ _tj_commit(collBat);
});
lock_unset(coll_lock);
if (not(isnil(err))) ERROR(err);
@@ -571,17 +638,17 @@
}
# main internal add2collection() function.
-PROC _tj_add2collection(str collName, BAT[void,bat] collBats, str uri_loc, str
uri_name, bit store) : void
+PROC _tj_add2collection(str collName, BAT[str,bat] collBat, str uri_loc, str
uri_name, bit store) : void
{
var ms;
var t_start := usec();
if ( verbose ) printf("#TJ: _tj_add2collection(\"%s\") start.\n",collName);
- var selTagList := bat("tj_" + collName + "_param").find("tagFilter");
+ var selTagList := _tj_get_parameter(collBat, "tagFilter");
var i_start;
if ( store ) {
- var pf_collection := tj_get_parameter(collName,"pf_collection");
+ var pf_collection := _tj_get_parameter(collBat,"pf_collection");
if ( isnil(uri_name) ) {
uri_name := uri_loc;
} else if ( uri_name = "" ) {
@@ -602,24 +669,18 @@
}
ws_opendoc(ws, bat(void,str,1).append(uri_name));
i_start := usec();
- _tj_throw2collection(collBats,ws,uri_name,selTagList);
+ _tj_throw2collection(collBat,ws,uri_name,selTagList);
ws_destroy(ws);
} else {
i_start := usec();
- _tj_throw2collection_index(collBats,uri_loc);
+ _tj_throw2collection_index(collBat,uri_loc);
}
if ( timing ) {
ms := (usec()-i_start)/1000;
printf("#C[%s]:add2coll(\"%s\"): index time =
%lld.%03llds.\n",collName,uri_name,/(ms,1000),%(ms,1000));
}
- # var height1 :=
bat("doc_height").fetch(bat("doc_name").reverse().find(uri_name));
- var height1 := 999; # REMOVE
- var coll_oid := bat("tj_collName").reverse().find(collName);
- var height2 := bat("tj_" + collName + "_param").find("height").int();
- bat("tj_" + collName + "_param").replace("height",
max(height1,height2).str());
- bat("tj_" + collName + "_param").replace("status","building");
- commit();
+ _tj_set_parameter(collBat, "status", "building");
if ( verbose ) printf("#TJ:_tj_add2collectiont(\"%s\")
finish.\n",collName);
if ( timing ) {
ms := (usec()-t_start)/1000;
@@ -640,59 +701,57 @@
var coll_lock := tj_get_collection_lock(collName);
lock_set(coll_lock);
var err := CATCH({
- _tj_finalize_collection(collName);
+ print("Warning: This function is obsolete.");
+ #_tj_finalize_collection(collName);
});
lock_unset(coll_lock);
if (not(isnil(err))) ERROR(err);
}
# internal finalize function
-PROC _tj_finalize_collection(str collName) : void
+PROC _tj_finalize_collection(str collName, BAT[str,bat] collBat) : void
{
var t_start := usec();
if ( verbose ) printf("#TJ: _tj_finalize_collection(\"%s\")
called.\n",collName);
#
- # Finalize one collection. No documents should be added after this
- bat("tj_" + collName + "_fragments").access(BAT_READ);
- bat("tj_" + collName + "_fragments")@batloop()
+ var mod_frags := _tj_chk_modified_fragments(collName, collBat);
+ # set all fragments except the last one to BAT_READ
+ [EMAIL PROTECTED]()
{
- var ind := str(int($h));
- bat("tj_" + collName + "_tid" + ind).access(BAT_READ);
- bat("tj_" + collName + "_tid" + ind).mmap(1);
- bat("tj_" + collName + "_size" + ind).access(BAT_READ);
- bat("tj_" + collName + "_size" + ind).mmap(1);
+ bat($t).access(BAT_READ);
+ bat($t).mmap(1);
+ collBat.find("submitBats").append($t);
}
- # bat("tj_" + collName + "_pfpre").access(BAT_READ); BUG CANNOT USE
FINALIZED BAT, JF!!!
- bat("tj_" + collName + "_pfpre").mmap(1);
- commit();
- _buildIRindex(collName);
+ collBat.find("_size").access(BAT_APPEND).mmap(1);
+ collBat.find("_tid").access(BAT_APPEND).mmap(1);
+ collBat.find("_pfpre").access(BAT_APPEND).mmap(1);
+ collBat.find("submitBats").append("tj_" + collName + "_size1");
+ collBat.find("submitBats").append("tj_" + collName + "_tid1");
+ collBat.find("submitBats").append("tj_" + collName + "_pfpre");
+ collBat.find("submitBats").append("tj_" + collName + "_fragments");
+ collBat.find("submitBats").append("tj_" + collName + "_doc_name");
+ collBat.find("submitBats").append("tj_" + collName + "_doc_firstpre");
+ collBat.find("submitBats").append("tj_" + collName + "_param");
+
+ _buildIRindex(collName, collBat);
#
- bat("tj_" + collName + "_param").replace("status","finalized");
+ _tj_set_parameter(collBat, "status", "finalized");
var lst_fpre := bat("tj_" + collName + "_param").find("_last_tijahPre");
- bat("tj_" + collName + "_param").replace("_last_finalizedPre", lst_fpre);
+ _tj_set_parameter(collBat, "_last_finalizedPre", lst_fpre);
#
var gterm_sort := bat("tj_globalTerms").reverse().sort().reverse();
- bat("tj_globalTerms").persists(false);
var gtag_sort := bat("tj_globalTags").reverse().sort().reverse();
- bat("tj_globalTags").persists(false);
- commit();
- gterm_sort.persists(true);
- gterm_sort.bbpname("tj_globalTerms");
- gtag_sort.persists(true);
- gtag_sort.bbpname("tj_globalTags");
- if ( false ) {
- # Monet error, server crashes on .acces(BAT_WRITE) when a new
- # collection is created on a fresh Mserver.
- bat("tj_globalTerms").access(BAT_READ);
- bat("tj_globalTags").access(BAT_READ);
- }
- bat("tj_globalTerms").mmap(1);
- if ( false ) {
- # Monet error, server crashes on .acces(BAT_WRITE) when a new
- # collection is created on a fresh Mserver.
- bat("tj_globalTags").mmap(1);
- }
- commit();
+ gterm_sort.mmap(1);
+
+ collBat.replace("_globalTerms", gterm_sort);
+ collBat.replace("_globalTags", gtag_sort);
+ var replaceBats := collBat.find("replaceBats");
+ replaceBats.insert("_globalTerms", "tj_globalTerms");
+ replaceBats.insert("_globalTags", "tj_globalTags");
+
+ collBat.find("submitBats").append("tj_globalTerms");
+ collBat.find("submitBats").append("tj_globalTags");
+
if ( timing ) {
var ms := (usec()-t_start)/1000;
printf("#C[%s]:finalize(): total time =
%lld.%03llds.\n",collName,/(ms,1000),%(ms,1000));
@@ -700,9 +759,9 @@
}
# set a collection parameter
-PROC tj_set_parameter(str collName, str par, str val) : void
+PROC _tj_set_parameter(BAT[str,bat] collBat, str par, str val) : void
{
- var parbat := bat("tj_" + collName + "_param");
+ var parbat := collBat.find("_param");
if ( parbat.exist(par) ) {
parbat.replace(par,val);
@@ -712,9 +771,9 @@
}
# set a collection parameter
-PROC tj_get_parameter(str collName, str par) : str
+PROC _tj_get_parameter(BAT[str,bat] collBat, str par) : str
{
- var parbat := bat("tj_" + collName + "_param");
+ var parbat := collBat.find("_param");
if ( parbat.exist(par) ) {
return parbat.find(par);
@@ -2459,7 +2518,6 @@
{
printf("WARNING: the use of function tj_coll_remove() is
deprecated.\n");
tj_delete_collection(collName);
- commit();
}
# SUGAR, REMOVE IN FUTURE
@@ -2498,11 +2556,29 @@
printf("WARNING: the use of function tj_finalize() is deprecated.\n");
}
-PROC _buildIRindex(str collName) : void :=
+PROC _tj_chk_modified_fragments(str collName, BAT[str,bat] collBat) :
BAT[void,str] :=
{
- var offset := oid(int(bat("tj_" + collName +
"_param").find("_last_finalizedPre")) + 1);
- var frag_offset := int(find_lower(bat("tj_" + collName +
"_fragments").reverse(), offset));
- var frag_last := bat("tj_" + collName + "_fragments").count();
+ var offset := oid(int(_tj_get_parameter(collBat, "_last_finalizedPre"))
+ 1);
+ var fragments := collBat.find("_fragments");
+ var frag_offset := int(find_lower(fragments.reverse(), offset));
+ var frag_last := fragments.count();
+
+ var mod_frags := new(void, str).seqbase([EMAIL PROTECTED]);
+ while (frag_offset < frag_last)
+ {
+ mod_frags.append("tj_" + collName + "_tid" + str(frag_offset));
+ mod_frags.append("tj_" + collName + "_size" + str(frag_offset));
+ }
+
+ return mod_frags;
+}
+
+PROC _buildIRindex(str collName, BAT[str,bat] collBat) : void :=
+{
+ var offset := oid(int(_tj_get_parameter(collBat, "_last_finalizedPre"))
+ 1);
+ var fragments := collBat.find("_fragments");
+ var frag_offset := int(find_lower(fragments.reverse(), offset));
+ var frag_last := fragments.count();
var pre_tid := bat("tj_" + collName + "_tid" + str(frag_offset));
var tids := pre_tid.slice(int(offset), pre_tid.count() - 1);
@@ -2513,57 +2589,44 @@
tids.append(bat("tj_" + collName + "_tid" + str(frag_offset)));
frag_offset :+= 1;
}
- var tmp := tids.kdiff(bat("tj_" + collName + "_pfpre"));
+ var tmp := tids.kdiff(collBat.find("_pfpre"));
tmp := tmp.reverse().sort();
-
- # incremental index merge
+
+ # incremental index merge
if (view_bbp_name().reverse().exist("tj_" + collName + "_TermIndex"))
{
- var tmp := tids.kdiff(bat("tj_" + collName + "_pfpre"));
- tmp := tmp.reverse().sort();
- var i := mergeindex(tmp, bat("tj_" + collName + "_TermIndex"),
- bat("tj_" + collName + "_Terms"),
- bat("tj_globalTerms").count() + 1);
- bat("tj_" + collName + "_TermIndex").persists(false);
- bat("tj_" + collName + "_Terms").persists(false);
- commit();
- # create _TermIndex and _Terms here
- var newindex := i.fetch(0);
- newindex.persists(true).bbpname("tj_" + collName +
"_TermIndex");
- var newpre := i.fetch(1);
- newpre.persists(true).bbpname("tj_" + collName + "_Terms");
+ var replaceBats := collBat.find("replaceBats");
+ var tmp := tids.kdiff(collBat.find("_pfpre"));
+ tmp := tmp.reverse().ssort();
+ var i := mergeindex(tmp, collBat.find("_TermIndex"),
+ collBat.find("_Terms"),
+ collBat.find("_globalTerms").count()
+ 1);
+ collBat.replace("_TermIndex", i.fetch(0));
+ collBat.replace("_Terms", i.fetch(1));
i := nil;
tmp := nil;
- newindex.access(BAT_READ);
- newindex.mmap(1);
- newpre.access(BAT_READ);
- newpre.mmap(1);
- newindex := nil;
- newpre := nil;
- commit();
+ replaceBats.insert("_TermIndex", "tj_" + collName +
"_TermIndex");
+ replaceBats.insert("_Terms", "tj_" + collName + "_Terms");
+ collBat.find("_TermIndex").access(BAT_READ).mmap(1);
+ collBat.find("_Terms").access(BAT_READ).mmap(1);
+ collBat.find("submitBats").append("tj_" + collName +
"_TermIndex");
+ collBat.find("submitBats").append("tj_" + collName + "_Terms");
tmp := tids.semijoin(bat("tj_" + collName + "_pfpre"));
- tmp := tmp.reverse().sort();
- var i := mergeindex(tmp, bat("tj_" + collName + "_TagIndex"),
- bat("tj_" + collName + "_Tags"),
- bat("tj_globalTags").count() + 1);
- bat("tj_" + collName + "_TagIndex").persists(false);
- bat("tj_" + collName + "_Tags").persists(false);
- commit();
- # create _TagIndex and _Tags here
- var newindex := i.fetch(0);
- newindex.persists(true).bbpname("tj_" + collName + "_TagIndex");
- var newpre := i.fetch(1);
- newpre.persists(true).bbpname("tj_" + collName + "_Tags");
+ tmp := tmp.reverse().ssort();
+ i := mergeindex(tmp, collBat.find("_TagIndex"),
+ collBat.find("_Tags"),
+ collBat.find("_globalTags").count() + 1);
+ collBat.replace("_TagIndex", i.fetch(0));
+ collBat.replace("_Tags", i.fetch(1));
i := nil;
tmp := nil;
- newindex.access(BAT_READ);
- newindex.mmap(1);
- newpre.access(BAT_READ);
- newpre.mmap(1);
- newindex := nil;
- newpre := nil;
- commit();
+ replaceBats.insert("_TagIndex", "tj_" + collName + "_TagIndex");
+ replaceBats.insert("_Tags", "tj_" + collName + "_Tags");
+ collBat.find("_TagIndex").access(BAT_READ).mmap(1);
+ collBat.find("_Tags").access(BAT_READ).mmap(1);
+ collBat.find("submitBats").append("tj_" + collName +
"_TagIndex");
+ collBat.find("submitBats").append("tj_" + collName + "_Tags");
}
else # create new index
{
@@ -2581,7 +2644,8 @@
terms.mmap(1);
termindex := nil;
terms := nil;
- commit();
+ collBat.find("submitBats").append("tj_" + collName +
"_TermIndex");
+ collBat.find("submitBats").append("tj_" + collName + "_Terms");
tmp := tids.semijoin(bat("tj_" + collName + "_pfpre"));
tmp := tmp.reverse().ssort();
@@ -2597,7 +2661,8 @@
tags.mmap(1);
tagindex := nil;
tags := nil;
- commit();
+ collBat.find("submitBats").append("tj_" + collName +
"_TagIndex");
+ collBat.find("submitBats").append("tj_" + collName + "_Tags");
}
}
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins