Update of /cvsroot/monetdb/pathfinder/modules/pftijah
In directory sc8-pr-cvs7.sourceforge.net:/tmp/cvs-serv18765/modules/pftijah
Modified Files:
nexi.c pftijah.mx serialize_pftijah.mx
Log Message:
propagated changes of Thursday Feb 22 2007 - Friday Feb 23 2007
from the XQuery_0-16 branch to the development trunk
Index: serialize_pftijah.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/serialize_pftijah.mx,v
retrieving revision 1.40
retrieving revision 1.41
diff -u -d -r1.40 -r1.41
--- serialize_pftijah.mx 22 Feb 2007 11:34:42 -0000 1.40
+++ serialize_pftijah.mx 23 Feb 2007 15:11:07 -0000 1.41
@@ -649,11 +649,13 @@
return NULL;
int newFragments = (int)BATcount(fragments) + 1;
/* */
+ BBPunfix(BBPcacheid(res->b_collPre));
res->b_collPre =
pftu_create_bat(pftu_batname1("tj_%s_tid%d",res->name,newFragments),TYPE_void,TYPE_oid,1);
if ( !res->b_collPre )
return NULL;
BBPfix(BBPcacheid(res->b_collPre));
BATseqbase(res->b_collPre,res->tijahPre);
+ BBPunfix(BBPcacheid(res->b_collSize));
res->b_collSize =
pftu_create_bat(pftu_batname1("tj_%s_size%d",res->name,newFragments),TYPE_void,TYPE_int,1);
if ( !res->b_collSize )
return NULL;
@@ -662,6 +664,13 @@
if ( !BUNappend(fragments,&res->tijahPre,0) )
return NULL;
BBPunfix(BBPcacheid(fragments));
+ /* */
+ str pretag = "_tid";
+ bat prebat = BBPcacheid(res->b_collPre);
+ if ( !BUNreplace(tjCtx_BAT,pretag,&prebat,0) ) { return NULL; }
+ str sizetag = "_size";
+ bat sizebat = BBPcacheid(res->b_collSize);
+ if ( !BUNreplace(tjCtx_BAT,sizetag,&sizebat,0) ) { return NULL;
}
#ifdef TJ_TRACE
if ( TJ_TRACE ) stream_printf(GDKout,"C[%s]: loadTijahContext()
finished creating new fragments\n",res->name);
#endif
Index: nexi.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/nexi.c,v
retrieving revision 1.48
retrieving revision 1.49
diff -u -d -r1.48 -r1.49
--- nexi.c 22 Feb 2007 11:34:42 -0000 1.48
+++ nexi.c 23 Feb 2007 15:11:05 -0000 1.49
@@ -242,6 +242,10 @@
MILPRINTF(MILOUT, "if ( view_bbp_name().reverse().exist(\"%s\") )
{\n", startNodes_name );
MILPRINTF(MILOUT, " startNodes := bat(\"%s\");\n", startNodes_name);
MILPRINTF(MILOUT, " bat(\"%s\").persists(false);\n", startNodes_name);
+ if ( TDEBUG(98) ) {
+ MILPRINTF(MILOUT," printf(\"# tijah-mil-exec: contents of
startNodes is:\\n\");\n");
+ MILPRINTF(MILOUT," bat(\"%s\").print();\n",startNodes_name);
+ }
MILPRINTF(MILOUT, "}\n" );
}
Index: pftijah.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah.mx,v
retrieving revision 1.97
retrieving revision 1.98
diff -u -d -r1.97 -r1.98
--- pftijah.mx 22 Feb 2007 11:34:42 -0000 1.97
+++ pftijah.mx 23 Feb 2007 15:11:07 -0000 1.98
@@ -53,6 +53,10 @@
.COMMAND tj_dispose_termdb() : void = CMDtj_dispose_termdb;
"INCOMPLETE"
+.COMMAND tj_log(str, int) : void = CMDtj_log;
+ "DEBUGGING function for difficult IO areas"
+
+
.COMMAND serialize_tijah_opt(
BAT[void,bat] ws,
int niters,
@@ -67,12 +71,12 @@
"C interface to pftijah option serialize"
.COMMAND pf2tijah_node(
- BAT[void,str] doc_name,
- BAT[void,int] doc_firstpre,
+ BAT[oid,str] doc_name,
+ BAT[oid,int] doc_firstpre,
BAT[oid,oid] pfpre,
- BAT[void,oid] item,
- BAT[void,int] kind,
- BAT[void,str] doc_loaded)
+ BAT[oid,oid] item,
+ BAT[oid,int] kind,
+ BAT[oid,str] doc_loaded)
: BAT[void,oid] = CMDpf2tijah_node;
"Translate Pathfinder node sequence to tijah node sequence"
@@ -582,8 +586,8 @@
lock_set(coll_lock);
var err := CATCH({
var t_start := usec();
- bat("tj_globalTerms").access(BAT_WRITE);
- bat("tj_globalTags").access(BAT_WRITE);
+ bat("tj_globalTerms").access(BAT_APPEND);
+ bat("tj_globalTags").access(BAT_APPEND);
var collBat := _tj_collection(collName);
_tj_add2collection(collName, collBat, uri_loc, uri_name, store);
_tj_finalize_collection(collName, collBat);
@@ -615,9 +619,10 @@
lock_set(coll_lock);
var err := CATCH({
var t_start := usec();
- bat("tj_globalTerms").access(BAT_WRITE);
- bat("tj_globalTags").access(BAT_WRITE);
- var collBat := _tj_collection(collName);
+ bat("tj_globalTerms").access(BAT_APPEND);
+ bat("tj_globalTags").access(BAT_APPEND);
+ var collBat;
+ collBat := _tj_collection(collName);
[EMAIL PROTECTED]() {
_tj_add2collection(collName, collBat, $h, $t, store);
}
@@ -725,8 +730,9 @@
collBat.find("_size").access(BAT_APPEND).mmap(1);
collBat.find("_tid").access(BAT_APPEND).mmap(1);
collBat.find("_pfpre").access(BAT_APPEND).mmap(1);
- collBat.find("submitBats").append("tj_" + collName + "_size1");
- collBat.find("submitBats").append("tj_" + collName + "_tid1");
+ var fpfx := str(collBat.find("_fragments").count());
+ collBat.find("submitBats").append("tj_" + collName + "_size" + fpfx);
+ collBat.find("submitBats").append("tj_" + collName + "_tid" + fpfx);
collBat.find("submitBats").append("tj_" + collName + "_pfpre");
collBat.find("submitBats").append("tj_" + collName + "_fragments");
collBat.find("submitBats").append("tj_" + collName + "_doc_name");
@@ -740,8 +746,11 @@
_tj_set_parameter(collBat, "_last_finalizedPre", lst_fpre);
#
var gterm_sort := bat("tj_globalTerms").reverse().sort().reverse();
- var gtag_sort := bat("tj_globalTags").reverse().sort().reverse();
+ gterm_sort.access(BAT_APPEND);
gterm_sort.mmap(1);
+ var gtag_sort := bat("tj_globalTags").reverse().sort().reverse();
+ gtag_sort.access(BAT_APPEND);
+ gtag_sort.mmap(1);
collBat.replace("_globalTerms", gterm_sort);
collBat.replace("_globalTags", gtag_sort);
@@ -1362,7 +1371,7 @@
}
- var ctx := region;
+ var ctx := region.sort();
var pre_size := load( "tj_" + collName + "_size1");
var desc := scj_desc( pre_size, ctx, count(pre_size) );
var result := nodes( desc ).project( dbl(scoreBase) );
@@ -1580,7 +1589,7 @@
var pre_size := bat("tj_" + collName + "_size" + ind);
# evaluate doc/term (anc/desc) relationship
- var elem_tid := _containing_desc3(left.mark([EMAIL PROTECTED]), pre_tid,
pre_size);
+ var elem_tid := _containing_desc3(left.sort().mark([EMAIL PROTECTED]),
pre_tid, pre_size);
pre_tid := nil;
if (elem_tid.count() = 0) {return new(oid,dbl);}
@@ -1623,7 +1632,7 @@
# evaluate doc/term (anc/desc) relationship
var t2 := time();
- var elem_tid := _containing_desc3(left.mark([EMAIL PROTECTED]), pre_tid,
pre_size);
+ var elem_tid := _containing_desc3(left.sort().mark([EMAIL PROTECTED]),
pre_tid, pre_size);
var t3 := time();
pre_tid := nil;
if (elem_tid.count() = 0) {return new(oid,dbl);}
@@ -1670,7 +1679,7 @@
# evaluate doc/term (anc/desc) relationship
var t2 := time();
- var elem_tid := _containing_desc(left.mark([EMAIL PROTECTED]), pre_tid,
pre_size);
+ var elem_tid := _containing_desc(left.sort().mark([EMAIL PROTECTED]),
pre_tid, pre_size);
var t3 := time();
pre_tid := nil;
if (elem_tid.count() = 0) {return new(oid,dbl);}
@@ -1861,7 +1870,7 @@
var pre_size := bat("tj_" + collName + "_size1");
# See which document contain the query terms we create a bat of [doc,
term-id]:
- var doc_tid := _containing_desc(ctx.mark([EMAIL PROTECTED]).sort(),
tid_pre.reverse(), pre_size);
+ var doc_tid := _containing_desc(ctx.sort().mark([EMAIL PROTECTED]),
tid_pre.reverse(), pre_size);
# len(doc): [doc, size]
var doc_len := [dbl](bat("tj_" + collName + "_size1").semijoin(doc_tid));
@@ -2568,6 +2577,7 @@
{
mod_frags.append("tj_" + collName + "_tid" + str(frag_offset));
mod_frags.append("tj_" + collName + "_size" + str(frag_offset));
+ frag_offset :+= 1;
}
return mod_frags;
@@ -2581,16 +2591,14 @@
var frag_last := fragments.count();
var pre_tid := bat("tj_" + collName + "_tid" + str(frag_offset));
- var tids := pre_tid.slice(int(offset), pre_tid.count() - 1);
+ var tids := pre_tid.slice(int(offset) - int(pre_tid.seqbase()),
pre_tid.count() - 1);
tids.access(BAT_WRITE);
frag_offset :+= 1;
while (frag_offset <= frag_last)
{
tids.append(bat("tj_" + collName + "_tid" + str(frag_offset)));
frag_offset :+= 1;
- }
- var tmp := tids.kdiff(collBat.find("_pfpre"));
- tmp := tmp.reverse().sort();
+ }
# incremental index merge
if (view_bbp_name().reverse().exist("tj_" + collName + "_TermIndex"))
@@ -2598,7 +2606,7 @@
var replaceBats := collBat.find("replaceBats");
var tmp := tids.kdiff(collBat.find("_pfpre"));
tmp := tmp.reverse().ssort();
- var i := mergeindex(tmp, collBat.find("_TermIndex"),
+ var i := mergeindex(tmp, collBat.find("_TermIndex"),
collBat.find("_Terms"),
collBat.find("_globalTerms").count()
+ 1);
collBat.replace("_TermIndex", i.fetch(0));
@@ -2757,6 +2765,21 @@
return monet_exec(str);
}
+#define LOGGERFILE "/tmp/TIJAH_LOGGER"
+
+/*
+ * the CMDtj_log is created to be able to print in very difficult IO areas
+ */
+int CMDtj_log(str format, int* i) {
+ FILE* f;
+
+ if ( (f = fopen(LOGGERFILE,"a")) ) {
+ fprintf(f,(const char*)format,*i);
+ fclose(f);
+ }
+ return GDK_SUCCEED;
+}
+
int CMDtijah_command(bit* res, str command) {
if ( 0 ) stream_printf(GDKout,"# tijah_command(%s)\n",(char*)command);
*res = executeMIL(command); /* OK */
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins