Update of /cvsroot/monetdb/pathfinder/modules/pftijah
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv21943/modules/pftijah
Modified Files:
nexi.c pftijah.mx serialize_pftijah.mx
Log Message:
This check-in contains the PTIJAH implementation using the Algebra version of
Pathfinder. All PFTIJAH functions are implemented using the new fun_call /
fun_param operations and (after some coaching by JanR:) this worked
pretty well.
Notes:
- all functions except tijah:ft-index-info() are implemented.
- The maintainance of the Full-Text indices is now also implemented using a
'tape'. So indices are only modified AFTER the query.
- Implementatation may be a little bit buggy because only operations on
small synthetic test examples have been test an no real complex scenarios
have been tried.
The coming weeks I will try to solve these small problems but I thought it
is better to synchronize early.
Jan Flokstra.
U serialize_pftijah.mx
Index: serialize_pftijah.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/serialize_pftijah.mx,v
retrieving revision 1.58
retrieving revision 1.59
diff -u -d -r1.58 -r1.59
--- serialize_pftijah.mx 22 Jan 2008 22:14:14 -0000 1.58
+++ serialize_pftijah.mx 10 Apr 2008 13:38:47 -0000 1.59
@@ -25,6 +25,11 @@
@t serialize_pftijah
@h
+/*
+ * TODO: whitelist/blacklist tags implementeren
+ *
+ */
+
struct tijahContextStruct;
extern int handleTijahTerm(struct tijahContextStruct *ctx, char* term);
U nexi.c
Index: nexi.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/nexi.c,v
retrieving revision 1.75
retrieving revision 1.76
diff -u -d -r1.75 -r1.76
--- nexi.c 14 Jan 2008 14:06:05 -0000 1.75
+++ nexi.c 10 Apr 2008 13:38:46 -0000 1.76
@@ -91,8 +91,16 @@
extern int old_main(BAT* optbat, char* startNodes_name);
-char* tijahParse(BAT* optbat, char* startNodes_name, char* query, char**
errBUFF) {
+char* tijahParse(BAT* optbat, char* startNodes_name, char** errBUFF) {
/* setup TijahParserContext structure */
+ BUN bun;
+ if ( (bun = BUNfnd(optbat,"_query")) == BUN_NONE ) {
+ stream_printf(GDKerr,"Error: cannot find \"_query\" tag.\n");
+ return FALSE;
+ }
+ BATiter bi = bat_iterator(optbat);
+ str query = (str)BUNtail(bi,bun);
+
LOGPRINTF(LOGFILE,"- tijahParse([%s])\n",query);
parserCtx->collection = "DFLT_FT_INDEX";
parserCtx->queryText = query;
@@ -274,6 +282,8 @@
SET_TDEBUG(v);
if (TDEBUG(1)) stream_printf(GDKout,"# old_main: setting debug
value to %d.\n",v);
}
+ } else if (strcmp(optName, "_query") == 0) {
+ /* OK, this is the regular query transfer option */
} else if ( strcmp(optName,"timing") == 0 ) {
if ( strcasecmp(optVal,"TRUE") == 0 ) {
MILPRINTF(MILOUT, "timing := TRUE;\n" );
U pftijah.mx
Index: pftijah.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah.mx,v
retrieving revision 1.163
retrieving revision 1.164
diff -u -d -r1.163 -r1.164
--- pftijah.mx 4 Mar 2008 21:47:27 -0000 1.163
+++ pftijah.mx 10 Apr 2008 13:38:46 -0000 1.164
@@ -27,7 +27,7 @@
@m
.MODULE pftijah;
-.COMMAND _run_tijah_query(BAT[str,str] opt, bit use_startnodes, BAT[void,oid]
nodes, str q) : BAT[oid,dbl] = CMDtijah_query;
+.COMMAND _run_tijah_query(BAT[str,str] opt, bit use_startnodes, BAT[void,oid]
nodes) : BAT[oid,dbl] = CMDtijah_query;
"INCOMPLETE"
.COMMAND tj_normalizeTerm(str, str) : str = CMDtj_normalizeTerm;
@@ -875,7 +875,6 @@
if (not(isnil(err))) ERROR(err);
}
-
# internal method which return all batnames of a collection in a
# a [void,str] bat
PROC _tj_collection_str(str ftiName) : BAT[void,bat]
@@ -1271,7 +1270,146 @@
# #
#####################################################################
-PROC run_tijah_query(str ftiName, BAT[str,str] opt, bit use_startnodes,
BAT[void,oid] nodes, str q) : BAT[oid,dbl] :=
+# tijah function to 'package' a pathfinder [iter|item|kind|pos] operand
+PROC tj_pfop( BAT[void,oid] iter,
+ BAT[void,any] item,
+ BAT[void,int] kind,
+ BAT[void,oid] pos) : BAT[void,bat] :=
+{
+ var res := new(void,bat).seqbase([EMAIL PROTECTED]);
+
+ res.append(iter);
+ res.append(item);
+ res.append(kind);
+ res.append(pos);
+
+ return res;
+}
+
+# universal tijah query function handler. Sould work in the milprint_summer
+# and in the algebra context.
+PROC tj_query_handler(
+ bit par_storeScore,
+ BAT[void,bat] pfop_sn,
+ BAT[void,bat] pfop_query,
+ BAT[void,bat] pfop_opt,
+ BAT[oid,any] par_loop,
+ BAT[oid,bat] par_ws,
+ BAT[void,lng] par_int_values,
+ BAT[void,dbl] par_dbl_values,
+ BAT[void,str] par_str_values,
+ BAT[void,bat] par_scoreDB
+ ) : BAT[void,bat] :=
+{
+ var result_id;
+ var result_iter;
+ var result_item;
+ var result_pos;
+ var result_frag;
+
+ if ( par_storeScore ) {
+ result_id := new(void,lng).seqbase([EMAIL PROTECTED]);
+ } else {
+ result_iter := new(void,oid).seqbase([EMAIL PROTECTED]);
+ result_item := new(void,oid).seqbase([EMAIL PROTECTED]);
+ result_pos := new(void,oid).seqbase([EMAIL PROTECTED]);
+ result_frag := new(void,oid).seqbase([EMAIL PROTECTED]);
+ }
+
+ var has_sn := (pfop_sn.count() > 0);
+ var has_options := (pfop_opt.count() > 0);
+
+ [EMAIL PROTECTED]() { # begin batloop over queries
+ var optbat;
+ if ( has_options ) {
+ iter := pfop_opt.fetch([EMAIL PROTECTED]).select($t);
+ item := pfop_opt.fetch([EMAIL PROTECTED]).semijoin(iter);
+ kind := pfop_opt.fetch([EMAIL PROTECTED]).semijoin(iter);
+ iter := iter.tmark([EMAIL PROTECTED]);
+ item := item.tmark([EMAIL PROTECTED]);
+ kind := kind.tmark([EMAIL PROTECTED]);
+ optbat :=
serialize_tijah_opt(par_ws,1,iter,iter,item,kind,par_int_values,par_dbl_values,par_str_values);
+ } else {
+ optbat := new(str,str,32);
+ }
+ var ftindex := tj_get_ft_index(optbat,true);
+ tijah_lock := tj_get_collection_lock(ftindex);
+ lock_set(tijah_lock);
+ var startNodes;
+ if ( has_sn ) {
+ iter := pfop_sn.fetch([EMAIL PROTECTED]);
+ var iteration := pfop_query.fetch([EMAIL PROTECTED]).fetch(int($h));
+ iter := iter.select(iteration);
+ item := pfop_sn.fetch([EMAIL PROTECTED]).semijoin(iter);
+ kind := pfop_sn.fetch([EMAIL PROTECTED]).semijoin(iter);
+ item := item.tmark([EMAIL PROTECTED]);
+ kind := kind.tmark([EMAIL PROTECTED]);
+ var xdoc_name := bat("tj_" + ftindex + "_doc_name");
+ var xdoc_firstpre := bat("tj_" + ftindex + "_doc_firstpre");
+ var xpfpre := bat("tj_" + ftindex + "_pfpre");
+ var doc_loaded :=
reverse(par_ws.fetch(OPEN_CONT)).leftfetchjoin(par_ws.fetch(OPEN_NAME));
+ startNodes :=
pf2tijah_node(xdoc_name,xdoc_firstpre,xpfpre,item,kind,doc_loaded);
+ } else {
+ startNodes := new(void,oid);
+ }
+ optbat.access(BAT_WRITE);
+ optbat.insert("_query",pfop_query.fetch([EMAIL
PROTECTED]).fetch(int($h)));
+ var nexi_allscores := run_tijah_query(ftindex,optbat,has_sn,startNodes);
+ var nexi_score;
+ if ( optbat.exist("returnNumber") ) {
+ var retNum := int(optbat.find("returnNumber"));
+ nexi_score := nexi_allscores.slice(0, retNum - 1);
+ } else {
+ nexi_score := nexi_allscores;
+ }
+ var docpre := bat("tj_" + ftindex + "_doc_firstpre").[oid]();
+ var pfpre := bat("tj_" + ftindex + "_pfpre");
+ item := nexi_score.hmark([EMAIL PROTECTED]);
+ var frag := [find_lower](const docpre.reverse().mark([EMAIL PROTECTED]),
item);
+ item := item.join(pfpre).sort().tmark();
+ var needed_docs := bat("tj_" + ftindex +
"_doc_name").semijoin(frag.tunique());
+ lock_unset(tijah_lock);
+ tijah_lock := lock_nil;
+ var loaded_docs := par_ws.fetch(OPEN_NAME).reverse();
+ var docs_to_load :=
kdiff(needed_docs.reverse(),loaded_docs).hmark([EMAIL PROTECTED]);
+ ws_opendoc(par_ws, docs_to_load);
+ var doc_loaded :=
reverse(par_ws.fetch(OPEN_CONT)).leftfetchjoin(par_ws.fetch(OPEN_NAME));
+ var fid_pffid := needed_docs.join(doc_loaded.reverse());
+ frag := frag.join(fid_pffid).sort().tmark();
+ if ( par_storeScore ) {
+ var tID := oid(par_scoreDB.fetch([EMAIL PROTECTED]).count() + 10000);
+ par_scoreDB.fetch([EMAIL
PROTECTED]).insert(lng(tID),lng(nexi_allscores.count()));
+ par_scoreDB.fetch([EMAIL PROTECTED]).append(item.project(tID));
+ par_scoreDB.fetch([EMAIL PROTECTED]).append(frag);
+ par_scoreDB.fetch([EMAIL PROTECTED]).append(item);
+ par_scoreDB.fetch([EMAIL PROTECTED]).append(nexi_score.tmark());
+ result_id.append(lng(tID));
+ } else {
+ result_iter.append(item.project($t));
+ result_pos.append(item.mark([EMAIL PROTECTED]));
+ result_frag.append(frag);
+ result_item.append(item);
+ }
+ } # end batloop over queries
+ if ( par_storeScore ) {
+ item := int_values.addValues(result_id).tmark([EMAIL PROTECTED]);
+ iter := par_loop.tmark(oid(0));
+ ipik := iter;
+ pos := oid(1);
+ kind := INT;
+ } else {
+ iter := result_iter;
+ pos := result_pos;
+ kind := set_kind(result_frag, ELEM);
+ item := result_item;
+ ipik := iter;
+ }
+ var res :=
tj_pfop(iter.materialize(ipik),item.materialize(ipik),kind.materialize(ipik),pos.materialize(ipik));
+ #
+ return res;
+}
+
+PROC run_tijah_query(str ftiName, BAT[str,str] opt, bit use_startnodes,
BAT[void,oid] nodes) : BAT[oid,dbl] :=
{
if ( verbose ) printf("#TJ:run_tijah_query(\"%s\",..)
called.\n",ftiName);
var parambat := bat("tj_" + ftiName + "_param");
@@ -1287,16 +1425,416 @@
_tj_commit(collBat);
}
}
- return _run_tijah_query(opt,use_startnodes,nodes,q);
+ return _run_tijah_query(opt,use_startnodes,nodes);
}
-
#####################################################################
-# #
-# End of the new implementation of the interfaces #
-# #
+# #
+# #
+# Experimental algebra section #
+# #
+# #
#####################################################################
+# tijah function to 'package' an algebra [iter|item|kind|pos] operand
+PROC ALG_tj_pfop(
+ BAT[oid,oid] iter,
+ BAT[oid,any] item,
+ int noKind,
+ BAT[oid,oid] pos) : BAT[void,bat] :=
+{
+ var res := new(void,bat).seqbase([EMAIL PROTECTED]);
+
+ res.append(iter);
+ res.append(item);
+ res.append(item.project(0));
+ res.append(pos);
+
+ return res;
+}
+
+PROC ALG_tj_pfop(
+ BAT[oid,oid] iter,
+ BAT[oid,any] item,
+ BAT[oid,oid] frag,
+ BAT[oid,oid] pos) : BAT[void,bat] :=
+{
+ var res := new(void,bat).seqbase([EMAIL PROTECTED]);
+
+ res.append(iter);
+ res.append(item);
+ res.append(frag);
+ res.append(pos);
+
+ return res;
+}
+
+PROC ALG_tj_query_nodes(
+ BAT[void,any] par_loop,
+ BAT[oid,bat] pfop_id,
+ BAT[oid,bat] tijah_scoreDB
+ ) : BAT[void,bat] :=
+{
+ if ( verbose ) printf("# ALG_tj_query_nodes: START.\n");
+ var item := new(void,oid).seqbase([EMAIL PROTECTED]);
+ var iter := new(void,oid).seqbase([EMAIL PROTECTED]);
+ var pos := new(void,oid).seqbase([EMAIL PROTECTED]);
+ var frag := new(void,oid).seqbase([EMAIL PROTECTED]);
+ [EMAIL PROTECTED]() { # begin of query batloop
+ var qid := oid(pfop_id.fetch(1).fetch(int($h)));
+ var tmp := tijah_scoreDB.fetch([EMAIL PROTECTED]).ord_uselect(qid);
+ item.append(tmp.mirror().leftfetchjoin(tijah_scoreDB.fetch([EMAIL
PROTECTED])));
+ iter.append(tmp.project(par_loop.fetch(int($h))));
+ frag.append(tmp.mirror().leftfetchjoin(tijah_scoreDB.fetch([EMAIL
PROTECTED])));
+ pos.append(tmp.mark([EMAIL PROTECTED]));
+ } # end of query batloop
+ var res := ALG_tj_pfop(iter,item,frag,pos);
+ if ( verbose ) printf("# ALG_tj_query_nodes: FINISH.\n");
+ return res;
+}
+
+PROC ALG_tj_query_score(
+ BAT[void,any] par_loop,
+ BAT[oid,bat] pfop_id,
+ BAT[oid,bat] pfop_nodes,
+ BAT[oid,bat] tijah_scoreDB
+ ) : BAT[void,bat] :=
+{
+ var score := new(oid,dbl);
+ var tmp := [<<]([lng](tijah_scoreDB.fetch([EMAIL PROTECTED])), const
32);
+ var tijah_fragpre := [+](tmp, [lng](tijah_scoreDB.fetch([EMAIL
PROTECTED])));
+ tmp := nil;
+ var item1_unique := pfop_id.fetch(1).tunique();
+ var item := pfop_nodes.fetch(1);
+ var kind := pfop_nodes.fetch(2);
+ [EMAIL PROTECTED]() {
+ var item_part := item.semijoin(pfop_id.fetch(1).uselect($h));
+ var frag_part := kind.semijoin(item_part);
+ frag_part := [<<]([lng](frag_part), const 32);
+ var fragpre_part := [+](frag_part, [lng](item_part));
+
+ item_part := nil;
+ frag_part := nil;
+ tmp := tijah_scoreDB.fetch([EMAIL PROTECTED]).uselect(oid($h));
+ tmp := tmp.mirror().leftfetchjoin(tijah_fragpre);
+ tmp := tmp.join(fragpre_part.reverse());
+ score.insert(tmp.reverse().leftfetchjoin(tijah_scoreDB.fetch([EMAIL
PROTECTED])));
+ }
+ var xitem := kdiff(item,score).project(dbl(0));
+ score.insert(xitem);
+ xitem := nil;
+ score := score.sort().tmark([EMAIL PROTECTED]);
+
+ var iter := par_loop.tmark([EMAIL PROTECTED]);
+ var ipik := iter;
+ var pos := [EMAIL PROTECTED];
+ var res := ALG_tj_pfop(iter,score,0,pos.materialize(ipik));
+ if ( verbose ) printf("# ALG_tj_query_nodes: FINISH.\n");
+ return res;
+}
+
+# temporary algebra query handler
+PROC ALG_tj_query_handler(
+ bit par_storeScore,
+ BAT[oid,bat] pfop_sn,
+ BAT[oid,bat] pfop_query,
+ BAT[oid,bat] pfop_opt,
+ BAT[void,any] par_loop,
+ BAT[oid,bat] par_ws,
+ BAT[oid,bat] par_scoreDB
+ ) : BAT[void,bat] :=
+{
+ var result_id;
+ var result_iter;
+ var result_item;
+ var result_pos;
+ var result_frag;
+
+ if ( verbose ) printf("# ALG_tj_query_handler: START.\n");
+ if ( par_storeScore ) {
+ if ( verbose ) printf("# ALG_tj_query_handler: storeScore=TRUE.\n");
+ result_id := new(void,lng).seqbase([EMAIL PROTECTED]);
+ } else {
+ if ( verbose ) printf("# ALG_tj_query_handler: storeScore=FALSE.\n");
+ result_iter := new(void,oid).seqbase([EMAIL PROTECTED]);
+ result_item := new(void,oid).seqbase([EMAIL PROTECTED]);
+ result_pos := new(void,oid).seqbase([EMAIL PROTECTED]);
+ result_frag := new(void,oid).seqbase([EMAIL PROTECTED]);
+ }
+
+ var has_sn := (pfop_sn.count() > 0);
+ var has_options := (pfop_opt.count() > 0);
+
+ [EMAIL PROTECTED]() {
+ var optbat;
+ if ( verbose ) printf("# ALG_tj_query_handler: loop start, id=%d.\n",$t);
+ if ( has_options ) {
+ if ( verbose ) printf("# ALG_tj_query_handler: running option
handler.\n");
+ iter := pfop_opt.fetch([EMAIL PROTECTED]).select($t);
+ item := pfop_opt.fetch([EMAIL PROTECTED]).semijoin(iter);
+ kind := pfop_opt.fetch([EMAIL PROTECTED]).semijoin(iter);
+ iter := iter.tmark([EMAIL PROTECTED]);
+ item := item.tmark([EMAIL PROTECTED]);
+ kind := kind.tmark([EMAIL PROTECTED]);
+ optbat :=
serialize_tijah_opt(par_ws,1,iter,iter,item,set_kind(kind,ELEM),new(void,lng),new(void,dbl),new(void,str));
+ if ( verbose ) optbat.print();
+ } else {
+ optbat := new(str,str,32);
+ }
+ if ( verbose ) printf("# ALG_tj_query_handler: handle startNodes.\n");
+ var ftindex := tj_get_ft_index(optbat,true);
+ var tijah_lock := tj_get_collection_lock(ftindex);
+ lock_set(tijah_lock);
+ var startNodes;
+ if ( has_sn ) {
+ iter := pfop_sn.fetch([EMAIL PROTECTED]);
+ var iteration := pfop_query.fetch([EMAIL PROTECTED]).fetch(int($h));
+ iter := iter.select(iteration);
+ item := pfop_sn.fetch([EMAIL PROTECTED]).semijoin(iter);
+ kind := pfop_sn.fetch([EMAIL PROTECTED]).semijoin(iter);
+ item := item.tmark([EMAIL PROTECTED]);
+ kind := kind.tmark([EMAIL PROTECTED]);
+
+ var xdoc_name := bat("tj_" + ftindex + "_doc_name");
+ var xdoc_firstpre := bat("tj_" + ftindex + "_doc_firstpre");
+ var xpfpre := bat("tj_" + ftindex + "_pfpre");
+ var doc_loaded :=
reverse(par_ws.fetch(OPEN_CONT)).leftfetchjoin(par_ws.fetch(OPEN_NAME));
+ if ( verbose ) printf("# ALG_tj_query_handler: compute startnodes\n");
+ startNodes :=
pf2tijah_node(xdoc_name,xdoc_firstpre,xpfpre,item,[int](kind),doc_loaded);
+ } else {
+ startNodes := new(void,oid);
+ }
+ optbat.access(BAT_WRITE);
+ optbat.insert("_query",pfop_query.fetch([EMAIL
PROTECTED]).fetch(int($h)));
+ if ( verbose ) printf("# ALG_tj_query_handler: run tijah query.\n");
+ var nexi_allscores := run_tijah_query(ftindex,optbat,has_sn,startNodes);
+ var nexi_score;
+ if ( verbose ) printf("# ALG_tj_query_handler: handling scores.\n");
+ if ( optbat.exist("returnNumber") ) {
+ var retNum := int(optbat.find("returnNumber"));
+ nexi_score := nexi_allscores.slice(0, retNum - 1);
+ } else {
+ nexi_score := nexi_allscores;
+ }
+ var docpre := bat("tj_" + ftindex + "_doc_firstpre").[oid]();
+ var pfpre := bat("tj_" + ftindex + "_pfpre");
+ var item := nexi_score.hmark([EMAIL PROTECTED]);
+ var frag := [find_lower](const docpre.reverse().mark([EMAIL PROTECTED]),
item);
+ item := item.join(pfpre).sort().tmark();
+ var needed_docs := bat("tj_" + ftindex +
"_doc_name").semijoin(frag.tunique());
+ lock_unset(tijah_lock);
+ if ( verbose ) printf("# ALG_tj_query_handler: released lock.\n");
+ tijah_lock := lock_nil;
+ var loaded_docs := par_ws.fetch(OPEN_NAME).reverse();
+ var docs_to_load :=
kdiff(needed_docs.reverse(),loaded_docs).hmark([EMAIL PROTECTED]);
+ ws_opendoc(par_ws, docs_to_load);
+ var doc_loaded :=
reverse(par_ws.fetch(OPEN_CONT)).leftfetchjoin(par_ws.fetch(OPEN_NAME));
+ var fid_pffid := needed_docs.join(doc_loaded.reverse());
+ frag := frag.join(fid_pffid).sort().tmark();
+ if ( verbose ) printf("# ALG_tj_query_handler: handled new
frags/documents.\n");
+ if ( par_storeScore ) {
+ var tID := oid(par_scoreDB.fetch([EMAIL PROTECTED]).count() + 8888);
+ par_scoreDB.fetch([EMAIL
PROTECTED]).insert(lng(tID),lng(nexi_allscores.count()));
+ par_scoreDB.fetch([EMAIL PROTECTED]).append(item.project(tID));
+ par_scoreDB.fetch([EMAIL PROTECTED]).append(frag);
+ par_scoreDB.fetch([EMAIL PROTECTED]).append(item);
+ par_scoreDB.fetch([EMAIL PROTECTED]).append(nexi_score.tmark());
+ result_id.append(lng(tID));
+ if ( verbose ) printf("# ALG_tj_query_handler: stored loop score.\n");
+ } else {
+ result_iter.append(item.project($t));
+ result_pos.append(item.mark([EMAIL PROTECTED]));
+ result_frag.append(frag);
+ result_item.append(item);
+ if ( verbose ) printf("# ALG_tj_query_handler: stored loop nodes in
result.\n");
+ }
+ if ( verbose ) printf("# ALG_tj_query_handler: loop finish,
id=%d.\n",$t);
+ } # end batloop over queries
+ if ( verbose ) printf("# ALG_tj_query_handler: batloop finished.\n");
+ var iter;
+ var item;
+ var ipik;
+ var kind;
+ var pos;
+ if ( par_storeScore ) {
+ if ( verbose ) printf("# ALG_tj_query_handler: create int return.\n");
+ item := result_id;
+ iter := par_loop.tmark(oid(0));
+ ipik := iter;
+ pos := oid(1);
+ kind := new(oid,oid);
+ } else {
+ if ( verbose ) printf("# ALG_tj_query_handler: create node return.\n");
+ iter := result_iter;
+ pos := result_pos;
+ kind := result_frag;
+ item := result_item;
+ ipik := iter;
+ }
+ if ( verbose ) {
+ printf("# ALG_tj_query_handler: iter/item/kind/pos result start\n");
+ iter.print();
+ item.print();
+ kind.print();
+ pos.print();
+ printf("# ALG_tj_query_handler: iter/item/kind/pos result finish\n");
+ }
+ var res := ALG_tj_pfop(iter,item,kind,pos.materialize(ipik));
+ #
+ if ( verbose ) printf("# ALG_tj_query_handler: FINISH.\n");
+ return res;
+}
+
+PROC ALG_tj_add_fti_tape(
+ str op,
+ BAT[oid,bat] pfop_coll,
+ BAT[oid,bat] pfop_opt,
+ BAT[void,any] par_loop,
+ BAT[oid,bat] par_ws,
+ BAT[str,bat] tape
+ ) : BAT[str,bat] :=
+{
+ if ( verbose ) printf("# ALG_tj_add_fti_tape: START.\n");
+ var has_coll := (pfop_coll.count() > 0);
+ var has_opt := (pfop_opt.count() > 0);
+ [EMAIL PROTECTED]() {
+ if ( verbose ) printf("# ALG_tj_query_handler: loop start,
id=%d.\n",$t);
+
+ var collbat;
+ if ( has_coll ) {
+ if ( verbose ) printf("# ALG_tj_add_fti_tape: start
collection handler.\n");
+ var iter := pfop_coll.fetch([EMAIL
PROTECTED]).select($t);
+ collbat := pfop_coll.fetch([EMAIL
PROTECTED]).semijoin(iter);
+
+ if ( collbat.select("*").count() > 0 ) {
+ ERROR("not possible to use wildcards for
pfcollections.");
+ }
+ } else {
+ if ( verbose ) printf("# ALG_tj_add_fti_tape: no
collection.\n");
+ collbat := new(void,str).seqbase([EMAIL PROTECTED]);
+ collbat.append("*");
+ }
+
+ var optbat;
+ if ( has_opt ) {
+ if ( verbose ) printf("# ALG_tj_add_fti_tape: running
option handler.\n");
+ var iter := pfop_opt.fetch([EMAIL
PROTECTED]).select($t);
+ var item := pfop_opt.fetch([EMAIL
PROTECTED]).semijoin(iter);
+ var kind := pfop_opt.fetch([EMAIL
PROTECTED]).semijoin(iter);
+ iter := iter.tmark([EMAIL PROTECTED]);
+ item := item.tmark([EMAIL PROTECTED]);
+ kind := kind.tmark([EMAIL PROTECTED]);
+ optbat :=
serialize_tijah_opt(par_ws,1,iter,iter,item,set_kind(kind,ELEM),new(void,lng),new(void,dbl),new(void,str));
+ if ( verbose ) optbat.print();
+ } else {
+ if ( verbose ) printf("# ALG_tj_add_fti_tape: no
options.\n");
+ optbat := new(str,str,32);
+ }
+
+ if ( verbose ) printf("# ALG_tj_query_handler: writing tape.\n");
+ var bb := new(void,bat).seqbase([EMAIL PROTECTED]);
+ bb.append(collbat);
+ bb.append(optbat);
+ tape.insert(op,bb);
+ if ( verbose ) tape.print();
+ if ( verbose ) printf("# ALG_tj_query_handler: end loop start,
id=%d.\n",$t);
+ }
+ if ( verbose ) printf("# ALG_tj_add_fti_tape: FINISH.\n");
+ return tape;
+}
+
+#PROC DocmgmTape(BAT[void,BAT] ws,
+# BAT[void,str] location,
+# BAT[void,str] docname,
+# BAT[void,str] colname,
+# BAT[void,lng] percentage) : void
+#{
+# var del_doc := percentage.ord_uselect(-1LL).hmark([EMAIL PROTECTED]);
+# var add_doc := percentage.ord_uselect(0LL,lng_nil).hmark([EMAIL
PROTECTED]);
+#
+# shred_doc_base(del_doc(bit_nil, del_doc.leftfetchjoin(docname), true),
+# add_doc.leftfetchjoin(location),
+# add_doc.leftfetchjoin(docname),
+# add_doc.leftfetchjoin(colname),
+# add_doc.leftfetchjoin(percentage),
+# stream_nil, ws_id(ws));
+#}
+
+PROC ALG_tj_docmgmt_tape(BAT[str,bat] tape,
+ BAT[void,BAT] ws,
+ BAT[void,str] location,
+ BAT[void,str] docnames,
+ BAT[void,str] colnames,
+ BAT[void,lng] percentages) : bit :=
+{
+ if ( verbose ) printf("# ALG_tj_docmgmt_tape: START.\n");
+ #
+ # INCOMPLETE, CHECK IF THIS REALLY STILL WORKS
+ #
+ if (isnil(CATCH(bat("tj_collName").count()))) {
+ # pftijah is active
+ if ( verbose ) printf("# ALG_tj_docmgmt_tape: running document
management.\n");
+ var del_doc := percentages.ord_uselect(-1LL).hmark([EMAIL
PROTECTED]);
+ var add_doc :=
percentages.ord_uselect(0LL,lng_nil).hmark([EMAIL PROTECTED]);
+ if ( verbose ) {
+ printf("# ALG_tj_docmgmt_tape: deleted docs are:\n");
+ del_doc.print();
+ printf("# ALG_tj_docmgmt_tape: added docs are:\n");
+ add_doc.print();
+ }
+ #
+ var pfc_name := docnames.reverse().leftfetchjoin(colnames);
+ var pfdep := bat("tj_pfc_fti_dep");
+ var pfdep_star := bat("tj_pfc_fti_dep_star");
+ var fti_dname := pfdep.join(pfc_name.reverse());
+ if ( pfdep_star.count() > 0 ) {
+ fti_dname.insert(pfdep_star.cross(pfc_name.reverse()));
+ }
+
+ var fti_cluster := new(str,bat);
+ [EMAIL PROTECTED]() {
+ var cb;
+ if ( fti_cluster.exist($h) ) {
+ cb := fti_cluster.find($h);
+ } else {
+ cb := new(str,str);
+ fti_cluster.insert($h,cb);
+ }
+ cb.insert(str(nil),$t);
+ }
+ [EMAIL PROTECTED]() {
+ if ( verbose ) { printf("#TJ:tj_play_doc_tape() doing ft-index
\"%s\".\n",$h); $t.print(); }
+ tj_add2collection($h,$t,true);
+ }
+ } else {
+ # pftijah is not active
+ if ( verbose ) printf("# ALG_tj_docmgmt_tape: skipping doc
managemnt.\n");
+ }
+
+ if ( verbose ) printf("# ALG_tj_docmgmt_tape: running collection
management.\n");
+ [EMAIL PROTECTED]() {
+ var op := $h;
+ var collbat := $t.fetch([EMAIL PROTECTED]);
+ var optbat := $t.fetch([EMAIL PROTECTED]);
+ var fti_name := tj_get_ft_index(optbat,(op!="create"));
+
+ if ( op = "create" ) {
+ if ( verbose ) printf("# ALG_tj_docmgmt_tape:
tj_init_collection(%s).\n",fti_name);
+ tj_init_collection(fti_name,optbat,collbat);
+ } else if ( op = "extend" ) {
+ if ( verbose ) printf("# ALG_tj_docmgmt_tape:
tj_extend_collection(%s).\n",fti_name);
+ tj_extend_collection(fti_name,collbat);
+ } else if ( op = "remove" ) {
+ if ( verbose ) printf("# ALG_tj_docmgmt_tape:
tj_delete_collection(%s).\n",fti_name);
+ tj_delete_collection(fti_name);
+ } else {
+ ERROR("ALG_tj_docmgmt_tape: unknown op");
+ }
+ }
+ if ( verbose ) printf("# ALG_tj_docmgmt_tape: FINISH.\n");
+ return true;
+}
+
# INCOMPLETE: henning, what should I do about this (no locking impl).
PROC tj_setBackgroundCollName(str name, BAT[oid,str] qenv) : BAT[void,str] :=
{
@@ -1393,18 +1931,9 @@
cb.insert(str(nil),$t);
}
[EMAIL PROTECTED]() {
- # $h.print();
- # $t.print();
if ( verbose ) { printf("#TJ:tj_play_doc_tape() doing ft-index
\"%s\".\n",$h); $t.print(); }
tj_add2collection($h,$t,true);
}
- # shred_doc_base(del_doc(bit_nil, del_doc.leftfetchjoin(names), true),
- # add_doc.leftfetchjoin(locations),
- # add_doc.leftfetchjoin(names),
- # add_doc.leftfetchjoin(colnames),
- # add_doc.leftfetchjoin(percentages),
- # stream_nil, ws_id(ws));
- # add_doc.leftfetchjoin(names).print();
if ( verbose ) printf("#TJ:tj_play_doc_tape() finished.\n");
}
}
@@ -2775,16 +3304,16 @@
return GDK_SUCCEED;
}
-extern char* tijahParse(BAT* optbat, char* startNodes_name, char* query,
char** errBUFF);
+extern char* tijahParse(BAT* optbat, char* startNodes_name, char** errBUFF);
static int nexiTmpCounter = 0;
-int CMDtijah_query(BAT** res, BAT* optbat, bit* use_startnodes, BAT*
startNodes, str query) {
+int CMDtijah_query(BAT** res, BAT* optbat, bit* use_startnodes, BAT*
startNodes) {
char* err;
char* mil;
char nameBUFF[32], *startNodes_name;
- if (TDEBUG(1)) stream_printf(GDKout,"# CMDtijah_query: start,
query=\"%s\".\n",query);
+ if (TDEBUG(1)) stream_printf(GDKout,"# CMDtijah_query: start.\n");
if ( *use_startnodes ) {
BATmode(startNodes,PERSISTENT);
sprintf(&nameBUFF[0],"%s%d","nexi_start",nexiTmpCounter++);
@@ -2803,7 +3332,7 @@
return GDK_FAIL;
}
if (TDEBUG(2)) stream_printf(GDKout,"# CMDtijah_query: call
tijahParse.\n");
- if ( !(mil=tijahParse(optbat,startNodes_name,query,&err)) ) {
+ if ( !(mil=tijahParse(optbat,startNodes_name,&err)) ) {
GDKerror("CMDtijah_query: %s.\n", err);
return GDK_FAIL;
}
@@ -3060,12 +3589,12 @@
*res = BATnew(TYPE_void, TYPE_oid, BATcount(item));
if ( debug ) stream_printf(GDKout,"* Start of CMDpf2tijah_node():\n");
- if ( 0 && debug ) {
+ if ( 1 && debug ) {
BATprintf(GDKout,item);
BATprintf(GDKout,kind);
BATprintf(GDKout,doc_loaded);
}
- if ( 0 && debug ) {
+ if ( 1 && debug ) {
BATprintf(GDKout,doc_name);
BATprintf(GDKout,doc_firstpre);
BATprintf(GDKout,doc_pfpre);
@@ -3084,11 +3613,16 @@
return GDK_FAIL;
}
int kval = *(int*)Tloc(kind, kindBUN);
+ // ALGEBRA NODES ONLY CONTAIN THE FRAG
+ oid container;
if ( XTRACT_KIND(kval) != ELEM ) {
- stream_printf(GDKout,"CMDpf2tijah_node: startNodes: no node\n");
- return GDK_FAIL;
- }
- oid container = (oid)XTRACT_CONT(kval);
+ // stream_printf(GDKout,"CMDpf2tijah_node: startNodes: no
node\n");
+ // return GDK_FAIL;
+ container = (oid)kval;
+ if ( debug ) stream_printf(GDKout,"* container =
%d.\n",container);
+ } else {
+ container = (oid)XTRACT_CONT(kval);
+ }
int myindex = container - 1;
/* make it a switch */
@@ -3901,6 +4435,6 @@
pftijah_epilogue(void)
{
}
-@
+@
/* vim:set shiftwidth=4 expandtab: */
-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference
Don't miss this year's exciting event. There's still time to save $100.
Use priority code J8TL2D2.
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins