Update of /cvsroot/monetdb/pathfinder/modules/pftijah
In directory
sfp-cvsdas-1.v30.ch3.sourceforge.com:/tmp/cvs-serv28905/modules/pftijah
Modified Files:
Tag: Feb2010
pftijah.mx
Log Message:
propagated changes of Wednesday Feb 10 2010
from the Nov2009 branch to the Feb2010 branch
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2010/02/10 - cornuz: modules/pftijah/pftijah.mx,1.238.2.9
- re-enable shred bit in tj_addcollection_frag (previously disabled by
mistake), to shred documents before indexing when needed
- initialize variable last_pre before the loop for indexing in chunks
- some minor fixes to ADDHELP text
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Index: pftijah.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah.mx,v
retrieving revision 1.249.2.4
retrieving revision 1.249.2.5
diff -u -d -r1.249.2.4 -r1.249.2.5
--- pftijah.mx 8 Feb 2010 12:02:08 -0000 1.249.2.4
+++ pftijah.mx 10 Feb 2010 14:15:09 -0000 1.249.2.5
@@ -999,13 +999,13 @@
ADDHELP("tj_add2collection", "flokstra & rode", "Jan 2007",
"PARAMETERS:\n\
-- str ftiName: the name of the collection.\n
-- str uri_loc: the location of the xml document.\n
-- str uri_name: the name of the xml document (optional).\n
+- str ftiName: the name of the collection.\n\
+- str uri_loc: the location of the xml document.\n\
+- str uri_name: the name of the xml document (optional).\n\
- bit shred: when true the doc is shredded when necessary.\n\
DESCRIPTION:\n\
Add a document to a pftijah collection. The document is indexed and if the \n\
-shred parameter is true it is also shredded in Pathfinder.
+shred parameter is true it is also shredded in Pathfinder. \n\
The index is automatically finalized at the end of the method.",
"pftijah");
PROC tj_add2collection(str ftiName, str uri_loc, str uri_name, bit shred) :
void
@@ -1650,14 +1650,14 @@
PROC tj_add2collection_frag(str ftiName, str uri, str filename, bit shred) :
void
{
var uris := new(str,str).insert(uri, filename);
- return tj_add2collection_frag(ftiName, uris, shred);
+ tj_add2collection_frag(ftiName, uris, shred);
}
ADDHELP("tj_add2collection_frag", "flokstra & rode", "Sept 2009",
"PARAMETERS:\n\
--` str ftiName: the name of the collection.\n
+- str ftiName: the name of the collection.\n\
- BAT[str,str]: the bat containing the [location,name] pairs of the xml
docs.\n\
-- bit shred: when true the doc is shredded when necessary (deprecated).\n\
+- bit shred: when true the doc is shredded when necessary.\n\
DESCRIPTION:\n\
Adds a documents to the index. If needed, the index is split into several
fragments.\n\
Each fragment is finalized after it is filled to its maximum capacity.",
@@ -1665,9 +1665,6 @@
PROC tj_add2collection_frag(str ftiName, BAT[str,str] uri, bit shred) : void
{
if ( verbose ) tj_verbose(HASH +"TJ tj_add2collection_frag(\"%s\")
called.\n",ftiName);
- if (shred)
- ERROR("tj_add2collection_frag: shred bit no longer supported");
-
var coll_lock := tj_get_collection_lock(ftiName);
lock_set(coll_lock);
var err := CATCH({
@@ -1678,6 +1675,37 @@
# get first free collection fragment (first fragment that is not yet
filled completely)
var collBat := _tj_get_collection_frag(ftiName, commitBats);
+ # shred documents if needed
+ if ( shred ) {
+ var pf_collection := _tj_get_parameter2(collBat,"pf_collection");
+ u...@batloop() {
+ var uri_loc := $h;
+ var uri_name := $t;
+ if ( isnil(uri_loc) ) {
+ ERROR("tj_add2collection_frag: should specify doc_uri (and
doc_name).");
+ }
+ if ( isnil(uri_name) ) {
+ uri_name := uri_loc;
+ } else if ( uri_name = "" ) {
+ uri_name := uri_loc;
+ }
+ if (not(bat("doc_name").reverse().exist(uri_name))) {
+ var s_start := usec();
+ if ( isnil(pf_collection) ) {
+ shred_doc(uri_loc,uri_name);
+ } else {
+ shred_doc(uri_loc,uri_name,pf_collection,0LL);
+ }
+ if ( timing ) {
+ ms := (usec() - s_start)/1000;
+ printf(HASH +"TJ tj_add2collection_frag(\"%s\"): shred time =
%lld.%03llds.\n",uri_name,/(ms,1000),%(ms,1000));
+ }
+ } else {
+ if ( verbose ) tj_verbose(HASH +"TJ tj_add2collection_frag
doc(\"%s\") already shredded.\n",uri_name);
+ }
+ }
+ }
+
# set access back to BAT_APPEND
_tj_set_forwardindex_access(collBat, BAT_APPEND);
@@ -1694,7 +1722,7 @@
var first_doc := 0;
var last_doc := uri.count()-1;
- var last_pre;
+ var last_pre := collBat.find("size").count_wrd() + 1;
while(first_doc <= last_doc) {
var uri_chunk := uri.slice(first_doc, first_doc+chunksize-1);
var ws_opt := ws_create(0);
------------------------------------------------------------------------------
SOLARIS 10 is the OS for Data Centers - provides features such as DTrace,
Predictive Self Healing and Award Winning ZFS. Get Solaris 10 NOW
http://p.sf.net/sfu/solaris-dev2dev
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins