Update of /cvsroot/monetdb/pathfinder/modules/pftijah
In directory sc8-pr-cvs7.sourceforge.net:/tmp/cvs-serv25529

Modified Files:
        pftijah.mx serialize_pftijah.mx 
Log Message:
- bugfixes on incremental indexing
- approved output for new test sets



Index: serialize_pftijah.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/serialize_pftijah.mx,v
retrieving revision 1.33
retrieving revision 1.34
diff -u -d -r1.33 -r1.34
--- serialize_pftijah.mx        11 Jan 2007 15:30:42 -0000      1.33
+++ serialize_pftijah.mx        17 Jan 2007 13:06:51 -0000      1.34
@@ -747,8 +747,10 @@
        if ( ! tdb_close(tjctx->tdb) )
                return GDK_FAIL;
 #endif
-       BATkey(BATmirror(tjctx->b_globalTerm), TRUE);
-       BATkey(BATmirror(tjctx->b_globalTag), TRUE);
+       /* BATkey(BATmirror(tjctx->b_globalTerm), TRUE); */
+       /* BATkey(BATmirror(tjctx->b_globalTag), TRUE); */
+       BATkey(BATmirror(tjctx->b_collPre), FALSE);
+       BATkey(BATmirror(tjctx->b_collSize), FALSE);
        BBPunfix(BBPcacheid(tjctx->b_globalTerm));
        BBPunfix(BBPcacheid(tjctx->b_globalTag));
        BBPunfix(BBPcacheid(tjctx->b_docName));

Index: pftijah.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/modules/pftijah/pftijah.mx,v
retrieving revision 1.86
retrieving revision 1.87
diff -u -d -r1.86 -r1.87
--- pftijah.mx  17 Jan 2007 12:34:13 -0000      1.86
+++ pftijah.mx  17 Jan 2007 13:06:51 -0000      1.87
@@ -86,8 +86,8 @@
 creates an offset index.
 "
 
-.COMMAND mergeindex( BAT[any,oid] tid_pre, BAT[void,oid] index, BAT[void, oid] 
pre, BAT[void,oid] new_index)
-               : BAT[oid,oid] = CMDmergeindex;
+.COMMAND mergeindex_old( BAT[any,oid] tid_pre, BAT[void,oid] index, BAT[void, 
oid] pre, BAT[void,oid] new_index)
+               : BAT[oid,oid] = CMDmergeindex_old;
 "PARAMETERS:
 BAT[oid,oid] - new tid_pre to merge with the old index
 BAT[void,oid] - join index bat with value-offset
@@ -97,6 +97,17 @@
 the operation merges a new sorted tid_pre with an existing offset index.
 "
 
+.COMMAND mergeindex( BAT[oid,oid] tid_pre, BAT[void,oid] index, BAT[void, oid] 
pre, int indsize)
+               : BAT[void,bat] = CMDmergeindex;
+"PARAMETERS:
+BAT[oid,oid] - new tid_pre to merge with the old index
+BAT[void,oid] - old index bat with value-offset
+BAT[void,oid] - old posting lists (pre order lists)
+int - size of new (dense) offset index.
+DESCRIPTION:
+the operation merges a new sorted tid_pre with an existing offset index.
+"
+
 .COMMAND indexfetchjoin( BAT[any,oid] tid, BAT[void,oid] index, BAT[void, oid] 
pre)
                : BAT[void,oid] = CMDindexfetchjoin;
 "PARAMETERS:
@@ -2340,48 +2351,48 @@
         {
                var tmp := tids.kdiff(bat("tj_" + collName + "_pfpre"));
                 tmp := tmp.reverse().sort();
-                var newindex := bat("tj_globalTerms").hmark([EMAIL 
PROTECTED]).project([EMAIL PROTECTED]);
-                newindex.access(BAT_WRITE);
-                newindex.append([EMAIL PROTECTED]);
-                var newterms := mergeindex(tmp, bat("tj_" + collName + 
"_TermIndex"),
-                                                bat("tj_" + collName + 
"_Terms"),
-                                                newindex);       
+                var i := mergeindex(tmp, bat("tj_" + collName + "_TermIndex"),
+                                         bat("tj_" + collName + "_Terms"),
+                                         bat("tj_globalTerms").count() + 1);   
    
                bat("tj_" + collName + "_TermIndex").persists(false);
                bat("tj_" + collName + "_Terms").persists(false);
                 commit();
                 # create _TermIndex and _Terms here
-               newindex.persists(true).bbpname("tj_" + collName + 
"_TermIndex");
-               newterms.persists(true).bbpname("tj_" + collName + "_Terms");
-               tmp := nil;
+               var newindex := i.fetch(0);
+               newindex.persists(true).bbpname("tj_" + collName + 
"_TermIndex");
+               var newpre := i.fetch(1);
+               newpre.persists(true).bbpname("tj_" + collName + "_Terms");
+               i := nil;
+               tmp := nil;
                newindex.access(BAT_READ);
                newindex.mmap(1);
-               newterms.access(BAT_READ);
-               newterms.mmap(1);
+               newpre.access(BAT_READ);
+               newpre.mmap(1);
                newindex := nil;
-               newterms := nil;
+               newpre := nil;
                commit();
                
                tmp := tids.semijoin(bat("tj_" + collName + "_pfpre"));
                 tmp := tmp.reverse().sort();
-                var newindex := bat("tj_globalTags").hmark([EMAIL 
PROTECTED]).project([EMAIL PROTECTED]);
-                newindex.access(BAT_WRITE);
-                newindex.append([EMAIL PROTECTED]);
-                var newtags := mergeindex(tmp, bat("tj_" + collName + 
"_TagIndex"),
-                                               bat("tj_" + collName + "_Tags"),
-                                               newindex);       
+                var i := mergeindex(tmp, bat("tj_" + collName + "_TagIndex"),
+                                         bat("tj_" + collName + "_Tags"),
+                                         bat("tj_globalTags").count() + 1);    
   
                bat("tj_" + collName + "_TagIndex").persists(false);
                bat("tj_" + collName + "_Tags").persists(false);
                 commit();
                 # create _TagIndex and _Tags here
+               var newindex := i.fetch(0);
                newindex.persists(true).bbpname("tj_" + collName + "_TagIndex");
-               newtags.persists(true).bbpname("tj_" + collName + "_Tags");
-               tmp := nil;
+               var newpre := i.fetch(1);
+               newpre.persists(true).bbpname("tj_" + collName + "_Tags");
+               i := nil;
+               tmp := nil;
                newindex.access(BAT_READ);
                newindex.mmap(1);
-               newtags.access(BAT_READ);
-               newtags.mmap(1);
+               newpre.access(BAT_READ);
+               newpre.mmap(1);
                newindex := nil;
-               newtags := nil;
+               newpre := nil;
                commit();
         }
         else # create new index
@@ -2792,7 +2803,154 @@
        return GDK_SUCCEED;
 }      
 
-int CMDmergeindex ( BAT** result, BAT* tid_pre, BAT* index, BAT* pre, BAT* 
new_index )
+int CMDmergeindex ( BAT** result, BAT* tidpre, BAT* oldindex, BAT* oldpre, 
int* indsize )
+{
+       char *name = "TJmergeindex";
+       BAT *res = NULL;
+       BAT *newindex = NULL;
+       BAT *newpre = NULL;
+       int bs_tidpre, bs_oldindex, bs_oldpre, bs_newindex, bs_newpre, bs_res;
+       int i,j, ressize = 0;
+        BUN lst_tidpre, lst_oldindex, lst_oldpre, lst_copy, lst_newindex, 
lst_newpre, lst_res, cur_tidpre, cur_oldindex, cur_oldpre;
+       oid tid;
+       
+       /* --------------------------- checks 
---------------------------------- */
+       
+       BATcheck(tidpre, name);
+       BATcheck(oldindex, name);
+       BATcheck(oldpre, name);
+        
+       if (!(BAThordered(tidpre) & 1))
+       {
+               GDKerror("%s: term-bat must be ordered on tail.\n", name);
+               return GDK_FAIL;
+       }
+
+       /* ---------------------------- inits 
---------------------------------- */
+
+       ressize = 2;
+       res = BATnew(TYPE_void, TYPE_bat, ressize);
+        if (res == NULL) 
+        { 
+               GDKerror("%s: could not allocate a result BAT[void,oid] of size 
%d.\n", name, ressize);
+               return(GDK_FAIL);
+        }
+       
+       ressize = BATcount(tidpre) + BATcount(oldpre);
+       newpre = BATnew(TYPE_void, TYPE_oid, ressize);
+        if (res == NULL) 
+        { 
+               GDKerror("%s: could not allocate a result BAT[void,oid] of size 
%d.\n", name, ressize);
+               return(GDK_FAIL);
+        }
+       
+       ressize = *indsize;
+       newindex = BATnew(TYPE_void, TYPE_oid, ressize);
+        if (res == NULL) 
+        { 
+               GDKerror("%s: could not allocate a result BAT[void,oid] of size 
%d.\n", name, ressize);
+               return(GDK_FAIL);
+        }
+       
+       bs_tidpre = BUNsize(tidpre);
+       bs_oldindex = BUNsize(oldindex);
+       bs_oldpre = BUNsize(oldpre);
+        bs_newindex = BUNsize(newindex);
+        bs_newpre = BUNsize(newpre);
+       bs_res = BUNsize(res);
+       
+       lst_tidpre = BUNlast(tidpre);
+       lst_oldindex = BUNlast(oldindex) - bs_oldindex; /* last index is not a 
real term */
+       lst_oldpre = BUNlast(oldpre);
+       lst_newindex = BUNlast(newindex);
+       lst_newpre = BUNlast(newpre);
+       lst_res = BUNlast(res);
+       
+       cur_tidpre = BUNfirst(tidpre);
+       cur_oldindex = BUNfirst(oldindex);
+       cur_oldpre = BUNfirst(oldpre);
+       
+       /* ----------------------------- main 
---------------------------------- */
+       
+       j = *indsize - 1;
+       for(i = 0; i < j; i++)
+        {
+               tid = (oid) i;
+               *(oid*)lst_newindex = BUNindex(newpre,lst_newpre);
+               lst_newindex += bs_newindex;
+               
+                /* copy old nodes to new index */
+                if (cur_oldindex < lst_oldindex && tid == 
*(oid*)BUNhead(oldindex, cur_oldindex))
+                {
+                       lst_copy = BUNptr(oldpre, *(int*) BUNtail(oldindex, 
cur_oldindex + bs_oldindex));
+                       while (cur_oldpre < lst_copy)
+                       {
+                               *(oid*)lst_newpre = *(oid*)BUNtail(oldpre, 
cur_oldpre);
+                               lst_newpre += bs_newpre;
+                               cur_oldpre += bs_oldpre;
+                       }
+                        cur_oldindex += bs_oldindex;
+                }
+                /* merge-in new nodes */
+                while(cur_tidpre < lst_tidpre && tid == *(oid*)BUNhead(tidpre, 
cur_tidpre))
+                {
+                        *(oid*)lst_newpre = *(oid*)BUNtail(tidpre, cur_tidpre);
+                       lst_newpre += bs_newpre;
+                       cur_tidpre += bs_tidpre;
+                }
+        }
+
+       /* write limit of index as last item to index bat */
+       *(oid*)lst_newindex = BUNindex(newpre,lst_newpre);
+       lst_newindex += bs_newindex;
+
+       /* ---------------------------- tidy up 
--------------------------------- */
+
+       newindex->batBuns->free = lst_newindex - newindex->batBuns->base;
+       BATsetcount(newindex, newindex->batBuns->free / bs_newindex);
+       newindex->batDirty = TRUE;
+        newindex->hsorted = GDK_SORTED;
+        newindex->tsorted = GDK_SORTED;
+        BATkey(newindex, TRUE);
+        BATkey(BATmirror(newindex), FALSE);
+       BATseqbase(newindex, (oid)0);
+
+       newpre->batBuns->free = lst_newpre - newpre->batBuns->base;
+       BATsetcount(newpre, newpre->batBuns->free / bs_newpre);
+       newpre->batDirty = TRUE;
+        newpre->hsorted = GDK_SORTED;
+        newpre->tsorted = FALSE;
+        BATkey(newpre, TRUE);
+        BATkey(BATmirror(newpre), TRUE);
+       BATseqbase(newpre, (oid)0);
+       
+        /* insert bats in result */            
+       BATseqbase(res, (oid)0);
+       voidany_bunfastins_nocheck_noinc(res, lst_res, 0, 
&newindex->batCacheid);
+       BBPunfix(newindex->batCacheid); 
+       lst_res += bs_res;
+       voidany_bunfastins_nocheck_noinc(res, lst_res, 0, &newpre->batCacheid); 
+       BBPunfix(newpre->batCacheid);   
+       lst_res += bs_res;
+       
+       res->batBuns->free = lst_res - res->batBuns->base;
+       BATsetcount(res, res->batBuns->free / bs_res);
+       res->batDirty = TRUE;
+       BATkey(res, TRUE);
+       BATkey(BATmirror(res), TRUE);
+        res->hsorted = GDK_SORTED;
+        res->tsorted = FALSE;
+       
+       *result = res;
+       return GDK_SUCCEED;
+
+bunins_failed:
+        GDKerror("%s: BUN insert failed.\n", name);
+        return(GDK_FAIL);
+       
+}
+
+int CMDmergeindex_old ( BAT** result, BAT* tid_pre, BAT* index, BAT* pre, BAT* 
new_index )
 {
        char *name = "TJmergeindex";
        BAT *res = NULL;


-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to