Author: j16sdiz
Date: 2008-12-29 15:56:55 +0000 (Mon, 29 Dec 2008)
New Revision: 24821

Added:
   trunk/plugins/XMLSpider/db/
   trunk/plugins/XMLSpider/db/Config.java
   trunk/plugins/XMLSpider/db/Page.java
   trunk/plugins/XMLSpider/db/PageTimeStampComparator.java
   trunk/plugins/XMLSpider/db/PerstRoot.java
   trunk/plugins/XMLSpider/db/Status.java
   trunk/plugins/XMLSpider/db/Term.java
   trunk/plugins/XMLSpider/db/TermPosition.java
Removed:
   trunk/plugins/XMLSpider/Config.java
   trunk/plugins/XMLSpider/MaxPageId.java
   trunk/plugins/XMLSpider/Page.java
   trunk/plugins/XMLSpider/Status.java
   trunk/plugins/XMLSpider/Term.java
   trunk/plugins/XMLSpider/TermPosition.java
Modified:
   trunk/plugins/XMLSpider/IndexWriter.java
   trunk/plugins/XMLSpider/XMLSpider.java
   trunk/plugins/XMLSpider/web/ConfigPage.java
   trunk/plugins/XMLSpider/web/MainPage.java
Log:
Port the whole thing to PERST

Less disk i/o, faster processing, lessor CPU, messier code

Deleted: trunk/plugins/XMLSpider/Config.java
===================================================================
--- trunk/plugins/XMLSpider/Config.java 2008-12-29 13:03:39 UTC (rev 24820)
+++ trunk/plugins/XMLSpider/Config.java 2008-12-29 15:56:55 UTC (rev 24821)
@@ -1,166 +0,0 @@
-/**
- * @author j16sdiz (1024D/75494252)
- */
-package plugins.XMLSpider;
-
-import freenet.node.RequestStarter;
-import freenet.support.Logger;
-
-public class Config implements Cloneable {
-       /**
-        * Directory where the generated indices are stored
-        */
-       private String indexDir;
-       private int indexMaxEntries;
-       private long indexSubindexMaxSize;
-
-       private String indexTitle;
-       private String indexOwner;
-       private String indexOwnerEmail;
-
-       private int maxShownURIs;
-       private int maxParallelRequests;
-       private String[] badlistedExtensions;
-       private short requestPriority;
-
-       public Config() {
-       } // for db4o
-
-       public Config(boolean setDefault) {
-               if (!setDefault)
-                       return;
-
-               indexDir = "myindex7/";
-               indexMaxEntries = 2000;
-               indexSubindexMaxSize = 4 * 1024 * 1024;
-
-               indexTitle = "XMLSpider index";
-               indexOwner = "Freenet";
-               indexOwnerEmail = "(nil)";
-
-               maxShownURIs = 15;
-
-               maxParallelRequests = 100;
-
-               badlistedExtensions = new String[] { //
-               ".ico", ".bmp", ".png", ".jpg", ".gif", // image
-                       ".zip", ".jar", ".gz", ".bz2", ".rar", // archive
-                       ".7z", ".rar", ".arj", ".rpm", ".deb", //
-                       ".xpi", ".ace", ".cab", ".lza", ".lzh", //
-                       ".ace", ".exe", ".iso", // binary
-                       ".mpg", ".ogg", ".mp3", ".avi", // media
-                       ".css", ".sig" // other
-               };
-
-               requestPriority = 
RequestStarter.IMMEDIATE_SPLITFILE_PRIORITY_CLASS;
-       }
-
-       public synchronized void setValue(Config config) {
-               synchronized (config) {
-                       indexDir = config.indexDir;
-                       indexMaxEntries = config.indexMaxEntries;
-                       indexSubindexMaxSize = config.indexSubindexMaxSize;
-
-                       indexTitle = config.indexTitle;
-                       indexOwner = config.indexOwner;
-                       indexOwnerEmail = config.indexOwnerEmail;
-
-                       maxShownURIs = config.maxShownURIs;
-
-                       maxParallelRequests = config.maxParallelRequests;
-
-                       badlistedExtensions = config.badlistedExtensions;
-
-                       requestPriority = config.requestPriority;
-               }
-       }
-
-       public synchronized Config clone() {
-               try {
-                       return (Config) super.clone();
-               } catch (CloneNotSupportedException e) {
-                       Logger.error(this, "impossible:", e);
-                       throw new RuntimeException(e);
-               }
-       }
-
-       public synchronized void setIndexDir(String indexDir) {
-               this.indexDir = indexDir;
-       }
-
-       public synchronized String getIndexDir() {
-               return indexDir;
-       }
-
-       public synchronized void setIndexMaxEntries(int indexMaxEntries) {
-               this.indexMaxEntries = indexMaxEntries;
-       }
-
-       public synchronized int getIndexMaxEntries() {
-               return indexMaxEntries;
-       }
-
-       public synchronized void setIndexSubindexMaxSize(long 
indexSubindexMaxSize) {
-               this.indexSubindexMaxSize = indexSubindexMaxSize;
-       }
-
-       public synchronized long getIndexSubindexMaxSize() {
-               return indexSubindexMaxSize;
-       }
-
-       public synchronized void setIndexTitle(String indexTitle) {
-               this.indexTitle = indexTitle;
-       }
-
-       public synchronized String getIndexTitle() {
-               return indexTitle;
-       }
-
-       public synchronized void setIndexOwner(String indexOwner) {
-               this.indexOwner = indexOwner;
-       }
-
-       public synchronized String getIndexOwner() {
-               return indexOwner;
-       }
-
-       public synchronized void setIndexOwnerEmail(String indexOwnerEmail) {
-               this.indexOwnerEmail = indexOwnerEmail;
-       }
-
-       public synchronized void setMaxShownURIs(int maxShownURIs) {
-               this.maxShownURIs = maxShownURIs;
-       }
-
-       public synchronized int getMaxShownURIs() {
-               return maxShownURIs;
-       }
-
-       public synchronized String getIndexOwnerEmail() {
-               return indexOwnerEmail;
-       }
-
-       public synchronized void setMaxParallelRequests(int 
maxParallelRequests) {
-               this.maxParallelRequests = maxParallelRequests;
-       }
-
-       public synchronized int getMaxParallelRequests() {
-               return maxParallelRequests;
-       }
-
-       public synchronized void setBadlistedExtensions(String[] 
badlistedExtensions) {
-               this.badlistedExtensions = badlistedExtensions;
-       }
-
-       public synchronized String[] getBadlistedExtensions() {
-               return badlistedExtensions;
-       }
-
-       public synchronized void setRequestPriority(short requestPriority) {
-               this.requestPriority = requestPriority;
-       }
-
-       public synchronized short getRequestPriority() {
-               return requestPriority;
-       }
-}
\ No newline at end of file

Modified: trunk/plugins/XMLSpider/IndexWriter.java
===================================================================
--- trunk/plugins/XMLSpider/IndexWriter.java    2008-12-29 13:03:39 UTC (rev 
24820)
+++ trunk/plugins/XMLSpider/IndexWriter.java    2008-12-29 15:56:55 UTC (rev 
24821)
@@ -10,6 +10,7 @@
 import java.io.IOException;
 import java.security.NoSuchAlgorithmException;
 import java.util.List;
+import java.util.Set;
 import java.util.Vector;
 
 import javax.xml.parsers.DocumentBuilder;
@@ -25,9 +26,10 @@
 import org.w3c.dom.Element;
 import org.w3c.dom.Text;
 
-import com.db4o.ObjectSet;
-import com.db4o.query.Query;
-
+import plugins.XMLSpider.db.Config;
+import plugins.XMLSpider.db.Page;
+import plugins.XMLSpider.db.Term;
+import plugins.XMLSpider.db.TermPosition;
 import freenet.support.Logger;
 
 /**
@@ -201,25 +203,22 @@
        private void makeSubIndices(Config config) throws Exception {
                Logger.normal(this, "Generating index...");
 
-               Query query = xmlSpider.db.query();
-               query.constrain(Term.class);
-               query.descend("md5").orderAscending();
-               @SuppressWarnings("unchecked")
-               ObjectSet<Term> termSet = query.execute();
+               List<Term> termList = xmlSpider.getDbRoot().getTermList();
+               int termCount = xmlSpider.getDbRoot().getTermCount();
 
                indices = new Vector<String>();
-               int prefix = (int) ((Math.log(termSet.size()) - 
Math.log(config.getIndexMaxEntries())) / Math.log(16)) - 1;
+               int prefix = (int) ((Math.log(termCount) - 
Math.log(config.getIndexMaxEntries())) / Math.log(16)) - 1;
                if (prefix <= 0)
                        prefix = 1;
                match = 1;
                Vector<Term> list = new Vector<Term>();
 
-               Term term0 = termSet.get(0);
-               String currentPrefix = term0.md5.substring(0, prefix);
+               Term term0 = termList.get(0);
+               String currentPrefix = term0.getMD5().substring(0, prefix);
 
                int i = 0;
-               for (Term term : termSet) {
-                       String key = term.md5;
+               for (Term term : termList) {
+                       String key = term.getMD5();
                        //create a list of the words to be added in the same 
subindex
                        if (key.startsWith(currentPrefix)) {
                                i++;
@@ -265,11 +264,11 @@
                        match = p + 1;
                int prefix = p + 1;
                int i = 0;
-               String str = list.get(i).md5;
+               String str = list.get(i).getMD5();
                int index = 0;
                while (i < list.size()) {
                        Term term = list.get(i);
-                       String key = term.md5;
+                       String key = term.getMD5();
                        if ((key.substring(0, prefix)).equals(str.substring(0, 
prefix))) {
                                i++;
                        } else {
@@ -296,7 +295,7 @@
         * @throws Exception
         */
        protected void generateXML(Config config, List<Term> list, int prefix) 
throws TooBigIndexException, Exception {
-               String p = list.get(0).md5.substring(0, prefix);
+               String p = list.get(0).getMD5().substring(0, prefix);
                indices.add(p);
                File outputFile = new File(config.getIndexDir() + "index_" + p 
+ ".xml");
                BufferedOutputStream fos = new BufferedOutputStream(new 
FileOutputStream(outputFile));
@@ -343,21 +342,15 @@
                        for (int i = 0; i < list.size(); i++) {
                                Element wordElement = 
xmlDoc.createElement("word");
                                Term term = list.get(i);
-                               wordElement.setAttribute("v", term.word);
+                               wordElement.setAttribute("v", term.getWord());
 
-                               Query query = xmlSpider.db.query();
-                               query.constrain(TermPosition.class);
+                               Set<Page> pages = term.getPages();
 
-                               query.descend("word").constrain(term.word);
-                               @SuppressWarnings("unchecked")
-                               ObjectSet<TermPosition> set = query.execute();
-
-                               for (TermPosition termPos : set) {
+                               for (Page page : pages) {
+                                       TermPosition termPos = 
page.getTermPosition(term);
+                                       
                                        synchronized (termPos) {
-                                               Page page = 
xmlSpider.getPageById(termPos.pageId);
-
                                                synchronized (page) {
-
                                                        /*
                                                         * adding file 
information uriElement - lists the id of the file
                                                         * containing a 
particular word fileElement - lists the id,key,title of
@@ -365,10 +358,11 @@
                                                         */
                                                        Element uriElement = 
xmlDoc.createElement("file");
                                                        Element fileElement = 
xmlDoc.createElement("file");
-                                                       
uriElement.setAttribute("id", Long.toString(page.id));
-                                                       
fileElement.setAttribute("id", Long.toString(page.id));
-                                                       
fileElement.setAttribute("key", page.uri);
-                                                       
fileElement.setAttribute("title", page.pageTitle != null ? page.pageTitle : 
page.uri);
+                                                       
uriElement.setAttribute("id", Long.toString(page.getId()));
+                                                       
fileElement.setAttribute("id", Long.toString(page.getId()));
+                                                       
fileElement.setAttribute("key", page.getURI());
+                                                       
fileElement.setAttribute("title", page.getPageTitle() != null ? 
page.getPageTitle() : page
+                                                               .getURI());
 
                                                        /* Position by position 
*/
                                                        int[] positions = 
termPos.positions;
@@ -382,8 +376,8 @@
                                                        }
                                                        
uriElement.appendChild(xmlDoc.createTextNode(positionList.toString()));
                                                        
wordElement.appendChild(uriElement);
-                                                       if 
(!fileid.contains(page.id)) {
-                                                               
fileid.add(page.id);
+                                                       if 
(!fileid.contains(page.getId())) {
+                                                               
fileid.add(page.getId());
                                                                
filesElement.appendChild(fileElement);
                                                        }
                                                }

Deleted: trunk/plugins/XMLSpider/MaxPageId.java
===================================================================
--- trunk/plugins/XMLSpider/MaxPageId.java      2008-12-29 13:03:39 UTC (rev 
24820)
+++ trunk/plugins/XMLSpider/MaxPageId.java      2008-12-29 15:56:55 UTC (rev 
24821)
@@ -1,19 +0,0 @@
-/**
- * @author j16sdiz (1024D/75494252)
- */
-package plugins.XMLSpider;
-
-class MaxPageId {
-       volatile long v;
-
-       MaxPageId() {
-       }
-
-       MaxPageId(long v) {
-               this.v = v;
-       }
-       
-       synchronized long incrementAndGet() {
-               return ++v;
-       }
-}
\ No newline at end of file

Deleted: trunk/plugins/XMLSpider/Page.java
===================================================================
--- trunk/plugins/XMLSpider/Page.java   2008-12-29 13:03:39 UTC (rev 24820)
+++ trunk/plugins/XMLSpider/Page.java   2008-12-29 15:56:55 UTC (rev 24821)
@@ -1,53 +0,0 @@
-/**
- * @author j16sdiz (1024D/75494252)
- */
-package plugins.XMLSpider;
-
-public class Page {
-       /** Page Id */
-       public long id;
-       /** URI of the page */
-       public String uri;
-       /** Title */
-       public String pageTitle;
-       /** Status */
-       public Status status;
-       /** Last Change Time */
-       public long lastChange;
-       /** Comment, for debugging */
-       public String comment;
-
-       public Page() {}        // for db4o callConstructors(true)
-
-       public Page(long id, String uri, String comment) {
-               this.id = id;
-               this.uri = uri;
-               this.comment = comment;
-               status = Status.QUEUED;
-               lastChange = System.currentTimeMillis();
-       }
-
-       @Override
-       public int hashCode() {
-               return (int) (id ^ (id >>> 32));
-       }
-
-       @Override
-       public boolean equals(Object obj) {
-               if (this == obj)
-                       return true;
-               if (obj == null)
-                       return false;
-               if (getClass() != obj.getClass())
-                       return false;
-
-               return id == ((Page) obj).id;
-       }
-
-       @Override
-       public String toString() {
-               return "[PAGE: id=" + id + ", title=" + pageTitle + ", uri=" + 
uri + ", status=" + status + ", comment="
-               + comment
-               + "]";
-       }
-}

Deleted: trunk/plugins/XMLSpider/Status.java
===================================================================
--- trunk/plugins/XMLSpider/Status.java 2008-12-29 13:03:39 UTC (rev 24820)
+++ trunk/plugins/XMLSpider/Status.java 2008-12-29 15:56:55 UTC (rev 24821)
@@ -1,9 +0,0 @@
-/**
- * @author j16sdiz (1024D/75494252)
- */
-package plugins.XMLSpider;
-
-public enum Status {
-       /** For simplicity, running is also mark as QUEUED */
-       QUEUED, SUCCEEDED, FAILED
-}
\ No newline at end of file

Deleted: trunk/plugins/XMLSpider/Term.java
===================================================================
--- trunk/plugins/XMLSpider/Term.java   2008-12-29 13:03:39 UTC (rev 24820)
+++ trunk/plugins/XMLSpider/Term.java   2008-12-29 15:56:55 UTC (rev 24821)
@@ -1,57 +0,0 @@
-/**
- * @author j16sdiz (1024D/75494252)
- */
-package plugins.XMLSpider;
-
-import java.io.UnsupportedEncodingException;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
-
-class Term {
-       /** MD5 of the term */
-       String md5;
-       /** Term */
-       String word;
-
-       public Term(String word) {
-               this.word = word;
-               md5 = MD5(word);
-       }
-
-       public Term() {
-       }
-       
-       /*
-        * calculate the md5 for a given string
-        */
-       public static String MD5(String text) {
-               try {
-                       MessageDigest md = MessageDigest.getInstance("MD5");
-                       byte[] md5hash = new byte[32];
-                       byte[] b = text.getBytes("UTF-8");
-                       md.update(b, 0, b.length);
-                       md5hash = md.digest();
-                       return convertToHex(md5hash);
-               } catch (UnsupportedEncodingException e) {
-                       throw new RuntimeException("UTF-8 not supported", e);
-               } catch (NoSuchAlgorithmException e) {
-                       throw new RuntimeException("MD5 not supported", e);
-               }
-       }
-
-       public static String convertToHex(byte[] data) {
-               StringBuilder buf = new StringBuilder();
-               for (int i = 0; i < data.length; i++) {
-                       int halfbyte = (data[i] >>> 4) & 0x0F;
-                       int two_halfs = 0;
-                       do {
-                               if ((0 <= halfbyte) && (halfbyte <= 9))
-                                       buf.append((char) ('0' + halfbyte));
-                               else
-                                       buf.append((char) ('a' + (halfbyte - 
10)));
-                               halfbyte = data[i] & 0x0F;
-                       } while (two_halfs++ < 1);
-               }
-               return buf.toString();
-       }
-}
\ No newline at end of file

Deleted: trunk/plugins/XMLSpider/TermPosition.java
===================================================================
--- trunk/plugins/XMLSpider/TermPosition.java   2008-12-29 13:03:39 UTC (rev 
24820)
+++ trunk/plugins/XMLSpider/TermPosition.java   2008-12-29 15:56:55 UTC (rev 
24821)
@@ -1,16 +0,0 @@
-/**
- * @author j16sdiz (1024D/75494252)
- */
-package plugins.XMLSpider;
-
-class TermPosition {
-       /** Term */
-       String word;
-       /** Page id */
-       long pageId;
-       /** Position List */
-       int[] positions;
-
-       public TermPosition() {
-       }
-}
\ No newline at end of file

Modified: trunk/plugins/XMLSpider/XMLSpider.java
===================================================================
--- trunk/plugins/XMLSpider/XMLSpider.java      2008-12-29 13:03:39 UTC (rev 
24820)
+++ trunk/plugins/XMLSpider/XMLSpider.java      2008-12-29 15:56:55 UTC (rev 
24821)
@@ -13,7 +13,6 @@
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
-import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -22,17 +21,15 @@
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
 
+import plugins.XMLSpider.db.Config;
+import plugins.XMLSpider.db.Page;
+import plugins.XMLSpider.db.PerstRoot;
+import plugins.XMLSpider.db.Status;
+import plugins.XMLSpider.db.Term;
+import plugins.XMLSpider.db.TermPosition;
+import plugins.XMLSpider.org.garret.perst.Storage;
+import plugins.XMLSpider.org.garret.perst.StorageFactory;
 import plugins.XMLSpider.web.WebInterface;
-
-import com.db4o.Db4o;
-import com.db4o.ObjectContainer;
-import com.db4o.ObjectSet;
-import com.db4o.config.Configuration;
-import com.db4o.config.QueryEvaluationMode;
-import com.db4o.diagnostic.DiagnosticToConsole;
-import com.db4o.query.Query;
-import com.db4o.reflect.jdk.JdkReflector;
-
 import freenet.client.ClientMetadata;
 import freenet.client.FetchContext;
 import freenet.client.FetchException;
@@ -73,29 +70,19 @@
  *  
  */
 public class XMLSpider implements FredPlugin, FredPluginHTTP, 
FredPluginThreadless, FredPluginVersioned, FredPluginL10n, USKCallback {
-       private Config config;
-
        public Config getConfig() {
                // always return a clone, never allow changing directly
-               return config.clone();
+               return root.getConfig().clone();
        }
 
        // Set config asynchronously
        public void setConfig(Config config) {
                callbackExecutor.execute(new SetConfigCallback(config));
        }
-       
-       public synchronized long getNextPageId() {
-               long x = maxPageId.incrementAndGet();
-               db.store(maxPageId);
-               return x;
-       }
 
        /** Document ID of fetching documents */
        protected Map<Page, ClientGetter> runningFetch = 
Collections.synchronizedMap(new HashMap<Page, ClientGetter>());
 
-       protected MaxPageId maxPageId;
-
        /**
         * Lists the allowed mime types of the fetched page. 
         */
@@ -120,41 +107,33 @@
         * @param uri the new uri that needs to be fetched for further indexing
         */
        public void queueURI(FreenetURI uri, String comment, boolean force) {
-               String sURI = uri.toString();
-               for (String ext : config.getBadlistedExtensions())
-                       if (sURI.endsWith(ext))
-                               return; // be smart
+               db.beginThreadTransaction(Storage.EXCLUSIVE_TRANSACTION);
+               try {
+                       String sURI = uri.toString();
+                       for (String ext : 
root.getConfig().getBadlistedExtensions())
+                               if (sURI.endsWith(ext))
+                                       return; // be smart
 
-               if (uri.isUSK()) {
-                       if(uri.getSuggestedEdition() < 0)
-                               uri = uri.setSuggestedEdition((-1)* 
uri.getSuggestedEdition());
-                       try{
-                               uri = ((USK.create(uri)).getSSK()).getURI();
-                               
(ctx.uskManager).subscribe(USK.create(uri),this, false, this);  
+                       if (uri.isUSK()) {
+                               if (uri.getSuggestedEdition() < 0)
+                                       uri = uri.setSuggestedEdition((-1) * 
uri.getSuggestedEdition());
+                               try {
+                                       uri = 
((USK.create(uri)).getSSK()).getURI();
+                                       
(ctx.uskManager).subscribe(USK.create(uri), this, false, this);
+                               } catch (Exception e) {
+                               }
                        }
-                       catch(Exception e){}
-               }
 
-               synchronized (this) {
-                       Page page = getPageByURI(uri);
-                       if (page == null) {
-                               page = new Page(getNextPageId(), 
uri.toString(), comment);
-
-                               db.store(page);
-                       } else if (force) {
-                               synchronized (page) {
-                                       page.status = Status.QUEUED;
-                                       page.lastChange = 
System.currentTimeMillis();
-
-                                       db.store(page);
-                               }
+                       Page page = root.getPageByURI(uri, true, comment);
+                       if (force && page.getStatus() != Status.QUEUED) {
+                               page.setStatus(Status.QUEUED);
+                               page.setComment(comment);
                        }
+               } finally {
+                       db.endThreadTransaction();
                }
        }
 
-       protected List<Page> queuedRequestCache = new ArrayList<Page>();
-       protected long lastPrefetchedTimeStamp = -1; 
-       
        public void startSomeRequests() {
                ArrayList<ClientGetter> toStart = null;
                synchronized (this) {
@@ -163,62 +142,29 @@
                        synchronized (runningFetch) {
                                int running = runningFetch.size();
 
-                               if (running >= config.getMaxParallelRequests())
+                               if (running >= 
root.getConfig().getMaxParallelRequests())
                                        return;
 
-                               // prefetch 2 * config.getMaxParallelRequests() 
entries
-                               if (queuedRequestCache.isEmpty()) {
-                                       Query query = db.query();
-                                       query.constrain(Page.class);
-                                       
query.descend("status").constrain(Status.QUEUED);
-                                       if (lastPrefetchedTimeStamp != -1) {
-                                               
query.descend("lastChange").constrain(lastPrefetchedTimeStamp - 1000).greater();
-                                               
query.descend("lastChange").constrain(lastPrefetchedTimeStamp + 1800 * 
1000).smaller();
-                                       }                                       
        
-                                       
query.descend("lastChange").orderAscending();
-                                       @SuppressWarnings("unchecked")
-                                       ObjectSet<Page> queuedSet = 
query.execute();
-                                       
-                                       
System.out.println("lastPrefetchedTimeStamp=" + lastPrefetchedTimeStamp + ", 
BLAR = "
-                                               + queuedSet.size());
-                                       if (lastPrefetchedTimeStamp != -1 && 
queuedSet.isEmpty()) {
-                                               lastPrefetchedTimeStamp = -1;
-                                               startSomeRequests();
-                                               return;
-                                       }
-
-                                       while (queuedRequestCache.size() < 
config.getMaxParallelRequests() * 2 && queuedSet.hasNext()) {
-                                               Page page = queuedSet.next();
-                                               assert page.status == 
Status.QUEUED;
-                                               if 
(!runningFetch.containsKey(page)) {
-                                                       
queuedRequestCache.add(page);
-                                                       
-                                                       if (page.lastChange > 
lastPrefetchedTimeStamp)
-                                                               
lastPrefetchedTimeStamp = page.lastChange;
-                                               }
-                                       }
-                               }
-
                                // perpare to start
-                               toStart = new 
ArrayList<ClientGetter>(config.getMaxParallelRequests() - running);
-                               Iterator<Page> it = 
queuedRequestCache.iterator();
+                               toStart = new 
ArrayList<ClientGetter>(root.getConfig().getMaxParallelRequests() - running);
+                               synchronized (root) {
+                                       Iterator<Page> it = 
root.getPages(Status.QUEUED);
 
-                               while (running + toStart.size() < 
config.getMaxParallelRequests() && it.hasNext()) {
-                                       Page page = it.next();
-                                       it.remove();
+                                       while (running + toStart.size() < 
root.getConfig().getMaxParallelRequests() && it.hasNext()) {
+                                               Page page = it.next();
+                                               if 
(runningFetch.containsKey(page))
+                                                       continue;
 
-                                       try {
-                                               ClientGetter getter = 
makeGetter(page);
+                                               try {
+                                                       ClientGetter getter = 
makeGetter(page);
 
-                                               Logger.minor(this, "Starting " 
+ getter + " " + page);
-                                               toStart.add(getter);
-                                               runningFetch.put(page, getter);
-                                       } catch (MalformedURLException e) {
-                                               Logger.error(this, 
"IMPOSSIBLE-Malformed URI: " + page, e);
-
-                                               page.status = Status.FAILED;
-                                               page.lastChange = 
System.currentTimeMillis();
-                                               db.store(page);
+                                                       Logger.minor(this, 
"Starting " + getter + " " + page);
+                                                       toStart.add(getter);
+                                                       runningFetch.put(page, 
getter);
+                                               } catch (MalformedURLException 
e) {
+                                                       Logger.error(this, 
"IMPOSSIBLE-Malformed URI: " + page, e);
+                                                       
page.setStatus(Status.FAILED);
+                                               }
                                        }
                                }
                        }
@@ -284,9 +230,8 @@
        private ClientGetter makeGetter(Page page) throws MalformedURLException 
{
                ClientGetter getter = new ClientGetter(new 
ClientGetterCallback(page),
                                core.requestStarters.chkFetchScheduler,
-                       core.requestStarters.sskFetchScheduler, new 
FreenetURI(page.uri), ctx, config.getRequestPriority(),
-                       this,
-                       null, null);
+                       core.requestStarters.sskFetchScheduler, new 
FreenetURI(page.getURI()), ctx,
+                       getPollingPriorityProgress(), this, null, null);
                return getter;
        }
 
@@ -363,10 +308,8 @@
                }
 
                public void run() {
-                       synchronized (this) {
-                               XMLSpider.this.config.setValue(config);
-                               db.store(XMLSpider.this.config);
-                               db.commit();
+                       synchronized (root) {
+                               root.getConfig().setValue(config);
                        }
                }
        }
@@ -421,7 +364,7 @@
                }
 
                FreenetURI uri = state.getURI();
-
+               db.beginThreadTransaction(Storage.READ_WRITE_TRANSACTION);
                try {
                        ClientMetadata cm = result.getMetadata();
                        Bucket data = result.asBucket();
@@ -434,40 +377,31 @@
                         * provided).
                         */
                        PageCallBack pageCallBack = new PageCallBack(page);
-                       Logger.minor(this, "Successful: " + uri + " : " + 
page.id);
+                       Logger.minor(this, "Successful: " + uri + " : " + 
page.getId());
 
                        try {
                                ContentFilter.filter(data, new 
NullBucketFactory(), mimeType, uri.toURI("http://127.0.0.1:8888/";),
                                        pageCallBack);
-                               pageCallBack.store();
+                               page.setStatus(Status.SUCCEEDED);
+                               db.endThreadTransaction();
 
-                               synchronized (this) {
-                                       page.status = Status.SUCCEEDED;
-                                       page.lastChange = 
System.currentTimeMillis();
-                                       db.store(page);
-                                       db.commit();
-                               }
-                               Logger.minor(this, "Filtered " + uri + " : " + 
page.id);
+                               Logger.minor(this, "Filtered " + uri + " : " + 
page.getId());
                        } catch (UnsafeContentTypeException e) {
-                               Logger.minor(this, "UnsafeContentTypeException 
" + uri + " : " + page.id, e);
-                               synchronized (this) {
-                                       page.status = Status.SUCCEEDED;
-                                       page.lastChange = 
System.currentTimeMillis();
-                                       db.store(page);
-                                       db.commit();
-                               }
+                               Logger.minor(this, "UnsafeContentTypeException 
" + uri + " : " + page.getId(), e);
+                               page.setStatus(Status.SUCCEEDED);
+                               db.endThreadTransaction();
                                return; // Ignore
                        } catch (IOException e) {
-                               db.rollback();
+                               db.rollbackThreadTransaction();
                                Logger.error(this, "Bucket error?: " + e, e);
                        } catch (URISyntaxException e) {
-                               db.rollback();
+                               db.rollbackThreadTransaction();
                                Logger.error(this, "Internal error: " + e, e);
                        } finally {
                                data.free();
                        }
                } catch (RuntimeException e) {
-                       db.rollback();
+                       db.rollbackThreadTransaction();
                        throw e;
                } finally {
                        synchronized (this) {
@@ -485,28 +419,21 @@
                        if (stopped)
                                return;
 
+                       
db.beginThreadTransaction(Storage.EXCLUSIVE_TRANSACTION);
                        synchronized (page) {
                                if (fe.newURI != null) {
                                        // redirect, mark as succeeded
                                        queueURI(fe.newURI, "redirect from " + 
state.getURI(), false);
-
-                                       page.status = Status.SUCCEEDED;
-                                       page.lastChange = 
System.currentTimeMillis();
-                                       db.store(page);
+                                       page.setStatus(Status.SUCCEEDED);
                                } else if (fe.isFatal()) {
                                        // too many tries or fatal, mark as 
failed
-                                       page.status = Status.FAILED;
-                                       page.lastChange = 
System.currentTimeMillis();
-                                       db.store(page);
+                                       page.setStatus(Status.FAILED);
                                } else {
                                        // requeue at back
-                                       page.status = Status.QUEUED;
-                                       page.lastChange = 
System.currentTimeMillis();
-
-                                       db.store(page);
+                                       page.setStatus(Status.QUEUED);
                                }
                        }
-                       db.commit();
+                       db.endThreadTransaction();
                        runningFetch.remove(page);
                }
 
@@ -542,13 +469,8 @@
                        callbackExecutor.shutdownNow();
                }
                try { callbackExecutor.awaitTermination(30, TimeUnit.SECONDS); 
} catch (InterruptedException e) {}
-               try { db.rollback(); } catch (Exception e) {}
                try { db.close(); } catch (Exception e) {}
 
-               synchronized (this) {
-                       termCache.clear();
-               }
-
                Logger.normal(this, "XMLSpider terminated");
        }
 
@@ -582,47 +504,9 @@
 
                stopped = false;
 
-               // Initial DB4O
-               db = initDB4O();
-
-               // Find max Page ID
-               {
-                       Query query = db.query();
-                       query.constrain(MaxPageId.class);
-                       @SuppressWarnings("unchecked")
-                       ObjectSet<MaxPageId> set = query.execute();
-                       
-                       if (set.hasNext())
-                               maxPageId = set.next();
-                       else {
-                               query = db.query();
-                               query.constrain(Page.class);
-                               query.descend("id").orderDescending();
-                               @SuppressWarnings("unchecked")
-                               ObjectSet<Page> set2 = query.execute();
-                               if (set2.hasNext())
-                                       maxPageId = new 
MaxPageId(set2.next().id);
-                               else
-                                       maxPageId = new MaxPageId(0);
-                       }
-               }
+               // Initial Database
+               db = initDB();
                
-               // Load Config
-               {
-                       Query query = db.query();
-                       query.constrain(Config.class);
-                       @SuppressWarnings("unchecked")
-                       ObjectSet<Config> set = query.execute();
-
-                       if (set.hasNext())
-                               config = set.next();
-                       else {
-                               config = new Config(true);
-                               db.store(config);
-                               db.commit();
-                       }
-               }       
-               
                indexWriter = new IndexWriter(this);
                webInterface = new WebInterface(this);
 
@@ -672,7 +556,7 @@
                        if (stopped)
                                throw new RuntimeException("plugin stopping");
                        Logger.debug(this, "foundURI " + uri + " on " + page);
-                       queueURI(uri, "Added from " + page.uri, false);
+                       queueURI(uri, "Added from " + page.getURI(), false);
                }
 
                protected Integer lastPosition = null;
@@ -681,13 +565,13 @@
                        if (stopped)
                                throw new RuntimeException("plugin stopping");
 
-                       Logger.debug(this, "onText on " + page.id + " (" + 
baseURI + ")");
+                       Logger.debug(this, "onText on " + page.getId() + " (" + 
baseURI + ")");
 
                        if ("title".equalsIgnoreCase(type) && (s != null) && 
(s.length() != 0) && (s.indexOf('\n') < 0)) {
                                /*
                                 * title of the page 
                                 */
-                               page.pageTitle = s;
+                               page.setPageTitle(s);
                                type = "title";
                        }
                        else type = null;
@@ -722,49 +606,9 @@
                        if (word.length() < 3)
                                return;
                        Term term = getTermByWord(word, true);
-                       TermPosition termPos = getTermPosition(term);
-
-                       synchronized (termPos) {
-                               int[] newPositions = new 
int[termPos.positions.length + 1];
-                               System.arraycopy(termPos.positions, 0, 
newPositions, 0, termPos.positions.length);
-                               newPositions[termPos.positions.length] = 
position;
-
-                               termPos.positions = newPositions;
-                       }
+                       TermPosition termPos = page.getTermPosition(term);
+                       termPos.addPositions(position);
                }
-               
-               protected Map<Term, TermPosition> termPosCache = new 
HashMap<Term, TermPosition>();
-
-               public void store() {
-                       // Delete existing TermPosition
-                       Query query = db.query();
-                       query.constrain(TermPosition.class);
-                       query.descend("pageId").constrain(page.id);
-                       @SuppressWarnings("unchecked")
-                       ObjectSet<TermPosition> set = query.execute();
-                       for (TermPosition tp : set) {
-                               assert tp.pageId == page.id;
-                               db.delete(tp);
-                       }
-                       
-                       for (TermPosition tp : termPosCache.values())
-                               db.store(tp);
-                       termPosCache.clear();
-               }
-
-               protected TermPosition getTermPosition(Term term) {
-                       TermPosition cachedTermPos = termPosCache.get(term);
-                       if (cachedTermPos != null)
-                               return cachedTermPos;
-
-                       cachedTermPos = new TermPosition();
-                       cachedTermPos.word = term.word;
-                       cachedTermPos.pageId = page.id;
-                       cachedTermPos.positions = new int[0];
-
-                       termPosCache.put(term, cachedTermPos);
-                       return cachedTermPos;
-               }
        }
 
        public void onFoundEdition(long l, USK key){
@@ -782,145 +626,53 @@
        }
 
        public short getPollingPriorityNormal() {
-               return (short) Math.min(RequestStarter.MINIMUM_PRIORITY_CLASS, 
config.getRequestPriority() + 1);
+               return (short) Math.min(RequestStarter.MINIMUM_PRIORITY_CLASS, 
root.getConfig().getRequestPriority() + 1);
        }
 
        public short getPollingPriorityProgress() {
-               return config.getRequestPriority();
+               return root.getConfig().getRequestPriority();
        }
 
-       protected ObjectContainer db;
+       protected Storage db;
+       protected PerstRoot root;
 
        /**
-        * Initializes DB4O.
-        * 
-        * @return db4o's connector
+        * Initializes Database
         */
-       private ObjectContainer initDB4O() {
-               Configuration cfg = Db4o.newConfiguration();
-               cfg.reflectWith(new JdkReflector(getClass().getClassLoader()));
+       private Storage initDB() {
+               Storage db = StorageFactory.getInstance().createStorage();
+               db.setProperty("perst.object.cache.kind", "soft");
+               db.setProperty("perst.gc.threshold", 16384);
+               db.setProperty("perst.alternative.btree", true);
+               db.setProperty("perst.string.encoding", "UTF-8");
+               db.setProperty("perst.concurrent.iterator", true);
 
-               //- Page
-               cfg.objectClass(Page.class).objectField("id").indexed(true);
-               cfg.objectClass(Page.class).objectField("uri").indexed(true);
-               cfg.objectClass(Page.class).objectField("status").indexed(true);
-               
cfg.objectClass(Page.class).objectField("lastChange").indexed(true);            
+               db.open("XMLSpider-" + version + ".dbs");
 
-               cfg.objectClass(Page.class).callConstructor(true);
+               root = (PerstRoot) db.getRoot();
+               if (root == null)
+                       root = PerstRoot.createRoot(db);
 
-               //- Term
-               cfg.objectClass(Term.class).objectField("md5").indexed(true);
-               cfg.objectClass(Term.class).objectField("word").indexed(true);
-
-               cfg.objectClass(Term.class).callConstructor(true);
-
-               //- TermPosition
-               
cfg.objectClass(TermPosition.class).objectField("pageId").indexed(true);
-               
cfg.objectClass(TermPosition.class).objectField("word").indexed(true);
-
-               cfg.objectClass(TermPosition.class).callConstructor(true);
-
-               //- Other
-               cfg.objectClass(MaxPageId.class).callConstructor(true);
-               cfg.objectClass(Config.class).callConstructor(true);
-
-               cfg.activationDepth(3);
-               cfg.updateDepth(3);
-               cfg.automaticShutDown(false);
-               cfg.queries().evaluationMode(QueryEvaluationMode.LAZY);
-               cfg.diagnostic().addListener(new DiagnosticToConsole());
-
-               ObjectContainer oc = Db4o.openFile(cfg, "XMLSpider-" + version 
+ ".db4o");
-
-               return oc;
+               return db;
        }
        
-       public ObjectContainer getDB() {
-               return db;
+       public PerstRoot getDbRoot() {
+               return root;
        }
 
        protected Page getPageByURI(FreenetURI uri) {
-               Query query = db.query();
-               query.constrain(Page.class);
-               query.descend("uri").constrain(uri.toString());
-               @SuppressWarnings("unchecked")
-               ObjectSet<Page> set = query.execute();
-
-               if (set.hasNext()) {
-                       Page page = set.next();
-                       assert page.uri.equals(uri.toString());
-                       return page;
-               } else
-                       return null;
+               return root.getPageByURI(uri, false, null);
        }
 
        protected Page getPageById(long id) {
-               Query query = db.query();
-               query.constrain(Page.class);
-               query.descend("id").constrain(id);
-               @SuppressWarnings("unchecked")
-               ObjectSet<Page> set = query.execute();
-
-               if (set.hasNext()) {
-                       Page page = set.next();
-                       assert page.id == id;
-                       return page;
-               } else
-                       return null;
+               return root.getPageById(id);
        }
 
-       protected Term getTermByMd5(String md5) {
-               Query query = db.query();
-               query.constrain(Term.class);
-               query.descend("md5").constrain(md5);
-               @SuppressWarnings("unchecked")
-               ObjectSet<Term> set = query.execute();
-
-               if (set.hasNext()) {
-                       Term term = set.next();
-                       assert md5.equals(term.md5);
-                       return term;
-               } else
-                       return null;
-       }
-
-       @SuppressWarnings("serial")     
-       protected Map<String, Term> termCache = new LinkedHashMap<String, 
Term>() {
-               protected boolean removeEldestEntry(Map.Entry<String, Term> 
eldest) {
-                       return size() > 1024;
-               }
-       };
-
        // language for I10N
        private LANGUAGE language;
 
        protected Term getTermByWord(String word, boolean create) {
-               synchronized (this) {
-                       Term cachedTerm = termCache.get(word);
-                       if (cachedTerm != null)
-                               return cachedTerm;
-
-                       Query query = db.query();
-                       query.constrain(Term.class);
-                       query.descend("word").constrain(word);
-                       @SuppressWarnings("unchecked")
-                       ObjectSet<Term> set = query.execute();
-
-                       if (set.hasNext()) {
-                               cachedTerm = set.next();
-                               assert word.equals(cachedTerm.word);
-                               termCache.put(word, cachedTerm);
-
-                               return cachedTerm;
-                       } else if (create) {
-                               cachedTerm = new Term(word);
-                               termCache.put(word, cachedTerm);
-                               db.store(cachedTerm);
-
-                               return cachedTerm;
-                       } else
-                               return null;
-               }
+               return root.getTermByWord(word, create);
        }
 
        public String getString(String key) {

Copied: trunk/plugins/XMLSpider/db/Config.java (from rev 24819, 
trunk/plugins/XMLSpider/Config.java)
===================================================================
--- trunk/plugins/XMLSpider/db/Config.java                              (rev 0)
+++ trunk/plugins/XMLSpider/db/Config.java      2008-12-29 15:56:55 UTC (rev 
24821)
@@ -0,0 +1,177 @@
+/**
+ * @author j16sdiz (1024D/75494252)
+ */
+package plugins.XMLSpider.db;
+
+import plugins.XMLSpider.org.garret.perst.Persistent;
+import plugins.XMLSpider.org.garret.perst.Storage;
+import freenet.node.RequestStarter;
+
+public class Config extends Persistent implements Cloneable {
+       /**
+        * Directory where the generated indices are stored
+        */
+       private String indexDir;
+       private int indexMaxEntries;
+       private long indexSubindexMaxSize;
+
+       private String indexTitle;
+       private String indexOwner;
+       private String indexOwnerEmail;
+
+       private int maxShownURIs;
+       private int maxParallelRequests;
+       private String[] badlistedExtensions;
+       private short requestPriority;
+
+       public Config() {
+       }
+
+       public Config(Storage storage) {
+               indexDir = "myindex7/";
+               indexMaxEntries = 2000;
+               indexSubindexMaxSize = 4 * 1024 * 1024;
+
+               indexTitle = "XMLSpider index";
+               indexOwner = "Freenet";
+               indexOwnerEmail = "(nil)";
+
+               maxShownURIs = 15;
+
+               maxParallelRequests = 100;
+
+               badlistedExtensions = new String[] { //
+               ".ico", ".bmp", ".png", ".jpg", ".gif", // image
+                       ".zip", ".jar", ".gz", ".bz2", ".rar", // archive
+                       ".7z", ".rar", ".arj", ".rpm", ".deb", //
+                       ".xpi", ".ace", ".cab", ".lza", ".lzh", //
+                       ".ace", ".exe", ".iso", // binary
+                       ".mpg", ".ogg", ".mp3", ".avi", // media
+                       ".css", ".sig" // other
+               };
+
+               requestPriority = 
RequestStarter.IMMEDIATE_SPLITFILE_PRIORITY_CLASS;
+               
+               storage.makePersistent(this);
+       }
+
+       public synchronized void setValue(Config config) {
+               synchronized (config) {
+                       indexDir = config.indexDir;
+                       indexMaxEntries = config.indexMaxEntries;
+                       indexSubindexMaxSize = config.indexSubindexMaxSize;
+
+                       indexTitle = config.indexTitle;
+                       indexOwner = config.indexOwner;
+                       indexOwnerEmail = config.indexOwnerEmail;
+
+                       maxShownURIs = config.maxShownURIs;
+
+                       maxParallelRequests = config.maxParallelRequests;
+
+                       badlistedExtensions = config.badlistedExtensions;
+
+                       requestPriority = config.requestPriority;
+               }
+               
+               if (isPersistent())
+                       modify();
+       }
+
+       public synchronized Config clone() {
+               Config newConfig = new Config();
+               newConfig.setValue(this);
+               return newConfig;
+       }
+
+       public synchronized void setIndexDir(String indexDir) {
+               assert !isPersistent();
+               this.indexDir = indexDir;
+       }
+
+       public synchronized String getIndexDir() {
+               return indexDir;
+       }
+
+       public synchronized void setIndexMaxEntries(int indexMaxEntries) {
+               assert !isPersistent();
+               this.indexMaxEntries = indexMaxEntries;
+       }
+
+       public synchronized int getIndexMaxEntries() {
+               return indexMaxEntries;
+       }
+
+       public synchronized void setIndexSubindexMaxSize(long 
indexSubindexMaxSize) {
+               assert !isPersistent();
+               this.indexSubindexMaxSize = indexSubindexMaxSize;
+       }
+
+       public synchronized long getIndexSubindexMaxSize() {
+               return indexSubindexMaxSize;
+       }
+
+       public synchronized void setIndexTitle(String indexTitle) {
+               assert !isPersistent();
+               this.indexTitle = indexTitle;
+       }
+
+       public synchronized String getIndexTitle() {
+               return indexTitle;
+       }
+
+       public synchronized void setIndexOwner(String indexOwner) {
+               assert !isPersistent();
+               this.indexOwner = indexOwner;
+       }
+
+       public synchronized String getIndexOwner() {
+               return indexOwner;
+       }
+
+       public synchronized void setIndexOwnerEmail(String indexOwnerEmail) {
+               assert !isPersistent();
+               this.indexOwnerEmail = indexOwnerEmail;
+       }
+
+       public synchronized void setMaxShownURIs(int maxShownURIs) {
+               assert !isPersistent();
+               this.maxShownURIs = maxShownURIs;
+       }
+
+       public synchronized int getMaxShownURIs() {
+               return maxShownURIs;
+       }
+
+       public synchronized String getIndexOwnerEmail() {
+               return indexOwnerEmail;
+       }
+
+       public synchronized void setMaxParallelRequests(int 
maxParallelRequests) {
+               assert !isPersistent();
+               this.maxParallelRequests = maxParallelRequests;
+       }
+
+       public synchronized int getMaxParallelRequests() {
+               return maxParallelRequests;
+       }
+
+       public synchronized void setBadlistedExtensions(String[] 
badlistedExtensions) {
+               assert !isPersistent();
+               ;
+               this.badlistedExtensions = badlistedExtensions;
+       }
+
+       public synchronized String[] getBadlistedExtensions() {
+               return badlistedExtensions;
+       }
+
+       public synchronized void setRequestPriority(short requestPriority) {
+               assert !isPersistent();
+               this.requestPriority = requestPriority;
+       }
+
+       public synchronized short getRequestPriority() {
+               return requestPriority;
+       }
+}
\ No newline at end of file

Added: trunk/plugins/XMLSpider/db/Page.java
===================================================================
--- trunk/plugins/XMLSpider/db/Page.java                                (rev 0)
+++ trunk/plugins/XMLSpider/db/Page.java        2008-12-29 15:56:55 UTC (rev 
24821)
@@ -0,0 +1,141 @@
+/**
+ * @author j16sdiz (1024D/75494252)
+ */
+package plugins.XMLSpider.db;
+
+import plugins.XMLSpider.org.garret.perst.IPersistentMap;
+import plugins.XMLSpider.org.garret.perst.Persistent;
+import plugins.XMLSpider.org.garret.perst.SortedCollection;
+import plugins.XMLSpider.org.garret.perst.Storage;
+
+public class Page extends Persistent implements Comparable<Page> {
+       /** Page Id */
+       protected long id;
+       /** URI of the page */
+       protected String uri;
+       /** Title */
+       protected String pageTitle;
+       /** Status */
+       protected Status status;
+       /** Last Change Time */
+       protected long lastChange;
+       /** Comment, for debugging */
+       protected String comment;
+       /** term.md5 -> TermPosition */
+       protected IPersistentMap<String, TermPosition> termPosMap;
+
+       public Page() {
+       }
+
+       Page(String uri, String comment, Storage storage) {
+               this.uri = uri;
+               this.comment = comment;
+               this.status = Status.QUEUED;
+               this.lastChange = System.currentTimeMillis();
+               
+               storage.makePersistent(this);
+       }
+       
+       public synchronized void setStatus(Status status) {
+               preModify();
+               this.status = status;
+               postModify();
+       }
+
+       public Status getStatus() {
+               return status;
+       }
+
+       public synchronized void setComment(String comment) {
+               preModify();
+               this.comment = comment;
+               postModify();
+       }
+       
+       public String getComment() {
+               return comment;
+       }
+
+       public String getURI() {
+               return uri;
+       }
+       
+       public long getId() {
+               return id;
+       }
+       
+       public void setPageTitle(String pageTitle) {
+               preModify();
+               this.pageTitle = pageTitle;
+               postModify();
+       }
+
+       public String getPageTitle() {
+               return pageTitle;
+       }
+
+       public synchronized TermPosition getTermPosition(Term term) {
+               if (termPosMap == null)
+                       termPosMap = getStorage().createMap(String.class);
+
+               TermPosition tp = termPosMap.get(term.md5);
+               if (tp == null) {
+                       tp = new TermPosition(getStorage());
+                       term.pageSet.add(this);
+               }
+
+               return tp;
+       }
+       
+       @Override
+       public int hashCode() {
+               return (int) (id ^ (id >>> 32));
+       }
+
+       @Override
+       public boolean equals(Object obj) {
+               if (this == obj)
+                       return true;
+               if (obj == null)
+                       return false;
+               if (getClass() != obj.getClass())
+                       return false;
+
+               return id == ((Page) obj).id;
+       }
+
+       @Override
+       public String toString() {
+               return "[PAGE: id=" + id + ", title=" + pageTitle + ", uri=" + 
uri + ", status=" + status + ", comment="
+               + comment
+               + "]";
+       }
+
+       public int compareTo(Page o) {
+               return new Long(id).compareTo(o.id);
+       }
+       
+       private void preModify() {
+               Storage storage = getStorage();
+
+               if (storage != null) {
+                       PerstRoot root = (PerstRoot) storage.getRoot();
+                       SortedCollection<Page> coll = 
root.getPageCollection(status);
+                       coll.remove(this);
+               }
+       }
+
+       private void postModify() {
+               lastChange = System.currentTimeMillis();
+               
+               modify();
+
+               Storage storage = getStorage();
+
+               if (storage != null) {
+                       PerstRoot root = (PerstRoot) storage.getRoot();
+                       SortedCollection<Page> coll = 
root.getPageCollection(status);
+                       coll.add(this);
+               }
+       }
+}

Added: trunk/plugins/XMLSpider/db/PageTimeStampComparator.java
===================================================================
--- trunk/plugins/XMLSpider/db/PageTimeStampComparator.java                     
        (rev 0)
+++ trunk/plugins/XMLSpider/db/PageTimeStampComparator.java     2008-12-29 
15:56:55 UTC (rev 24821)
@@ -0,0 +1,25 @@
+/**
+ * 
+ */
+package plugins.XMLSpider.db;
+
+import plugins.XMLSpider.org.garret.perst.PersistentComparator;
+
+final class PageTimeStampComparator extends PersistentComparator<Page> {
+    @Override
+    public int compareMemberWithKey(Page p1, Object key) {
+       if (key instanceof Page)
+               return compareMembers(p1, (Page) key);
+       else
+               return 0;
+    }
+
+    @Override
+    public int compareMembers(Page p1, Page p2) {
+       if (p1.lastChange < p2.lastChange)
+               return -1;
+       if (p1.lastChange > p2.lastChange)
+               return 1;
+       return 0;
+    }
+}
\ No newline at end of file

Added: trunk/plugins/XMLSpider/db/PerstRoot.java
===================================================================
--- trunk/plugins/XMLSpider/db/PerstRoot.java                           (rev 0)
+++ trunk/plugins/XMLSpider/db/PerstRoot.java   2008-12-29 15:56:55 UTC (rev 
24821)
@@ -0,0 +1,120 @@
+package plugins.XMLSpider.db;
+
+import java.util.Iterator;
+import java.util.List;
+
+import plugins.XMLSpider.org.garret.perst.FieldIndex;
+import plugins.XMLSpider.org.garret.perst.Key;
+import plugins.XMLSpider.org.garret.perst.Persistent;
+import plugins.XMLSpider.org.garret.perst.SortedCollection;
+import plugins.XMLSpider.org.garret.perst.Storage;
+import freenet.keys.FreenetURI;
+
+public class PerstRoot extends Persistent {
+       protected FieldIndex<Term> md5Term;
+       protected FieldIndex<Term> wordTerm;
+
+       protected FieldIndex<Page> idPage;
+       protected FieldIndex<Page> uriPage;
+       protected SortedCollection<Page> queuedPages;
+       protected SortedCollection<Page> failedPages;
+       protected SortedCollection<Page> succeededPages;
+       
+       private Config config;
+
+       public PerstRoot() {
+       }
+
+       public static PerstRoot createRoot(Storage storage) {
+               PerstRoot root = new PerstRoot();
+
+               root.md5Term = storage.createFieldIndex(Term.class, "md5", 
true);
+               root.wordTerm = storage.createFieldIndex(Term.class, "word", 
true);
+
+               root.idPage = storage.createFieldIndex(Page.class, "id", true);
+               root.uriPage = storage.createFieldIndex(Page.class, "uri", 
true);
+               root.queuedPages = storage.<Page> createSortedCollection(new 
PageTimeStampComparator(), false);
+               root.failedPages = storage.<Page> createSortedCollection(new 
PageTimeStampComparator(), false);
+               root.succeededPages = storage.<Page> createSortedCollection(new 
PageTimeStampComparator(), false);
+               
+               
+               root.config = new Config(storage);
+               
+               storage.setRoot(root);
+               
+               return root;
+       }
+
+       public synchronized Term getTermByWord(String word, boolean create) {
+               Term term = wordTerm.get(new Key(word));
+
+               if (create && term == null) {
+                       term = new Term(word, getStorage());
+                       md5Term.add(term);
+                       wordTerm.add(term);
+               }
+
+               return term;
+       }
+
+       public synchronized Iterator<Term> getTermIterator() {
+               return md5Term.iterator();
+       }
+       
+       public synchronized List<Term> getTermList() {
+               return md5Term.getList(null, null);
+       }
+
+       public synchronized int getTermCount() {
+               return md5Term.size();
+       }
+       
+       public synchronized Page getPageByURI(FreenetURI uri, boolean create, 
String comment) {
+               Page page = uriPage.get(new Key(uri.toString()));
+
+               if (create && page == null) {
+                       page = new Page(uri.toString(), comment, getStorage());
+
+                       idPage.append(page);
+                       uriPage.add(page);
+                       queuedPages.add(page);
+               }
+
+               return page;
+       }
+
+       public Page getPageById(long id) {
+               Page page = idPage.get(id);
+               return page;
+       }
+       
+       SortedCollection<Page> getPageCollection(Status status) {
+               switch (status) {
+               case FAILED:
+                       return failedPages;
+               case QUEUED:
+                       return queuedPages;
+               case SUCCEEDED:
+                       return succeededPages;
+               default:
+                       return null;
+               }
+       }
+
+       public synchronized Iterator<Page> getPages(Status status) {
+               return getPageCollection(status).iterator();
+       }
+       
+       public synchronized int getPageCount(Status status) {
+               return getPageCollection(status).size();
+       }
+
+       public void setConfig(Config config) {
+           this.config = config;
+           modify();
+    }
+
+       public Config getConfig() {
+           return config;
+    }
+}

Copied: trunk/plugins/XMLSpider/db/Status.java (from rev 24819, 
trunk/plugins/XMLSpider/Status.java)
===================================================================
--- trunk/plugins/XMLSpider/db/Status.java                              (rev 0)
+++ trunk/plugins/XMLSpider/db/Status.java      2008-12-29 15:56:55 UTC (rev 
24821)
@@ -0,0 +1,9 @@
+/**
+ * @author j16sdiz (1024D/75494252)
+ */
+package plugins.XMLSpider.db;
+
+public enum Status {
+       /** For simplicity, running is also mark as QUEUED */
+       QUEUED, SUCCEEDED, FAILED
+}
\ No newline at end of file

Added: trunk/plugins/XMLSpider/db/Term.java
===================================================================
--- trunk/plugins/XMLSpider/db/Term.java                                (rev 0)
+++ trunk/plugins/XMLSpider/db/Term.java        2008-12-29 15:56:55 UTC (rev 
24821)
@@ -0,0 +1,103 @@
+/**
+ * @author j16sdiz (1024D/75494252)
+ */
+package plugins.XMLSpider.db;
+
+import java.io.UnsupportedEncodingException;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.Set;
+
+import plugins.XMLSpider.org.garret.perst.IPersistentSet;
+import plugins.XMLSpider.org.garret.perst.Persistent;
+import plugins.XMLSpider.org.garret.perst.Storage;
+
+public class Term extends Persistent {
+       /** MD5 of the term */
+       String md5;
+       /** Term */
+       String word;
+       
+       /** Pages containing this Term */
+       IPersistentSet<Page> pageSet;
+
+       public Term(String word, Storage storage) {
+               this.word = word;
+               md5 = MD5(word);
+               pageSet = storage.<Page> createScalableSet();
+               
+               storage.makePersistent(this);
+       }
+
+       public Term() {
+       }
+       
+       public boolean addPage(Page page) {
+               return pageSet.add(page);
+       }
+
+       public boolean removePage(Page page) {
+               return pageSet.remove(page);
+       }
+
+       public Set<Page> getPages() {
+               return pageSet;
+       }
+
+       public String getWord() {
+               return word;
+       }
+       
+       public String getMD5() {
+               return md5;
+       }
+       
+       @Override
+       public boolean equals(Object o) {
+               if (o == null)
+                       return false;
+               if (getClass() != o.getClass())
+                       return false;
+               Term t = (Term) o;
+               return md5.equals(t.md5) && word.equals(t.word);
+       }
+
+       @Override
+       public int hashCode() {
+               return md5.hashCode() ^ word.hashCode();
+       }
+       
+       /*
+        * calculate the md5 for a given string
+        */
+       public static String MD5(String text) {
+               try {
+                       MessageDigest md = MessageDigest.getInstance("MD5");
+                       byte[] md5hash = new byte[32];
+                       byte[] b = text.getBytes("UTF-8");
+                       md.update(b, 0, b.length);
+                       md5hash = md.digest();
+                       return convertToHex(md5hash);
+               } catch (UnsupportedEncodingException e) {
+                       throw new RuntimeException("UTF-8 not supported", e);
+               } catch (NoSuchAlgorithmException e) {
+                       throw new RuntimeException("MD5 not supported", e);
+               }
+       }
+
+       public static String convertToHex(byte[] data) {
+               StringBuilder buf = new StringBuilder();
+               for (int i = 0; i < data.length; i++) {
+                       int halfbyte = (data[i] >>> 4) & 0x0F;
+                       int two_halfs = 0;
+                       do {
+                               if ((0 <= halfbyte) && (halfbyte <= 9))
+                                       buf.append((char) ('0' + halfbyte));
+                               else
+                                       buf.append((char) ('a' + (halfbyte - 
10)));
+                               halfbyte = data[i] & 0x0F;
+                       } while (two_halfs++ < 1);
+               }
+               return buf.toString();
+       }
+}
\ No newline at end of file

Added: trunk/plugins/XMLSpider/db/TermPosition.java
===================================================================
--- trunk/plugins/XMLSpider/db/TermPosition.java                                
(rev 0)
+++ trunk/plugins/XMLSpider/db/TermPosition.java        2008-12-29 15:56:55 UTC 
(rev 24821)
@@ -0,0 +1,33 @@
+/**
+ * @author j16sdiz (1024D/75494252)
+ */
+package plugins.XMLSpider.db;
+
+import plugins.XMLSpider.org.garret.perst.Persistent;
+import plugins.XMLSpider.org.garret.perst.Storage;
+
+public class TermPosition extends Persistent {
+       /** Position List */
+       public int[] positions;
+
+       public TermPosition() {
+       }
+
+       public TermPosition(Storage storage) {
+               positions = new int[0];
+               storage.makePersistent(this);
+       }
+
+       public synchronized void addPositions(int position) {
+               int[] newPositions = new int[positions.length + 1];
+               System.arraycopy(positions, 0, newPositions, 0, 
positions.length);
+               newPositions[positions.length] = position;
+
+               positions = newPositions;
+               modify();
+       }
+
+       public synchronized int[] addPositions() {
+               return positions;
+       }
+}
\ No newline at end of file

Modified: trunk/plugins/XMLSpider/web/ConfigPage.java
===================================================================
--- trunk/plugins/XMLSpider/web/ConfigPage.java 2008-12-29 13:03:39 UTC (rev 
24820)
+++ trunk/plugins/XMLSpider/web/ConfigPage.java 2008-12-29 15:56:55 UTC (rev 
24821)
@@ -4,8 +4,8 @@
  */
 package plugins.XMLSpider.web;
 
-import plugins.XMLSpider.Config;
 import plugins.XMLSpider.XMLSpider;
+import plugins.XMLSpider.db.Config;
 import freenet.clients.http.PageMaker;
 import freenet.pluginmanager.PluginRespirator;
 import freenet.support.HTMLNode;

Modified: trunk/plugins/XMLSpider/web/MainPage.java
===================================================================
--- trunk/plugins/XMLSpider/web/MainPage.java   2008-12-29 13:03:39 UTC (rev 
24820)
+++ trunk/plugins/XMLSpider/web/MainPage.java   2008-12-29 15:56:55 UTC (rev 
24821)
@@ -9,14 +9,11 @@
 import java.util.Iterator;
 import java.util.List;
 
-import plugins.XMLSpider.Config;
-import plugins.XMLSpider.Page;
-import plugins.XMLSpider.Status;
 import plugins.XMLSpider.XMLSpider;
-
-import com.db4o.ObjectSet;
-import com.db4o.query.Query;
-
+import plugins.XMLSpider.db.Config;
+import plugins.XMLSpider.db.Page;
+import plugins.XMLSpider.db.PerstRoot;
+import plugins.XMLSpider.db.Status;
 import freenet.clients.http.PageMaker;
 import freenet.keys.FreenetURI;
 import freenet.pluginmanager.PluginRespirator;
@@ -165,8 +162,8 @@
                        int maxURI = config.getMaxShownURIs();
                        for (int i = 0; i < maxURI && pi.hasNext(); i++) {
                                Page page = pi.next();
-                               HTMLNode litem = list.addChild("li", "title", 
page.comment);
-                               litem.addChild("a", "href", "/freenet:" + 
page.uri, page.uri);
+                               HTMLNode litem = list.addChild("li", "title", 
page.getComment());
+                               litem.addChild("a", "href", "/freenet:" + 
page.getURI(), page.getURI());
                        }
                }
                contentNode.addChild(runningBox);
@@ -192,19 +189,18 @@
 
        //-- Utilities
        private PageStatus getPageStatus(Status status) {
-               Query query = xmlSpider.getDB().query();
-               query.constrain(Page.class);
-               query.descend("status").constrain(status);
-               query.descend("lastChange").orderDescending();
+               PerstRoot root = xmlSpider.getDbRoot();
+               synchronized (root) {
+                       int count = root.getPageCount(status);
+                       Iterator<Page> it = root.getPages(status);
 
-               @SuppressWarnings("unchecked")
-               ObjectSet<Page> set = query.execute();
-               List<Page> pages = new ArrayList<Page>();
-               while (set.hasNext() && pages.size() < 
xmlSpider.getConfig().getMaxShownURIs()) {
-                       pages.add(set.next());
-               }
+                       int showURI = xmlSpider.getConfig().getMaxShownURIs();
+                       List<Page> page = new ArrayList();
+                       while (page.size() < showURI && it.hasNext())
+                               page.add(it.next());
 
-               return new PageStatus(set.size(), pages);
+                       return new PageStatus(count, page);
+               }
        }
 
        private void listPages(PageStatus pageStatus, HTMLNode parent) {
@@ -214,8 +210,8 @@
                        HTMLNode list = parent.addChild("ol", "style", 
"overflow: auto; white-space: nowrap;");
 
                        for (Page page : pageStatus.pages) {
-                               HTMLNode litem = list.addChild("li", "title", 
page.comment);
-                               litem.addChild("a", "href", "/freenet:" + 
page.uri, page.uri);
+                               HTMLNode litem = list.addChild("li", "title", 
page.getComment());
+                               litem.addChild("a", "href", "/freenet:" + 
page.getURI(), page.getURI());
                        }
                }
        }

_______________________________________________
cvs mailing list
[email protected]
http://emu.freenetproject.org/cgi-bin/mailman/listinfo/cvs

Reply via email to