Author: j16sdiz
Date: 2008-12-29 15:56:55 +0000 (Mon, 29 Dec 2008)
New Revision: 24821
Added:
trunk/plugins/XMLSpider/db/
trunk/plugins/XMLSpider/db/Config.java
trunk/plugins/XMLSpider/db/Page.java
trunk/plugins/XMLSpider/db/PageTimeStampComparator.java
trunk/plugins/XMLSpider/db/PerstRoot.java
trunk/plugins/XMLSpider/db/Status.java
trunk/plugins/XMLSpider/db/Term.java
trunk/plugins/XMLSpider/db/TermPosition.java
Removed:
trunk/plugins/XMLSpider/Config.java
trunk/plugins/XMLSpider/MaxPageId.java
trunk/plugins/XMLSpider/Page.java
trunk/plugins/XMLSpider/Status.java
trunk/plugins/XMLSpider/Term.java
trunk/plugins/XMLSpider/TermPosition.java
Modified:
trunk/plugins/XMLSpider/IndexWriter.java
trunk/plugins/XMLSpider/XMLSpider.java
trunk/plugins/XMLSpider/web/ConfigPage.java
trunk/plugins/XMLSpider/web/MainPage.java
Log:
Port the whole thing to PERST
Less disk i/o, faster processing, lessor CPU, messier code
Deleted: trunk/plugins/XMLSpider/Config.java
===================================================================
--- trunk/plugins/XMLSpider/Config.java 2008-12-29 13:03:39 UTC (rev 24820)
+++ trunk/plugins/XMLSpider/Config.java 2008-12-29 15:56:55 UTC (rev 24821)
@@ -1,166 +0,0 @@
-/**
- * @author j16sdiz (1024D/75494252)
- */
-package plugins.XMLSpider;
-
-import freenet.node.RequestStarter;
-import freenet.support.Logger;
-
-public class Config implements Cloneable {
- /**
- * Directory where the generated indices are stored
- */
- private String indexDir;
- private int indexMaxEntries;
- private long indexSubindexMaxSize;
-
- private String indexTitle;
- private String indexOwner;
- private String indexOwnerEmail;
-
- private int maxShownURIs;
- private int maxParallelRequests;
- private String[] badlistedExtensions;
- private short requestPriority;
-
- public Config() {
- } // for db4o
-
- public Config(boolean setDefault) {
- if (!setDefault)
- return;
-
- indexDir = "myindex7/";
- indexMaxEntries = 2000;
- indexSubindexMaxSize = 4 * 1024 * 1024;
-
- indexTitle = "XMLSpider index";
- indexOwner = "Freenet";
- indexOwnerEmail = "(nil)";
-
- maxShownURIs = 15;
-
- maxParallelRequests = 100;
-
- badlistedExtensions = new String[] { //
- ".ico", ".bmp", ".png", ".jpg", ".gif", // image
- ".zip", ".jar", ".gz", ".bz2", ".rar", // archive
- ".7z", ".rar", ".arj", ".rpm", ".deb", //
- ".xpi", ".ace", ".cab", ".lza", ".lzh", //
- ".ace", ".exe", ".iso", // binary
- ".mpg", ".ogg", ".mp3", ".avi", // media
- ".css", ".sig" // other
- };
-
- requestPriority =
RequestStarter.IMMEDIATE_SPLITFILE_PRIORITY_CLASS;
- }
-
- public synchronized void setValue(Config config) {
- synchronized (config) {
- indexDir = config.indexDir;
- indexMaxEntries = config.indexMaxEntries;
- indexSubindexMaxSize = config.indexSubindexMaxSize;
-
- indexTitle = config.indexTitle;
- indexOwner = config.indexOwner;
- indexOwnerEmail = config.indexOwnerEmail;
-
- maxShownURIs = config.maxShownURIs;
-
- maxParallelRequests = config.maxParallelRequests;
-
- badlistedExtensions = config.badlistedExtensions;
-
- requestPriority = config.requestPriority;
- }
- }
-
- public synchronized Config clone() {
- try {
- return (Config) super.clone();
- } catch (CloneNotSupportedException e) {
- Logger.error(this, "impossible:", e);
- throw new RuntimeException(e);
- }
- }
-
- public synchronized void setIndexDir(String indexDir) {
- this.indexDir = indexDir;
- }
-
- public synchronized String getIndexDir() {
- return indexDir;
- }
-
- public synchronized void setIndexMaxEntries(int indexMaxEntries) {
- this.indexMaxEntries = indexMaxEntries;
- }
-
- public synchronized int getIndexMaxEntries() {
- return indexMaxEntries;
- }
-
- public synchronized void setIndexSubindexMaxSize(long
indexSubindexMaxSize) {
- this.indexSubindexMaxSize = indexSubindexMaxSize;
- }
-
- public synchronized long getIndexSubindexMaxSize() {
- return indexSubindexMaxSize;
- }
-
- public synchronized void setIndexTitle(String indexTitle) {
- this.indexTitle = indexTitle;
- }
-
- public synchronized String getIndexTitle() {
- return indexTitle;
- }
-
- public synchronized void setIndexOwner(String indexOwner) {
- this.indexOwner = indexOwner;
- }
-
- public synchronized String getIndexOwner() {
- return indexOwner;
- }
-
- public synchronized void setIndexOwnerEmail(String indexOwnerEmail) {
- this.indexOwnerEmail = indexOwnerEmail;
- }
-
- public synchronized void setMaxShownURIs(int maxShownURIs) {
- this.maxShownURIs = maxShownURIs;
- }
-
- public synchronized int getMaxShownURIs() {
- return maxShownURIs;
- }
-
- public synchronized String getIndexOwnerEmail() {
- return indexOwnerEmail;
- }
-
- public synchronized void setMaxParallelRequests(int
maxParallelRequests) {
- this.maxParallelRequests = maxParallelRequests;
- }
-
- public synchronized int getMaxParallelRequests() {
- return maxParallelRequests;
- }
-
- public synchronized void setBadlistedExtensions(String[]
badlistedExtensions) {
- this.badlistedExtensions = badlistedExtensions;
- }
-
- public synchronized String[] getBadlistedExtensions() {
- return badlistedExtensions;
- }
-
- public synchronized void setRequestPriority(short requestPriority) {
- this.requestPriority = requestPriority;
- }
-
- public synchronized short getRequestPriority() {
- return requestPriority;
- }
-}
\ No newline at end of file
Modified: trunk/plugins/XMLSpider/IndexWriter.java
===================================================================
--- trunk/plugins/XMLSpider/IndexWriter.java 2008-12-29 13:03:39 UTC (rev
24820)
+++ trunk/plugins/XMLSpider/IndexWriter.java 2008-12-29 15:56:55 UTC (rev
24821)
@@ -10,6 +10,7 @@
import java.io.IOException;
import java.security.NoSuchAlgorithmException;
import java.util.List;
+import java.util.Set;
import java.util.Vector;
import javax.xml.parsers.DocumentBuilder;
@@ -25,9 +26,10 @@
import org.w3c.dom.Element;
import org.w3c.dom.Text;
-import com.db4o.ObjectSet;
-import com.db4o.query.Query;
-
+import plugins.XMLSpider.db.Config;
+import plugins.XMLSpider.db.Page;
+import plugins.XMLSpider.db.Term;
+import plugins.XMLSpider.db.TermPosition;
import freenet.support.Logger;
/**
@@ -201,25 +203,22 @@
private void makeSubIndices(Config config) throws Exception {
Logger.normal(this, "Generating index...");
- Query query = xmlSpider.db.query();
- query.constrain(Term.class);
- query.descend("md5").orderAscending();
- @SuppressWarnings("unchecked")
- ObjectSet<Term> termSet = query.execute();
+ List<Term> termList = xmlSpider.getDbRoot().getTermList();
+ int termCount = xmlSpider.getDbRoot().getTermCount();
indices = new Vector<String>();
- int prefix = (int) ((Math.log(termSet.size()) -
Math.log(config.getIndexMaxEntries())) / Math.log(16)) - 1;
+ int prefix = (int) ((Math.log(termCount) -
Math.log(config.getIndexMaxEntries())) / Math.log(16)) - 1;
if (prefix <= 0)
prefix = 1;
match = 1;
Vector<Term> list = new Vector<Term>();
- Term term0 = termSet.get(0);
- String currentPrefix = term0.md5.substring(0, prefix);
+ Term term0 = termList.get(0);
+ String currentPrefix = term0.getMD5().substring(0, prefix);
int i = 0;
- for (Term term : termSet) {
- String key = term.md5;
+ for (Term term : termList) {
+ String key = term.getMD5();
//create a list of the words to be added in the same
subindex
if (key.startsWith(currentPrefix)) {
i++;
@@ -265,11 +264,11 @@
match = p + 1;
int prefix = p + 1;
int i = 0;
- String str = list.get(i).md5;
+ String str = list.get(i).getMD5();
int index = 0;
while (i < list.size()) {
Term term = list.get(i);
- String key = term.md5;
+ String key = term.getMD5();
if ((key.substring(0, prefix)).equals(str.substring(0,
prefix))) {
i++;
} else {
@@ -296,7 +295,7 @@
* @throws Exception
*/
protected void generateXML(Config config, List<Term> list, int prefix)
throws TooBigIndexException, Exception {
- String p = list.get(0).md5.substring(0, prefix);
+ String p = list.get(0).getMD5().substring(0, prefix);
indices.add(p);
File outputFile = new File(config.getIndexDir() + "index_" + p
+ ".xml");
BufferedOutputStream fos = new BufferedOutputStream(new
FileOutputStream(outputFile));
@@ -343,21 +342,15 @@
for (int i = 0; i < list.size(); i++) {
Element wordElement =
xmlDoc.createElement("word");
Term term = list.get(i);
- wordElement.setAttribute("v", term.word);
+ wordElement.setAttribute("v", term.getWord());
- Query query = xmlSpider.db.query();
- query.constrain(TermPosition.class);
+ Set<Page> pages = term.getPages();
- query.descend("word").constrain(term.word);
- @SuppressWarnings("unchecked")
- ObjectSet<TermPosition> set = query.execute();
-
- for (TermPosition termPos : set) {
+ for (Page page : pages) {
+ TermPosition termPos =
page.getTermPosition(term);
+
synchronized (termPos) {
- Page page =
xmlSpider.getPageById(termPos.pageId);
-
synchronized (page) {
-
/*
* adding file
information uriElement - lists the id of the file
* containing a
particular word fileElement - lists the id,key,title of
@@ -365,10 +358,11 @@
*/
Element uriElement =
xmlDoc.createElement("file");
Element fileElement =
xmlDoc.createElement("file");
-
uriElement.setAttribute("id", Long.toString(page.id));
-
fileElement.setAttribute("id", Long.toString(page.id));
-
fileElement.setAttribute("key", page.uri);
-
fileElement.setAttribute("title", page.pageTitle != null ? page.pageTitle :
page.uri);
+
uriElement.setAttribute("id", Long.toString(page.getId()));
+
fileElement.setAttribute("id", Long.toString(page.getId()));
+
fileElement.setAttribute("key", page.getURI());
+
fileElement.setAttribute("title", page.getPageTitle() != null ?
page.getPageTitle() : page
+ .getURI());
/* Position by position
*/
int[] positions =
termPos.positions;
@@ -382,8 +376,8 @@
}
uriElement.appendChild(xmlDoc.createTextNode(positionList.toString()));
wordElement.appendChild(uriElement);
- if
(!fileid.contains(page.id)) {
-
fileid.add(page.id);
+ if
(!fileid.contains(page.getId())) {
+
fileid.add(page.getId());
filesElement.appendChild(fileElement);
}
}
Deleted: trunk/plugins/XMLSpider/MaxPageId.java
===================================================================
--- trunk/plugins/XMLSpider/MaxPageId.java 2008-12-29 13:03:39 UTC (rev
24820)
+++ trunk/plugins/XMLSpider/MaxPageId.java 2008-12-29 15:56:55 UTC (rev
24821)
@@ -1,19 +0,0 @@
-/**
- * @author j16sdiz (1024D/75494252)
- */
-package plugins.XMLSpider;
-
-class MaxPageId {
- volatile long v;
-
- MaxPageId() {
- }
-
- MaxPageId(long v) {
- this.v = v;
- }
-
- synchronized long incrementAndGet() {
- return ++v;
- }
-}
\ No newline at end of file
Deleted: trunk/plugins/XMLSpider/Page.java
===================================================================
--- trunk/plugins/XMLSpider/Page.java 2008-12-29 13:03:39 UTC (rev 24820)
+++ trunk/plugins/XMLSpider/Page.java 2008-12-29 15:56:55 UTC (rev 24821)
@@ -1,53 +0,0 @@
-/**
- * @author j16sdiz (1024D/75494252)
- */
-package plugins.XMLSpider;
-
-public class Page {
- /** Page Id */
- public long id;
- /** URI of the page */
- public String uri;
- /** Title */
- public String pageTitle;
- /** Status */
- public Status status;
- /** Last Change Time */
- public long lastChange;
- /** Comment, for debugging */
- public String comment;
-
- public Page() {} // for db4o callConstructors(true)
-
- public Page(long id, String uri, String comment) {
- this.id = id;
- this.uri = uri;
- this.comment = comment;
- status = Status.QUEUED;
- lastChange = System.currentTimeMillis();
- }
-
- @Override
- public int hashCode() {
- return (int) (id ^ (id >>> 32));
- }
-
- @Override
- public boolean equals(Object obj) {
- if (this == obj)
- return true;
- if (obj == null)
- return false;
- if (getClass() != obj.getClass())
- return false;
-
- return id == ((Page) obj).id;
- }
-
- @Override
- public String toString() {
- return "[PAGE: id=" + id + ", title=" + pageTitle + ", uri=" +
uri + ", status=" + status + ", comment="
- + comment
- + "]";
- }
-}
Deleted: trunk/plugins/XMLSpider/Status.java
===================================================================
--- trunk/plugins/XMLSpider/Status.java 2008-12-29 13:03:39 UTC (rev 24820)
+++ trunk/plugins/XMLSpider/Status.java 2008-12-29 15:56:55 UTC (rev 24821)
@@ -1,9 +0,0 @@
-/**
- * @author j16sdiz (1024D/75494252)
- */
-package plugins.XMLSpider;
-
-public enum Status {
- /** For simplicity, running is also mark as QUEUED */
- QUEUED, SUCCEEDED, FAILED
-}
\ No newline at end of file
Deleted: trunk/plugins/XMLSpider/Term.java
===================================================================
--- trunk/plugins/XMLSpider/Term.java 2008-12-29 13:03:39 UTC (rev 24820)
+++ trunk/plugins/XMLSpider/Term.java 2008-12-29 15:56:55 UTC (rev 24821)
@@ -1,57 +0,0 @@
-/**
- * @author j16sdiz (1024D/75494252)
- */
-package plugins.XMLSpider;
-
-import java.io.UnsupportedEncodingException;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
-
-class Term {
- /** MD5 of the term */
- String md5;
- /** Term */
- String word;
-
- public Term(String word) {
- this.word = word;
- md5 = MD5(word);
- }
-
- public Term() {
- }
-
- /*
- * calculate the md5 for a given string
- */
- public static String MD5(String text) {
- try {
- MessageDigest md = MessageDigest.getInstance("MD5");
- byte[] md5hash = new byte[32];
- byte[] b = text.getBytes("UTF-8");
- md.update(b, 0, b.length);
- md5hash = md.digest();
- return convertToHex(md5hash);
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException("UTF-8 not supported", e);
- } catch (NoSuchAlgorithmException e) {
- throw new RuntimeException("MD5 not supported", e);
- }
- }
-
- public static String convertToHex(byte[] data) {
- StringBuilder buf = new StringBuilder();
- for (int i = 0; i < data.length; i++) {
- int halfbyte = (data[i] >>> 4) & 0x0F;
- int two_halfs = 0;
- do {
- if ((0 <= halfbyte) && (halfbyte <= 9))
- buf.append((char) ('0' + halfbyte));
- else
- buf.append((char) ('a' + (halfbyte -
10)));
- halfbyte = data[i] & 0x0F;
- } while (two_halfs++ < 1);
- }
- return buf.toString();
- }
-}
\ No newline at end of file
Deleted: trunk/plugins/XMLSpider/TermPosition.java
===================================================================
--- trunk/plugins/XMLSpider/TermPosition.java 2008-12-29 13:03:39 UTC (rev
24820)
+++ trunk/plugins/XMLSpider/TermPosition.java 2008-12-29 15:56:55 UTC (rev
24821)
@@ -1,16 +0,0 @@
-/**
- * @author j16sdiz (1024D/75494252)
- */
-package plugins.XMLSpider;
-
-class TermPosition {
- /** Term */
- String word;
- /** Page id */
- long pageId;
- /** Position List */
- int[] positions;
-
- public TermPosition() {
- }
-}
\ No newline at end of file
Modified: trunk/plugins/XMLSpider/XMLSpider.java
===================================================================
--- trunk/plugins/XMLSpider/XMLSpider.java 2008-12-29 13:03:39 UTC (rev
24820)
+++ trunk/plugins/XMLSpider/XMLSpider.java 2008-12-29 15:56:55 UTC (rev
24821)
@@ -13,7 +13,6 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
-import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -22,17 +21,15 @@
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
+import plugins.XMLSpider.db.Config;
+import plugins.XMLSpider.db.Page;
+import plugins.XMLSpider.db.PerstRoot;
+import plugins.XMLSpider.db.Status;
+import plugins.XMLSpider.db.Term;
+import plugins.XMLSpider.db.TermPosition;
+import plugins.XMLSpider.org.garret.perst.Storage;
+import plugins.XMLSpider.org.garret.perst.StorageFactory;
import plugins.XMLSpider.web.WebInterface;
-
-import com.db4o.Db4o;
-import com.db4o.ObjectContainer;
-import com.db4o.ObjectSet;
-import com.db4o.config.Configuration;
-import com.db4o.config.QueryEvaluationMode;
-import com.db4o.diagnostic.DiagnosticToConsole;
-import com.db4o.query.Query;
-import com.db4o.reflect.jdk.JdkReflector;
-
import freenet.client.ClientMetadata;
import freenet.client.FetchContext;
import freenet.client.FetchException;
@@ -73,29 +70,19 @@
*
*/
public class XMLSpider implements FredPlugin, FredPluginHTTP,
FredPluginThreadless, FredPluginVersioned, FredPluginL10n, USKCallback {
- private Config config;
-
public Config getConfig() {
// always return a clone, never allow changing directly
- return config.clone();
+ return root.getConfig().clone();
}
// Set config asynchronously
public void setConfig(Config config) {
callbackExecutor.execute(new SetConfigCallback(config));
}
-
- public synchronized long getNextPageId() {
- long x = maxPageId.incrementAndGet();
- db.store(maxPageId);
- return x;
- }
/** Document ID of fetching documents */
protected Map<Page, ClientGetter> runningFetch =
Collections.synchronizedMap(new HashMap<Page, ClientGetter>());
- protected MaxPageId maxPageId;
-
/**
* Lists the allowed mime types of the fetched page.
*/
@@ -120,41 +107,33 @@
* @param uri the new uri that needs to be fetched for further indexing
*/
public void queueURI(FreenetURI uri, String comment, boolean force) {
- String sURI = uri.toString();
- for (String ext : config.getBadlistedExtensions())
- if (sURI.endsWith(ext))
- return; // be smart
+ db.beginThreadTransaction(Storage.EXCLUSIVE_TRANSACTION);
+ try {
+ String sURI = uri.toString();
+ for (String ext :
root.getConfig().getBadlistedExtensions())
+ if (sURI.endsWith(ext))
+ return; // be smart
- if (uri.isUSK()) {
- if(uri.getSuggestedEdition() < 0)
- uri = uri.setSuggestedEdition((-1)*
uri.getSuggestedEdition());
- try{
- uri = ((USK.create(uri)).getSSK()).getURI();
-
(ctx.uskManager).subscribe(USK.create(uri),this, false, this);
+ if (uri.isUSK()) {
+ if (uri.getSuggestedEdition() < 0)
+ uri = uri.setSuggestedEdition((-1) *
uri.getSuggestedEdition());
+ try {
+ uri =
((USK.create(uri)).getSSK()).getURI();
+
(ctx.uskManager).subscribe(USK.create(uri), this, false, this);
+ } catch (Exception e) {
+ }
}
- catch(Exception e){}
- }
- synchronized (this) {
- Page page = getPageByURI(uri);
- if (page == null) {
- page = new Page(getNextPageId(),
uri.toString(), comment);
-
- db.store(page);
- } else if (force) {
- synchronized (page) {
- page.status = Status.QUEUED;
- page.lastChange =
System.currentTimeMillis();
-
- db.store(page);
- }
+ Page page = root.getPageByURI(uri, true, comment);
+ if (force && page.getStatus() != Status.QUEUED) {
+ page.setStatus(Status.QUEUED);
+ page.setComment(comment);
}
+ } finally {
+ db.endThreadTransaction();
}
}
- protected List<Page> queuedRequestCache = new ArrayList<Page>();
- protected long lastPrefetchedTimeStamp = -1;
-
public void startSomeRequests() {
ArrayList<ClientGetter> toStart = null;
synchronized (this) {
@@ -163,62 +142,29 @@
synchronized (runningFetch) {
int running = runningFetch.size();
- if (running >= config.getMaxParallelRequests())
+ if (running >=
root.getConfig().getMaxParallelRequests())
return;
- // prefetch 2 * config.getMaxParallelRequests()
entries
- if (queuedRequestCache.isEmpty()) {
- Query query = db.query();
- query.constrain(Page.class);
-
query.descend("status").constrain(Status.QUEUED);
- if (lastPrefetchedTimeStamp != -1) {
-
query.descend("lastChange").constrain(lastPrefetchedTimeStamp - 1000).greater();
-
query.descend("lastChange").constrain(lastPrefetchedTimeStamp + 1800 *
1000).smaller();
- }
-
query.descend("lastChange").orderAscending();
- @SuppressWarnings("unchecked")
- ObjectSet<Page> queuedSet =
query.execute();
-
-
System.out.println("lastPrefetchedTimeStamp=" + lastPrefetchedTimeStamp + ",
BLAR = "
- + queuedSet.size());
- if (lastPrefetchedTimeStamp != -1 &&
queuedSet.isEmpty()) {
- lastPrefetchedTimeStamp = -1;
- startSomeRequests();
- return;
- }
-
- while (queuedRequestCache.size() <
config.getMaxParallelRequests() * 2 && queuedSet.hasNext()) {
- Page page = queuedSet.next();
- assert page.status ==
Status.QUEUED;
- if
(!runningFetch.containsKey(page)) {
-
queuedRequestCache.add(page);
-
- if (page.lastChange >
lastPrefetchedTimeStamp)
-
lastPrefetchedTimeStamp = page.lastChange;
- }
- }
- }
-
// perpare to start
- toStart = new
ArrayList<ClientGetter>(config.getMaxParallelRequests() - running);
- Iterator<Page> it =
queuedRequestCache.iterator();
+ toStart = new
ArrayList<ClientGetter>(root.getConfig().getMaxParallelRequests() - running);
+ synchronized (root) {
+ Iterator<Page> it =
root.getPages(Status.QUEUED);
- while (running + toStart.size() <
config.getMaxParallelRequests() && it.hasNext()) {
- Page page = it.next();
- it.remove();
+ while (running + toStart.size() <
root.getConfig().getMaxParallelRequests() && it.hasNext()) {
+ Page page = it.next();
+ if
(runningFetch.containsKey(page))
+ continue;
- try {
- ClientGetter getter =
makeGetter(page);
+ try {
+ ClientGetter getter =
makeGetter(page);
- Logger.minor(this, "Starting "
+ getter + " " + page);
- toStart.add(getter);
- runningFetch.put(page, getter);
- } catch (MalformedURLException e) {
- Logger.error(this,
"IMPOSSIBLE-Malformed URI: " + page, e);
-
- page.status = Status.FAILED;
- page.lastChange =
System.currentTimeMillis();
- db.store(page);
+ Logger.minor(this,
"Starting " + getter + " " + page);
+ toStart.add(getter);
+ runningFetch.put(page,
getter);
+ } catch (MalformedURLException
e) {
+ Logger.error(this,
"IMPOSSIBLE-Malformed URI: " + page, e);
+
page.setStatus(Status.FAILED);
+ }
}
}
}
@@ -284,9 +230,8 @@
private ClientGetter makeGetter(Page page) throws MalformedURLException
{
ClientGetter getter = new ClientGetter(new
ClientGetterCallback(page),
core.requestStarters.chkFetchScheduler,
- core.requestStarters.sskFetchScheduler, new
FreenetURI(page.uri), ctx, config.getRequestPriority(),
- this,
- null, null);
+ core.requestStarters.sskFetchScheduler, new
FreenetURI(page.getURI()), ctx,
+ getPollingPriorityProgress(), this, null, null);
return getter;
}
@@ -363,10 +308,8 @@
}
public void run() {
- synchronized (this) {
- XMLSpider.this.config.setValue(config);
- db.store(XMLSpider.this.config);
- db.commit();
+ synchronized (root) {
+ root.getConfig().setValue(config);
}
}
}
@@ -421,7 +364,7 @@
}
FreenetURI uri = state.getURI();
-
+ db.beginThreadTransaction(Storage.READ_WRITE_TRANSACTION);
try {
ClientMetadata cm = result.getMetadata();
Bucket data = result.asBucket();
@@ -434,40 +377,31 @@
* provided).
*/
PageCallBack pageCallBack = new PageCallBack(page);
- Logger.minor(this, "Successful: " + uri + " : " +
page.id);
+ Logger.minor(this, "Successful: " + uri + " : " +
page.getId());
try {
ContentFilter.filter(data, new
NullBucketFactory(), mimeType, uri.toURI("http://127.0.0.1:8888/"),
pageCallBack);
- pageCallBack.store();
+ page.setStatus(Status.SUCCEEDED);
+ db.endThreadTransaction();
- synchronized (this) {
- page.status = Status.SUCCEEDED;
- page.lastChange =
System.currentTimeMillis();
- db.store(page);
- db.commit();
- }
- Logger.minor(this, "Filtered " + uri + " : " +
page.id);
+ Logger.minor(this, "Filtered " + uri + " : " +
page.getId());
} catch (UnsafeContentTypeException e) {
- Logger.minor(this, "UnsafeContentTypeException
" + uri + " : " + page.id, e);
- synchronized (this) {
- page.status = Status.SUCCEEDED;
- page.lastChange =
System.currentTimeMillis();
- db.store(page);
- db.commit();
- }
+ Logger.minor(this, "UnsafeContentTypeException
" + uri + " : " + page.getId(), e);
+ page.setStatus(Status.SUCCEEDED);
+ db.endThreadTransaction();
return; // Ignore
} catch (IOException e) {
- db.rollback();
+ db.rollbackThreadTransaction();
Logger.error(this, "Bucket error?: " + e, e);
} catch (URISyntaxException e) {
- db.rollback();
+ db.rollbackThreadTransaction();
Logger.error(this, "Internal error: " + e, e);
} finally {
data.free();
}
} catch (RuntimeException e) {
- db.rollback();
+ db.rollbackThreadTransaction();
throw e;
} finally {
synchronized (this) {
@@ -485,28 +419,21 @@
if (stopped)
return;
+
db.beginThreadTransaction(Storage.EXCLUSIVE_TRANSACTION);
synchronized (page) {
if (fe.newURI != null) {
// redirect, mark as succeeded
queueURI(fe.newURI, "redirect from " +
state.getURI(), false);
-
- page.status = Status.SUCCEEDED;
- page.lastChange =
System.currentTimeMillis();
- db.store(page);
+ page.setStatus(Status.SUCCEEDED);
} else if (fe.isFatal()) {
// too many tries or fatal, mark as
failed
- page.status = Status.FAILED;
- page.lastChange =
System.currentTimeMillis();
- db.store(page);
+ page.setStatus(Status.FAILED);
} else {
// requeue at back
- page.status = Status.QUEUED;
- page.lastChange =
System.currentTimeMillis();
-
- db.store(page);
+ page.setStatus(Status.QUEUED);
}
}
- db.commit();
+ db.endThreadTransaction();
runningFetch.remove(page);
}
@@ -542,13 +469,8 @@
callbackExecutor.shutdownNow();
}
try { callbackExecutor.awaitTermination(30, TimeUnit.SECONDS);
} catch (InterruptedException e) {}
- try { db.rollback(); } catch (Exception e) {}
try { db.close(); } catch (Exception e) {}
- synchronized (this) {
- termCache.clear();
- }
-
Logger.normal(this, "XMLSpider terminated");
}
@@ -582,47 +504,9 @@
stopped = false;
- // Initial DB4O
- db = initDB4O();
-
- // Find max Page ID
- {
- Query query = db.query();
- query.constrain(MaxPageId.class);
- @SuppressWarnings("unchecked")
- ObjectSet<MaxPageId> set = query.execute();
-
- if (set.hasNext())
- maxPageId = set.next();
- else {
- query = db.query();
- query.constrain(Page.class);
- query.descend("id").orderDescending();
- @SuppressWarnings("unchecked")
- ObjectSet<Page> set2 = query.execute();
- if (set2.hasNext())
- maxPageId = new
MaxPageId(set2.next().id);
- else
- maxPageId = new MaxPageId(0);
- }
- }
+ // Initial Database
+ db = initDB();
- // Load Config
- {
- Query query = db.query();
- query.constrain(Config.class);
- @SuppressWarnings("unchecked")
- ObjectSet<Config> set = query.execute();
-
- if (set.hasNext())
- config = set.next();
- else {
- config = new Config(true);
- db.store(config);
- db.commit();
- }
- }
-
indexWriter = new IndexWriter(this);
webInterface = new WebInterface(this);
@@ -672,7 +556,7 @@
if (stopped)
throw new RuntimeException("plugin stopping");
Logger.debug(this, "foundURI " + uri + " on " + page);
- queueURI(uri, "Added from " + page.uri, false);
+ queueURI(uri, "Added from " + page.getURI(), false);
}
protected Integer lastPosition = null;
@@ -681,13 +565,13 @@
if (stopped)
throw new RuntimeException("plugin stopping");
- Logger.debug(this, "onText on " + page.id + " (" +
baseURI + ")");
+ Logger.debug(this, "onText on " + page.getId() + " (" +
baseURI + ")");
if ("title".equalsIgnoreCase(type) && (s != null) &&
(s.length() != 0) && (s.indexOf('\n') < 0)) {
/*
* title of the page
*/
- page.pageTitle = s;
+ page.setPageTitle(s);
type = "title";
}
else type = null;
@@ -722,49 +606,9 @@
if (word.length() < 3)
return;
Term term = getTermByWord(word, true);
- TermPosition termPos = getTermPosition(term);
-
- synchronized (termPos) {
- int[] newPositions = new
int[termPos.positions.length + 1];
- System.arraycopy(termPos.positions, 0,
newPositions, 0, termPos.positions.length);
- newPositions[termPos.positions.length] =
position;
-
- termPos.positions = newPositions;
- }
+ TermPosition termPos = page.getTermPosition(term);
+ termPos.addPositions(position);
}
-
- protected Map<Term, TermPosition> termPosCache = new
HashMap<Term, TermPosition>();
-
- public void store() {
- // Delete existing TermPosition
- Query query = db.query();
- query.constrain(TermPosition.class);
- query.descend("pageId").constrain(page.id);
- @SuppressWarnings("unchecked")
- ObjectSet<TermPosition> set = query.execute();
- for (TermPosition tp : set) {
- assert tp.pageId == page.id;
- db.delete(tp);
- }
-
- for (TermPosition tp : termPosCache.values())
- db.store(tp);
- termPosCache.clear();
- }
-
- protected TermPosition getTermPosition(Term term) {
- TermPosition cachedTermPos = termPosCache.get(term);
- if (cachedTermPos != null)
- return cachedTermPos;
-
- cachedTermPos = new TermPosition();
- cachedTermPos.word = term.word;
- cachedTermPos.pageId = page.id;
- cachedTermPos.positions = new int[0];
-
- termPosCache.put(term, cachedTermPos);
- return cachedTermPos;
- }
}
public void onFoundEdition(long l, USK key){
@@ -782,145 +626,53 @@
}
public short getPollingPriorityNormal() {
- return (short) Math.min(RequestStarter.MINIMUM_PRIORITY_CLASS,
config.getRequestPriority() + 1);
+ return (short) Math.min(RequestStarter.MINIMUM_PRIORITY_CLASS,
root.getConfig().getRequestPriority() + 1);
}
public short getPollingPriorityProgress() {
- return config.getRequestPriority();
+ return root.getConfig().getRequestPriority();
}
- protected ObjectContainer db;
+ protected Storage db;
+ protected PerstRoot root;
/**
- * Initializes DB4O.
- *
- * @return db4o's connector
+ * Initializes Database
*/
- private ObjectContainer initDB4O() {
- Configuration cfg = Db4o.newConfiguration();
- cfg.reflectWith(new JdkReflector(getClass().getClassLoader()));
+ private Storage initDB() {
+ Storage db = StorageFactory.getInstance().createStorage();
+ db.setProperty("perst.object.cache.kind", "soft");
+ db.setProperty("perst.gc.threshold", 16384);
+ db.setProperty("perst.alternative.btree", true);
+ db.setProperty("perst.string.encoding", "UTF-8");
+ db.setProperty("perst.concurrent.iterator", true);
- //- Page
- cfg.objectClass(Page.class).objectField("id").indexed(true);
- cfg.objectClass(Page.class).objectField("uri").indexed(true);
- cfg.objectClass(Page.class).objectField("status").indexed(true);
-
cfg.objectClass(Page.class).objectField("lastChange").indexed(true);
+ db.open("XMLSpider-" + version + ".dbs");
- cfg.objectClass(Page.class).callConstructor(true);
+ root = (PerstRoot) db.getRoot();
+ if (root == null)
+ root = PerstRoot.createRoot(db);
- //- Term
- cfg.objectClass(Term.class).objectField("md5").indexed(true);
- cfg.objectClass(Term.class).objectField("word").indexed(true);
-
- cfg.objectClass(Term.class).callConstructor(true);
-
- //- TermPosition
-
cfg.objectClass(TermPosition.class).objectField("pageId").indexed(true);
-
cfg.objectClass(TermPosition.class).objectField("word").indexed(true);
-
- cfg.objectClass(TermPosition.class).callConstructor(true);
-
- //- Other
- cfg.objectClass(MaxPageId.class).callConstructor(true);
- cfg.objectClass(Config.class).callConstructor(true);
-
- cfg.activationDepth(3);
- cfg.updateDepth(3);
- cfg.automaticShutDown(false);
- cfg.queries().evaluationMode(QueryEvaluationMode.LAZY);
- cfg.diagnostic().addListener(new DiagnosticToConsole());
-
- ObjectContainer oc = Db4o.openFile(cfg, "XMLSpider-" + version
+ ".db4o");
-
- return oc;
+ return db;
}
- public ObjectContainer getDB() {
- return db;
+ public PerstRoot getDbRoot() {
+ return root;
}
protected Page getPageByURI(FreenetURI uri) {
- Query query = db.query();
- query.constrain(Page.class);
- query.descend("uri").constrain(uri.toString());
- @SuppressWarnings("unchecked")
- ObjectSet<Page> set = query.execute();
-
- if (set.hasNext()) {
- Page page = set.next();
- assert page.uri.equals(uri.toString());
- return page;
- } else
- return null;
+ return root.getPageByURI(uri, false, null);
}
protected Page getPageById(long id) {
- Query query = db.query();
- query.constrain(Page.class);
- query.descend("id").constrain(id);
- @SuppressWarnings("unchecked")
- ObjectSet<Page> set = query.execute();
-
- if (set.hasNext()) {
- Page page = set.next();
- assert page.id == id;
- return page;
- } else
- return null;
+ return root.getPageById(id);
}
- protected Term getTermByMd5(String md5) {
- Query query = db.query();
- query.constrain(Term.class);
- query.descend("md5").constrain(md5);
- @SuppressWarnings("unchecked")
- ObjectSet<Term> set = query.execute();
-
- if (set.hasNext()) {
- Term term = set.next();
- assert md5.equals(term.md5);
- return term;
- } else
- return null;
- }
-
- @SuppressWarnings("serial")
- protected Map<String, Term> termCache = new LinkedHashMap<String,
Term>() {
- protected boolean removeEldestEntry(Map.Entry<String, Term>
eldest) {
- return size() > 1024;
- }
- };
-
// language for I10N
private LANGUAGE language;
protected Term getTermByWord(String word, boolean create) {
- synchronized (this) {
- Term cachedTerm = termCache.get(word);
- if (cachedTerm != null)
- return cachedTerm;
-
- Query query = db.query();
- query.constrain(Term.class);
- query.descend("word").constrain(word);
- @SuppressWarnings("unchecked")
- ObjectSet<Term> set = query.execute();
-
- if (set.hasNext()) {
- cachedTerm = set.next();
- assert word.equals(cachedTerm.word);
- termCache.put(word, cachedTerm);
-
- return cachedTerm;
- } else if (create) {
- cachedTerm = new Term(word);
- termCache.put(word, cachedTerm);
- db.store(cachedTerm);
-
- return cachedTerm;
- } else
- return null;
- }
+ return root.getTermByWord(word, create);
}
public String getString(String key) {
Copied: trunk/plugins/XMLSpider/db/Config.java (from rev 24819,
trunk/plugins/XMLSpider/Config.java)
===================================================================
--- trunk/plugins/XMLSpider/db/Config.java (rev 0)
+++ trunk/plugins/XMLSpider/db/Config.java 2008-12-29 15:56:55 UTC (rev
24821)
@@ -0,0 +1,177 @@
+/**
+ * @author j16sdiz (1024D/75494252)
+ */
+package plugins.XMLSpider.db;
+
+import plugins.XMLSpider.org.garret.perst.Persistent;
+import plugins.XMLSpider.org.garret.perst.Storage;
+import freenet.node.RequestStarter;
+
+public class Config extends Persistent implements Cloneable {
+ /**
+ * Directory where the generated indices are stored
+ */
+ private String indexDir;
+ private int indexMaxEntries;
+ private long indexSubindexMaxSize;
+
+ private String indexTitle;
+ private String indexOwner;
+ private String indexOwnerEmail;
+
+ private int maxShownURIs;
+ private int maxParallelRequests;
+ private String[] badlistedExtensions;
+ private short requestPriority;
+
+ public Config() {
+ }
+
+ public Config(Storage storage) {
+ indexDir = "myindex7/";
+ indexMaxEntries = 2000;
+ indexSubindexMaxSize = 4 * 1024 * 1024;
+
+ indexTitle = "XMLSpider index";
+ indexOwner = "Freenet";
+ indexOwnerEmail = "(nil)";
+
+ maxShownURIs = 15;
+
+ maxParallelRequests = 100;
+
+ badlistedExtensions = new String[] { //
+ ".ico", ".bmp", ".png", ".jpg", ".gif", // image
+ ".zip", ".jar", ".gz", ".bz2", ".rar", // archive
+ ".7z", ".rar", ".arj", ".rpm", ".deb", //
+ ".xpi", ".ace", ".cab", ".lza", ".lzh", //
+ ".ace", ".exe", ".iso", // binary
+ ".mpg", ".ogg", ".mp3", ".avi", // media
+ ".css", ".sig" // other
+ };
+
+ requestPriority =
RequestStarter.IMMEDIATE_SPLITFILE_PRIORITY_CLASS;
+
+ storage.makePersistent(this);
+ }
+
+ public synchronized void setValue(Config config) {
+ synchronized (config) {
+ indexDir = config.indexDir;
+ indexMaxEntries = config.indexMaxEntries;
+ indexSubindexMaxSize = config.indexSubindexMaxSize;
+
+ indexTitle = config.indexTitle;
+ indexOwner = config.indexOwner;
+ indexOwnerEmail = config.indexOwnerEmail;
+
+ maxShownURIs = config.maxShownURIs;
+
+ maxParallelRequests = config.maxParallelRequests;
+
+ badlistedExtensions = config.badlistedExtensions;
+
+ requestPriority = config.requestPriority;
+ }
+
+ if (isPersistent())
+ modify();
+ }
+
+ public synchronized Config clone() {
+ Config newConfig = new Config();
+ newConfig.setValue(this);
+ return newConfig;
+ }
+
+ public synchronized void setIndexDir(String indexDir) {
+ assert !isPersistent();
+ this.indexDir = indexDir;
+ }
+
+ public synchronized String getIndexDir() {
+ return indexDir;
+ }
+
+ public synchronized void setIndexMaxEntries(int indexMaxEntries) {
+ assert !isPersistent();
+ this.indexMaxEntries = indexMaxEntries;
+ }
+
+ public synchronized int getIndexMaxEntries() {
+ return indexMaxEntries;
+ }
+
+ public synchronized void setIndexSubindexMaxSize(long
indexSubindexMaxSize) {
+ assert !isPersistent();
+ this.indexSubindexMaxSize = indexSubindexMaxSize;
+ }
+
+ public synchronized long getIndexSubindexMaxSize() {
+ return indexSubindexMaxSize;
+ }
+
+ public synchronized void setIndexTitle(String indexTitle) {
+ assert !isPersistent();
+ this.indexTitle = indexTitle;
+ }
+
+ public synchronized String getIndexTitle() {
+ return indexTitle;
+ }
+
+ public synchronized void setIndexOwner(String indexOwner) {
+ assert !isPersistent();
+ this.indexOwner = indexOwner;
+ }
+
+ public synchronized String getIndexOwner() {
+ return indexOwner;
+ }
+
+ public synchronized void setIndexOwnerEmail(String indexOwnerEmail) {
+ assert !isPersistent();
+ this.indexOwnerEmail = indexOwnerEmail;
+ }
+
+ public synchronized void setMaxShownURIs(int maxShownURIs) {
+ assert !isPersistent();
+ this.maxShownURIs = maxShownURIs;
+ }
+
+ public synchronized int getMaxShownURIs() {
+ return maxShownURIs;
+ }
+
+ public synchronized String getIndexOwnerEmail() {
+ return indexOwnerEmail;
+ }
+
+ public synchronized void setMaxParallelRequests(int
maxParallelRequests) {
+ assert !isPersistent();
+ this.maxParallelRequests = maxParallelRequests;
+ }
+
+ public synchronized int getMaxParallelRequests() {
+ return maxParallelRequests;
+ }
+
+ public synchronized void setBadlistedExtensions(String[]
badlistedExtensions) {
+ assert !isPersistent();
+ ;
+ this.badlistedExtensions = badlistedExtensions;
+ }
+
+ public synchronized String[] getBadlistedExtensions() {
+ return badlistedExtensions;
+ }
+
+ public synchronized void setRequestPriority(short requestPriority) {
+ assert !isPersistent();
+ this.requestPriority = requestPriority;
+ }
+
+ public synchronized short getRequestPriority() {
+ return requestPriority;
+ }
+}
\ No newline at end of file
Added: trunk/plugins/XMLSpider/db/Page.java
===================================================================
--- trunk/plugins/XMLSpider/db/Page.java (rev 0)
+++ trunk/plugins/XMLSpider/db/Page.java 2008-12-29 15:56:55 UTC (rev
24821)
@@ -0,0 +1,141 @@
+/**
+ * @author j16sdiz (1024D/75494252)
+ */
+package plugins.XMLSpider.db;
+
+import plugins.XMLSpider.org.garret.perst.IPersistentMap;
+import plugins.XMLSpider.org.garret.perst.Persistent;
+import plugins.XMLSpider.org.garret.perst.SortedCollection;
+import plugins.XMLSpider.org.garret.perst.Storage;
+
+public class Page extends Persistent implements Comparable<Page> {
+ /** Page Id */
+ protected long id;
+ /** URI of the page */
+ protected String uri;
+ /** Title */
+ protected String pageTitle;
+ /** Status */
+ protected Status status;
+ /** Last Change Time */
+ protected long lastChange;
+ /** Comment, for debugging */
+ protected String comment;
+ /** term.md5 -> TermPosition */
+ protected IPersistentMap<String, TermPosition> termPosMap;
+
+ public Page() {
+ }
+
+ Page(String uri, String comment, Storage storage) {
+ this.uri = uri;
+ this.comment = comment;
+ this.status = Status.QUEUED;
+ this.lastChange = System.currentTimeMillis();
+
+ storage.makePersistent(this);
+ }
+
+ public synchronized void setStatus(Status status) {
+ preModify();
+ this.status = status;
+ postModify();
+ }
+
+ public Status getStatus() {
+ return status;
+ }
+
+ public synchronized void setComment(String comment) {
+ preModify();
+ this.comment = comment;
+ postModify();
+ }
+
+ public String getComment() {
+ return comment;
+ }
+
+ public String getURI() {
+ return uri;
+ }
+
+ public long getId() {
+ return id;
+ }
+
+ public void setPageTitle(String pageTitle) {
+ preModify();
+ this.pageTitle = pageTitle;
+ postModify();
+ }
+
+ public String getPageTitle() {
+ return pageTitle;
+ }
+
+ public synchronized TermPosition getTermPosition(Term term) {
+ if (termPosMap == null)
+ termPosMap = getStorage().createMap(String.class);
+
+ TermPosition tp = termPosMap.get(term.md5);
+ if (tp == null) {
+ tp = new TermPosition(getStorage());
+ term.pageSet.add(this);
+ }
+
+ return tp;
+ }
+
+ @Override
+ public int hashCode() {
+ return (int) (id ^ (id >>> 32));
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+
+ return id == ((Page) obj).id;
+ }
+
+ @Override
+ public String toString() {
+ return "[PAGE: id=" + id + ", title=" + pageTitle + ", uri=" +
uri + ", status=" + status + ", comment="
+ + comment
+ + "]";
+ }
+
+ public int compareTo(Page o) {
+ return new Long(id).compareTo(o.id);
+ }
+
+ private void preModify() {
+ Storage storage = getStorage();
+
+ if (storage != null) {
+ PerstRoot root = (PerstRoot) storage.getRoot();
+ SortedCollection<Page> coll =
root.getPageCollection(status);
+ coll.remove(this);
+ }
+ }
+
+ private void postModify() {
+ lastChange = System.currentTimeMillis();
+
+ modify();
+
+ Storage storage = getStorage();
+
+ if (storage != null) {
+ PerstRoot root = (PerstRoot) storage.getRoot();
+ SortedCollection<Page> coll =
root.getPageCollection(status);
+ coll.add(this);
+ }
+ }
+}
Added: trunk/plugins/XMLSpider/db/PageTimeStampComparator.java
===================================================================
--- trunk/plugins/XMLSpider/db/PageTimeStampComparator.java
(rev 0)
+++ trunk/plugins/XMLSpider/db/PageTimeStampComparator.java 2008-12-29
15:56:55 UTC (rev 24821)
@@ -0,0 +1,25 @@
+/**
+ *
+ */
+package plugins.XMLSpider.db;
+
+import plugins.XMLSpider.org.garret.perst.PersistentComparator;
+
+final class PageTimeStampComparator extends PersistentComparator<Page> {
+ @Override
+ public int compareMemberWithKey(Page p1, Object key) {
+ if (key instanceof Page)
+ return compareMembers(p1, (Page) key);
+ else
+ return 0;
+ }
+
+ @Override
+ public int compareMembers(Page p1, Page p2) {
+ if (p1.lastChange < p2.lastChange)
+ return -1;
+ if (p1.lastChange > p2.lastChange)
+ return 1;
+ return 0;
+ }
+}
\ No newline at end of file
Added: trunk/plugins/XMLSpider/db/PerstRoot.java
===================================================================
--- trunk/plugins/XMLSpider/db/PerstRoot.java (rev 0)
+++ trunk/plugins/XMLSpider/db/PerstRoot.java 2008-12-29 15:56:55 UTC (rev
24821)
@@ -0,0 +1,120 @@
+package plugins.XMLSpider.db;
+
+import java.util.Iterator;
+import java.util.List;
+
+import plugins.XMLSpider.org.garret.perst.FieldIndex;
+import plugins.XMLSpider.org.garret.perst.Key;
+import plugins.XMLSpider.org.garret.perst.Persistent;
+import plugins.XMLSpider.org.garret.perst.SortedCollection;
+import plugins.XMLSpider.org.garret.perst.Storage;
+import freenet.keys.FreenetURI;
+
+public class PerstRoot extends Persistent {
+ protected FieldIndex<Term> md5Term;
+ protected FieldIndex<Term> wordTerm;
+
+ protected FieldIndex<Page> idPage;
+ protected FieldIndex<Page> uriPage;
+ protected SortedCollection<Page> queuedPages;
+ protected SortedCollection<Page> failedPages;
+ protected SortedCollection<Page> succeededPages;
+
+ private Config config;
+
+ public PerstRoot() {
+ }
+
+ public static PerstRoot createRoot(Storage storage) {
+ PerstRoot root = new PerstRoot();
+
+ root.md5Term = storage.createFieldIndex(Term.class, "md5",
true);
+ root.wordTerm = storage.createFieldIndex(Term.class, "word",
true);
+
+ root.idPage = storage.createFieldIndex(Page.class, "id", true);
+ root.uriPage = storage.createFieldIndex(Page.class, "uri",
true);
+ root.queuedPages = storage.<Page> createSortedCollection(new
PageTimeStampComparator(), false);
+ root.failedPages = storage.<Page> createSortedCollection(new
PageTimeStampComparator(), false);
+ root.succeededPages = storage.<Page> createSortedCollection(new
PageTimeStampComparator(), false);
+
+
+ root.config = new Config(storage);
+
+ storage.setRoot(root);
+
+ return root;
+ }
+
+ public synchronized Term getTermByWord(String word, boolean create) {
+ Term term = wordTerm.get(new Key(word));
+
+ if (create && term == null) {
+ term = new Term(word, getStorage());
+ md5Term.add(term);
+ wordTerm.add(term);
+ }
+
+ return term;
+ }
+
+ public synchronized Iterator<Term> getTermIterator() {
+ return md5Term.iterator();
+ }
+
+ public synchronized List<Term> getTermList() {
+ return md5Term.getList(null, null);
+ }
+
+ public synchronized int getTermCount() {
+ return md5Term.size();
+ }
+
+ public synchronized Page getPageByURI(FreenetURI uri, boolean create,
String comment) {
+ Page page = uriPage.get(new Key(uri.toString()));
+
+ if (create && page == null) {
+ page = new Page(uri.toString(), comment, getStorage());
+
+ idPage.append(page);
+ uriPage.add(page);
+ queuedPages.add(page);
+ }
+
+ return page;
+ }
+
+ public Page getPageById(long id) {
+ Page page = idPage.get(id);
+ return page;
+ }
+
+ SortedCollection<Page> getPageCollection(Status status) {
+ switch (status) {
+ case FAILED:
+ return failedPages;
+ case QUEUED:
+ return queuedPages;
+ case SUCCEEDED:
+ return succeededPages;
+ default:
+ return null;
+ }
+ }
+
+ public synchronized Iterator<Page> getPages(Status status) {
+ return getPageCollection(status).iterator();
+ }
+
+ public synchronized int getPageCount(Status status) {
+ return getPageCollection(status).size();
+ }
+
+ public void setConfig(Config config) {
+ this.config = config;
+ modify();
+ }
+
+ public Config getConfig() {
+ return config;
+ }
+}
Copied: trunk/plugins/XMLSpider/db/Status.java (from rev 24819,
trunk/plugins/XMLSpider/Status.java)
===================================================================
--- trunk/plugins/XMLSpider/db/Status.java (rev 0)
+++ trunk/plugins/XMLSpider/db/Status.java 2008-12-29 15:56:55 UTC (rev
24821)
@@ -0,0 +1,9 @@
+/**
+ * @author j16sdiz (1024D/75494252)
+ */
+package plugins.XMLSpider.db;
+
+public enum Status {
+ /** For simplicity, running is also mark as QUEUED */
+ QUEUED, SUCCEEDED, FAILED
+}
\ No newline at end of file
Added: trunk/plugins/XMLSpider/db/Term.java
===================================================================
--- trunk/plugins/XMLSpider/db/Term.java (rev 0)
+++ trunk/plugins/XMLSpider/db/Term.java 2008-12-29 15:56:55 UTC (rev
24821)
@@ -0,0 +1,103 @@
+/**
+ * @author j16sdiz (1024D/75494252)
+ */
+package plugins.XMLSpider.db;
+
+import java.io.UnsupportedEncodingException;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.Set;
+
+import plugins.XMLSpider.org.garret.perst.IPersistentSet;
+import plugins.XMLSpider.org.garret.perst.Persistent;
+import plugins.XMLSpider.org.garret.perst.Storage;
+
+public class Term extends Persistent {
+ /** MD5 of the term */
+ String md5;
+ /** Term */
+ String word;
+
+ /** Pages containing this Term */
+ IPersistentSet<Page> pageSet;
+
+ public Term(String word, Storage storage) {
+ this.word = word;
+ md5 = MD5(word);
+ pageSet = storage.<Page> createScalableSet();
+
+ storage.makePersistent(this);
+ }
+
+ public Term() {
+ }
+
+ public boolean addPage(Page page) {
+ return pageSet.add(page);
+ }
+
+ public boolean removePage(Page page) {
+ return pageSet.remove(page);
+ }
+
+ public Set<Page> getPages() {
+ return pageSet;
+ }
+
+ public String getWord() {
+ return word;
+ }
+
+ public String getMD5() {
+ return md5;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == null)
+ return false;
+ if (getClass() != o.getClass())
+ return false;
+ Term t = (Term) o;
+ return md5.equals(t.md5) && word.equals(t.word);
+ }
+
+ @Override
+ public int hashCode() {
+ return md5.hashCode() ^ word.hashCode();
+ }
+
+ /*
+ * calculate the md5 for a given string
+ */
+ public static String MD5(String text) {
+ try {
+ MessageDigest md = MessageDigest.getInstance("MD5");
+ byte[] md5hash = new byte[32];
+ byte[] b = text.getBytes("UTF-8");
+ md.update(b, 0, b.length);
+ md5hash = md.digest();
+ return convertToHex(md5hash);
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException("UTF-8 not supported", e);
+ } catch (NoSuchAlgorithmException e) {
+ throw new RuntimeException("MD5 not supported", e);
+ }
+ }
+
+ public static String convertToHex(byte[] data) {
+ StringBuilder buf = new StringBuilder();
+ for (int i = 0; i < data.length; i++) {
+ int halfbyte = (data[i] >>> 4) & 0x0F;
+ int two_halfs = 0;
+ do {
+ if ((0 <= halfbyte) && (halfbyte <= 9))
+ buf.append((char) ('0' + halfbyte));
+ else
+ buf.append((char) ('a' + (halfbyte -
10)));
+ halfbyte = data[i] & 0x0F;
+ } while (two_halfs++ < 1);
+ }
+ return buf.toString();
+ }
+}
\ No newline at end of file
Added: trunk/plugins/XMLSpider/db/TermPosition.java
===================================================================
--- trunk/plugins/XMLSpider/db/TermPosition.java
(rev 0)
+++ trunk/plugins/XMLSpider/db/TermPosition.java 2008-12-29 15:56:55 UTC
(rev 24821)
@@ -0,0 +1,33 @@
+/**
+ * @author j16sdiz (1024D/75494252)
+ */
+package plugins.XMLSpider.db;
+
+import plugins.XMLSpider.org.garret.perst.Persistent;
+import plugins.XMLSpider.org.garret.perst.Storage;
+
+public class TermPosition extends Persistent {
+ /** Position List */
+ public int[] positions;
+
+ public TermPosition() {
+ }
+
+ public TermPosition(Storage storage) {
+ positions = new int[0];
+ storage.makePersistent(this);
+ }
+
+ public synchronized void addPositions(int position) {
+ int[] newPositions = new int[positions.length + 1];
+ System.arraycopy(positions, 0, newPositions, 0,
positions.length);
+ newPositions[positions.length] = position;
+
+ positions = newPositions;
+ modify();
+ }
+
+ public synchronized int[] addPositions() {
+ return positions;
+ }
+}
\ No newline at end of file
Modified: trunk/plugins/XMLSpider/web/ConfigPage.java
===================================================================
--- trunk/plugins/XMLSpider/web/ConfigPage.java 2008-12-29 13:03:39 UTC (rev
24820)
+++ trunk/plugins/XMLSpider/web/ConfigPage.java 2008-12-29 15:56:55 UTC (rev
24821)
@@ -4,8 +4,8 @@
*/
package plugins.XMLSpider.web;
-import plugins.XMLSpider.Config;
import plugins.XMLSpider.XMLSpider;
+import plugins.XMLSpider.db.Config;
import freenet.clients.http.PageMaker;
import freenet.pluginmanager.PluginRespirator;
import freenet.support.HTMLNode;
Modified: trunk/plugins/XMLSpider/web/MainPage.java
===================================================================
--- trunk/plugins/XMLSpider/web/MainPage.java 2008-12-29 13:03:39 UTC (rev
24820)
+++ trunk/plugins/XMLSpider/web/MainPage.java 2008-12-29 15:56:55 UTC (rev
24821)
@@ -9,14 +9,11 @@
import java.util.Iterator;
import java.util.List;
-import plugins.XMLSpider.Config;
-import plugins.XMLSpider.Page;
-import plugins.XMLSpider.Status;
import plugins.XMLSpider.XMLSpider;
-
-import com.db4o.ObjectSet;
-import com.db4o.query.Query;
-
+import plugins.XMLSpider.db.Config;
+import plugins.XMLSpider.db.Page;
+import plugins.XMLSpider.db.PerstRoot;
+import plugins.XMLSpider.db.Status;
import freenet.clients.http.PageMaker;
import freenet.keys.FreenetURI;
import freenet.pluginmanager.PluginRespirator;
@@ -165,8 +162,8 @@
int maxURI = config.getMaxShownURIs();
for (int i = 0; i < maxURI && pi.hasNext(); i++) {
Page page = pi.next();
- HTMLNode litem = list.addChild("li", "title",
page.comment);
- litem.addChild("a", "href", "/freenet:" +
page.uri, page.uri);
+ HTMLNode litem = list.addChild("li", "title",
page.getComment());
+ litem.addChild("a", "href", "/freenet:" +
page.getURI(), page.getURI());
}
}
contentNode.addChild(runningBox);
@@ -192,19 +189,18 @@
//-- Utilities
private PageStatus getPageStatus(Status status) {
- Query query = xmlSpider.getDB().query();
- query.constrain(Page.class);
- query.descend("status").constrain(status);
- query.descend("lastChange").orderDescending();
+ PerstRoot root = xmlSpider.getDbRoot();
+ synchronized (root) {
+ int count = root.getPageCount(status);
+ Iterator<Page> it = root.getPages(status);
- @SuppressWarnings("unchecked")
- ObjectSet<Page> set = query.execute();
- List<Page> pages = new ArrayList<Page>();
- while (set.hasNext() && pages.size() <
xmlSpider.getConfig().getMaxShownURIs()) {
- pages.add(set.next());
- }
+ int showURI = xmlSpider.getConfig().getMaxShownURIs();
+ List<Page> page = new ArrayList();
+ while (page.size() < showURI && it.hasNext())
+ page.add(it.next());
- return new PageStatus(set.size(), pages);
+ return new PageStatus(count, page);
+ }
}
private void listPages(PageStatus pageStatus, HTMLNode parent) {
@@ -214,8 +210,8 @@
HTMLNode list = parent.addChild("ol", "style",
"overflow: auto; white-space: nowrap;");
for (Page page : pageStatus.pages) {
- HTMLNode litem = list.addChild("li", "title",
page.comment);
- litem.addChild("a", "href", "/freenet:" +
page.uri, page.uri);
+ HTMLNode litem = list.addChild("li", "title",
page.getComment());
+ litem.addChild("a", "href", "/freenet:" +
page.getURI(), page.getURI());
}
}
}
_______________________________________________
cvs mailing list
[email protected]
http://emu.freenetproject.org/cgi-bin/mailman/listinfo/cvs