Author: j16sdiz
Date: 2008-12-23 04:17:38 +0000 (Tue, 23 Dec 2008)
New Revision: 24755
Added:
trunk/plugins/XMLSpider/Config.java
Modified:
trunk/plugins/XMLSpider/IndexWriter.java
trunk/plugins/XMLSpider/XMLSpider.java
Log:
move config to seprate class (perpare to be stored in db4o)
Added: trunk/plugins/XMLSpider/Config.java
===================================================================
--- trunk/plugins/XMLSpider/Config.java (rev 0)
+++ trunk/plugins/XMLSpider/Config.java 2008-12-23 04:17:38 UTC (rev 24755)
@@ -0,0 +1,116 @@
+/**
+ * @author j16sdiz (1024D/75494252)
+ */
+package plugins.XMLSpider;
+
+import freenet.node.RequestStarter;
+
+class Config {
+ /**
+ * directory where the generated indices are stored. Needs to be
created before it can be used
+ */
+ private String indexDir = "myindex7/";
+ private int indexMaxEntries = 2000;
+ private long indexSubindexMaxSize = 4 * 1024 * 1024;
+
+ private String indexTitle = "XMLSpider index";
+ private String indexOwner = "Freenet";
+ private String indexOwnerEmail = null;
+
+ private int maxShownURIs = 15;
+
+ // Can have many; this limit only exists to save memory.
+ private int maxParallelRequests = 100;
+
+ private String[] badlistedExtensions = new String[] { //
+ ".ico", ".bmp", ".png", ".jpg", ".gif", // image
+ ".zip", ".jar", ".gz", ".bz2", ".rar", // archive
+ ".7z", ".rar", ".arj", ".rpm", ".deb", ".xpi", ".ace", ".cab",
".lza", ".lzh", ".ace", ".exe", ".iso", // binary
+ ".mpg", ".ogg", ".mp3", ".avi", // media
+ ".css", ".sig" // other
+ };
+
+ // Equal to Frost, ARK fetches etc. One step down from Fproxy.
+ // Any lower makes it very difficult to debug. Maybe reduce for
production - after solving the ARK bugs.
+ private short requestPriority =
RequestStarter.IMMEDIATE_SPLITFILE_PRIORITY_CLASS;
+
+ public synchronized void setIndexDir(String indexDir) {
+ this.indexDir = indexDir;
+ }
+
+ public synchronized String getIndexDir() {
+ return indexDir;
+ }
+
+ public synchronized void setIndexMaxEntries(int indexMaxEntries) {
+ this.indexMaxEntries = indexMaxEntries;
+ }
+
+ public synchronized int getIndexMaxEntries() {
+ return indexMaxEntries;
+ }
+
+ public synchronized void setIndexSubindexMaxSize(long
indexSubindexMaxSize) {
+ this.indexSubindexMaxSize = indexSubindexMaxSize;
+ }
+
+ public synchronized long getIndexSubindexMaxSize() {
+ return indexSubindexMaxSize;
+ }
+
+ public synchronized void setIndexTitle(String indexTitle) {
+ this.indexTitle = indexTitle;
+ }
+
+ public synchronized String getIndexTitle() {
+ return indexTitle;
+ }
+
+ public synchronized void setIndexOwner(String indexOwner) {
+ this.indexOwner = indexOwner;
+ }
+
+ public synchronized String getIndexOwner() {
+ return indexOwner;
+ }
+
+ public synchronized void setIndexOwnerEmail(String indexOwnerEmail) {
+ this.indexOwnerEmail = indexOwnerEmail;
+ }
+
+ public synchronized void setMaxShownURIs(int maxShownURIs) {
+ this.maxShownURIs = maxShownURIs;
+ }
+
+ public synchronized int getMaxShownURIs() {
+ return maxShownURIs;
+ }
+
+ public synchronized String getIndexOwnerEmail() {
+ return indexOwnerEmail;
+ }
+
+ public synchronized void setMaxParallelRequests(int
maxParallelRequests) {
+ this.maxParallelRequests = maxParallelRequests;
+ }
+
+ public synchronized int getMaxParallelRequests() {
+ return maxParallelRequests;
+ }
+
+ public synchronized void setBadlistedExtensions(String[]
badlistedExtensions) {
+ this.badlistedExtensions = badlistedExtensions;
+ }
+
+ public synchronized String[] getBadlistedExtensions() {
+ return badlistedExtensions;
+ }
+
+ public synchronized void setRequestPriority(short requestPriority) {
+ this.requestPriority = requestPriority;
+ }
+
+ public synchronized short getRequestPriority() {
+ return requestPriority;
+ }
+}
\ No newline at end of file
Modified: trunk/plugins/XMLSpider/IndexWriter.java
===================================================================
--- trunk/plugins/XMLSpider/IndexWriter.java 2008-12-22 20:12:39 UTC (rev
24754)
+++ trunk/plugins/XMLSpider/IndexWriter.java 2008-12-23 04:17:38 UTC (rev
24755)
@@ -49,6 +49,11 @@
try {
time_taken = System.currentTimeMillis();
+ if (!(new
File(xmlSpider.getConfig().getIndexDir()).mkdirs())) {
+ Logger.error(this, "Cannot create index
directory: " + xmlSpider.getConfig().getIndexDir());
+ return;
+ }
+
makeSubIndices();
makeMainIndex();
@@ -76,7 +81,7 @@
Logger.minor(this, "Producing top index...");
//the main index file
- File outputFile = new File(XMLSpider.DEFAULT_INDEX_DIR +
"index.xml");
+ File outputFile = new File(xmlSpider.getConfig().getIndexDir()
+ "index.xml");
// Use a stream so we can explicitly close - minimise number of
filehandles used.
BufferedOutputStream fos = new BufferedOutputStream(new
FileOutputStream(outputFile));
StreamResult resultStream;
@@ -110,22 +115,22 @@
/* -> title */
Element subHeaderElement =
xmlDoc.createElement("title");
- Text subHeaderText =
xmlDoc.createTextNode(XMLSpider.indexTitle);
+ Text subHeaderText =
xmlDoc.createTextNode(xmlSpider.getConfig().getIndexTitle());
subHeaderElement.appendChild(subHeaderText);
headerElement.appendChild(subHeaderElement);
/* -> owner */
subHeaderElement = xmlDoc.createElement("owner");
- subHeaderText =
xmlDoc.createTextNode(XMLSpider.indexOwner);
+ subHeaderText =
xmlDoc.createTextNode(xmlSpider.getConfig().getIndexOwner());
subHeaderElement.appendChild(subHeaderText);
headerElement.appendChild(subHeaderElement);
/* -> owner email */
- if (XMLSpider.indexOwnerEmail != null) {
+ if (xmlSpider.getConfig().getIndexOwnerEmail() != null)
{
subHeaderElement =
xmlDoc.createElement("email");
- subHeaderText =
xmlDoc.createTextNode(XMLSpider.indexOwnerEmail);
+ subHeaderText =
xmlDoc.createTextNode(xmlSpider.getConfig().getIndexOwnerEmail());
subHeaderElement.appendChild(subHeaderText);
headerElement.appendChild(subHeaderElement);
@@ -200,7 +205,7 @@
ObjectSet<Term> termSet = query.execute();
indices = new Vector<String>();
- int prefix = (int) ((Math.log(termSet.size()) -
Math.log(XMLSpider.MAX_ENTRIES)) / Math.log(16)) - 1;
+ int prefix = (int) ((Math.log(termSet.size()) -
Math.log(xmlSpider.getConfig().getIndexMaxEntries())) / Math.log(16)) - 1;
if (prefix <= 0)
prefix = 1;
match = 1;
@@ -243,7 +248,7 @@
try {
if (list.size() == 0)
return;
- if (list.size() < XMLSpider.MAX_ENTRIES) {
+ if (list.size() <
xmlSpider.getConfig().getIndexMaxEntries()) {
generateXML(list, p);
return;
}
@@ -290,7 +295,7 @@
protected void generateXML(List<Term> list, int prefix) throws
TooBigIndexException, Exception {
String p = list.get(0).md5.substring(0, prefix);
indices.add(p);
- File outputFile = new File(XMLSpider.DEFAULT_INDEX_DIR +
"index_" + p + ".xml");
+ File outputFile = new File(xmlSpider.getConfig().getIndexDir()
+ "index_" + p + ".xml");
BufferedOutputStream fos = new BufferedOutputStream(new
FileOutputStream(outputFile));
StreamResult resultStream;
resultStream = new StreamResult(fos);
@@ -320,7 +325,7 @@
Element headerElement = xmlDoc.createElement("header");
/* -> title */
Element subHeaderElement =
xmlDoc.createElement("title");
- Text subHeaderText =
xmlDoc.createTextNode(XMLSpider.indexTitle);
+ Text subHeaderText =
xmlDoc.createTextNode(xmlSpider.getConfig().getIndexTitle());
subHeaderElement.appendChild(subHeaderText);
headerElement.appendChild(subHeaderElement);
@@ -412,7 +417,7 @@
} finally {
fos.close();
}
- if (outputFile.length() >
XMLSpider.MAX_SUBINDEX_UNCOMPRESSED_SIZE && list.size() > 1) {
+ if (outputFile.length() >
xmlSpider.getConfig().getIndexSubindexMaxSize() && list.size() > 1) {
outputFile.delete();
throw new TooBigIndexException();
}
@@ -464,22 +469,22 @@
/* -> title */
Element subHeaderElement =
xmlDoc.createElement("title");
- Text subHeaderText =
xmlDoc.createTextNode(XMLSpider.indexTitle);
+ Text subHeaderText =
xmlDoc.createTextNode(xmlSpider.getConfig().getIndexTitle());
subHeaderElement.appendChild(subHeaderText);
headerElement.appendChild(subHeaderElement);
/* -> owner */
subHeaderElement = xmlDoc.createElement("owner");
- subHeaderText =
xmlDoc.createTextNode(XMLSpider.indexOwner);
+ subHeaderText =
xmlDoc.createTextNode(xmlSpider.getConfig().getIndexOwner());
subHeaderElement.appendChild(subHeaderText);
headerElement.appendChild(subHeaderElement);
/* -> owner email */
- if (XMLSpider.indexOwnerEmail != null) {
+ if (xmlSpider.getConfig().getIndexOwnerEmail() != null)
{
subHeaderElement =
xmlDoc.createElement("email");
- subHeaderText =
xmlDoc.createTextNode(XMLSpider.indexOwnerEmail);
+ subHeaderText =
xmlDoc.createTextNode(xmlSpider.getConfig().getIndexOwnerEmail());
subHeaderElement.appendChild(subHeaderText);
headerElement.appendChild(subHeaderElement);
Modified: trunk/plugins/XMLSpider/XMLSpider.java
===================================================================
--- trunk/plugins/XMLSpider/XMLSpider.java 2008-12-22 20:12:39 UTC (rev
24754)
+++ trunk/plugins/XMLSpider/XMLSpider.java 2008-12-23 04:17:38 UTC (rev
24755)
@@ -3,7 +3,6 @@
* http://www.gnu.org/ for further details of the GPL. */
package plugins.XMLSpider;
-import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
@@ -74,6 +73,12 @@
*
*/
public class XMLSpider implements FredPlugin, FredPluginHTTP,
FredPluginThreadless, FredPluginVersioned, FredPluginL10n, USKCallback {
+ private Config config = new Config();
+
+ public Config getConfig() {
+ return config;
+ }
+
public synchronized long getNextPageId() {
long x = maxPageId.incrementAndGet();
db.store(maxPageId);
@@ -84,18 +89,12 @@
protected Map<Page, ClientGetter> runningFetch =
Collections.synchronizedMap(new HashMap<Page, ClientGetter>());
protected MaxPageId maxPageId;
-
+
/**
- * directory where the generated indices are stored.
- * Needs to be created before it can be used
- */
- public static final String DEFAULT_INDEX_DIR = "myindex7/";
- /**
* Lists the allowed mime types of the fetched page.
*/
- public Set<String> allowedMIMETypes;
- static final int MAX_ENTRIES = 2000;
- static final long MAX_SUBINDEX_UNCOMPRESSED_SIZE = 4 * 1024 * 1024;
+ public Set<String> allowedMIMETypes;
+
private static int version = 33;
private static final String pluginName = "XML spider " + version;
@@ -103,47 +102,19 @@
return version + " r" + Version.getSvnRevision();
}
- /**
- * Gives the allowed fraction of total time spent on generating indices
with
- * maximum value = 1; minimum value = 0.
- */
- public static final double MAX_TIME_SPENT_INDEXING = 0.5;
-
- static final String indexTitle = "XMLSpider index";
- static final String indexOwner = "Freenet";
- static final String indexOwnerEmail = null;
-
- // Can have many; this limit only exists to save memory.
- private static final int maxParallelRequests = 100;
- private int maxShownURIs = 15;
-
private NodeClientCore core;
private FetchContext ctx;
- // Equal to Frost, ARK fetches etc. One step down from Fproxy.
- // Any lower makes it very difficult to debug. Maybe reduce for
production - after solving the ARK bugs.
- private final short PRIORITY_CLASS =
RequestStarter.IMMEDIATE_SPLITFILE_PRIORITY_CLASS;
private boolean stopped = true;
private PageMaker pageMaker;
-
- private final static String[] BADLIST_EXTENSTION = new String[] {
- ".ico", ".bmp", ".png", ".jpg", ".gif", // image
- ".zip", ".jar", ".gz" , ".bz2", ".rar", // archive
- ".7z" , ".rar", ".arj", ".rpm", ".deb",
- ".xpi", ".ace", ".cab", ".lza", ".lzh",
- ".ace",
- ".exe", ".iso",
// binary
- ".mpg", ".ogg", ".mp3", ".avi", // media
- ".css", ".sig"
// other
- };
-
+
/**
* Adds the found uri to the list of to-be-retrieved uris. <p>Every usk
uri added as ssk.
* @param uri the new uri that needs to be fetched for further indexing
*/
public void queueURI(FreenetURI uri, String comment, boolean force) {
String sURI = uri.toString();
- for (String ext : BADLIST_EXTENSTION)
+ for (String ext : config.getBadlistedExtensions())
if (sURI.endsWith(ext))
return; // be smart
@@ -188,7 +159,8 @@
synchronized (runningFetch) {
int running = runningFetch.size();
- if (running >= maxParallelRequests) return;
+ if (running >= config.getMaxParallelRequests())
+ return;
if (queuedRequestCache.isEmpty()) {
Query query = db.query();
@@ -199,18 +171,18 @@
ObjectSet<Page> queuedSet =
query.execute();
for (int i = 0 ;
- i < maxParallelRequests * 2 &&
queuedSet.hasNext();
+ i <
config.getMaxParallelRequests() * 2 && queuedSet.hasNext();
i++) { // cache 2 *
maxParallelRequests
queuedRequestCache.add(queuedSet.next());
}
}
queuedRequestCache.removeAll(runningFetch.keySet());
- toStart = new
ArrayList<ClientGetter>(maxParallelRequests - running);
+ toStart = new
ArrayList<ClientGetter>(config.getMaxParallelRequests() - running);
Iterator<Page> it =
queuedRequestCache.iterator();
- while (running + toStart.size() <
maxParallelRequests && it.hasNext()) {
+ while (running + toStart.size() <
config.getMaxParallelRequests() && it.hasNext()) {
Page page = it.next();
it.remove();
@@ -291,7 +263,9 @@
private ClientGetter makeGetter(Page page) throws MalformedURLException
{
ClientGetter getter = new ClientGetter(new
ClientGetterCallback(page),
core.requestStarters.chkFetchScheduler,
- core.requestStarters.sskFetchScheduler, new
FreenetURI(page.uri), ctx, PRIORITY_CLASS, this, null, null);
+ core.requestStarters.sskFetchScheduler, new
FreenetURI(page.uri), ctx, config.getRequestPriority(),
+ this,
+ null, null);
return getter;
}
@@ -538,10 +512,6 @@
stopped = false;
- if (!new File(DEFAULT_INDEX_DIR).mkdirs()) {
- Logger.error(this, "Could not create default index
directory ");
- }
-
// Initial DB4O
db = initDB4O();
@@ -598,7 +568,7 @@
@SuppressWarnings("unchecked")
ObjectSet<Page> set = query.execute();
List<Page> pages = new ArrayList<Page>();
- while (set.hasNext() && pages.size() < maxShownURIs) {
+ while (set.hasNext() && pages.size() <
config.getMaxShownURIs()) {
pages.add(set.next());
}
@@ -676,7 +646,7 @@
HTMLNode nextTableCell = overviewTableRow.addChild("td",
"class", "first");
HTMLNode statusBox = pageMaker.getInfobox("Spider Status");
HTMLNode statusContent = pageMaker.getContentNode(statusBox);
- statusContent.addChild("#", "Running Request: " +
runningFetch.size() + "/" + maxParallelRequests);
+ statusContent.addChild("#", "Running Request: " +
runningFetch.size() + "/" + config.getMaxParallelRequests());
statusContent.addChild("br");
statusContent.addChild("#", "Queued: " + queuedStatus.count);
statusContent.addChild("br");
@@ -742,7 +712,7 @@
HTMLNode list = runningContent.addChild("ol",
"style", "overflow: auto; white-space: nowrap;");
Iterator<Page> pi =
runningFetch.keySet().iterator();
- for (int i = 0; i < maxShownURIs &&
pi.hasNext(); i++) {
+ for (int i = 0; i < config.getMaxShownURIs() &&
pi.hasNext(); i++) {
Page page = pi.next();
HTMLNode litem = list.addChild("li",
"title", page.comment);
litem.addChild("a", "href", "/freenet:"
+ page.uri, page.uri);
@@ -917,11 +887,11 @@
}
public short getPollingPriorityNormal() {
- return (short) Math.min(RequestStarter.MINIMUM_PRIORITY_CLASS,
PRIORITY_CLASS + 1);
+ return (short) Math.min(RequestStarter.MINIMUM_PRIORITY_CLASS,
config.getRequestPriority() + 1);
}
public short getPollingPriorityProgress() {
- return PRIORITY_CLASS;
+ return config.getRequestPriority();
}
protected ObjectContainer db;
_______________________________________________
cvs mailing list
[email protected]
http://emu.freenetproject.org/cgi-bin/mailman/listinfo/cvs