Author: j16sdiz
Date: 2008-12-23 04:17:38 +0000 (Tue, 23 Dec 2008)
New Revision: 24755

Added:
   trunk/plugins/XMLSpider/Config.java
Modified:
   trunk/plugins/XMLSpider/IndexWriter.java
   trunk/plugins/XMLSpider/XMLSpider.java
Log:
move config to seprate class (perpare to be stored in db4o)

Added: trunk/plugins/XMLSpider/Config.java
===================================================================
--- trunk/plugins/XMLSpider/Config.java                         (rev 0)
+++ trunk/plugins/XMLSpider/Config.java 2008-12-23 04:17:38 UTC (rev 24755)
@@ -0,0 +1,116 @@
+/**
+ * @author j16sdiz (1024D/75494252)
+ */
+package plugins.XMLSpider;
+
+import freenet.node.RequestStarter;
+
+class Config {
+       /**
+        * directory where the generated indices are stored. Needs to be 
created before it can be used
+        */
+       private String indexDir = "myindex7/";
+       private int indexMaxEntries = 2000;
+       private long indexSubindexMaxSize = 4 * 1024 * 1024;
+
+       private String indexTitle = "XMLSpider index";
+       private String indexOwner = "Freenet";
+       private String indexOwnerEmail = null;
+
+       private int maxShownURIs = 15;
+
+       // Can have many; this limit only exists to save memory.
+       private int maxParallelRequests = 100;
+
+       private String[] badlistedExtensions = new String[] { //
+       ".ico", ".bmp", ".png", ".jpg", ".gif", // image
+               ".zip", ".jar", ".gz", ".bz2", ".rar", // archive
+               ".7z", ".rar", ".arj", ".rpm", ".deb", ".xpi", ".ace", ".cab", 
".lza", ".lzh", ".ace", ".exe", ".iso", // binary
+               ".mpg", ".ogg", ".mp3", ".avi", // media
+               ".css", ".sig" // other
+       };
+
+       // Equal to Frost, ARK fetches etc. One step down from Fproxy.
+       // Any lower makes it very difficult to debug. Maybe reduce for 
production - after solving the ARK bugs.
+       private short requestPriority = 
RequestStarter.IMMEDIATE_SPLITFILE_PRIORITY_CLASS;
+
+       public synchronized void setIndexDir(String indexDir) {
+               this.indexDir = indexDir;
+       }
+
+       public synchronized String getIndexDir() {
+               return indexDir;
+       }
+
+       public synchronized void setIndexMaxEntries(int indexMaxEntries) {
+               this.indexMaxEntries = indexMaxEntries;
+       }
+
+       public synchronized int getIndexMaxEntries() {
+               return indexMaxEntries;
+       }
+
+       public synchronized void setIndexSubindexMaxSize(long 
indexSubindexMaxSize) {
+               this.indexSubindexMaxSize = indexSubindexMaxSize;
+       }
+
+       public synchronized long getIndexSubindexMaxSize() {
+               return indexSubindexMaxSize;
+       }
+
+       public synchronized void setIndexTitle(String indexTitle) {
+               this.indexTitle = indexTitle;
+       }
+
+       public synchronized String getIndexTitle() {
+               return indexTitle;
+       }
+
+       public synchronized void setIndexOwner(String indexOwner) {
+               this.indexOwner = indexOwner;
+       }
+
+       public synchronized String getIndexOwner() {
+               return indexOwner;
+       }
+
+       public synchronized void setIndexOwnerEmail(String indexOwnerEmail) {
+               this.indexOwnerEmail = indexOwnerEmail;
+       }
+
+       public synchronized void setMaxShownURIs(int maxShownURIs) {
+               this.maxShownURIs = maxShownURIs;
+       }
+
+       public synchronized int getMaxShownURIs() {
+               return maxShownURIs;
+       }
+
+       public synchronized String getIndexOwnerEmail() {
+               return indexOwnerEmail;
+       }
+
+       public synchronized void setMaxParallelRequests(int 
maxParallelRequests) {
+               this.maxParallelRequests = maxParallelRequests;
+       }
+
+       public synchronized int getMaxParallelRequests() {
+               return maxParallelRequests;
+       }
+
+       public synchronized void setBadlistedExtensions(String[] 
badlistedExtensions) {
+               this.badlistedExtensions = badlistedExtensions;
+       }
+
+       public synchronized String[] getBadlistedExtensions() {
+               return badlistedExtensions;
+       }
+
+       public synchronized void setRequestPriority(short requestPriority) {
+               this.requestPriority = requestPriority;
+       }
+
+       public synchronized short getRequestPriority() {
+               return requestPriority;
+       }
+}
\ No newline at end of file

Modified: trunk/plugins/XMLSpider/IndexWriter.java
===================================================================
--- trunk/plugins/XMLSpider/IndexWriter.java    2008-12-22 20:12:39 UTC (rev 
24754)
+++ trunk/plugins/XMLSpider/IndexWriter.java    2008-12-23 04:17:38 UTC (rev 
24755)
@@ -49,6 +49,11 @@
                try {
                        time_taken = System.currentTimeMillis();
 
+                       if (!(new 
File(xmlSpider.getConfig().getIndexDir()).mkdirs())) {
+                               Logger.error(this, "Cannot create index 
directory: " + xmlSpider.getConfig().getIndexDir());
+                               return;
+                       }
+                       
                        makeSubIndices();
                        makeMainIndex();
 
@@ -76,7 +81,7 @@
                Logger.minor(this, "Producing top index...");
 
                //the main index file 
-               File outputFile = new File(XMLSpider.DEFAULT_INDEX_DIR + 
"index.xml");
+               File outputFile = new File(xmlSpider.getConfig().getIndexDir() 
+ "index.xml");
                // Use a stream so we can explicitly close - minimise number of 
filehandles used.
                BufferedOutputStream fos = new BufferedOutputStream(new 
FileOutputStream(outputFile));
                StreamResult resultStream;
@@ -110,22 +115,22 @@
 
                        /* -> title */
                        Element subHeaderElement = 
xmlDoc.createElement("title");
-                       Text subHeaderText = 
xmlDoc.createTextNode(XMLSpider.indexTitle);
+                       Text subHeaderText = 
xmlDoc.createTextNode(xmlSpider.getConfig().getIndexTitle());
 
                        subHeaderElement.appendChild(subHeaderText);
                        headerElement.appendChild(subHeaderElement);
 
                        /* -> owner */
                        subHeaderElement = xmlDoc.createElement("owner");
-                       subHeaderText = 
xmlDoc.createTextNode(XMLSpider.indexOwner);
+                       subHeaderText = 
xmlDoc.createTextNode(xmlSpider.getConfig().getIndexOwner());
 
                        subHeaderElement.appendChild(subHeaderText);
                        headerElement.appendChild(subHeaderElement);
 
                        /* -> owner email */
-                       if (XMLSpider.indexOwnerEmail != null) {
+                       if (xmlSpider.getConfig().getIndexOwnerEmail() != null) 
{
                                subHeaderElement = 
xmlDoc.createElement("email");
-                               subHeaderText = 
xmlDoc.createTextNode(XMLSpider.indexOwnerEmail);
+                               subHeaderText = 
xmlDoc.createTextNode(xmlSpider.getConfig().getIndexOwnerEmail());
 
                                subHeaderElement.appendChild(subHeaderText);
                                headerElement.appendChild(subHeaderElement);
@@ -200,7 +205,7 @@
                ObjectSet<Term> termSet = query.execute();
 
                indices = new Vector<String>();
-               int prefix = (int) ((Math.log(termSet.size()) - 
Math.log(XMLSpider.MAX_ENTRIES)) / Math.log(16)) - 1;
+               int prefix = (int) ((Math.log(termSet.size()) - 
Math.log(xmlSpider.getConfig().getIndexMaxEntries())) / Math.log(16)) - 1;
                if (prefix <= 0)
                        prefix = 1;
                match = 1;
@@ -243,7 +248,7 @@
                try {
                        if (list.size() == 0)
                                return;
-                       if (list.size() < XMLSpider.MAX_ENTRIES) {
+                       if (list.size() < 
xmlSpider.getConfig().getIndexMaxEntries()) {
                                generateXML(list, p);
                                return;
                        }
@@ -290,7 +295,7 @@
        protected void generateXML(List<Term> list, int prefix) throws 
TooBigIndexException, Exception {
                String p = list.get(0).md5.substring(0, prefix);
                indices.add(p);
-               File outputFile = new File(XMLSpider.DEFAULT_INDEX_DIR + 
"index_" + p + ".xml");
+               File outputFile = new File(xmlSpider.getConfig().getIndexDir() 
+ "index_" + p + ".xml");
                BufferedOutputStream fos = new BufferedOutputStream(new 
FileOutputStream(outputFile));
                StreamResult resultStream;
                resultStream = new StreamResult(fos);
@@ -320,7 +325,7 @@
                        Element headerElement = xmlDoc.createElement("header");
                        /* -> title */
                        Element subHeaderElement = 
xmlDoc.createElement("title");
-                       Text subHeaderText = 
xmlDoc.createTextNode(XMLSpider.indexTitle);
+                       Text subHeaderText = 
xmlDoc.createTextNode(xmlSpider.getConfig().getIndexTitle());
                        subHeaderElement.appendChild(subHeaderText);
                        headerElement.appendChild(subHeaderElement);
 
@@ -412,7 +417,7 @@
                } finally {
                        fos.close();
                }
-               if (outputFile.length() > 
XMLSpider.MAX_SUBINDEX_UNCOMPRESSED_SIZE && list.size() > 1) {
+               if (outputFile.length() > 
xmlSpider.getConfig().getIndexSubindexMaxSize() && list.size() > 1) {
                        outputFile.delete();
                        throw new TooBigIndexException();
                }
@@ -464,22 +469,22 @@
 
                        /* -> title */
                        Element subHeaderElement = 
xmlDoc.createElement("title");
-                       Text subHeaderText = 
xmlDoc.createTextNode(XMLSpider.indexTitle);
+                       Text subHeaderText = 
xmlDoc.createTextNode(xmlSpider.getConfig().getIndexTitle());
 
                        subHeaderElement.appendChild(subHeaderText);
                        headerElement.appendChild(subHeaderElement);
 
                        /* -> owner */
                        subHeaderElement = xmlDoc.createElement("owner");
-                       subHeaderText = 
xmlDoc.createTextNode(XMLSpider.indexOwner);
+                       subHeaderText = 
xmlDoc.createTextNode(xmlSpider.getConfig().getIndexOwner());
 
                        subHeaderElement.appendChild(subHeaderText);
                        headerElement.appendChild(subHeaderElement);
 
                        /* -> owner email */
-                       if (XMLSpider.indexOwnerEmail != null) {
+                       if (xmlSpider.getConfig().getIndexOwnerEmail() != null) 
{
                                subHeaderElement = 
xmlDoc.createElement("email");
-                               subHeaderText = 
xmlDoc.createTextNode(XMLSpider.indexOwnerEmail);
+                               subHeaderText = 
xmlDoc.createTextNode(xmlSpider.getConfig().getIndexOwnerEmail());
 
                                subHeaderElement.appendChild(subHeaderText);
                                headerElement.appendChild(subHeaderElement);

Modified: trunk/plugins/XMLSpider/XMLSpider.java
===================================================================
--- trunk/plugins/XMLSpider/XMLSpider.java      2008-12-22 20:12:39 UTC (rev 
24754)
+++ trunk/plugins/XMLSpider/XMLSpider.java      2008-12-23 04:17:38 UTC (rev 
24755)
@@ -3,7 +3,6 @@
  * http://www.gnu.org/ for further details of the GPL. */
 package plugins.XMLSpider;
 
-import java.io.File;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URI;
@@ -74,6 +73,12 @@
  *  
  */
 public class XMLSpider implements FredPlugin, FredPluginHTTP, 
FredPluginThreadless, FredPluginVersioned, FredPluginL10n, USKCallback {
+       private Config config = new Config();
+
+       public Config getConfig() {
+               return config;
+       }
+
        public synchronized long getNextPageId() {
                long x = maxPageId.incrementAndGet();
                db.store(maxPageId);
@@ -84,18 +89,12 @@
        protected Map<Page, ClientGetter> runningFetch = 
Collections.synchronizedMap(new HashMap<Page, ClientGetter>());
 
        protected MaxPageId maxPageId;
-       
+
        /**
-        * directory where the generated indices are stored. 
-        * Needs to be created before it can be used
-        */
-       public static final String DEFAULT_INDEX_DIR = "myindex7/";
-       /**
         * Lists the allowed mime types of the fetched page. 
         */
-       public Set<String> allowedMIMETypes;
-       static final int MAX_ENTRIES = 2000;
-       static final long MAX_SUBINDEX_UNCOMPRESSED_SIZE = 4 * 1024 * 1024;
+       public Set<String> allowedMIMETypes;    
+       
        private static int version = 33;
        private static final String pluginName = "XML spider " + version;
 
@@ -103,47 +102,19 @@
                return version + " r" + Version.getSvnRevision();
        }
 
-       /**
-        * Gives the allowed fraction of total time spent on generating indices 
with
-        * maximum value = 1; minimum value = 0. 
-        */
-       public static final double MAX_TIME_SPENT_INDEXING = 0.5;
-
-       static final String indexTitle = "XMLSpider index";
-       static final String indexOwner = "Freenet";
-       static final String indexOwnerEmail = null;
-
-       // Can have many; this limit only exists to save memory.
-       private static final int maxParallelRequests = 100;
-       private int maxShownURIs = 15;
-
        private NodeClientCore core;
        private FetchContext ctx;
-       // Equal to Frost, ARK fetches etc. One step down from Fproxy.
-       // Any lower makes it very difficult to debug. Maybe reduce for 
production - after solving the ARK bugs.
-       private final short PRIORITY_CLASS = 
RequestStarter.IMMEDIATE_SPLITFILE_PRIORITY_CLASS;
        private boolean stopped = true;
 
        private PageMaker pageMaker;
-
-       private final static String[] BADLIST_EXTENSTION = new String[] { 
-               ".ico", ".bmp", ".png", ".jpg", ".gif",         // image
-               ".zip", ".jar", ".gz" , ".bz2", ".rar",         // archive
-               ".7z" , ".rar", ".arj", ".rpm", ".deb",
-               ".xpi", ".ace", ".cab", ".lza", ".lzh",
-               ".ace",
-               ".exe", ".iso",                                                 
        // binary
-               ".mpg", ".ogg", ".mp3", ".avi",                         // media
-               ".css", ".sig"                                                  
        // other
-       };
-
+       
        /**
         * Adds the found uri to the list of to-be-retrieved uris. <p>Every usk 
uri added as ssk.
         * @param uri the new uri that needs to be fetched for further indexing
         */
        public void queueURI(FreenetURI uri, String comment, boolean force) {
                String sURI = uri.toString();
-               for (String ext : BADLIST_EXTENSTION)
+               for (String ext : config.getBadlistedExtensions())
                        if (sURI.endsWith(ext))
                                return; // be smart
 
@@ -188,7 +159,8 @@
                        synchronized (runningFetch) {
                                int running = runningFetch.size();
 
-                               if (running >= maxParallelRequests) return;
+                               if (running >= config.getMaxParallelRequests())
+                                       return;
 
                                if (queuedRequestCache.isEmpty()) {
                                        Query query = db.query();
@@ -199,18 +171,18 @@
                                        ObjectSet<Page> queuedSet = 
query.execute();
 
                                        for (int i = 0 ; 
-                                               i < maxParallelRequests * 2 && 
queuedSet.hasNext();
+                                               i < 
config.getMaxParallelRequests() * 2 && queuedSet.hasNext();
                                                i++) {  // cache 2 * 
maxParallelRequests
                                                
queuedRequestCache.add(queuedSet.next());
                                        }
                                }
                                
queuedRequestCache.removeAll(runningFetch.keySet());
 
-                               toStart = new 
ArrayList<ClientGetter>(maxParallelRequests - running);
+                               toStart = new 
ArrayList<ClientGetter>(config.getMaxParallelRequests() - running);
 
                                Iterator<Page> it = 
queuedRequestCache.iterator();
 
-                               while (running + toStart.size() < 
maxParallelRequests && it.hasNext()) {
+                               while (running + toStart.size() < 
config.getMaxParallelRequests() && it.hasNext()) {
                                        Page page = it.next();
                                        it.remove();
 
@@ -291,7 +263,9 @@
        private ClientGetter makeGetter(Page page) throws MalformedURLException 
{
                ClientGetter getter = new ClientGetter(new 
ClientGetterCallback(page),
                                core.requestStarters.chkFetchScheduler,
-                       core.requestStarters.sskFetchScheduler, new 
FreenetURI(page.uri), ctx, PRIORITY_CLASS, this, null, null);
+                       core.requestStarters.sskFetchScheduler, new 
FreenetURI(page.uri), ctx, config.getRequestPriority(),
+                       this,
+                       null, null);
                return getter;
        }
 
@@ -538,10 +512,6 @@
 
                stopped = false;
 
-               if (!new File(DEFAULT_INDEX_DIR).mkdirs()) {
-                       Logger.error(this, "Could not create default index 
directory ");
-               }
-
                // Initial DB4O
                db = initDB4O();
 
@@ -598,7 +568,7 @@
                @SuppressWarnings("unchecked")
                ObjectSet<Page> set = query.execute();
                List<Page> pages = new ArrayList<Page>();
-               while (set.hasNext() && pages.size() < maxShownURIs) {
+               while (set.hasNext() && pages.size() < 
config.getMaxShownURIs()) {
                        pages.add(set.next());
                }
 
@@ -676,7 +646,7 @@
                HTMLNode nextTableCell = overviewTableRow.addChild("td", 
"class", "first");
                HTMLNode statusBox = pageMaker.getInfobox("Spider Status");
                HTMLNode statusContent = pageMaker.getContentNode(statusBox);
-               statusContent.addChild("#", "Running Request: " + 
runningFetch.size() + "/" + maxParallelRequests);
+               statusContent.addChild("#", "Running Request: " + 
runningFetch.size() + "/" + config.getMaxParallelRequests());
                statusContent.addChild("br");
                statusContent.addChild("#", "Queued: " + queuedStatus.count);
                statusContent.addChild("br");
@@ -742,7 +712,7 @@
                                HTMLNode list = runningContent.addChild("ol", 
"style", "overflow: auto; white-space: nowrap;");
 
                                Iterator<Page> pi = 
runningFetch.keySet().iterator();
-                               for (int i = 0; i < maxShownURIs && 
pi.hasNext(); i++) {
+                               for (int i = 0; i < config.getMaxShownURIs() && 
pi.hasNext(); i++) {
                                        Page page = pi.next();
                                        HTMLNode litem = list.addChild("li", 
"title", page.comment);
                                        litem.addChild("a", "href", "/freenet:" 
+ page.uri, page.uri);
@@ -917,11 +887,11 @@
        }
 
        public short getPollingPriorityNormal() {
-               return (short) Math.min(RequestStarter.MINIMUM_PRIORITY_CLASS, 
PRIORITY_CLASS + 1);
+               return (short) Math.min(RequestStarter.MINIMUM_PRIORITY_CLASS, 
config.getRequestPriority() + 1);
        }
 
        public short getPollingPriorityProgress() {
-               return PRIORITY_CLASS;
+               return config.getRequestPriority();
        }
 
        protected ObjectContainer db;

_______________________________________________
cvs mailing list
[email protected]
http://emu.freenetproject.org/cgi-bin/mailman/listinfo/cvs

Reply via email to