Author: toad
Date: 2008-02-28 22:43:46 +0000 (Thu, 28 Feb 2008)
New Revision: 18236

Modified:
   trunk/plugins/XMLSpider/XMLSpider.java
Log:
XMLSpider #28: Avoid the cooldown queue completely, one try and 2 retries so 
we're just within the limit, 3 queues so everything gets tried 3 times, then 
waits for everything else at that level, then gets tried another 3 times, waits 
again, then another 3 tries, then give up.

Modified: trunk/plugins/XMLSpider/XMLSpider.java
===================================================================
--- trunk/plugins/XMLSpider/XMLSpider.java      2008-02-28 22:26:54 UTC (rev 
18235)
+++ trunk/plugins/XMLSpider/XMLSpider.java      2008-02-28 22:43:46 UTC (rev 
18236)
@@ -103,8 +103,12 @@
        /**
         * 
         * Lists the uris that are still queued.
+        * 
+        * Since we have limited RAM, and we don't want stuff to be on the 
cooldown queue for a 
+        * long period, we use 2 retries (to stay off the cooldown queue), and 
we go over the queued
+        * list 3 times for each key.
         */
-       public final LinkedList queuedURIList = new LinkedList();
+       public final LinkedList[] queuedURIList = new LinkedList[] { new 
LinkedList(), new LinkedList(), new LinkedList() };
        private final HashMap runningFetchesByURI = new HashMap();

        private final HashMap idsByWord = new HashMap();
@@ -143,7 +147,7 @@
        public Set allowedMIMETypes;
        private static final int MAX_ENTRIES = 2000;
        private static final long MAX_SUBINDEX_UNCOMPRESSED_SIZE = 256*1024;
-       private static int version = 27;
+       private static int version = 28;
        private static final String pluginName = "XML spider "+version;
        /**
         * Gives the allowed fraction of total time spent on generating indices 
with
@@ -188,7 +192,7 @@
                }

                if ((!visitedURIs.contains(uri)) && queuedURISet.add(uri)) {
-                       queuedURIList.addLast(uri);
+                       queuedURIList[0].addLast(uri);
                        visitedURIs.add(uri);
                        uriIds.put(uri, id);
                        idUris.put(id, uri);
@@ -211,7 +215,7 @@
                                return;
                        }
                        int running = runningFetchesByURI.size();
-                       int queued = queuedURIList.size();
+                       int queued = queuedURIList[0].size() + 
queuedURIList[1].size() + queuedURIList[2].size();

                        if ((running >= maxParallelRequests) || (queued == 0))
                                return;
@@ -219,12 +223,17 @@
                        toStart = new ArrayList(Math.min(maxParallelRequests - 
running, queued));

                        for (int i = running; i < maxParallelRequests; i++) {
-                               if (queuedURIList.isEmpty())
-                                       break;
-                               FreenetURI uri = (FreenetURI) 
queuedURIList.removeFirst();
+                               boolean found = false;
+                               for(int j=0;j<queuedURIList.length;j++) {
+                               if(queuedURIList[j].isEmpty()) continue;
+                               FreenetURI uri = (FreenetURI) 
queuedURIList[j].removeFirst();
+                               if(j < queuedURIList.length-1) 
queuedURIList[j+1].add(uri);
                                queuedURISet.remove(uri);
                                ClientGetter getter = makeGetter(uri);
                                toStart.add(getter);
+                               found = true;
+                               }
+                               if(!found) break;
                        }
                }
                for (int i = 0; i < toStart.size(); i++) {
@@ -707,8 +716,8 @@

                this.core = pluginManager.getClientCore();
                this.ctx = core.makeClient((short) 0).getFetchContext();
-               ctx.maxSplitfileBlockRetries = 9; // Will be on the cooldown 
queue 3 times.
-               ctx.maxNonSplitfileRetries = 9; // Will be on the cooldown 
queue 3 times.
+               ctx.maxSplitfileBlockRetries = 2; // Don't let it enter the 
cooldown queue.
+               ctx.maxNonSplitfileRetries = 2; // Don't let it enter the 
cooldown queue.
                ctx.maxTempLength = 2 * 1024 * 1024;
                ctx.maxOutputLength = 2 * 1024 * 1024;
                allowedMIMETypes = new HashSet();
@@ -735,7 +744,8 @@
        public void stopPlugin() {
                synchronized (this) {
                        stopped = true;
-                       queuedURIList.clear();
+                       for(int i=0;i<queuedURIList.length;i++)
+                               queuedURIList[i].clear();
                }
        }

@@ -894,7 +904,8 @@
        public void terminate(){
                synchronized (this) {
                        stopped = true;
-                       queuedURIList.clear();
+                       for(int i=0;i<queuedURIList.length;i++)
+                               queuedURIList[i].clear();
                }
        }

@@ -978,8 +989,8 @@
                        it = (runningFetchesByURI.keySet()).iterator();
                if(listname.equals("visited"))
                        it = (new HashSet(visitedURIs)).iterator();
-               if(listname.equals("queued"))
-                       it = (new ArrayList(queuedURIList)).iterator();
+               if(listname.startsWith("queued"))
+                       it = (new 
ArrayList(queuedURIList[Integer.parseInt(listname.substring("queued".length()))])).iterator();
                if(listname.equals("failed"))
                        it = (new HashSet(failedURIs)).iterator();
                while(it.hasNext())
@@ -999,21 +1010,23 @@
                Set runningFetches;
                Set visited;
                Set failed;
-               List queued;
+               List[] queued = new List[queuedURIList.length];
                synchronized(this) {
                        visited = new HashSet(visitedURIs);
                        failed = new HashSet(failedURIs);
-                       queued = new ArrayList(queuedURIList);
+                       for(int i=0;i<queuedURIList.length;i++)
+                               queued[i] = new ArrayList(queuedURIList[i]);
                        runningFetches = new 
HashSet(runningFetchesByURI.keySet());
                }
                out.append("<p><h3>Running Fetches</h3></p>");
-               Iterator it=queued.iterator();
                out.append("<br/>Size :"+runningFetches.size()+"<br/>");
                appendList(runningFetches,out,stylesheet);
                out.append("<p><a href=\"?list="+"running"+"\">Show 
all</a><br/></p>");
-               out.append("<p><h3>Queued URIs</h3></p>");
-               out.append("<br/>Size :"+queued.size()+"<br/>");
+               for(int j=0;j<queued.length;j++) {
+               out.append("<p><h3>Queued URIs ("+j+")</h3></p>");
+               out.append("<br/>Size :"+queued[j].size()+"<br/>");
                int i = 0;
+               Iterator it=queued[j].iterator();
                while(it.hasNext()){
                        if(i<=maxShownURIs){
                                
out.append("<code>"+it.next().toString()+"</code><br/>");
@@ -1021,7 +1034,8 @@
                        else break;
                        i++;
                }
-               out.append("<p><a href=\"?list="+"queued"+"\">Show 
all</a><br/></p>");
+               out.append("<p><a href=\"?list="+"queued"+j+"\">Show 
all</a><br/></p>");
+               }
                out.append("<p><h3>Visited URIs</h3></p>");
                out.append("<br/>Size :"+visited.size()+"<br/>");
                appendList(visited,out,stylesheet);


Reply via email to