Author: toad
Date: 2008-02-28 22:43:46 +0000 (Thu, 28 Feb 2008)
New Revision: 18236
Modified:
trunk/plugins/XMLSpider/XMLSpider.java
Log:
XMLSpider #28: Avoid the cooldown queue completely, one try and 2 retries so
we're just within the limit, 3 queues so everything gets tried 3 times, then
waits for everything else at that level, then gets tried another 3 times, waits
again, then another 3 tries, then give up.
Modified: trunk/plugins/XMLSpider/XMLSpider.java
===================================================================
--- trunk/plugins/XMLSpider/XMLSpider.java 2008-02-28 22:26:54 UTC (rev
18235)
+++ trunk/plugins/XMLSpider/XMLSpider.java 2008-02-28 22:43:46 UTC (rev
18236)
@@ -103,8 +103,12 @@
/**
*
* Lists the uris that are still queued.
+ *
+ * Since we have limited RAM, and we don't want stuff to be on the
cooldown queue for a
+ * long period, we use 2 retries (to stay off the cooldown queue), and
we go over the queued
+ * list 3 times for each key.
*/
- public final LinkedList queuedURIList = new LinkedList();
+ public final LinkedList[] queuedURIList = new LinkedList[] { new
LinkedList(), new LinkedList(), new LinkedList() };
private final HashMap runningFetchesByURI = new HashMap();
private final HashMap idsByWord = new HashMap();
@@ -143,7 +147,7 @@
public Set allowedMIMETypes;
private static final int MAX_ENTRIES = 2000;
private static final long MAX_SUBINDEX_UNCOMPRESSED_SIZE = 256*1024;
- private static int version = 27;
+ private static int version = 28;
private static final String pluginName = "XML spider "+version;
/**
* Gives the allowed fraction of total time spent on generating indices
with
@@ -188,7 +192,7 @@
}
if ((!visitedURIs.contains(uri)) && queuedURISet.add(uri)) {
- queuedURIList.addLast(uri);
+ queuedURIList[0].addLast(uri);
visitedURIs.add(uri);
uriIds.put(uri, id);
idUris.put(id, uri);
@@ -211,7 +215,7 @@
return;
}
int running = runningFetchesByURI.size();
- int queued = queuedURIList.size();
+ int queued = queuedURIList[0].size() +
queuedURIList[1].size() + queuedURIList[2].size();
if ((running >= maxParallelRequests) || (queued == 0))
return;
@@ -219,12 +223,17 @@
toStart = new ArrayList(Math.min(maxParallelRequests -
running, queued));
for (int i = running; i < maxParallelRequests; i++) {
- if (queuedURIList.isEmpty())
- break;
- FreenetURI uri = (FreenetURI)
queuedURIList.removeFirst();
+ boolean found = false;
+ for(int j=0;j<queuedURIList.length;j++) {
+ if(queuedURIList[j].isEmpty()) continue;
+ FreenetURI uri = (FreenetURI)
queuedURIList[j].removeFirst();
+ if(j < queuedURIList.length-1)
queuedURIList[j+1].add(uri);
queuedURISet.remove(uri);
ClientGetter getter = makeGetter(uri);
toStart.add(getter);
+ found = true;
+ }
+ if(!found) break;
}
}
for (int i = 0; i < toStart.size(); i++) {
@@ -707,8 +716,8 @@
this.core = pluginManager.getClientCore();
this.ctx = core.makeClient((short) 0).getFetchContext();
- ctx.maxSplitfileBlockRetries = 9; // Will be on the cooldown
queue 3 times.
- ctx.maxNonSplitfileRetries = 9; // Will be on the cooldown
queue 3 times.
+ ctx.maxSplitfileBlockRetries = 2; // Don't let it enter the
cooldown queue.
+ ctx.maxNonSplitfileRetries = 2; // Don't let it enter the
cooldown queue.
ctx.maxTempLength = 2 * 1024 * 1024;
ctx.maxOutputLength = 2 * 1024 * 1024;
allowedMIMETypes = new HashSet();
@@ -735,7 +744,8 @@
public void stopPlugin() {
synchronized (this) {
stopped = true;
- queuedURIList.clear();
+ for(int i=0;i<queuedURIList.length;i++)
+ queuedURIList[i].clear();
}
}
@@ -894,7 +904,8 @@
public void terminate(){
synchronized (this) {
stopped = true;
- queuedURIList.clear();
+ for(int i=0;i<queuedURIList.length;i++)
+ queuedURIList[i].clear();
}
}
@@ -978,8 +989,8 @@
it = (runningFetchesByURI.keySet()).iterator();
if(listname.equals("visited"))
it = (new HashSet(visitedURIs)).iterator();
- if(listname.equals("queued"))
- it = (new ArrayList(queuedURIList)).iterator();
+ if(listname.startsWith("queued"))
+ it = (new
ArrayList(queuedURIList[Integer.parseInt(listname.substring("queued".length()))])).iterator();
if(listname.equals("failed"))
it = (new HashSet(failedURIs)).iterator();
while(it.hasNext())
@@ -999,21 +1010,23 @@
Set runningFetches;
Set visited;
Set failed;
- List queued;
+ List[] queued = new List[queuedURIList.length];
synchronized(this) {
visited = new HashSet(visitedURIs);
failed = new HashSet(failedURIs);
- queued = new ArrayList(queuedURIList);
+ for(int i=0;i<queuedURIList.length;i++)
+ queued[i] = new ArrayList(queuedURIList[i]);
runningFetches = new
HashSet(runningFetchesByURI.keySet());
}
out.append("<p><h3>Running Fetches</h3></p>");
- Iterator it=queued.iterator();
out.append("<br/>Size :"+runningFetches.size()+"<br/>");
appendList(runningFetches,out,stylesheet);
out.append("<p><a href=\"?list="+"running"+"\">Show
all</a><br/></p>");
- out.append("<p><h3>Queued URIs</h3></p>");
- out.append("<br/>Size :"+queued.size()+"<br/>");
+ for(int j=0;j<queued.length;j++) {
+ out.append("<p><h3>Queued URIs ("+j+")</h3></p>");
+ out.append("<br/>Size :"+queued[j].size()+"<br/>");
int i = 0;
+ Iterator it=queued[j].iterator();
while(it.hasNext()){
if(i<=maxShownURIs){
out.append("<code>"+it.next().toString()+"</code><br/>");
@@ -1021,7 +1034,8 @@
else break;
i++;
}
- out.append("<p><a href=\"?list="+"queued"+"\">Show
all</a><br/></p>");
+ out.append("<p><a href=\"?list="+"queued"+j+"\">Show
all</a><br/></p>");
+ }
out.append("<p><h3>Visited URIs</h3></p>");
out.append("<br/>Size :"+visited.size()+"<br/>");
appendList(visited,out,stylesheet);