On Wed, Dec 10, 2008 at 9:01 PM, Matthew Toseland
<toad at amphibian.dyndns.org> wrote:
> On Wednesday 10 December 2008 09:08, j16sdiz at freenetproject.org wrote:
>> Author: j16sdiz
>> Date: 2008-12-10 09:08:25 +0000 (Wed, 10 Dec 2008)
>> New Revision: 24174
>>
>> Added:
>> trunk/plugins/XMLSpider/Version.java
>> Modified:
>> trunk/plugins/XMLSpider/XMLSpider.java
>> Log:
>> initial db4o support
>
> Ooooooh........ does this work?
>>
>> Modified: trunk/plugins/XMLSpider/XMLSpider.java
>> ===================================================================
>> --- trunk/plugins/XMLSpider/XMLSpider.java 2008-12-10 07:43:04 UTC (rev
> 24173)
>> +++ trunk/plugins/XMLSpider/XMLSpider.java 2008-12-10 09:08:25 UTC (rev
> 24174)
>> @@ -74,59 +84,67 @@
>> * @author swati goyal
>
> Add yourself here. :)
>> *
>> */
>> -public class XMLSpider implements FredPlugin, FredPluginHTTP,
> FredPluginThreadless, FredPluginHTTPAdvanced, USKCallback {
>> +public class XMLSpider implements FredPlugin, FredPluginHTTP,
> FredPluginThreadless, FredPluginVersioned,
>> + FredPluginHTTPAdvanced, USKCallback {
>> + static enum Status {
>> + /** For simplicity, running is also mark as QUEUED */
>> + QUEUED, SUCCESSED, FAILED
>
> Typo: "SUCCEEDED"
>
>>
>> private void startSomeRequests() {
>> -
>> -
>> FreenetURI[] initialURIs = core.getBookmarkURIs();
>> for (int i = 0; i < initialURIs.length; i++)
>> - {
>> queueURI(initialURIs[i]);
>> - }
>>
>> ArrayList<ClientGetter> toStart = null;
>> synchronized (this) {
>> if (stopped) {
>> return;
>> }
>> - int running = runningFetchesByURI.size();
>> - int queued = queuedURIList[0].size() +
>> queuedURIList[1].size() +
> queuedURIList[2].size();
>> + int running = runningFetch.size();
>> +
>> + Query query = db.query();
>> + query.constrain(Page.class);
>> + query.descend("status").constrain(Status.QUEUED);
>> + query.descend("lastChange").orderAscending();
>> + ObjectSet<Page> queuedSet = query.execute();
>>
>> - if ((running >= maxParallelRequests) || (queued == 0))
>> + if ((running >= maxParallelRequests) ||
>> (queuedSet.size() - running <=
> 0))
>
> The latter part of this condition doesn't make sense.
Running fetch have the status QUEUED.
( queuedSet.size() - running == 0 ) means all queued item are fetching.
If save "RUNNING" in database, we have to reset it on next start.
This behaviour is documented in "enum Status"
>
>> return;
>>
>> - toStart = new
>> ArrayList<ClientGetter>(Math.min(maxParallelRequests -
> running, queued));
>> + toStart = new
>> ArrayList<ClientGetter>(maxParallelRequests - running);
>>
>> for (int i = running; i < maxParallelRequests; i++) {
>> - boolean found = false;
>> - for(int j=0;j<queuedURIList.length;j++) {
>> - if(queuedURIList[j].isEmpty())
>> continue;
>> - FreenetURI uri = (FreenetURI)
>> queuedURIList[j].removeFirst();
>> - if(j == queuedURIList.length)
>> queuedURISet.remove(uri);
>> - ClientGetter getter = makeGetter(uri,
>> j);
>> + if (!queuedSet.hasNext())
>> + break;
>> +
>> + Page page = queuedSet.next();
>> + if (runningFetch.containsKey(page))
>> + continue;
>
> You are still incrementing i here.
eeeeee...
will fix on next commit
>> +
>> + try {
>> + ClientGetter getter = makeGetter(page,
>> 0);
>> +
>> + Logger.minor(this, "Starting " +
>> getter + " " + page);
>> toStart.add(getter);
>> - found = true;
>> - break;
>> + runningFetch.put(page, getter);
>> + } catch (MalformedURLException e) {
>> + Logger.error(this,
>> "IMPOSSIBLE-Malformed URI: " + page, e);
>> +
>> + page.status = Status.FAILED;
>> + page.lastChange =
>> System.currentTimeMillis();
>> + db.store(page);
>> }
>> - if(!found) break;
>> }
>> }
>> - for (int i = 0; i < toStart.size(); i++) {
>> -
>> - ClientGetter g = toStart.get(i);
>> +
>> + for (ClientGetter g : toStart) {
>> try {
>> - runningFetchesByURI.put(g.getURI(), g);
>> g.start();
>> + Logger.minor(this, g + " started");
>> } catch (FetchException e) {
>> - onFailure(e, g,
>> ((MyClientCallback)g.getClientCallback()).tries);
>> + Logger.error(this, "Fetch Exception: " + g, e);
>> + onFailure(e, g, ((MyClientCallback)
>> g.getClientCallback()).page,
> ((MyClientCallback) g
>> + .getClientCallback()).tries);
>> }
>> }
>> }
>>
> ...
>>
>> - public void onFailure(FetchException e, ClientGetter state, int tries)
>> {
>> - FreenetURI uri = state.getURI();
>> - Logger.minor(this, "Failed: "+uri+" : "+e);
>> + public void onFailure(FetchException fe, ClientGetter state, Page page,
> int tries) {
>> + Logger.minor(this, "Failed: [" + tries + "] " + page + " : " +
>> fe, fe);
>>
>> synchronized (this) {
>> - runningFetchesByURI.remove(uri);
>> - failedURIs.add(uri);
>> - tries++;
>> - if(tries < queuedURIList.length && !e.isFatal())
>> - queuedURIList[tries].addLast(uri);
>> + if (fe.newURI != null) {
>> + // redirect, mark as successed
>> + queueURI(fe.newURI);
>> +
>> + runningFetch.remove(page);
>> + page.status = Status.SUCCESSED;
>> + page.lastChange = System.currentTimeMillis();
>> + db.store(page);
>> + } else if (fe.isFatal() || tries > 3) {
>> + // too many tries or fatal, mark as failed
>> + runningFetch.remove(page.id);
>> + page.status = Status.FAILED;
>> + page.lastChange = System.currentTimeMillis();
>> + db.store(page);
>> + } else if (!stopped) {
>> + // Retry
>> + // FIXME why? the original version say this
>> keep off the cooldown queue
>
> That is correct. We can only do 3 tries on each fetch internally, or it ends
> up on the cooldown queue for half an hour. So we retry externally. It would
> probably be better to retry later rather than sooner i.e. to add it to the
> end of the queue with the new retry count.
What if we don't retry immediately, but queue at the end instead?
>> + ClientGetter getter = null;
>> + try {
>> + getter = makeGetter(page, tries + 1);
>> + getter.start();
>> + runningFetch.put(page, getter);
>> + } catch (MalformedURLException e) {
>> + Logger.error(this,
>> "IMPOSSIBLE-Malformed URI: " + page, e);
>> + } catch (FetchException e) {
>> + onFailure(e, getter,
>> ((MyClientCallback)
> getter.getClientCallback()).page,
>> + ((MyClientCallback)
>> getter.getClientCallback()).tries);
>> + }
>> + }
>> }
>> - if (e.newURI != null)
>> - queueURI(e.newURI);
>>
>> - startSomeRequests();
>> + if (!stopped)
>> + startSomeRequests();
>> }
>>
>> /**
>
> ...
>
>> @@ -922,7 +982,7 @@
>> {
>> appendDefaultHeader(out,null);
>> out.append("<p><h4>"+listname+" URIs</h4></p>");
>> - appendList(listname,out,null);
>> + appendList(listname, out);
>> return out.toString();
>> }
>> appendDefaultPageStart(out,null);
>> @@ -932,8 +992,18 @@
>> try {
>> FreenetURI uri = new FreenetURI(uriParam);
>> synchronized (this) {
>> - failedURIs.remove(uri);
>> - visitedURIs.remove(uri);
>> + // Check if queued already
>> + Page page = getPageByURI(uri);
>> + if (page != null) {
>> + // We have no reliable way to
>> stop a request,
>> + // requeue only if it is
>> successed / failed
>> + if (page.status ==
>> Status.SUCCESSED || page.status == Status.FAILED)
> {
>> + page.lastChange =
>> System.currentTimeMillis();
>> + page.status =
>> Status.QUEUED;
>> +
>> + db.store(page);
>> + }
>> + }
>> }
>
> Why do we re-add it here? Hasn't it already completed?
>
Only if the user add it manually from the text box.
The old code did that.