On Wed, Dec 10, 2008 at 9:01 PM, Matthew Toseland
<toad at amphibian.dyndns.org> wrote:
> On Wednesday 10 December 2008 09:08, j16sdiz at freenetproject.org wrote:
>> Author: j16sdiz
>> Date: 2008-12-10 09:08:25 +0000 (Wed, 10 Dec 2008)
>> New Revision: 24174
>>
>> Added:
>>    trunk/plugins/XMLSpider/Version.java
>> Modified:
>>    trunk/plugins/XMLSpider/XMLSpider.java
>> Log:
>> initial db4o support
>
> Ooooooh........ does this work?
>>
>> Modified: trunk/plugins/XMLSpider/XMLSpider.java
>> ===================================================================
>> --- trunk/plugins/XMLSpider/XMLSpider.java    2008-12-10 07:43:04 UTC (rev
> 24173)
>> +++ trunk/plugins/XMLSpider/XMLSpider.java    2008-12-10 09:08:25 UTC (rev
> 24174)
>> @@ -74,59 +84,67 @@
>>   *  @author swati goyal
>
> Add yourself here. :)
>>   *
>>   */
>> -public class XMLSpider implements FredPlugin, FredPluginHTTP,
> FredPluginThreadless,  FredPluginHTTPAdvanced, USKCallback {
>> +public class XMLSpider implements FredPlugin, FredPluginHTTP,
> FredPluginThreadless, FredPluginVersioned,
>> +        FredPluginHTTPAdvanced, USKCallback {
>> +     static enum Status {
>> +             /** For simplicity, running is also mark as QUEUED */
>> +             QUEUED, SUCCESSED, FAILED
>
> Typo: "SUCCEEDED"
>
>>
>>       private void startSomeRequests() {
>> -
>> -
>>               FreenetURI[] initialURIs = core.getBookmarkURIs();
>>               for (int i = 0; i < initialURIs.length; i++)
>> -             {
>>                       queueURI(initialURIs[i]);
>> -             }
>>
>>               ArrayList<ClientGetter> toStart = null;
>>               synchronized (this) {
>>                       if (stopped) {
>>                               return;
>>                       }
>> -                     int running = runningFetchesByURI.size();
>> -                     int queued = queuedURIList[0].size() + 
>> queuedURIList[1].size() +
> queuedURIList[2].size();
>> +                     int running = runningFetch.size();
>> +
>> +                     Query query = db.query();
>> +                     query.constrain(Page.class);
>> +                     query.descend("status").constrain(Status.QUEUED);
>> +                     query.descend("lastChange").orderAscending();
>> +                     ObjectSet<Page> queuedSet = query.execute();
>>
>> -                     if ((running >= maxParallelRequests) || (queued == 0))
>> +                     if ((running >= maxParallelRequests) || 
>> (queuedSet.size() - running <=
> 0))
>
> The latter part of this condition doesn't make sense.

Running fetch have the status QUEUED.

(  queuedSet.size() - running == 0 ) means all queued item are fetching.

If save "RUNNING" in database, we have to reset it on next start.
This behaviour is documented in "enum Status"


>
>>                               return;
>>
>> -                     toStart = new 
>> ArrayList<ClientGetter>(Math.min(maxParallelRequests -
> running, queued));
>> +                     toStart = new 
>> ArrayList<ClientGetter>(maxParallelRequests - running);
>>
>>                       for (int i = running; i < maxParallelRequests; i++) {
>> -                             boolean found = false;
>> -                             for(int j=0;j<queuedURIList.length;j++) {
>> -                                     if(queuedURIList[j].isEmpty()) 
>> continue;
>> -                                     FreenetURI uri = (FreenetURI) 
>> queuedURIList[j].removeFirst();
>> -                                     if(j == queuedURIList.length) 
>> queuedURISet.remove(uri);
>> -                                     ClientGetter getter = makeGetter(uri, 
>> j);
>> +                             if (!queuedSet.hasNext())
>> +                                     break;
>> +
>> +                             Page page = queuedSet.next();
>> +                             if (runningFetch.containsKey(page))
>> +                                     continue;
>
> You are still incrementing i here.

eeeeee...
will fix on next commit

>> +
>> +                             try {
>> +                                     ClientGetter getter = makeGetter(page, 
>> 0);
>> +
>> +                                     Logger.minor(this, "Starting " + 
>> getter + " " + page);
>>                                       toStart.add(getter);
>> -                                     found = true;
>> -                                     break;
>> +                                     runningFetch.put(page, getter);
>> +                             } catch (MalformedURLException e) {
>> +                                     Logger.error(this, 
>> "IMPOSSIBLE-Malformed URI: " + page, e);
>> +
>> +                                     page.status = Status.FAILED;
>> +                                     page.lastChange = 
>> System.currentTimeMillis();
>> +                                     db.store(page);
>>                               }
>> -                             if(!found) break;
>>                       }
>>               }
>> -             for (int i = 0; i < toStart.size(); i++) {
>> -
>> -                     ClientGetter g = toStart.get(i);
>> +
>> +             for (ClientGetter g : toStart) {
>>                       try {
>> -                             runningFetchesByURI.put(g.getURI(), g);
>>                               g.start();
>> +                             Logger.minor(this, g + " started");
>>                       } catch (FetchException e) {
>> -                             onFailure(e, g, 
>> ((MyClientCallback)g.getClientCallback()).tries);
>> +                Logger.error(this, "Fetch Exception: " + g, e);
>> +                             onFailure(e, g, ((MyClientCallback) 
>> g.getClientCallback()).page,
> ((MyClientCallback) g
>> +                                     .getClientCallback()).tries);
>>                       }
>>               }
>>       }
>>
> ...
>>
>> -     public void onFailure(FetchException e, ClientGetter state, int tries) 
>> {
>> -             FreenetURI uri = state.getURI();
>> -             Logger.minor(this, "Failed: "+uri+" : "+e);
>> +     public void onFailure(FetchException fe, ClientGetter state, Page page,
> int tries) {
>> +             Logger.minor(this, "Failed: [" + tries + "] " + page + " : " + 
>> fe, fe);
>>
>>               synchronized (this) {
>> -                     runningFetchesByURI.remove(uri);
>> -                     failedURIs.add(uri);
>> -                     tries++;
>> -                     if(tries < queuedURIList.length && !e.isFatal())
>> -                             queuedURIList[tries].addLast(uri);
>> +                     if (fe.newURI != null) {
>> +                             // redirect, mark as successed
>> +                             queueURI(fe.newURI);
>> +
>> +                             runningFetch.remove(page);
>> +                             page.status = Status.SUCCESSED;
>> +                             page.lastChange = System.currentTimeMillis();
>> +                             db.store(page);
>> +                     } else if (fe.isFatal() || tries > 3) {
>> +                             // too many tries or fatal, mark as failed
>> +                             runningFetch.remove(page.id);
>> +                             page.status = Status.FAILED;
>> +                             page.lastChange = System.currentTimeMillis();
>> +                             db.store(page);
>> +                     } else if (!stopped) {
>> +                             // Retry
>> +                             // FIXME why? the original version say this 
>> keep off the cooldown queue
>
> That is correct. We can only do 3 tries on each fetch internally, or it ends
> up on the cooldown queue for half an hour. So we retry externally. It would
> probably be better to retry later rather than sooner i.e. to add it to the
> end of the queue with the new retry count.

What if we don't retry immediately, but queue at the end instead?


>> +                             ClientGetter getter = null;
>> +                             try {
>> +                                     getter = makeGetter(page, tries + 1);
>> +                                     getter.start();
>> +                                     runningFetch.put(page, getter);
>> +                             } catch (MalformedURLException e) {
>> +                                     Logger.error(this, 
>> "IMPOSSIBLE-Malformed URI: " + page, e);
>> +                             } catch (FetchException e) {
>> +                                     onFailure(e, getter, 
>> ((MyClientCallback)
> getter.getClientCallback()).page,
>> +                                                     ((MyClientCallback) 
>> getter.getClientCallback()).tries);
>> +                             }
>> +                     }
>>               }
>> -             if (e.newURI != null)
>> -                     queueURI(e.newURI);
>>
>> -             startSomeRequests();
>> +             if (!stopped)
>> +                     startSomeRequests();
>>       }
>>
>>       /**
>
> ...
>
>> @@ -922,7 +982,7 @@
>>               {
>>                       appendDefaultHeader(out,null);
>>                       out.append("<p><h4>"+listname+" URIs</h4></p>");
>> -                     appendList(listname,out,null);
>> +                     appendList(listname, out);
>>                       return out.toString();
>>               }
>>               appendDefaultPageStart(out,null);
>> @@ -932,8 +992,18 @@
>>                       try {
>>                               FreenetURI uri = new FreenetURI(uriParam);
>>                               synchronized (this) {
>> -                                     failedURIs.remove(uri);
>> -                                     visitedURIs.remove(uri);
>> +                                     // Check if queued already
>> +                                     Page page = getPageByURI(uri);
>> +                                     if (page != null) {
>> +                                             // We have no reliable way to 
>> stop a request,
>> +                                             // requeue only if it is 
>> successed / failed
>> +                                             if (page.status == 
>> Status.SUCCESSED || page.status == Status.FAILED)
> {
>> +                                                     page.lastChange = 
>> System.currentTimeMillis();
>> +                                                     page.status = 
>> Status.QUEUED;
>> +
>> +                                                     db.store(page);
>> +                                             }
>> +                                     }
>>                               }
>
> Why do we re-add it here? Hasn't it already completed?
>

Only if the user add it manually from the text box.
The old code did that.

Reply via email to