On Wednesday 10 December 2008 14:30, Daniel Cheng wrote:
> On Wed, Dec 10, 2008 at 9:01 PM, Matthew Toseland
> <toad at amphibian.dyndns.org> wrote:
> > On Wednesday 10 December 2008 09:08, j16sdiz at freenetproject.org wrote:
> >> Author: j16sdiz
> >> Date: 2008-12-10 09:08:25 +0000 (Wed, 10 Dec 2008)
> >> New Revision: 24174
> >>
> >> Added:
> >> trunk/plugins/XMLSpider/Version.java
> >> Modified:
> >> trunk/plugins/XMLSpider/XMLSpider.java
> >> Log:
> >> initial db4o support
> >
> > Ooooooh........ does this work?
> >>
> >> Modified: trunk/plugins/XMLSpider/XMLSpider.java
> >> ===================================================================
> >> --- trunk/plugins/XMLSpider/XMLSpider.java 2008-12-10 07:43:04 UTC
(rev
> > 24173)
> >> +++ trunk/plugins/XMLSpider/XMLSpider.java 2008-12-10 09:08:25 UTC
(rev
> > 24174)
> >> @@ -74,59 +84,67 @@
> >> * @author swati goyal
> >
> > Add yourself here. :)
> >> *
> >> */
> >> -public class XMLSpider implements FredPlugin, FredPluginHTTP,
> > FredPluginThreadless, FredPluginHTTPAdvanced, USKCallback {
> >> +public class XMLSpider implements FredPlugin, FredPluginHTTP,
> > FredPluginThreadless, FredPluginVersioned,
> >> + FredPluginHTTPAdvanced, USKCallback {
> >> + static enum Status {
> >> + /** For simplicity, running is also mark as QUEUED */
> >> + QUEUED, SUCCESSED, FAILED
> >
> > Typo: "SUCCEEDED"
> >
> >>
> >> private void startSomeRequests() {
> >> -
> >> -
> >> FreenetURI[] initialURIs = core.getBookmarkURIs();
> >> for (int i = 0; i < initialURIs.length; i++)
> >> - {
> >> queueURI(initialURIs[i]);
> >> - }
> >>
> >> ArrayList<ClientGetter> toStart = null;
> >> synchronized (this) {
> >> if (stopped) {
> >> return;
> >> }
> >> - int running = runningFetchesByURI.size();
> >> - int queued = queuedURIList[0].size() +
queuedURIList[1].size() +
> > queuedURIList[2].size();
> >> + int running = runningFetch.size();
> >> +
> >> + Query query = db.query();
> >> + query.constrain(Page.class);
> >> + query.descend("status").constrain(Status.QUEUED);
> >> + query.descend("lastChange").orderAscending();
> >> + ObjectSet<Page> queuedSet = query.execute();
> >>
> >> - if ((running >= maxParallelRequests) || (queued ==
0))
> >> + if ((running >= maxParallelRequests) ||
(queuedSet.size() - running <=
> > 0))
> >
> > The latter part of this condition doesn't make sense.
>
> Running fetch have the status QUEUED.
>
> ( queuedSet.size() - running == 0 ) means all queued item are fetching.
>
> If save "RUNNING" in database, we have to reset it on next start.
> This behaviour is documented in "enum Status"
Hmmm ok, but getting the size of the ObjectSet means loading all the object
IDs, even in lazy evaluation mode.
>
> >> +
> >> + try {
> >> + ClientGetter getter =
makeGetter(page, 0);
> >> +
> >> + Logger.minor(this, "Starting " +
getter + " " + page);
> >> toStart.add(getter);
> >> - found = true;
> >> - break;
> >> + runningFetch.put(page, getter);
> >> + } catch (MalformedURLException e) {
> >> +
Logger.error(this, "IMPOSSIBLE-Malformed URI: " + page, e);
> >> +
> >> + page.status = Status.FAILED;
> >> + page.lastChange =
System.currentTimeMillis();
> >> + db.store(page);
> >> }
> >> - if(!found) break;
> >> }
> >> }
> >> - for (int i = 0; i < toStart.size(); i++) {
> >> -
> >> - ClientGetter g = toStart.get(i);
> >> +
> >> + for (ClientGetter g : toStart) {
> >> try {
> >> - runningFetchesByURI.put(g.getURI(), g);
> >> g.start();
> >> + Logger.minor(this, g + " started");
> >> } catch (FetchException e) {
> >> - onFailure(e, g,
((MyClientCallback)g.getClientCallback()).tries);
> >> + Logger.error(this, "Fetch Exception: " + g, e);
> >> + onFailure(e, g, ((MyClientCallback)
g.getClientCallback()).page,
> > ((MyClientCallback) g
> >> + .getClientCallback()).tries);
> >> }
> >> }
> >> }
> >>
> > ...
> >>
> >> - public void onFailure(FetchException e, ClientGetter state, int
tries) {
> >> - FreenetURI uri = state.getURI();
> >> - Logger.minor(this, "Failed: "+uri+" : "+e);
> >> + public void onFailure(FetchException fe, ClientGetter state, Page
page,
> > int tries) {
> >> + Logger.minor(this, "Failed: [" + tries + "] " + page
+ " : " + fe, fe);
> >>
> >> synchronized (this) {
> >> - runningFetchesByURI.remove(uri);
> >> - failedURIs.add(uri);
> >> - tries++;
> >> - if(tries < queuedURIList.length && !e.isFatal())
> >> - queuedURIList[tries].addLast(uri);
> >> + if (fe.newURI != null) {
> >> + // redirect, mark as successed
> >> + queueURI(fe.newURI);
> >> +
> >> + runningFetch.remove(page);
> >> + page.status = Status.SUCCESSED;
> >> + page.lastChange =
System.currentTimeMillis();
> >> + db.store(page);
> >> + } else if (fe.isFatal() || tries > 3) {
> >> + // too many tries or fatal, mark as failed
> >> + runningFetch.remove(page.id);
> >> + page.status = Status.FAILED;
> >> + page.lastChange =
System.currentTimeMillis();
> >> + db.store(page);
> >> + } else if (!stopped) {
> >> + // Retry
> >> + // FIXME why? the original version say this
keep off the cooldown queue
> >
> > That is correct. We can only do 3 tries on each fetch internally, or it
ends
> > up on the cooldown queue for half an hour. So we retry externally. It
would
> > probably be better to retry later rather than sooner i.e. to add it to the
> > end of the queue with the new retry count.
>
> What if we don't retry immediately, but queue at the end instead?
Probably would be better.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 827 bytes
Desc: not available
URL:
<https://emu.freenetproject.org/pipermail/devl/attachments/20081210/81af5f92/attachment.pgp>