On Wednesday 10 December 2008 09:08, j16sdiz at freenetproject.org wrote:
> Author: j16sdiz
> Date: 2008-12-10 09:08:25 +0000 (Wed, 10 Dec 2008)
> New Revision: 24174
>
> Added:
> trunk/plugins/XMLSpider/Version.java
> Modified:
> trunk/plugins/XMLSpider/XMLSpider.java
> Log:
> initial db4o support
Ooooooh........ does this work?
>
> Modified: trunk/plugins/XMLSpider/XMLSpider.java
> ===================================================================
> --- trunk/plugins/XMLSpider/XMLSpider.java 2008-12-10 07:43:04 UTC (rev
24173)
> +++ trunk/plugins/XMLSpider/XMLSpider.java 2008-12-10 09:08:25 UTC (rev
24174)
> @@ -74,59 +84,67 @@
> * @author swati goyal
Add yourself here. :)
> *
> */
> -public class XMLSpider implements FredPlugin, FredPluginHTTP,
FredPluginThreadless, FredPluginHTTPAdvanced, USKCallback {
> +public class XMLSpider implements FredPlugin, FredPluginHTTP,
FredPluginThreadless, FredPluginVersioned,
> + FredPluginHTTPAdvanced, USKCallback {
> + static enum Status {
> + /** For simplicity, running is also mark as QUEUED */
> + QUEUED, SUCCESSED, FAILED
Typo: "SUCCEEDED"
>
> private void startSomeRequests() {
> -
> -
> FreenetURI[] initialURIs = core.getBookmarkURIs();
> for (int i = 0; i < initialURIs.length; i++)
> - {
> queueURI(initialURIs[i]);
> - }
>
> ArrayList<ClientGetter> toStart = null;
> synchronized (this) {
> if (stopped) {
> return;
> }
> - int running = runningFetchesByURI.size();
> - int queued = queuedURIList[0].size() +
> queuedURIList[1].size() +
queuedURIList[2].size();
> + int running = runningFetch.size();
> +
> + Query query = db.query();
> + query.constrain(Page.class);
> + query.descend("status").constrain(Status.QUEUED);
> + query.descend("lastChange").orderAscending();
> + ObjectSet<Page> queuedSet = query.execute();
>
> - if ((running >= maxParallelRequests) || (queued == 0))
> + if ((running >= maxParallelRequests) ||
> (queuedSet.size() - running <=
0))
The latter part of this condition doesn't make sense.
> return;
>
> - toStart = new
> ArrayList<ClientGetter>(Math.min(maxParallelRequests -
running, queued));
> + toStart = new
> ArrayList<ClientGetter>(maxParallelRequests - running);
>
> for (int i = running; i < maxParallelRequests; i++) {
> - boolean found = false;
> - for(int j=0;j<queuedURIList.length;j++) {
> - if(queuedURIList[j].isEmpty()) continue;
> - FreenetURI uri = (FreenetURI)
> queuedURIList[j].removeFirst();
> - if(j == queuedURIList.length)
> queuedURISet.remove(uri);
> - ClientGetter getter = makeGetter(uri,
> j);
> + if (!queuedSet.hasNext())
> + break;
> +
> + Page page = queuedSet.next();
> + if (runningFetch.containsKey(page))
> + continue;
You are still incrementing i here.
> +
> + try {
> + ClientGetter getter = makeGetter(page,
> 0);
> +
> + Logger.minor(this, "Starting " + getter
> + " " + page);
> toStart.add(getter);
> - found = true;
> - break;
> + runningFetch.put(page, getter);
> + } catch (MalformedURLException e) {
> + Logger.error(this,
> "IMPOSSIBLE-Malformed URI: " + page, e);
> +
> + page.status = Status.FAILED;
> + page.lastChange =
> System.currentTimeMillis();
> + db.store(page);
> }
> - if(!found) break;
> }
> }
> - for (int i = 0; i < toStart.size(); i++) {
> -
> - ClientGetter g = toStart.get(i);
> +
> + for (ClientGetter g : toStart) {
> try {
> - runningFetchesByURI.put(g.getURI(), g);
> g.start();
> + Logger.minor(this, g + " started");
> } catch (FetchException e) {
> - onFailure(e, g,
> ((MyClientCallback)g.getClientCallback()).tries);
> + Logger.error(this, "Fetch Exception: " + g, e);
> + onFailure(e, g, ((MyClientCallback)
> g.getClientCallback()).page,
((MyClientCallback) g
> + .getClientCallback()).tries);
> }
> }
> }
>
...
>
> - public void onFailure(FetchException e, ClientGetter state, int tries) {
> - FreenetURI uri = state.getURI();
> - Logger.minor(this, "Failed: "+uri+" : "+e);
> + public void onFailure(FetchException fe, ClientGetter state, Page page,
int tries) {
> + Logger.minor(this, "Failed: [" + tries + "] " + page + " : " +
> fe, fe);
>
> synchronized (this) {
> - runningFetchesByURI.remove(uri);
> - failedURIs.add(uri);
> - tries++;
> - if(tries < queuedURIList.length && !e.isFatal())
> - queuedURIList[tries].addLast(uri);
> + if (fe.newURI != null) {
> + // redirect, mark as successed
> + queueURI(fe.newURI);
> +
> + runningFetch.remove(page);
> + page.status = Status.SUCCESSED;
> + page.lastChange = System.currentTimeMillis();
> + db.store(page);
> + } else if (fe.isFatal() || tries > 3) {
> + // too many tries or fatal, mark as failed
> + runningFetch.remove(page.id);
> + page.status = Status.FAILED;
> + page.lastChange = System.currentTimeMillis();
> + db.store(page);
> + } else if (!stopped) {
> + // Retry
> + // FIXME why? the original version say this
> keep off the cooldown queue
That is correct. We can only do 3 tries on each fetch internally, or it ends
up on the cooldown queue for half an hour. So we retry externally. It would
probably be better to retry later rather than sooner i.e. to add it to the
end of the queue with the new retry count.
> + ClientGetter getter = null;
> + try {
> + getter = makeGetter(page, tries + 1);
> + getter.start();
> + runningFetch.put(page, getter);
> + } catch (MalformedURLException e) {
> + Logger.error(this,
> "IMPOSSIBLE-Malformed URI: " + page, e);
> + } catch (FetchException e) {
> + onFailure(e, getter,
> ((MyClientCallback)
getter.getClientCallback()).page,
> + ((MyClientCallback)
> getter.getClientCallback()).tries);
> + }
> + }
> }
> - if (e.newURI != null)
> - queueURI(e.newURI);
>
> - startSomeRequests();
> + if (!stopped)
> + startSomeRequests();
> }
>
> /**
...
> @@ -922,7 +982,7 @@
> {
> appendDefaultHeader(out,null);
> out.append("<p><h4>"+listname+" URIs</h4></p>");
> - appendList(listname,out,null);
> + appendList(listname, out);
> return out.toString();
> }
> appendDefaultPageStart(out,null);
> @@ -932,8 +992,18 @@
> try {
> FreenetURI uri = new FreenetURI(uriParam);
> synchronized (this) {
> - failedURIs.remove(uri);
> - visitedURIs.remove(uri);
> + // Check if queued already
> + Page page = getPageByURI(uri);
> + if (page != null) {
> + // We have no reliable way to
> stop a request,
> + // requeue only if it is
> successed / failed
> + if (page.status ==
> Status.SUCCESSED || page.status == Status.FAILED)
{
> + page.lastChange =
> System.currentTimeMillis();
> + page.status =
> Status.QUEUED;
> +
> + db.store(page);
> + }
> + }
> }
Why do we re-add it here? Hasn't it already completed?
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 827 bytes
Desc: not available
URL:
<https://emu.freenetproject.org/pipermail/devl/attachments/20081210/0a9f4119/attachment.pgp>