Author: toad
Date: 2006-05-26 01:20:55 +0000 (Fri, 26 May 2006)
New Revision: 8873
Added:
trunk/freenet/src/freenet/clients/http/Spider.java
trunk/freenet/src/freenet/clients/http/filter/FoundURICallback.java
Modified:
trunk/freenet/src/freenet/client/async/SingleFileFetcher.java
trunk/freenet/src/freenet/clients/http/BookmarkManager.java
trunk/freenet/src/freenet/clients/http/FProxyToadlet.java
trunk/freenet/src/freenet/clients/http/WelcomeToadlet.java
trunk/freenet/src/freenet/clients/http/filter/ContentFilter.java
trunk/freenet/src/freenet/clients/http/filter/FilterCallback.java
trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java
trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
trunk/freenet/src/freenet/clients/http/filter/NullFilterCallback.java
trunk/freenet/src/freenet/node/Node.java
trunk/freenet/src/freenet/node/Version.java
Log:
742: Index spider (tested on testnet). Off unless you hack Node.start().
Modified: trunk/freenet/src/freenet/client/async/SingleFileFetcher.java
===================================================================
--- trunk/freenet/src/freenet/client/async/SingleFileFetcher.java
2006-05-25 22:15:22 UTC (rev 8872)
+++ trunk/freenet/src/freenet/client/async/SingleFileFetcher.java
2006-05-26 01:20:55 UTC (rev 8873)
@@ -238,7 +238,7 @@
clientMetadata.mergeNoOverwrite(metadata.getClientMetadata());
// Fetch it from the archive
if(ah == null)
- throw new
FetchException(FetchException.UNKNOWN_METADATA, "Archive redirect not in an
archive");
+ throw new
FetchException(FetchException.UNKNOWN_METADATA, "Archive redirect not in an
archive manifest");
String filename = metadata.getZIPInternalName();
Logger.minor(this, "Fetching "+filename);
Bucket dataBucket = ah.get(filename, actx,
null, recursionLevel+1, true);
Modified: trunk/freenet/src/freenet/clients/http/BookmarkManager.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/BookmarkManager.java 2006-05-25
22:15:22 UTC (rev 8872)
+++ trunk/freenet/src/freenet/clients/http/BookmarkManager.java 2006-05-26
01:20:55 UTC (rev 8873)
@@ -85,6 +85,15 @@
return this.bookmarks.elements();
}
+ public FreenetURI[] getBookmarkURIs() {
+ Bookmark[] b = (Bookmark[]) bookmarks.toArray(new
Bookmark[bookmarks.size()]);
+ FreenetURI[] uris = new FreenetURI[b.length];
+ for(int i=0;i<uris.length;i++) {
+ uris[i] = b[i].key;
+ }
+ return uris;
+ }
+
public void clear() {
for (Enumeration e = this.bookmarks.elements();
e.hasMoreElements(); ) {
Bookmark i = (Bookmark)e.nextElement();
Modified: trunk/freenet/src/freenet/clients/http/FProxyToadlet.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/FProxyToadlet.java 2006-05-25
22:15:22 UTC (rev 8872)
+++ trunk/freenet/src/freenet/clients/http/FProxyToadlet.java 2006-05-26
01:20:55 UTC (rev 8873)
@@ -179,7 +179,7 @@
try {
if(!force && !forcedownload) {
- data = ContentFilter.filter(data,
ctx.getBucketFactory(), typeName, uri);
+ data = ContentFilter.filter(data,
ctx.getBucketFactory(), typeName, uri, null);
}
if (forcedownload) {
@@ -415,5 +415,4 @@
}
return sb.toString();
}
-
}
Added: trunk/freenet/src/freenet/clients/http/Spider.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/Spider.java 2006-05-25 22:15:22 UTC
(rev 8872)
+++ trunk/freenet/src/freenet/clients/http/Spider.java 2006-05-26 01:20:55 UTC
(rev 8873)
@@ -0,0 +1,253 @@
+package freenet.clients.http;
+
+import java.io.BufferedWriter;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.Vector;
+
+import freenet.client.ClientMetadata;
+import freenet.client.FetchException;
+import freenet.client.FetchResult;
+import freenet.client.FetcherContext;
+import freenet.client.InserterException;
+import freenet.client.async.BaseClientPutter;
+import freenet.client.async.ClientCallback;
+import freenet.client.async.ClientGetter;
+import freenet.clients.http.filter.ContentFilter;
+import freenet.clients.http.filter.FoundURICallback;
+import freenet.clients.http.filter.UnsafeContentTypeException;
+import freenet.keys.FreenetURI;
+import freenet.node.Node;
+import freenet.node.RequestStarter;
+import freenet.support.Bucket;
+import freenet.support.Logger;
+
+/**
+ * Spider. Produces an index.
+ */
+public class Spider implements ClientCallback, FoundURICallback {
+
+ long tProducedIndex;
+
+ // URIs visited, or fetching, or queued. Added once then forgotten
about.
+ private final HashSet visitedURIs = new HashSet();
+ private final HashSet urisWithWords = new HashSet();
+ private final HashSet failedURIs = new HashSet();
+ private final HashSet queuedURISet = new HashSet();
+ private final LinkedList queuedURIList = new LinkedList();
+ private final HashMap runningFetchesByURI = new HashMap();
+ private final HashMap urisByWord = new HashMap();
+ // Can have many; this limit only exists to save memory.
+ private final int maxParallelRequests = 200;
+ private final Node node;
+ private final FetcherContext ctx;
+ private final short PRIORITY_CLASS =
RequestStarter.PREFETCH_PRIORITY_CLASS;
+
+ public Spider(BookmarkManager bm, Node node) {
+ this.node = node;
+ this.ctx = node.makeClient((short)0).getFetcherContext();
+ ctx.maxSplitfileBlockRetries = 10;
+ ctx.maxNonSplitfileRetries = 10;
+ ctx.maxTempLength = 2*1024*1024;
+ ctx.maxOutputLength = 2*1024*1024;
+ FreenetURI[] initialURIs = bm.getBookmarkURIs();
+ for(int i=0;i<initialURIs.length;i++)
+ queueURI(initialURIs[i]);
+ tProducedIndex = System.currentTimeMillis();
+ startSomeRequests();
+ }
+
+ private synchronized void queueURI(FreenetURI uri) {
+ if((!visitedURIs.contains(uri)) && queuedURISet.add(uri)) {
+ Logger.minor(this, "Spider queueing URI: "+uri);
+ queuedURIList.addLast(uri);
+ visitedURIs.add(uri);
+ }
+ }
+
+ private void startSomeRequests() {
+ Vector toStart = null;
+ synchronized(this) {
+ int running = runningFetchesByURI.size();
+ int queued = queuedURIList.size();
+ if(running == maxParallelRequests || queued == 0)
return;
+ if(toStart == null)
+ toStart = new
Vector(Math.min(maxParallelRequests-running, queued));
+ for(int i=running;i<maxParallelRequests;i++) {
+ if(queuedURIList.isEmpty()) break;
+ FreenetURI uri = (FreenetURI)
queuedURIList.removeFirst();
+ queuedURISet.remove(uri);
+ ClientGetter getter = makeGetter(uri);
+ toStart.add(getter);
+ }
+ }
+ if(toStart != null) {
+ for(int i=0;i<toStart.size();i++) {
+ ClientGetter g = (ClientGetter) toStart.get(i);
+ try {
+ Logger.minor(this, "Starting "+g+" for
"+g.getURI());
+ g.start();
+ Logger.minor(this, "Started "+g+" for
"+g.getURI());
+ runningFetchesByURI.put(g.getURI(), g);
+ } catch (FetchException e) {
+ onFailure(e, g);
+ }
+ }
+ }
+ }
+
+ private ClientGetter makeGetter(FreenetURI uri) {
+ Logger.minor(this, "Starting getter for "+uri);
+ ClientGetter g = new ClientGetter(this, node.chkFetchScheduler,
node.sskFetchScheduler, uri, ctx, PRIORITY_CLASS, this, null);
+ return g;
+ }
+
+ public void onSuccess(FetchResult result, ClientGetter state) {
+ FreenetURI uri = state.getURI();
+ synchronized(this) {
+ runningFetchesByURI.remove(uri);
+ }
+ Logger.minor(this, "Success: "+uri);
+ startSomeRequests();
+ ClientMetadata cm = result.getMetadata();
+ Bucket data = result.asBucket();
+ String mimeType = cm.getMIMEType();
+ try {
+ ContentFilter.filter(data, ctx.bucketFactory, mimeType,
new URI("http://127.0.0.1:8888/"+uri.toString(false)), this);
+ } catch (UnsafeContentTypeException e) {
+ return; // Ignore
+ } catch (IOException e) {
+ Logger.error(this, "Bucket error?: "+e, e);
+ } catch (URISyntaxException e) {
+ Logger.error(this, "Internal error: "+e, e);
+ } finally {
+ data.free();
+ }
+ }
+
+ public void onFailure(FetchException e, ClientGetter state) {
+ FreenetURI uri = state.getURI();
+ Logger.minor(this, "Failed: "+uri);
+ synchronized(this) {
+ failedURIs.add(uri);
+ runningFetchesByURI.remove(uri);
+ }
+ if(e.newURI != null)
+ queueURI(e.newURI);
+ startSomeRequests();
+ }
+
+ public void onSuccess(BaseClientPutter state) {
+ // Ignore
+ }
+
+ public void onFailure(InserterException e, BaseClientPutter state) {
+ // Ignore
+ }
+
+ public void onGeneratedURI(FreenetURI uri, BaseClientPutter state) {
+ // Ignore
+ }
+
+ public void foundURI(FreenetURI uri) {
+ queueURI(uri);
+ startSomeRequests();
+ }
+
+ public void onText(String s, URI baseURI) {
+ FreenetURI uri;
+ try {
+ uri = new FreenetURI(baseURI.getPath());
+ } catch (MalformedURLException e) {
+ Logger.error(this, "Caught "+e, e);
+ return;
+ }
+ String[] words = s.split("[^A-Za-z0-9]");
+ for(int i=0;i<words.length;i++) {
+ String word = words[i];
+ if(word == null || word.length() == 0) continue;
+ word = word.toLowerCase();
+ addWord(word, uri);
+ }
+ }
+
+ private synchronized void addWord(String word, FreenetURI uri) {
+ FreenetURI[] uris = (FreenetURI[]) urisByWord.get(word);
+ urisWithWords.add(uri);
+ if(uris == null) {
+ urisByWord.put(word, new FreenetURI[] { uri });
+ } else {
+ for(int i=0;i<uris.length;i++) {
+ if(uris[i].equals(uri))
+ return;
+ }
+ FreenetURI[] newURIs = new FreenetURI[uris.length+1];
+ System.arraycopy(uris, 0, newURIs, 0, uris.length);
+ newURIs[uris.length] = uri;
+ urisByWord.put(word, newURIs);
+ }
+ Logger.minor(this, "Added word: "+word+" for "+uri);
+ if(tProducedIndex + 10*1000 < System.currentTimeMillis()) {
+ try {
+ produceIndex();
+ } catch (IOException e) {
+ Logger.error(this, "Caught "+e+" while creating
index", e);
+ }
+ tProducedIndex = System.currentTimeMillis();
+ }
+ }
+
+ private synchronized void produceIndex() throws IOException {
+ // Produce an index file.
+ FileOutputStream fos = new FileOutputStream("index.new");
+ OutputStreamWriter osw;
+ try {
+ osw = new OutputStreamWriter(fos, "UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ throw new Error(e);
+ }
+ BufferedWriter bw = new BufferedWriter(osw);
+ if(urisByWord.isEmpty() || urisWithWords.isEmpty()) {
+ Logger.minor(this, "No URIs with words");
+ return;
+ }
+ String[] words = (String[]) urisByWord.keySet().toArray(new
String[urisByWord.size()]);
+ Arrays.sort(words);
+ FreenetURI[] uris = (FreenetURI[]) urisWithWords.toArray(new
FreenetURI[urisWithWords.size()]);
+ HashMap urisToNumbers = new HashMap();
+ for(int i=0;i<uris.length;i++) {
+ urisToNumbers.put(uris[i], new Integer(i));
+ bw.write("!" + uris[i].toString(false)+"\n");
+ }
+ for(int i=0;i<words.length;i++) {
+ StringBuffer s = new StringBuffer();
+ s.append('?');
+ s.append(words[i]);
+ FreenetURI[] urisForWord = (FreenetURI[])
urisByWord.get(words[i]);
+ for(int j=0;j<urisForWord.length;j++) {
+ FreenetURI uri = urisForWord[j];
+ Integer x = (Integer) urisToNumbers.get(uri);
+ if(x == null)
+ Logger.error(this, "Eh?");
+ else {
+ s.append(' ');
+ s.append(x.toString());
+ }
+ }
+ s.append('\n');
+ bw.write(s.toString());
+ }
+ bw.close();
+ }
+
+}
Modified: trunk/freenet/src/freenet/clients/http/WelcomeToadlet.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/WelcomeToadlet.java 2006-05-25
22:15:22 UTC (rev 8872)
+++ trunk/freenet/src/freenet/clients/http/WelcomeToadlet.java 2006-05-26
01:20:55 UTC (rev 8873)
@@ -38,6 +38,7 @@
this.node = n;
this.config = sc;
this.bookmarks = new BookmarkManager(n);
+ node.bookmarkManager = bookmarks;
sc.register("bookmarks", n.isTestnetEnabled() ?
DEFAULT_TESTNET_BOOKMARKS : DEFAULT_DARKNET_BOOKMARKS, 0, false, "List of
bookmarks", "A list of bookmarked freesites", this.bookmarks.makeCB());
Modified: trunk/freenet/src/freenet/clients/http/filter/ContentFilter.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/ContentFilter.java
2006-05-25 22:15:22 UTC (rev 8872)
+++ trunk/freenet/src/freenet/clients/http/filter/ContentFilter.java
2006-05-26 01:20:55 UTC (rev 8873)
@@ -104,7 +104,7 @@
* Filter some data.
* @throws IOException If an internal error involving buckets occurred.
*/
- public static Bucket filter(Bucket data, BucketFactory bf, String
typeName, URI baseURI) throws UnsafeContentTypeException, IOException {
+ public static Bucket filter(Bucket data, BucketFactory bf, String
typeName, URI baseURI, FoundURICallback cb) throws UnsafeContentTypeException,
IOException {
String type = typeName;
String options = "";
String charset = null;
@@ -154,7 +154,7 @@
charset = detectCharset(data, handler);
}
- return handler.readFilter.readFilter(data, bf,
charset, otherParams, new GenericReadFilterCallback(baseURI));
+ return handler.readFilter.readFilter(data, bf,
charset, otherParams, new GenericReadFilterCallback(baseURI, cb));
}
handler.throwUnsafeContentTypeException();
return null;
Modified: trunk/freenet/src/freenet/clients/http/filter/FilterCallback.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/FilterCallback.java
2006-05-25 22:15:22 UTC (rev 8872)
+++ trunk/freenet/src/freenet/clients/http/filter/FilterCallback.java
2006-05-26 01:20:55 UTC (rev 8873)
@@ -28,4 +28,9 @@
*/
public String onBaseHref(String baseHref);
+ /**
+ * Process plain-text. Notification only; can't modify.
+ */
+ public void onText(String s);
+
}
Added: trunk/freenet/src/freenet/clients/http/filter/FoundURICallback.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/FoundURICallback.java
2006-05-25 22:15:22 UTC (rev 8872)
+++ trunk/freenet/src/freenet/clients/http/filter/FoundURICallback.java
2006-05-26 01:20:55 UTC (rev 8873)
@@ -0,0 +1,13 @@
+package freenet.clients.http.filter;
+
+import java.net.URI;
+
+import freenet.keys.FreenetURI;
+
+public interface FoundURICallback {
+
+ public void foundURI(FreenetURI uri);
+
+ public void onText(String s, URI baseURI);
+
+}
Modified:
trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java
===================================================================
---
trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java
2006-05-25 22:15:22 UTC (rev 8872)
+++
trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java
2006-05-26 01:20:55 UTC (rev 8873)
@@ -14,14 +14,17 @@
public class GenericReadFilterCallback implements FilterCallback {
private URI baseURI;
+ private final FoundURICallback cb;
- public GenericReadFilterCallback(URI uri) {
+ public GenericReadFilterCallback(URI uri, FoundURICallback cb) {
this.baseURI = uri;
+ this.cb = cb;
}
- public GenericReadFilterCallback(FreenetURI uri) {
+ public GenericReadFilterCallback(FreenetURI uri, FoundURICallback cb) {
try {
this.baseURI = new URI("/" + uri.toString(false));
+ this.cb = cb;
} catch (URISyntaxException e) {
throw new Error(e);
}
@@ -143,6 +146,7 @@
// Valid freenet URI, allow it
// Now what about the queries?
HTTPRequest req = new HTTPRequest(uri);
+ if(cb != null) cb.foundURI(furi);
return finishProcess(req, overrideType, "/" +
furi.toString(false), uri, noRelative);
}
@@ -160,5 +164,10 @@
return baseURI.toASCIIString();
}
}
+
+ public void onText(String s) {
+ if(cb != null)
+ cb.onText(s, baseURI);
+ }
}
Modified: trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
2006-05-25 22:15:22 UTC (rev 8872)
+++ trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
2006-05-26 01:20:55 UTC (rev 8873)
@@ -384,7 +384,10 @@
out.append(c);
}
}
- w.write(out.toString());
+ String sout = out.toString();
+ if(pc.cb != null)
+ pc.cb.onText(sout);
+ w.write(sout);
}
void processTag(Vector splitTag, Writer w, HTMLParseContext pc)
Modified: trunk/freenet/src/freenet/clients/http/filter/NullFilterCallback.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/NullFilterCallback.java
2006-05-25 22:15:22 UTC (rev 8872)
+++ trunk/freenet/src/freenet/clients/http/filter/NullFilterCallback.java
2006-05-26 01:20:55 UTC (rev 8873)
@@ -19,4 +19,8 @@
return null;
}
+ public void onText(String s) {
+ // Do nothing
+ }
+
}
Modified: trunk/freenet/src/freenet/node/Node.java
===================================================================
--- trunk/freenet/src/freenet/node/Node.java 2006-05-25 22:15:22 UTC (rev
8872)
+++ trunk/freenet/src/freenet/node/Node.java 2006-05-26 01:20:55 UTC (rev
8873)
@@ -41,9 +41,10 @@
import freenet.client.async.ClientPutter;
import freenet.client.async.ClientRequestScheduler;
import freenet.client.async.USKManager;
+import freenet.clients.http.BookmarkManager;
import freenet.clients.http.FProxyToadlet;
import freenet.clients.http.SimpleToadletServer;
-import freenet.config.BooleanCallback;
+import freenet.clients.http.Spider;
import freenet.config.Config;
import freenet.config.FilePersistentConfig;
import freenet.config.IntCallback;
@@ -87,11 +88,11 @@
import freenet.keys.SSKBlock;
import freenet.keys.SSKVerifyException;
import freenet.node.fcp.FCPServer;
+import freenet.node.updater.NodeUpdater;
import freenet.node.useralerts.BuildOldAgeUserAlert;
import freenet.node.useralerts.IPUndetectedUserAlert;
import freenet.node.useralerts.MeaningfulNodeNameUserAlert;
import freenet.node.useralerts.UserAlertManager;
-import freenet.node.updater.NodeUpdater;
import freenet.pluginmanager.PluginManager;
import freenet.store.BerkeleyDBFreenetStore;
import freenet.store.FreenetStore;
@@ -1404,6 +1405,11 @@
if(testnetHandler != null)
testnetHandler.start();
+ // Spider. FIXME.
+
+ //if(testnetEnabled)
+ // new Spider(bookmarkManager, this);
+
persistentTempBucketFactory.completedInit();
redetectAddress();
@@ -2476,6 +2482,7 @@
}
FreenetInetAddress lastIP;
+ public BookmarkManager bookmarkManager;
public void redetectAddress() {
FreenetInetAddress newIP = detectPrimaryIPAddress();
Modified: trunk/freenet/src/freenet/node/Version.java
===================================================================
--- trunk/freenet/src/freenet/node/Version.java 2006-05-25 22:15:22 UTC (rev
8872)
+++ trunk/freenet/src/freenet/node/Version.java 2006-05-26 01:20:55 UTC (rev
8873)
@@ -18,7 +18,7 @@
public static final String protocolVersion = "1.0";
/** The build number of the current revision */
- private static final int buildNumber = 741;
+ private static final int buildNumber = 742;
/** Oldest build of Fred we will talk to */
private static final int lastGoodBuild = 732;