Author: toad
Date: 2007-08-18 16:36:14 +0000 (Sat, 18 Aug 2007)
New Revision: 14788
Modified:
trunk/freenet/src/freenet/clients/http/filter/FilterCallback.java
trunk/freenet/src/freenet/clients/http/filter/FoundURICallback.java
trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
Log:
onText() receives *decoded text*.
Better javadocs/comments for FilterCallback and FoundURICallback.
Modified: trunk/freenet/src/freenet/clients/http/filter/FilterCallback.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/FilterCallback.java
2007-08-18 16:03:33 UTC (rev 14787)
+++ trunk/freenet/src/freenet/clients/http/filter/FilterCallback.java
2007-08-18 16:36:14 UTC (rev 14788)
@@ -23,7 +23,11 @@
/**
* Process plain-text. Notification only; can't modify.
* Type can be null, or can correspond, for example to HTML tag name
around text
- * (for example: "title")
+ * (for example: "title").
+ *
+ * Note that the string will have been fed through the relevant decoder
if
+ * necessary (e.g. HTMLDecoder). It must be re-encoded if it is sent
out as
+ * text to a browser.
*/
public void onText(String s, String type);
Modified: trunk/freenet/src/freenet/clients/http/filter/FoundURICallback.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/FoundURICallback.java
2007-08-18 16:03:33 UTC (rev 14787)
+++ trunk/freenet/src/freenet/clients/http/filter/FoundURICallback.java
2007-08-18 16:36:14 UTC (rev 14788)
@@ -9,11 +9,25 @@
public interface FoundURICallback {
+ /**
+ * Called when a Freenet URI is found.
+ * @param uri The URI.
+ * FIXME: Indicate the type of the link e.g. inline image, hyperlink,
etc??
+ */
public void foundURI(FreenetURI uri);
- /* type can be null */
- /* but type can also be, for example, HTML tag name around text */
- /* Usefull to find things like titles */
- public void onText(String s, String type, URI baseURI);
+ /**
+ * Called when some plain text is processed. This is used typically by
+ * spiders to index pages by their content.
+ * @param text The text. Will already have been fed through whatever
decoding
+ * is necessary depending on the type of the source document e.g.
HTMLDecoder.
+ * Will need to be re-encoded before being sent to e.g. a browser.
+ * @param type Can be null, or may be for example the name of the HTML
tag
+ * directly surrounding the text. E.g. "title" lets you find page
titles.
+ * @param baseURI The current base URI for this page. The base URI is
not
+ * necessarily the URI of the page. It's the URI against which URIs on
the
+ * page are resolved. It defaults to the URI of the page but can be
overridden
+ * by base href in html, for example. */
+ public void onText(String text, String type, URI baseURI);
}
Modified: trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
2007-08-18 16:03:33 UTC (rev 14787)
+++ trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
2007-08-18 16:36:14 UTC (rev 14788)
@@ -367,7 +367,7 @@
}
String sout = out.toString();
if(pc.cb != null)
- pc.cb.onText(sout, tagName); /* Tag name is given as
type for the text */
+ pc.cb.onText(HTMLDecoder.decode(sout), tagName); /* Tag
name is given as type for the text */
w.write(sout);
}