Author: toad
Date: 2006-06-02 00:38:42 +0000 (Fri, 02 Jun 2006)
New Revision: 8977
Added:
trunk/freenet/src/freenet/support/URIPreEncoder.java
Modified:
trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java
trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
trunk/freenet/src/freenet/node/Version.java
Log:
769: Allow illegal characters in URIs on pages with relative links. Don't
clobber already encoded chars in correct URIs.
Modified:
trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java
===================================================================
---
trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java
2006-06-02 00:34:51 UTC (rev 8976)
+++
trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java
2006-06-02 00:38:42 UTC (rev 8977)
@@ -10,6 +10,7 @@
import freenet.keys.FreenetURI;
import freenet.support.HTMLEncoder;
import freenet.support.Logger;
+import freenet.support.URIPreEncoder;
public class GenericReadFilterCallback implements FilterCallback {
@@ -46,7 +47,8 @@
URI uri;
URI resolved;
try {
- uri = new URI(u).normalize();
+ Logger.minor(this, "Processing "+u);
+ uri = URIPreEncoder.encodeURI(u).normalize();
Logger.minor(this, "Processing "+uri);
if(!noRelative)
resolved = baseURI.resolve(uri);
@@ -54,6 +56,7 @@
resolved = uri;
Logger.minor(this, "Resolved: "+resolved);
} catch (URISyntaxException e1) {
+ Logger.minor(this, "Failed to parse URI: "+e1);
return null;
}
String path = uri.getPath();
Modified: trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
2006-06-02 00:34:51 UTC (rev 8976)
+++ trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
2006-06-02 00:38:42 UTC (rev 8977)
@@ -101,6 +101,7 @@
// Ignore ALL errors
Logger.minor(this, "Caught "+t+" trying to detect MIME
type with "+parseCharset);
}
+ Logger.minor(this, "Returning charset "+pc.detectedCharset);
return pc.detectedCharset;
}
@@ -1968,6 +1969,7 @@
String overrideType,
String overrideCharset,
FilterCallback cb) {
+ Logger.minor(HTMLFilter.class, "Sanitizing URI: "+suri+" (
override type "+overrideType +" override charset "+overrideCharset+" )");
if(overrideCharset != null && overrideCharset.length() > 0)
overrideType += ";charset="+overrideCharset;
return cb.processURI(suri, overrideType);
Modified: trunk/freenet/src/freenet/node/Version.java
===================================================================
--- trunk/freenet/src/freenet/node/Version.java 2006-06-02 00:34:51 UTC (rev
8976)
+++ trunk/freenet/src/freenet/node/Version.java 2006-06-02 00:38:42 UTC (rev
8977)
@@ -18,7 +18,7 @@
public static final String protocolVersion = "1.0";
/** The build number of the current revision */
- private static final int buildNumber = 768;
+ private static final int buildNumber = 769;
/** Oldest build of Fred we will talk to */
private static final int lastGoodBuild = 765;
Added: trunk/freenet/src/freenet/support/URIPreEncoder.java
===================================================================
--- trunk/freenet/src/freenet/support/URIPreEncoder.java 2006-06-02
00:34:51 UTC (rev 8976)
+++ trunk/freenet/src/freenet/support/URIPreEncoder.java 2006-06-02
00:38:42 UTC (rev 8977)
@@ -0,0 +1,51 @@
+package freenet.support;
+
+import java.io.UnsupportedEncodingException;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+/**
+ * Replace any invalid characters in a string (to be converted to a URI) with
encoded chars using UTF-8.
+ *
+ * This does NOT do the same thing as either java.net.URLEncoder or
freenet.support.URLEncoder!
+ *
+ * Its purpose is simply to allow us to accept "dirty" URIs - URIs which may
contain e.g. spaces -
+ * by preprocessing them before they reach the URI(String) constructor.
+ *
+ * I _think_ this may be what URLEncoder is for - but it seems to have become
rather confused.
+ * Somebody needs to check all the calls to URLEncoder...
+ */
+public class URIPreEncoder {
+
+ // We deliberately include '%' because we don't want to interfere with
stuff which is already encoded.
+ public final static String allowedChars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-!.~'()*,;:$&+=?/@%";
+
+ public static String encode(String s) {
+ StringBuffer output = new StringBuffer(s.length()*2);
+ for(int i=0;i<s.length();i++) {
+ char c = s.charAt(i);
+ if(allowedChars.indexOf(c) >= 0) {
+ output.append(c);
+ } else {
+ String tmp = ""+c;
+ try {
+ byte[] utf = tmp.getBytes("UTF-8");
+ for(int j=0;j<utf.length;j++) {
+ int x = utf[j] & 0xff;
+ output.append('%');
+ if(x < 16)
+ output.append('0');
+
output.append(Integer.toHexString(x));
+ }
+ } catch (UnsupportedEncodingException e) {
+ throw new Error(e);
+ }
+ }
+ }
+ return output.toString();
+ }
+
+ public static URI encodeURI(String s) throws URISyntaxException {
+ return new URI(encode(s));
+ }
+}