Author: toad
Date: 2006-06-02 00:38:42 +0000 (Fri, 02 Jun 2006)
New Revision: 8977

Added:
   trunk/freenet/src/freenet/support/URIPreEncoder.java
Modified:
   trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java
   trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
   trunk/freenet/src/freenet/node/Version.java
Log:
769: Allow illegal characters in URIs on pages with relative links. Don't 
clobber already encoded chars in correct URIs.

Modified: 
trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java
===================================================================
--- 
trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java    
    2006-06-02 00:34:51 UTC (rev 8976)
+++ 
trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java    
    2006-06-02 00:38:42 UTC (rev 8977)
@@ -10,6 +10,7 @@
 import freenet.keys.FreenetURI;
 import freenet.support.HTMLEncoder;
 import freenet.support.Logger;
+import freenet.support.URIPreEncoder;

 public class GenericReadFilterCallback implements FilterCallback {

@@ -46,7 +47,8 @@
                URI uri;
                URI resolved;
                try {
-                       uri = new URI(u).normalize();
+                       Logger.minor(this, "Processing "+u);
+                       uri = URIPreEncoder.encodeURI(u).normalize();
                        Logger.minor(this, "Processing "+uri);
                        if(!noRelative)
                                resolved = baseURI.resolve(uri);
@@ -54,6 +56,7 @@
                                resolved = uri;
                        Logger.minor(this, "Resolved: "+resolved);
                } catch (URISyntaxException e1) {
+                       Logger.minor(this, "Failed to parse URI: "+e1);
                        return null;
                }
                String path = uri.getPath();

Modified: trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java       
2006-06-02 00:34:51 UTC (rev 8976)
+++ trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java       
2006-06-02 00:38:42 UTC (rev 8977)
@@ -101,6 +101,7 @@
                        // Ignore ALL errors
                        Logger.minor(this, "Caught "+t+" trying to detect MIME 
type with "+parseCharset);
                }
+               Logger.minor(this, "Returning charset "+pc.detectedCharset);
                return pc.detectedCharset;
        }

@@ -1968,6 +1969,7 @@
                String overrideType,
                String overrideCharset,
                FilterCallback cb) {
+               Logger.minor(HTMLFilter.class, "Sanitizing URI: "+suri+" ( 
override type "+overrideType +" override charset "+overrideCharset+" )");
                if(overrideCharset != null && overrideCharset.length() > 0)
                        overrideType += ";charset="+overrideCharset;
                return cb.processURI(suri, overrideType);

Modified: trunk/freenet/src/freenet/node/Version.java
===================================================================
--- trunk/freenet/src/freenet/node/Version.java 2006-06-02 00:34:51 UTC (rev 
8976)
+++ trunk/freenet/src/freenet/node/Version.java 2006-06-02 00:38:42 UTC (rev 
8977)
@@ -18,7 +18,7 @@
        public static final String protocolVersion = "1.0";

        /** The build number of the current revision */
-       private static final int buildNumber = 768;
+       private static final int buildNumber = 769;

        /** Oldest build of Fred we will talk to */
        private static final int lastGoodBuild = 765;

Added: trunk/freenet/src/freenet/support/URIPreEncoder.java
===================================================================
--- trunk/freenet/src/freenet/support/URIPreEncoder.java        2006-06-02 
00:34:51 UTC (rev 8976)
+++ trunk/freenet/src/freenet/support/URIPreEncoder.java        2006-06-02 
00:38:42 UTC (rev 8977)
@@ -0,0 +1,51 @@
+package freenet.support;
+
+import java.io.UnsupportedEncodingException;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+/**
+ * Replace any invalid characters in a string (to be converted to a URI) with 
encoded chars using UTF-8.
+ * 
+ * This does NOT do the same thing as either java.net.URLEncoder or 
freenet.support.URLEncoder!
+ * 
+ * Its purpose is simply to allow us to accept "dirty" URIs - URIs which may 
contain e.g. spaces -
+ * by preprocessing them before they reach the URI(String) constructor.
+ * 
+ * I _think_ this may be what URLEncoder is for - but it seems to have become 
rather confused.
+ * Somebody needs to check all the calls to URLEncoder...
+ */
+public class URIPreEncoder {
+       
+       // We deliberately include '%' because we don't want to interfere with 
stuff which is already encoded.
+       public final static String allowedChars = 
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-!.~'()*,;:$&+=?/@%";
+
+       public static String encode(String s) {
+               StringBuffer output = new StringBuffer(s.length()*2);
+               for(int i=0;i<s.length();i++) {
+                       char c = s.charAt(i);
+                       if(allowedChars.indexOf(c) >= 0) {
+                               output.append(c);
+                       } else {
+                               String tmp = ""+c;
+                               try {
+                                       byte[] utf = tmp.getBytes("UTF-8");
+                                       for(int j=0;j<utf.length;j++) {
+                                               int x = utf[j] & 0xff;
+                                               output.append('%');
+                                               if(x < 16)
+                                                       output.append('0');
+                                               
output.append(Integer.toHexString(x));
+                                       }
+                               } catch (UnsupportedEncodingException e) {
+                                       throw new Error(e);
+                               }
+                       }
+               }
+               return output.toString();
+       }
+       
+       public static URI encodeURI(String s) throws URISyntaxException {
+               return new URI(encode(s));
+       }
+}


Reply via email to