Author: niallp
Date: Wed Sep 29 03:44:01 2010
New Revision: 1002457

URL: http://svn.apache.org/viewvc?rev=1002457&view=rev
Log:
IO-166 Fix URL decoding in FileUtils.toFile() - thanks to Benjamin Bentmann for 
the patch

Modified:
    commons/proper/io/trunk/src/java/org/apache/commons/io/FileUtils.java
    
commons/proper/io/trunk/src/test/org/apache/commons/io/FileUtilsTestCase.java

Modified: commons/proper/io/trunk/src/java/org/apache/commons/io/FileUtils.java
URL: 
http://svn.apache.org/viewvc/commons/proper/io/trunk/src/java/org/apache/commons/io/FileUtils.java?rev=1002457&r1=1002456&r2=1002457&view=diff
==============================================================================
--- commons/proper/io/trunk/src/java/org/apache/commons/io/FileUtils.java 
(original)
+++ commons/proper/io/trunk/src/java/org/apache/commons/io/FileUtils.java Wed 
Sep 29 03:44:01 2010
@@ -26,7 +26,9 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.net.URL;
 import java.net.URLConnection;
+import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Date;
@@ -113,6 +115,11 @@ public class FileUtils {
      */
     public static final File[] EMPTY_FILE_ARRAY = new File[0];
 
+    /**
+     * The UTF-8 character set, used to decode octets in URLs.
+     */
+    private static final Charset UTF8 = Charset.forName("UTF-8");
+
     //-----------------------------------------------------------------------
     /**
      * Returns the path to the system temporary directory.
@@ -515,28 +522,70 @@ public class FileUtils {
      * <p>
      * From version 1.1 this method will decode the URL.
      * Syntax such as <code>file:///my%20docs/file.txt</code> will be
-     * correctly decoded to <code>/my docs/file.txt</code>.
+     * correctly decoded to <code>/my docs/file.txt</code>. Starting with 
version
+     * 1.5, this method uses UTF-8 to decode percent-encoded octets to 
characters.
+     * Additionally, malformed percent-encoded octets are handled leniently by
+     * passing them through literally.
      *
      * @param url  the file URL to convert, <code>null</code> returns 
<code>null</code>
      * @return the equivalent <code>File</code> object, or <code>null</code>
      *  if the URL's protocol is not <code>file</code>
-     * @throws IllegalArgumentException if the file is incorrectly encoded
      */
     public static File toFile(URL url) {
-        if (url == null || !url.getProtocol().equals("file")) {
+        if (url == null || !"file".equalsIgnoreCase(url.getProtocol())) {
             return null;
         } else {
             String filename = url.getFile().replace('/', File.separatorChar);
-            int pos =0;
-            while ((pos = filename.indexOf('%', pos)) >= 0) {
-                if (pos + 2 < filename.length()) {
-                    String hexStr = filename.substring(pos + 1, pos + 3);
-                    char ch = (char) Integer.parseInt(hexStr, 16);
-                    filename = filename.substring(0, pos) + ch + 
filename.substring(pos + 3);
+            filename = decodeUrl(filename);
+            return new File(filename);
+        }
+    }
+
+    /**
+     * Decodes the specified URL as per RFC 3986, i.e. transforms
+     * percent-encoded octets to characters by decoding with the UTF-8 
character
+     * set. This function is primarily intended for usage with
+     * {...@link java.net.URL} which unfortunately does not enforce proper 
URLs. As
+     * such, this method will leniently accept invalid characters or malformed
+     * percent-encoded octets and simply pass them literally through to the
+     * result string. Except for rare edge cases, this will make unencoded URLs
+     * pass through unaltered.
+     * 
+     * @param url  The URL to decode, may be <code>null</code>.
+     * @return The decoded URL or <code>null</code> if the input was
+     *         <code>null</code>.
+     */
+    static String decodeUrl(String url) {
+        String decoded = url;
+        if (url != null && url.indexOf('%') >= 0) {
+            int n = url.length();
+            StringBuffer buffer = new StringBuffer();
+            ByteBuffer bytes = ByteBuffer.allocate(n);
+            for (int i = 0; i < n;) {
+                if (url.charAt(i) == '%') {
+                    try {
+                        do {
+                            byte octet = (byte) 
Integer.parseInt(url.substring(i + 1, i + 3), 16);
+                            bytes.put(octet);
+                            i += 3;
+                        } while (i < n && url.charAt(i) == '%');
+                        continue;
+                    } catch (RuntimeException e) {
+                        // malformed percent-encoded octet, fall through and
+                        // append characters literally
+                    } finally {
+                        if (bytes.position() > 0) {
+                            bytes.flip();
+                            buffer.append(UTF8.decode(bytes).toString());
+                            bytes.clear();
+                        }
+                    }
                 }
+                buffer.append(url.charAt(i++));
             }
-            return new File(filename);
+            decoded = buffer.toString();
         }
+        return decoded;
     }
 
     /**

Modified: 
commons/proper/io/trunk/src/test/org/apache/commons/io/FileUtilsTestCase.java
URL: 
http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/org/apache/commons/io/FileUtilsTestCase.java?rev=1002457&r1=1002456&r2=1002457&view=diff
==============================================================================
--- 
commons/proper/io/trunk/src/test/org/apache/commons/io/FileUtilsTestCase.java 
(original)
+++ 
commons/proper/io/trunk/src/test/org/apache/commons/io/FileUtilsTestCase.java 
Wed Sep 29 03:44:01 2010
@@ -283,9 +283,9 @@ public class FileUtilsTestCase extends F
     }
 
     public void testToFile2() throws Exception {
-        URL url = new URL("file", null, "a/b/c/file%20n%61me.tx%74");
+        URL url = new URL("file", null, "a/b/c/file%20n%61me%2520.tx%74");
         File file = FileUtils.toFile(url);
-        assertEquals(true, file.toString().indexOf("file name.txt") >= 0);
+        assertEquals(true, file.toString().indexOf("file name%20.txt") >= 0);
     }
 
     public void testToFile3() throws Exception {
@@ -294,11 +294,42 @@ public class FileUtilsTestCase extends F
     }
 
     public void testToFile4() throws Exception {
-        URL url = new URL("file", null, "a/b/c/file%2Xn%61me.txt");
-        try {
-            FileUtils.toFile(url);
-            fail();
-        }  catch (IllegalArgumentException ex) {}
+        URL url = new URL("file", null, "a/b/c/file%%20%me.txt%");
+        File file = FileUtils.toFile(url);
+        assertEquals(true, file.toString().indexOf("file% %me.txt%") >= 0);
+    }
+
+    public void testToFileUtf8() throws Exception {
+        URL url = new URL("file", null, "/home/%C3%A4%C3%B6%C3%BC%C3%9F");
+        File file = FileUtils.toFile(url);
+        assertEquals(true, file.toString().indexOf("\u00E4\u00F6\u00FC\u00DF") 
>= 0);
+    }
+
+    public void testDecodeUrl() {
+        assertEquals("", FileUtils.decodeUrl(""));
+        assertEquals("foo", FileUtils.decodeUrl("foo"));
+        assertEquals("+", FileUtils.decodeUrl("+"));
+        assertEquals("% ", FileUtils.decodeUrl("%25%20"));
+        assertEquals("%20", FileUtils.decodeUrl("%2520"));
+        assertEquals("jar:file:/C:/dir/sub 
dir/1.0/foo-1.0.jar!/org/Bar.class", FileUtils
+                
.decodeUrl("jar:file:/C:/dir/sub%20dir/1.0/foo-1.0.jar!/org/Bar.class"));
+    }
+
+    public void testDecodeUrlLenient() {
+        assertEquals(" ", FileUtils.decodeUrl(" "));
+        assertEquals("\u00E4\u00F6\u00FC\u00DF", 
FileUtils.decodeUrl("\u00E4\u00F6\u00FC\u00DF"));
+        assertEquals("%", FileUtils.decodeUrl("%"));
+        assertEquals("% ", FileUtils.decodeUrl("%%20"));
+        assertEquals("%2", FileUtils.decodeUrl("%2"));
+        assertEquals("%2G", FileUtils.decodeUrl("%2G"));
+    }
+
+    public void testDecodeUrlNullSafe() {
+        assertNull(FileUtils.decodeUrl(null));
+    }
+
+    public void testDecodeUrlEncodingUtf8() {
+        assertEquals("\u00E4\u00F6\u00FC\u00DF", 
FileUtils.decodeUrl("%C3%A4%C3%B6%C3%BC%C3%9F"));
     }
 
     // toFiles


Reply via email to