Author: niallp
Date: Wed Sep 29 03:44:01 2010
New Revision: 1002457
URL: http://svn.apache.org/viewvc?rev=1002457&view=rev
Log:
IO-166 Fix URL decoding in FileUtils.toFile() - thanks to Benjamin Bentmann for
the patch
Modified:
commons/proper/io/trunk/src/java/org/apache/commons/io/FileUtils.java
commons/proper/io/trunk/src/test/org/apache/commons/io/FileUtilsTestCase.java
Modified: commons/proper/io/trunk/src/java/org/apache/commons/io/FileUtils.java
URL:
http://svn.apache.org/viewvc/commons/proper/io/trunk/src/java/org/apache/commons/io/FileUtils.java?rev=1002457&r1=1002456&r2=1002457&view=diff
==============================================================================
--- commons/proper/io/trunk/src/java/org/apache/commons/io/FileUtils.java
(original)
+++ commons/proper/io/trunk/src/java/org/apache/commons/io/FileUtils.java Wed
Sep 29 03:44:01 2010
@@ -26,7 +26,9 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
import java.net.URLConnection;
+import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
+import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
@@ -113,6 +115,11 @@ public class FileUtils {
*/
public static final File[] EMPTY_FILE_ARRAY = new File[0];
+ /**
+ * The UTF-8 character set, used to decode octets in URLs.
+ */
+ private static final Charset UTF8 = Charset.forName("UTF-8");
+
//-----------------------------------------------------------------------
/**
* Returns the path to the system temporary directory.
@@ -515,28 +522,70 @@ public class FileUtils {
* <p>
* From version 1.1 this method will decode the URL.
* Syntax such as <code>file:///my%20docs/file.txt</code> will be
- * correctly decoded to <code>/my docs/file.txt</code>.
+ * correctly decoded to <code>/my docs/file.txt</code>. Starting with
version
+ * 1.5, this method uses UTF-8 to decode percent-encoded octets to
characters.
+ * Additionally, malformed percent-encoded octets are handled leniently by
+ * passing them through literally.
*
* @param url the file URL to convert, <code>null</code> returns
<code>null</code>
* @return the equivalent <code>File</code> object, or <code>null</code>
* if the URL's protocol is not <code>file</code>
- * @throws IllegalArgumentException if the file is incorrectly encoded
*/
public static File toFile(URL url) {
- if (url == null || !url.getProtocol().equals("file")) {
+ if (url == null || !"file".equalsIgnoreCase(url.getProtocol())) {
return null;
} else {
String filename = url.getFile().replace('/', File.separatorChar);
- int pos =0;
- while ((pos = filename.indexOf('%', pos)) >= 0) {
- if (pos + 2 < filename.length()) {
- String hexStr = filename.substring(pos + 1, pos + 3);
- char ch = (char) Integer.parseInt(hexStr, 16);
- filename = filename.substring(0, pos) + ch +
filename.substring(pos + 3);
+ filename = decodeUrl(filename);
+ return new File(filename);
+ }
+ }
+
+ /**
+ * Decodes the specified URL as per RFC 3986, i.e. transforms
+ * percent-encoded octets to characters by decoding with the UTF-8
character
+ * set. This function is primarily intended for usage with
+ * {...@link java.net.URL} which unfortunately does not enforce proper
URLs. As
+ * such, this method will leniently accept invalid characters or malformed
+ * percent-encoded octets and simply pass them literally through to the
+ * result string. Except for rare edge cases, this will make unencoded URLs
+ * pass through unaltered.
+ *
+ * @param url The URL to decode, may be <code>null</code>.
+ * @return The decoded URL or <code>null</code> if the input was
+ * <code>null</code>.
+ */
+ static String decodeUrl(String url) {
+ String decoded = url;
+ if (url != null && url.indexOf('%') >= 0) {
+ int n = url.length();
+ StringBuffer buffer = new StringBuffer();
+ ByteBuffer bytes = ByteBuffer.allocate(n);
+ for (int i = 0; i < n;) {
+ if (url.charAt(i) == '%') {
+ try {
+ do {
+ byte octet = (byte)
Integer.parseInt(url.substring(i + 1, i + 3), 16);
+ bytes.put(octet);
+ i += 3;
+ } while (i < n && url.charAt(i) == '%');
+ continue;
+ } catch (RuntimeException e) {
+ // malformed percent-encoded octet, fall through and
+ // append characters literally
+ } finally {
+ if (bytes.position() > 0) {
+ bytes.flip();
+ buffer.append(UTF8.decode(bytes).toString());
+ bytes.clear();
+ }
+ }
}
+ buffer.append(url.charAt(i++));
}
- return new File(filename);
+ decoded = buffer.toString();
}
+ return decoded;
}
/**
Modified:
commons/proper/io/trunk/src/test/org/apache/commons/io/FileUtilsTestCase.java
URL:
http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/org/apache/commons/io/FileUtilsTestCase.java?rev=1002457&r1=1002456&r2=1002457&view=diff
==============================================================================
---
commons/proper/io/trunk/src/test/org/apache/commons/io/FileUtilsTestCase.java
(original)
+++
commons/proper/io/trunk/src/test/org/apache/commons/io/FileUtilsTestCase.java
Wed Sep 29 03:44:01 2010
@@ -283,9 +283,9 @@ public class FileUtilsTestCase extends F
}
public void testToFile2() throws Exception {
- URL url = new URL("file", null, "a/b/c/file%20n%61me.tx%74");
+ URL url = new URL("file", null, "a/b/c/file%20n%61me%2520.tx%74");
File file = FileUtils.toFile(url);
- assertEquals(true, file.toString().indexOf("file name.txt") >= 0);
+ assertEquals(true, file.toString().indexOf("file name%20.txt") >= 0);
}
public void testToFile3() throws Exception {
@@ -294,11 +294,42 @@ public class FileUtilsTestCase extends F
}
public void testToFile4() throws Exception {
- URL url = new URL("file", null, "a/b/c/file%2Xn%61me.txt");
- try {
- FileUtils.toFile(url);
- fail();
- } catch (IllegalArgumentException ex) {}
+ URL url = new URL("file", null, "a/b/c/file%%20%me.txt%");
+ File file = FileUtils.toFile(url);
+ assertEquals(true, file.toString().indexOf("file% %me.txt%") >= 0);
+ }
+
+ public void testToFileUtf8() throws Exception {
+ URL url = new URL("file", null, "/home/%C3%A4%C3%B6%C3%BC%C3%9F");
+ File file = FileUtils.toFile(url);
+ assertEquals(true, file.toString().indexOf("\u00E4\u00F6\u00FC\u00DF")
>= 0);
+ }
+
+ public void testDecodeUrl() {
+ assertEquals("", FileUtils.decodeUrl(""));
+ assertEquals("foo", FileUtils.decodeUrl("foo"));
+ assertEquals("+", FileUtils.decodeUrl("+"));
+ assertEquals("% ", FileUtils.decodeUrl("%25%20"));
+ assertEquals("%20", FileUtils.decodeUrl("%2520"));
+ assertEquals("jar:file:/C:/dir/sub
dir/1.0/foo-1.0.jar!/org/Bar.class", FileUtils
+
.decodeUrl("jar:file:/C:/dir/sub%20dir/1.0/foo-1.0.jar!/org/Bar.class"));
+ }
+
+ public void testDecodeUrlLenient() {
+ assertEquals(" ", FileUtils.decodeUrl(" "));
+ assertEquals("\u00E4\u00F6\u00FC\u00DF",
FileUtils.decodeUrl("\u00E4\u00F6\u00FC\u00DF"));
+ assertEquals("%", FileUtils.decodeUrl("%"));
+ assertEquals("% ", FileUtils.decodeUrl("%%20"));
+ assertEquals("%2", FileUtils.decodeUrl("%2"));
+ assertEquals("%2G", FileUtils.decodeUrl("%2G"));
+ }
+
+ public void testDecodeUrlNullSafe() {
+ assertNull(FileUtils.decodeUrl(null));
+ }
+
+ public void testDecodeUrlEncodingUtf8() {
+ assertEquals("\u00E4\u00F6\u00FC\u00DF",
FileUtils.decodeUrl("%C3%A4%C3%B6%C3%BC%C3%9F"));
}
// toFiles