jericho 01/04/26 05:39:15
Modified: src/webdav/client/src/org/apache/webdav/util URIUtil.java
Log:
- This class should be an utility class for only escaping and unescaping. Not
character encoding.
- When you do character encoding like "UTF8", you must encode the whole escaped
characters.
If you had to concat strings, you should do it like
String s = new String("ASCII".getBytes("UTF8), "UTF8);
String summation = s + new String("some korean".getBytes("UTF8"), "UTF8");
It's very not extenable to use any purpose. :(
Actually, nobody do follow this style. So it could cause many problem.
- To support multilingual computing environment, It should be followed.
Because you must consider multi-byte languge. It's very important!
Revision Changes Path
1.3 +97 -111
jakarta-slide/src/webdav/client/src/org/apache/webdav/util/URIUtil.java
Index: URIUtil.java
===================================================================
RCS file:
/home/cvs/jakarta-slide/src/webdav/client/src/org/apache/webdav/util/URIUtil.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- URIUtil.java 2001/04/26 02:49:20 1.2
+++ URIUtil.java 2001/04/26 12:39:13 1.3
@@ -1,7 +1,7 @@
/*
- * $Header:
/home/cvs/jakarta-slide/src/webdav/client/src/org/apache/webdav/util/URIUtil.java,v
1.2 2001/04/26 02:49:20 jericho Exp $
- * $Revision: 1.2 $
- * $Date: 2001/04/26 02:49:20 $
+ * $Header:
/home/cvs/jakarta-slide/src/webdav/client/src/org/apache/webdav/util/URIUtil.java,v
1.3 2001/04/26 12:39:13 jericho Exp $
+ * $Revision: 1.3 $
+ * $Date: 2001/04/26 12:39:13 $
*
* ====================================================================
*
@@ -63,17 +63,17 @@
package org.apache.webdav.util;
-import java.io.UnsupportedEncodingException;
import java.io.ByteArrayOutputStream;
import java.io.OutputStreamWriter;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Map;
import java.util.BitSet;
/**
- * General purpose request parsing and encoding utility methods.
+ * General purpose escaping and unescaping utility methods.
+ * For "character encoding", The whole escaped characters must be done.
+ *
+ * It's different between "character encoding" and "escaping of characters".
*
* NOTICE: In order to do URI escaping, using the reserved characters defined
* in this class is not recommended for the the specific protocol.
@@ -82,7 +82,7 @@
* @author Tim Tye
* @author Remy Maucherat
* @author Park, Sung-Gu
- * @version $Revision: 1.2 $ $Date: 2001/04/26 02:49:20 $
+ * @version $Revision: 1.3 $ $Date: 2001/04/26 12:39:13 $
*/
public class URIUtil {
@@ -98,55 +98,49 @@
'A', 'B', 'C', 'D', 'E', 'F'};
- /**
- * The default encoding for URI characters.
- */
- private static final String defaultEnc = "UTF8";
-
-
// ----------------------------------------------------- Instance Variables
/**
- * Array containing the alphanum characters set.
+ * Array containing the alphanum URI character set.
*/
private static BitSet alphanum;
/**
- * Array containing the reserved characters set of the scheme part.
+ * Array containing the reserved URI character set of the scheme part.
*/
- public static BitSet schemeReserved;
+ private static BitSet schemeReserved;
/**
- * Array containing the reserved characters set of the authority part.
+ * Array containing the reserved URI character set of the authority part.
*/
- public static BitSet authorityReserved;
+ private static BitSet authorityReserved;
/**
- * Array containing the reserved characters set of the userinfo part.
+ * Array containing the reserved URI character set of the userinfo part.
*/
- public static BitSet userinfoReserved;
+ private static BitSet userinfoReserved;
/**
- * Array containing the reserved characters set of the host part.
+ * Array containing the reserved URI character set of the host part.
*/
- public static BitSet hostReserved;
+ private static BitSet hostReserved;
/**
- * Array containing the reserved characters set of the path part.
+ * Array containing the reserved URI character set of the path part.
*/
- public static BitSet pathReserved;
+ private static BitSet pathReserved;
/**
- * Array containing the reserved characters set of the query.
+ * Array containing the reserved URI character set of the query.
*/
- public static BitSet queryReserved;
+ private static BitSet queryReserved;
// ----------------------------------------------------- Static Initializer
@@ -154,7 +148,7 @@
static {
- // Save the alphanum characters that is common to do URI escaping.
+ // Save the alphanum URI character that is common to do URI escaping.
alphanum = new BitSet(128);
for (int i = 'a'; i <= 'z'; i++) {
alphanum.set(i);
@@ -166,7 +160,7 @@
alphanum.set(i);
}
- // Save the reserved characters within the sheme component.
+ // Save the reserved URI character within the sheme component.
schemeReserved = new BitSet(128);
/**
* Actually, this should be any combination of lower case letters,
@@ -178,7 +172,7 @@
schemeReserved.set('.');
schemeReserved.set('-');
- // Save the reserved characters within the authority component.
+ // Save the reserved URI character within the authority component.
authorityReserved = new BitSet(128);
authorityReserved.set(';');
authorityReserved.set(':');
@@ -186,7 +180,7 @@
authorityReserved.set('?');
authorityReserved.set('/');
- // Save the reserved characters within the userinfo component.
+ // Save the reserved URI character within the userinfo component.
userinfoReserved = new BitSet(128);
userinfoReserved.set(';');
userinfoReserved.set(':');
@@ -196,19 +190,19 @@
userinfoReserved.set('$');
userinfoReserved.set(',');
- // Save the reserved characters within the host component.
+ // Save the reserved URI character within the host component.
hostReserved = new BitSet(128);
hostReserved.set('.');
hostReserved.set('-');
- // Save the reserved characters within the path component.
+ // Save the reserved URI character within the path component.
pathReserved = new BitSet(128);
pathReserved.set('/');
pathReserved.set(';');
pathReserved.set('=');
pathReserved.set('?');
- // Save the reserved characters within the query component.
+ // Save the reserved URI character within the query component.
queryReserved = new BitSet(128);
queryReserved.set(';');
queryReserved.set('/');
@@ -224,6 +218,65 @@
}
+ // ------------------------------------------------------------ Properties
+
+
+ /**
+ * Get the reserved URI character of alphanum.
+ */
+ public static BitSet alphanum() {
+ return alphanum;
+ }
+
+
+ /**
+ * Get the reserved URI character of the scheme component.
+ */
+ public static BitSet schemeReserved() {
+ return schemeReserved;
+ }
+
+
+ /**
+ * Get the reserved URI character of the authority component.
+ */
+ public static BitSet authorityReserved() {
+ return authorityReserved;
+ }
+
+
+ /**
+ * Get the reserved URI character of the userinfo component.
+ */
+ public static BitSet userinfoReserved() {
+ return userinfoReserved;
+ }
+
+
+ /**
+ * Get the reserved URI character of the host component.
+ */
+ public static BitSet hostReserved() {
+ return hostReserved;
+ }
+
+
+ /**
+ * Get the reserved URI character of the path component.
+ */
+ public static BitSet pathReserved() {
+ return pathReserved;
+ }
+
+
+ /**
+ * Get the reserved URI character of the query component.
+ */
+ public static BitSet queryReserved() {
+ return queryReserved;
+ }
+
+
// -------------------------------------------------------- Private Methods
@@ -245,53 +298,24 @@
/**
* Unescape and return the specified URI-escaped String.
- * When the byte array is converted to a string, the system default
- * character encoding is used. In order to solve this problem, the
- * default encoding should be used.
*
* @param str The uri-escaped string
* @exception IllegalArgumentException if a '%' character is not followed
* by a valid 2-digit hexadecimal number
*/
public static String unescape(String str) {
- return unescape(str, defaultEnc);
+ return unescape(str.getBytes());
}
-
-
- /**
- * Unescape and return the specified URI-escaped String.
- *
- * @param str The uri-escaped string.
- * @param enc The encoding to use.
- * @exception IllegalArgumentException if a '%' character is not followed
- * by a valid 2-digit hexadecimal number
- */
- public static String unescape(String str, String enc) {
- return (str == null) ? null : unescape(str.getBytes(), enc);
- }
-
-
- /**
- * Unescape and return the specified URI-escaped byte array.
- *
- * @param bytes The uri-escaped byte array
- * @exception IllegalArgumentException if a '%' character is not followed
- * by a valid 2-digit hexadecimal number
- */
- public static String unescape(byte[] bytes) {
- return unescape(bytes, defaultEnc);
- }
-
+
/**
* Unescape and return the specified URI-escaped byte array.
*
* @param bytes The uri-escaped byte array
- * @param enc The encoding to use
* @exception IllegalArgumentException if a '%' character is not followed
* by a valid 2-digit hexadecimal number
*/
- public static synchronized String unescape(byte[] bytes, String enc) {
+ public static synchronized String unescape(byte[] bytes) {
if (bytes == null)
return (null);
@@ -309,36 +333,9 @@
}
bytes[ox++] = b;
}
- if (enc != null) {
- try {
- return new String(bytes, 0, ox, enc);
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- return new String(bytes, 0, ox);
-
- }
-
-
- /**
- * URI rewriter.
- *
- * @param str The string which has to be rewiten
- */
- public static String escape(String str) {
- return escape(str, defaultEnc);
- }
+ return new String(bytes, 0, ox);
- /**
- * URI rewriter.
- *
- * @param str The string which has to be rewritten.
- * @param enc The encoding to use.
- */
- public static String escape(String str, String enc) {
- return escape(str, enc, null);
}
@@ -346,10 +343,9 @@
* URI rewriter.
*
* @param str The string which has to be rewritten.
- * @param allowed The additional allowed characters not to escape.
*/
- public static String escape(String str, BitSet allowed) {
- return escape(str, defaultEnc, allowed);
+ public static String escape(String str) {
+ return escape(str, null);
}
@@ -357,11 +353,9 @@
* URI rewriter.
*
* @param str The string which has to be rewritten.
- * @param enc The encoding to use.
- * @param allowed The additional allowed characters not to escape.
+ * @param allowed The additional allowed URI character not to escape.
*/
- public static synchronized String escape(String str, String enc,
- BitSet allowed) {
+ public static synchronized String escape(String str, BitSet allowed) {
if (str == null)
return (null);
@@ -370,15 +364,8 @@
int caseDiff = ('a' - 'A');
StringBuffer rewrittenStr = new StringBuffer(str.length());
ByteArrayOutputStream buf = new ByteArrayOutputStream(maxBytesPerChar);
- OutputStreamWriter writer = null;
- try {
- // The same encoding as the one specified above should be used.
- writer = new OutputStreamWriter(buf, enc);
- } catch (Exception e) {
- e.printStackTrace();
- writer = new OutputStreamWriter(buf);
- }
-
+ OutputStreamWriter writer = new OutputStreamWriter(buf);
+
for (int i = 0; i < str.length(); i++) {
int c = (int) str.charAt(i);
if (alphanum.get(c)) {
@@ -386,7 +373,6 @@
} else if (allowed != null && allowed.get(c)) {
rewrittenStr.append((char)c);
} else {
- // convert to external encoding before hex conversion
try {
writer.write(c);
writer.flush();