jericho 02/04/29 00:45:22
Modified: src/util/org/apache/util URI.java
Log:
- Add the javadoc message resolving relative path
- Normalize them correctly having . and .. at the end
- Fix some typos of path_segments
- Make URI tokenizing correctly
- This class has been tested in resolving and normalizing thingy...
Revision Changes Path
1.5 +227 -106 jakarta-slide/src/util/org/apache/util/URI.java
Index: URI.java
===================================================================
RCS file: /home/cvs/jakarta-slide/src/util/org/apache/util/URI.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- URI.java 24 Apr 2002 17:24:21 -0000 1.4
+++ URI.java 29 Apr 2002 07:45:22 -0000 1.5
@@ -1,7 +1,7 @@
/*
- * $Header: /home/cvs/jakarta-slide/src/util/org/apache/util/URI.java,v 1.4
2002/04/24 17:24:21 jericho Exp $
- * $Revision: 1.4 $
- * $Date: 2002/04/24 17:24:21 $
+ * $Header: /home/cvs/jakarta-slide/src/util/org/apache/util/URI.java,v 1.5
2002/04/29 07:45:22 jericho Exp $
+ * $Revision: 1.5 $
+ * $Date: 2002/04/29 07:45:22 $
*
* ====================================================================
*
@@ -128,7 +128,7 @@
* relative URL(RFC 1808).
*
* @author <a href="mailto:[EMAIL PROTECTED]">Sung-Gu</a>
- * @version $Revision: 1.4 $ $Date: 2002/03/14 15:14:01
+ * @version $Revision: 1.5 $ $Date: 2002/03/14 15:14:01
*/
public class URI implements Comparable, java.io.Serializable {
@@ -276,6 +276,41 @@
* </pre></blockquote><p>
* Resolving Relative References to Absolute Form.
*
+ * <strong>Examples of Resolving Relative URI References</strong>
+ *
+ * Within an object with a well-defined base URI of
+ * <p><blockquote><pre>
+ * http://a/b/c/d;p?q
+ * </pre></blockquote><p>
+ * the relative URI would be resolved as follows:
+ *
+ * Normal Examples
+ *
+ * <p><blockquote><pre>
+ * g:h = g:h
+ * g = http://a/b/c/g
+ * ./g = http://a/b/c/g
+ * g/ = http://a/b/c/g/
+ * /g = http://a/g
+ * //g = http://g
+ * ?y = http://a/b/c/?y
+ * g?y = http://a/b/c/g?y
+ * #s = (current document)#s
+ * g#s = http://a/b/c/g#s
+ * g?y#s = http://a/b/c/g?y#s
+ * ;x = http://a/b/c/;x
+ * g;x = http://a/b/c/g;x
+ * g;x?y#s = http://a/b/c/g;x?y#s
+ * . = http://a/b/c/
+ * ./ = http://a/b/c/
+ * .. = http://a/b/
+ * ../ = http://a/b/
+ * ../g = http://a/b/g
+ * ../.. = http://a/
+ * ../../ = http://a/
+ * ../../g = http://a/g
+ * </pre></blockquote><p>
+ *
* Some URI schemes do not allow a hierarchical syntax matching the
* <hier_part> syntax, and thus cannot use relative references.
*
@@ -283,12 +318,12 @@
* @param relative the relative URI
*/
public URI(URI base, URI relative) throws Exception {
- if (base._scheme != null) {
+ if (base._scheme == null) {
throw new IllegalArgumentException("base URI required");
}
- if (relative._scheme != null && // is_relativeURI
- !equals(base._scheme, relative._scheme)) {
- throw new IllegalArgumentException("not relative URI");
+ if (base._scheme != null) {
+ this._scheme = base._scheme;
+ this._authority = base._authority;
}
if (base._is_opaque_part || relative._is_opaque_part) {
this._scheme = base._scheme;
@@ -298,21 +333,21 @@
this.setUriReference();
return;
}
- if (base._scheme != null) {
- this._scheme = base._scheme;
- }
- if (relative._authority != null) {
+ if (relative._scheme != null) {
+ this._scheme = relative._scheme;
this._is_net_path = relative._is_net_path;
this._authority = relative._authority;
if (relative._is_server) {
- this._is_server = relative._is_server;
this._userinfo = relative._userinfo;
this._host = relative._host;
this._port = relative._port;
} else if (relative._is_reg_name) {
this._is_reg_name = relative._is_reg_name;
}
- } else if (base._authority != null) {
+ this._is_abs_path = relative._is_abs_path;
+ this._is_rel_path = relative._is_rel_path;
+ this._path = relative._path;
+ } else if (base._authority != null && relative._scheme == null) {
this._is_net_path = base._is_net_path;
this._authority = base._authority;
if (base._is_server) {
@@ -323,8 +358,26 @@
this._is_reg_name = base._is_reg_name;
}
}
+ if (relative._authority != null) {
+ this._is_net_path = relative._is_net_path;
+ this._authority = relative._authority;
+ if (relative._is_server) {
+ this._is_server = relative._is_server;
+ this._userinfo = relative._userinfo;
+ this._host = relative._host;
+ this._port = relative._port;
+ } else if (relative._is_reg_name) {
+ this._is_reg_name = relative._is_reg_name;
+ }
+ this._is_abs_path = relative._is_abs_path;
+ this._is_rel_path = relative._is_rel_path;
+ this._path = relative._path;
+ }
// resolve the path
- this._path = resolvePath(base._path, relative._path);
+ if (relative._scheme == null && relative._authority == null ||
+ equals(base._scheme, relative._scheme)) {
+ this._path = resolvePath(base._path, relative._path);
+ }
// base._query removed
if (relative._query != null) {
this._query = relative._query;
@@ -463,10 +516,10 @@
protected static final BitSet alpha = new BitSet(256);
// Static initializer for alpha
static {
- for(int i='a';i<='z';i++) {
+ for (int i = 'a'; i <= 'z';i++) {
alpha.set(i);
}
- for(int i='A';i<='Z';i++) {
+ for (int i = 'A'; i <= 'Z';i++) {
alpha.set(i);
}
}
@@ -656,8 +709,8 @@
protected static final BitSet path_segments = new BitSet(256);
// Static initializer for path_segments
static {
- segment.or(segment);
- segment.set('/');
+ path_segments.set('/');
+ path_segments.or(segment);
}
@@ -979,7 +1032,7 @@
static {
hier_part.or(net_path);
hier_part.or(abs_path);
- hier_part.set('?');
+ // hier_part.set('?'); aleady included
hier_part.or(query);
}
@@ -995,7 +1048,7 @@
relativeURI.or(net_path);
relativeURI.or(abs_path);
relativeURI.or(rel_path);
- relativeURI.set('?');
+ // relativeURI.set('?'); aleady included
relativeURI.or(query);
}
@@ -1086,13 +1139,13 @@
/**
- * disallowed rel_segment before escaping
+ * disallowed rel_path before escaping
*/
- public static final BitSet disallowed_rel_segment = new BitSet(256);
- // Static initializer for disallowed_rel_segment
+ public static final BitSet disallowed_rel_path = new BitSet(256);
+ // Static initializer for disallowed_rel_path
static {
- disallowed_rel_segment.or(uric);
- disallowed_rel_segment.andNot(rel_segment);
+ disallowed_rel_path.or(uric);
+ disallowed_rel_path.andNot(rel_path);
}
@@ -1188,18 +1241,18 @@
static {
allowed_abs_path.or(abs_path);
// allowed_abs_path.set('/'); // aleady included
- allowed_abs_path.clear('%');
+ allowed_abs_path.andNot(percent);
}
/**
- * Those characters that are allowed within the rel_segment.
+ * Those characters that are allowed within the rel_path.
*/
- public static final BitSet allowed_rel_segment = new BitSet(256);
- // Static initializer for allowed_rel_segment
+ public static final BitSet allowed_rel_path = new BitSet(256);
+ // Static initializer for allowed_rel_path
static {
- allowed_rel_segment.or(rel_segment);
- allowed_rel_segment.clear('%');
+ allowed_rel_path.or(rel_path);
+ allowed_rel_path.clear('%');
}
@@ -1332,6 +1385,9 @@
}
String octets = new String(octet, _protocolCharset);
char[] preuric = new char[octets.length()];
+ if (octet.length == 0) {
+ return preuric; // defined, but empty
+ }
octets.getChars(0, octets.length(), preuric, 0);
StringBuffer buf = new StringBuffer(preuric.length);
for (int i = 0; i < preuric.length; i++) {
@@ -1339,7 +1395,7 @@
if (allowed.get(c)) {
buf.append(c);
} else {
- byte b = (byte) preuric[i];
+ byte b = (byte) c;
buf.append('%');
char hexadecimal = Character.forDigit((b >> 4) & 0xF, 16);
buf.append(hexadecimal);
@@ -1375,20 +1431,21 @@
}
byte[] octet = new String(uri).getBytes(_protocolCharset);
int oi = 0; // output index
- for (int ii = 0; ii < uri.length; ) {
+ for (int ii = 0; ii < uri.length; oi++) {
byte b = (byte) octet[ii++];
if (b == '%') {
- b = (byte) Character.digit(
- (char) (octet[ii++] << 4 + octet[ii++]), 16);
+ b = (byte) ((Character.digit((char) octet[ii++], 16) << 4) +
+ Character.digit((char) octet[ii++], 16));
if (b == -1) {
throw new IllegalArgumentException(
"incomplete trailing escape pattern");
}
}
- octet[oi++] = (byte) b;
+ octet[oi] = (byte) b;
}
- octet[oi] = (byte) '\0';
- return octet;
+ byte[] result = new byte[oi];
+ System.arraycopy(octet, 0, result, 0, oi);
+ return result;
}
@@ -1407,8 +1464,9 @@
}
char[] target = component.toCharArray();
for (int i = 0; i < target.length; i++) {
- if (disallowed.get(target[i]))
+ if (disallowed.get(target[i])) {
return false;
+ }
}
return true;
}
@@ -1448,7 +1506,7 @@
BitSet generous) {
// validate each component by generous characters
if (eoffset == -1) {
- eoffset = component.length;
+ eoffset = component.length -1;
}
for (int i = soffset; i < eoffset; i++) {
if (!generous.get(component[i]))
@@ -1509,12 +1567,26 @@
}
/**
+ * The starting index
+ */
+ int from = 0;
+
+ /**
* <p><blockquote><pre>
* @@@@@@@@
* ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
* </pre></blockquote><p>
*/
- int at = tmp.indexOf(":/?#");
+ int at = indexOf(tmp, ":/?#", from);
+ if (at == -1) {
+ at = 0;
+ }
+
+ /**
+ * The length of the sequence of characters.
+ * It may not be equal to the length of the byte array.
+ */
+ int length = tmp.length();
/**
* <p><blockquote><pre>
@@ -1523,20 +1595,15 @@
* ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
* </pre></blockquote><p>
*/
- if (at > 0 && tmp.charAt(at) == ':') {
+ if (0 < at && at < length && tmp.charAt(at) == ':') {
char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
if (validate(target, scheme)) {
_scheme = target;
}
+ from = ++at;
}
/**
- * The length of the sequence of characters.
- * It may not be equal to the length of the byte array.
- */
- int length = tmp.length();
-
- /**
* <p><blockquote><pre>
* authority = $4 = jakarta.apache.org
* @@
@@ -1545,52 +1612,25 @@
*/
// Reset flags
_is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
- if (at >= 0 && tmp.charAt(at) == '/') {
+ if (0 <= at && at < length && tmp.charAt(at) == '/') {
// Set flag
_is_hier_part = true;
- if (at+2 < length && tmp.charAt(at+1) == '/') {
- // the temporaray index to start the search from
- int from = at + 2;
- int next = tmp.indexOf("/?#", from); // at, if not -1
+ if (at + 2 < length && tmp.charAt(at + 1) == '/') {
+ // the temporary index to start the search from
+ int next = indexOf(tmp, "/?#", at + 2);
if (next == -1) {
- next = tmp.length();
+ next = (tmp.substring(at + 2).length() == 0) ? at + 2 :
+ tmp.length();
}
- parseAuthority(tmp.substring(from, next));
- at = next;
+ parseAuthority(tmp.substring(at + 2, next));
+ from = at = next;
// Set flag
_is_net_path = true;
}
- if (tmp.charAt(at) == '/') {
+ if (from == at) {
// Set flag
_is_abs_path = true;
}
- } else {
- if (_scheme == null) { // is_relativeURI
- // rel_path = rel_segment [ abs_path ]
- int next = tmp.indexOf('/');
- if (next == -1) {
- next = tmp.length();
- }
- // validating before escape encoding
- if (prevalidate(tmp.substring(at, next),
- disallowed_rel_segment)) {
- // Set flag
- _is_rel_path = true;
- }
- // REMINDME: let us skip the rest of abs_path to validate
- } else { // is_absoluteURI
- // validating before escape encoding
- if (prevalidate(tmp.substring(at), disallowed_opaque_part)) {
- // Set flag
- _is_opaque_part = true;
- }
- }
- if (!_is_rel_path || !_is_opaque_part) {
- // correct validation. possibly, only fragment.
- // is_relativeURI and is_absoluteURI must be false
- // Set flag
- _is_only_fragment = true;
- }
}
/**
@@ -1600,12 +1640,26 @@
* ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
* </pre></blockquote><p>
*/
- if (tmp.charAt(at) != '?' && tmp.charAt(at) != '#') {
- int from = at;
- int next = tmp.indexOf("?#", from);
+ if (from < length) { // && tmp.charAt(from) != '?' &&
+ // tmp.charAt(from) != '#') {
+ // rel_path = rel_segment [ abs_path ]
+ int next = indexOf(tmp, "?#", from);
if (next == -1) {
next = tmp.length();
}
+ if (prevalidate(tmp.substring(from, next),
+ disallowed_rel_path)) {
+ // Set flag
+ _is_rel_path = true;
+ } else if (prevalidate(tmp.substring(from, next),
+ disallowed_opaque_part)) {
+ // validating before escape encoding // is_absoluteURI
+ // Set flag
+ _is_opaque_part = true;
+ } else {
+ // the path component is never undefined, though it may be empty
+ _path = new char[] {'\0'};
+ }
setPath(tmp.substring(from, next));
at = next;
}
@@ -1617,13 +1671,13 @@
* ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
* </pre></blockquote><p>
*/
- if (at+1 < length && tmp.charAt(at) == '?') {
- int from = at + 1;
- int next = tmp.indexOf('#', from);
- if (next != -1) {
- _query = encode(tmp.substring(from, next), allowed_query);
- at = next;
+ if (0 <= at && at+1 < length && tmp.charAt(at) == '?') {
+ int next = tmp.indexOf('#', at + 1);
+ if (next == -1) {
+ next = tmp.length();
}
+ _query = encode(tmp.substring(at + 1, next), allowed_query);
+ at = next;
}
/**
@@ -1633,9 +1687,14 @@
* ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
* </pre></blockquote><p>
*/
- if (at+1 < length && tmp.charAt(at) == '#') {
- int from = at + 1;
- _fragment = encode(tmp.substring(from), allowed_fragment);
+ if (0 <= at && at+1 < length && tmp.charAt(at) == '#') {
+ _fragment = encode(tmp.substring(at + 1), allowed_fragment);
+ if (!_is_abs_path && !_is_rel_path && !_is_opaque_part) {
+ // correct validation. possibly, only fragment.
+ // is_relativeURI and is_absoluteURI must be false
+ // Set flag
+ _is_only_fragment = true;
+ }
}
// set this URI.
@@ -1644,6 +1703,54 @@
/**
+ * Get the earlier one among indexs that the characters as to be indexed
+ * are from the given string.
+ *
+ * @param s the string to be indexed
+ * @param delims the delimiters used to index
+ * @return the earlier index if there are delimiters
+ */
+ protected int indexOf(String s, String delims) {
+ return indexOf(s, delims, -1);
+ }
+
+
+ /**
+ * Get the earlier one among indexs that the characters as to be indexed
+ * are from the given string.
+ *
+ * @param s the string to be indexed
+ * @param delims the delimiters used to index
+ * @param offset the from index
+ * @return the earlier index if there are delimiters
+ */
+ protected int indexOf(String s, String delims, int offset) {
+ if (s == null || s.length() == 0) {
+ return -1;
+ }
+ if (delims == null || delims.length() == 0) {
+ return -1;
+ }
+ // check boundaries
+ if (offset < 0) {
+ offset = 0;
+ } else if (offset > s.length()) {
+ return -1;
+ }
+ // s is never null
+ int min = s.length();
+ char[] delim = delims.toCharArray();
+ for (int i = 0; i < delim.length; i++) {
+ int at = s.indexOf(delim[i], offset);
+ if (at >= 0 && at < min) {
+ min = at;
+ }
+ }
+ return (min == s.length()) ? -1 : min;
+ }
+
+
+ /**
* Parse the authority component.
*
* @param original the original character sequence of authority component
@@ -1658,7 +1765,7 @@
int from = 0;
int next = original.indexOf('@');
- if (next != -1) { // neither -1 and 0
+ if (next != -1) { // neither -1 and 0
// if next == 0, for example, in ftp, userinfo = 'anonymous'
// each protocol extented from URI supports the specific userinfo
_userinfo = encode(original.substring(0, next), allowed_userinfo);
@@ -1682,12 +1789,12 @@
if (next == -1) {
next = original.length();
}
+ // REMINDME: it doesn't need the pre-validation
+ _host = original.substring(from, next).toCharArray();
if (validate(_host, IPv4address)) {
- _host = original.substring(from, next).toCharArray();
// Set flag
_is_IPv4address = true;
} else if (validate(_host, hostname)) {
- _host = original.substring(from, next).toCharArray();
// Set flag
_is_hostname = true;
} else {
@@ -1702,7 +1809,8 @@
// set a registry-based naming authority
_authority = encode(original.toString(), allowed_reg_name);
} else {
- if (original.charAt(next) == ':') {
+ next = original.indexOf('/', from);
+ if (next > 0 && original.charAt(next) == ':') { // not empty
from = next + 1;
_port = Integer.parseInt(original.substring(from));
}
@@ -1747,8 +1855,11 @@
}
if (_opaque != null && _is_opaque_part) {
buf.append(_opaque);
- } else if (_path != null) { // _is_hier_part or _is_relativeURI
- buf.append(_path);
+ } else if (_path != null) { // && _path.length != 0) {
+ // _is_hier_part or _is_relativeURI
+ if (_path.length != 0) {
+ buf.append(_path);
+ }
if (_query != null) { // has_query
buf.append('?');
buf.append(_query);
@@ -2147,10 +2258,10 @@
StringBuffer buff = new StringBuffer(path.length());
int at = path.indexOf('/');
if (at > 0) { // never 0
- buff.append(encode(path.substring(0, at), allowed_rel_segment));
+ buff.append(encode(path.substring(0, at), allowed_rel_path));
buff.append(encode(path.substring(at), allowed_abs_path));
} else {
- buff.append(encode(path, allowed_rel_segment));
+ buff.append(encode(path, allowed_rel_path));
}
_path = buff.toString().toCharArray();
} else if (_is_opaque_part) {
@@ -2176,7 +2287,7 @@
base_path = base.substring(0, at + 1).toCharArray();
}
// _path could be empty
- if (rel_path.length == 0) {
+ if (rel_path == null || rel_path.length == 0) {
return normalize(base_path);
} else if (rel_path[0] == '/') {
return rel_path;
@@ -2493,9 +2604,14 @@
return null;
}
String normalized = new String(path);
+ boolean endsWithSlash = true;
// precondition
if (!normalized.endsWith("/")) {
normalized += '/';
+ endsWithSlash = false;
+ }
+ if (normalized.endsWith("/./") || normalized.endsWith("/../")) {
+ endsWithSlash = true;
}
// Resolve occurrences of "/./" in the normalized path
while (true) {
@@ -2533,6 +2649,11 @@
}
normalized = normalized.substring(0, at) +
normalized.substring(at + 1);
+ }
+ if (!endsWithSlash && normalized.endsWith("/")) {
+ normalized = normalized.substring(0, normalized.length()-1);
+ } else if (endsWithSlash && !normalized.endsWith("/")) {
+ normalized = normalized + "/";
}
// Set the normalized path that we have completed
return normalized.toCharArray();
--
To unsubscribe, e-mail: <mailto:[EMAIL PROTECTED]>
For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>