jericho 02/03/30 02:00:50
Modified: src/util/org/apache/util URI.java
Log:
- Fix compile errors on this class committed accidentally. :( So this class isn't
completed. I'm still making the stuff for URI. But you can check the mechanism and
give me some advices. This class will replace the GenericURI calss, I hope. I'll do
the rest of it sometime later....
Revision Changes Path
1.3 +1411 -83 jakarta-slide/src/util/org/apache/util/URI.java
Index: URI.java
===================================================================
RCS file: /home/cvs/jakarta-slide/src/util/org/apache/util/URI.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- URI.java 28 Mar 2002 06:12:06 -0000 1.2
+++ URI.java 30 Mar 2002 10:00:49 -0000 1.3
@@ -1,7 +1,7 @@
/*
- * $Header: /home/cvs/jakarta-slide/src/util/org/apache/util/URI.java,v 1.2
2002/03/28 06:12:06 jericho Exp $
- * $Revision: 1.2 $
- * $Date: 2002/03/28 06:12:06 $
+ * $Header: /home/cvs/jakarta-slide/src/util/org/apache/util/URI.java,v 1.3
2002/03/30 10:00:49 jericho Exp $
+ * $Revision: 1.3 $
+ * $Date: 2002/03/30 10:00:49 $
*
* ====================================================================
*
@@ -22,14 +22,14 @@
* the documentation and/or other materials provided with the
* distribution.
*
- * 3. the end-user documentation included with the redistribution, if
+ * 3. The end-user documentation included with the redistribution, if
* any, must include the following acknowlegement:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowlegement may appear in the software itself,
* if and wherever such third-party acknowlegements normally appear.
*
- * 4. the names "The Jakarta Project", "Slide", and "Apache Software
+ * 4. The names "The Jakarta Project", "Slide", and "Apache Software
* Foundation" must not be used to endorse or promote products derived
* from this software without prior written permission. For written
* permission, please contact [EMAIL PROTECTED]
@@ -63,34 +63,51 @@
package org.apache.util;
+import java.io.UnsupportedEncodingException;
+import java.util.BitSet;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import sun.security.action.GetBooleanAction;
+import sun.security.action.GetPropertyAction;
+
/**
- * the interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
- *
+ * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
+ * This class has the purpose of supportting of parsing a URI reference to
+ * extend any specific protocols, the character encoding of the protocol to
+ * be transported and the charset of the document.
+ * <p>
* A URI is always in an "escaped" form, since escaping or unescaping a
* completed URI might change its semantics.
- *
+ * <p>
* Implementers should be careful not to escape or unescape the same string
* more than once, since unescaping an already unescaped string might lead to
* misinterpreting a percent data character as another escaped character,
* or vice versa in the case of escaping an already escaped string.
- *
+ * <p>
* In order to avoid these problems, data types used as follows:
+ * <p><blockquote><pre>
* URI character sequence: char
* octet sequence: byte
* original character sequence: String
+ * </pre></blockquote><p>
*
* So, a URI is a sequence of characters as an array of a char type, which
* is not always represented as a sequence of octets as an array of byte.
- *
+ * <p>
+ *
* URI Syntactic Components
* - In general, written as follows:
+ * <p><blockquote><pre>
* Absolute URI = <scheme>:<scheme-specific-part>
* Generic URI = <scheme>://<authority><path>?<query>
+ * </pre></blockquote><p>
* - Syntax
+ * <p><blockquote><pre>
* absoluteURI = scheme ":" ( hier_part | opaque_part )
* hier_part = ( net_path | abs_path ) [ "?" query ]
* net_path = "//" authority [ abs_path ]
* abs_path = "/" path_segments
+ * </pre></blockquote><p>
*
* the following examples illustrate URI that are in common use.
* ftp://ftp.is.co.za/rfc/rfc1808.txt
@@ -105,9 +122,12 @@
* -- news scheme for USENET news groups and articles
* telnet://melvyl.ucop.edu/
* -- telnet scheme for interactive services via the TELNET Protocol
+ * <p>
+ * Please, notice that there are many modifications from URL(RFC 1738) and
+ * relative URL(RFC 1808).
*
* @author <a href="mailto:[EMAIL PROTECTED]">Sung-Gu</a>
- * @version $Revision: 1.2 $ $Date: 2002/03/14 15:14:01
+ * @version $Revision: 1.3 $ $Date: 2002/03/14 15:14:01
*/
public class URI implements java.io.Serializable {
@@ -117,85 +137,89 @@
/**
- * This Constructor
- *
- * @param escapedURI the escaped URI string.
+ * Construct a URI from the given string.
+ * <p><blockquote><pre>
+ * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+ * </pre></blockquote><p>
+ *
+ * @param str the string to be represented to URI character sequence
+ * It is one of absoluteURI and relativeURI.
+ * @exception Exception
*/
- public URI(String escapedURI) {
- URI = escapedURI;
+ public URI(String str) throws Exception {
+ parseUriReference(str);
}
/**
- * This Constructor
- *
- * @param scheme the scheme string.
- * @param host the host string.
- * @param port the port number.
+ * Construct a general URI from the given components.
+ * <p><blockquote><pre>
+ * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+ * absoluteURI = scheme ":" ( hier_part | opaque_part )
+ * opaque_part = uric_no_slash *uric
+ * </pre></blockquote><p>
+ * In general, absolute URI = <scheme>:<scheme-specific-part>#<fragment>
+ *
+ * @param scheme the scheme string
+ * @param scheme_specific_part scheme_specific_part
+ * @param fragment the fragment string
*/
- public URI(String scheme, String host, int port) {
- this(scheme, host, port, null, null);
+ public URI(String scheme, String scheme_specific_part, String fragment)
+ throws Exception {
+ // TODO: validate and contruct the URI character sequence
}
/**
- * This Constructor
- *
- * @param scheme the scheme string.
- * @param host the host string.
- * @param path the path string.
+ * Construct a general URI from the given components.
+ * <p><blockquote><pre>
+ * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+ * absoluteURI = scheme ":" ( hier_part | opaque_part )
+ * hier_part = ( net_path | abs_path ) [ "?" query ]
+ * </pre></blockquote><p>
+ * In general, generic URI = <scheme>://<authority><path>?<query>#<fragment>
+ *
+ * @param scheme the scheme string
+ * @param authority the authority string
+ * @param path the path string
+ * @param query the query string
+ * @param fragment the fragment string
*/
- public URI(String scheme, String host, String path) {
- this(scheme, host, -1, path, null);
+ public URI(String scheme, String authority, String path, String query,
+ String fragment) throws Exception {
+ // TODO: validate and contruct the URI character sequence
}
/**
- * This Constructor
+ * Construct a general URI with the given relative URI.
+ * <p><blockquote><pre>
+ * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+ * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
+ * </pre></blockquote><p>
*
- * @param scheme the scheme string.
- * @param host the host string.
- * @param port the port number.
- * @param path the path string.
+ * @param base the base URI
+ * @param relative the relative URI
*/
- public URI(String scheme, String host, int port, String path) {
- this(scheme, host, port, path, null);
+ public URI(URI base, URI relative) throws Exception {
+ // TODO: validate and contruct the URI character sequence
}
/**
- * This Constructor
+ * Construct a URI from the given components.
*
- * @param scheme the scheme string.
- * @param host the host string.
- * @param path the path string.
- * @param query the query string.
+ * @param scheme the scheme string
+ * @param userinfo the userinfo string
+ * @param host the host string
+ * @param port the port number
+ * @param path the path string
+ * @param query the query string
+ * @param fragment the fragment string
*/
- public URI(String scheme, String host, String path, String query) {
- this(scheme, host, -1, path, null);
- }
-
-
- /**
- * This Constructor
- *
- * @param scheme the scheme string.
- * @param host the host string.
- * @param port the port number.
- * @param path the path string.
- * @param query the query string.
- */
- public URI(String scheme, String host, int port, String path,
- String query) {
-
- URI = URIUtil.escape(scheme, URIUtil.schemeReserved()) + "://" +
- URIUtil.escape(host, URIUtil.hostReserved()) +
- ((port == defaultPort || port == -1) ? "" : ":" + port) +
- ((path == null || path.equals("")) ? "/" :
- URIUtil.escape((!path.startsWith("/")) ?
- "/" + path : path, URIUtil.pathReserved())) +
- ((query == null || query.equals("")) ? "" :
- "?" + URIUtil.escape(query, URIUtil.queryReserved()));
+ public URI(String scheme, String userinfo, String host, int port,
+ String path, String query, String fragment) throws Exception {
+ // TODO: validate and contruct the URI character sequence
}
@@ -204,6 +228,8 @@
/**
* This Uniform Resource Identifier (URI).
+ * The URI is always in an "escaped" form, since escaping or unescaping
+ * a completed URI might change its semantics.
*/
protected char[] _uri = null;
@@ -211,64 +237,800 @@
/**
* The default charset of the protocol. RFC 2277, 2396
*/
- protected String _protocolCharset = "UTF-8";
+ protected static String _protocolCharset = "UTF-8";
/**
* The default charset of the document. RFC 2277, 2396
+ * The platform's charset is used for the document by default.
+ */
+ protected static String _documentCharset = null;
+ // Static initializer for _documentCharset
+ static {
+ _documentCharset = (String)AccessController.doPrivileged (
+ new GetPropertyAction("file.encoding")
+ );
+ }
+
+ /**
+ * The scheme.
+ */
+ protected char[] _scheme = null;
+
+
+ /**
+ * The authority.
*/
- protected String _documentCharset = null;
+ protected char[] _authority = null;
/**
- * The default scheme.
+ * The host.
*/
- protected String _scheme = null;
+ protected char[] _host = null;
/**
- * The default port.
+ * The port.
*/
protected int _port = -1;
- // --------------------------------------------------- Protected methods
+ /**
+ * The path.
+ */
+ protected char[] _path = null;
+
+
+ /**
+ * The query.
+ */
+ protected char[] _query = null;
+
+
+ /**
+ * The fragment.
+ */
+ protected char[] _fragment = null;
+
+
+ // ---------------------- Generous characters for each component validation
+
+ /**
+ * <p><blockquote><pre>
+ * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
+ * "8" | "9"
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet digit = new BitSet(256);
+ // Static initializer for digit
+ static {
+ for(int i='0';i<='9';i++) {
+ digit.set(i);
+ }
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * alpha = lowalpha | upalpha
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet alpha = new BitSet(256);
+ // Static initializer for alpha
+ static {
+ for(int i='a';i<='z';i++) {
+ alpha.set(i);
+ }
+ for(int i='A';i<='Z';i++) {
+ alpha.set(i);
+ }
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * alphanum = alpha | digit
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet alphanum = new BitSet(256);
+ // Static initializer for alphanum
+ static {
+ alphanum.or(alpha);
+ alphanum.or(digit);
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
+ * "a" | "b" | "c" | "d" | "e" | "f"
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet hex = new BitSet(256);
+ // Static initializer for hex
+ static {
+ hex.or(digit);
+ for(int i='a';i<='f';i++) {
+ hex.set(i);
+ }
+ for(int i='A';i<='F';i++) {
+ hex.set(i);
+ }
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * escaped = "%" hex hex
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet escaped = new BitSet(256);
+ // Static initializer for escaped
+ static {
+ escaped.set('%');
+ escaped.or(hex);
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
+ * "(" | ")"
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet mark = new BitSet(256);
+ // Static initializer for mark
+ static {
+ mark.set('-');
+ mark.set('_');
+ mark.set('.');
+ mark.set('!');
+ mark.set('~');
+ mark.set('*');
+ mark.set('\'');
+ mark.set('(');
+ mark.set(')');
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * unreserved = alphanum | mark
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet unreserved = new BitSet(256);
+ // Static initializer for unreserved
+ static {
+ unreserved.or(alphanum);
+ unreserved.or(mark);
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
+ * "$" | ","
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet reserved = new BitSet(256);
+ // Static initializer for reserved
+ static {
+ reserved.set(';');
+ reserved.set('/');
+ reserved.set('?');
+ reserved.set(':');
+ reserved.set('@');
+ reserved.set('&');
+ reserved.set('=');
+ reserved.set('+');
+ reserved.set('$');
+ reserved.set(',');
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * uric = reserved | unreserved | escaped
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet uric = new BitSet(256);
+ // Static initializer for uric
+ static {
+ uric.or(reserved);
+ uric.or(unreserved);
+ uric.or(escaped);
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * fragment = *uric
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet fragment = uric;
+
+
+ /**
+ * <p><blockquote><pre>
+ * query = *uric
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet query = uric;
+
+
+ /**
+ * <p><blockquote><pre>
+ * pchar = unreserved | escaped |
+ * ":" | "@" | "&" | "=" | "+" | "$" | ","
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet pchar = new BitSet(256);
+ // Static initializer for pchar
+ static {
+ pchar.or(unreserved);
+ pchar.or(escaped);
+ pchar.set(':');
+ pchar.set('@');
+ pchar.set('&');
+ pchar.set('=');
+ pchar.set('+');
+ pchar.set('$');
+ pchar.set(',');
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * param = *pchar
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet param = pchar;
+
+
+ /**
+ * <p><blockquote><pre>
+ * segment = *pchar *( ";" param )
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet segment = new BitSet(256);
+ // Static initializer for segment
+ static {
+ segment.or(pchar);
+ segment.set(';');
+ segment.or(param);
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * path_segments = segment *( "/" segment )
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet path_segments = new BitSet(256);
+ // Static initializer for path_segments
+ static {
+ segment.or(segment);
+ segment.set('/');
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * abs_path = "/" path_segments
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet abs_path = new BitSet(256);
+ // Static initializer for abs_path
+ static {
+ abs_path.set('/');
+ abs_path.or(path_segments);
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
+ * "&" | "=" | "+" | "$" | ","
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet uric_no_slash = new BitSet(256);
+ // Static initializer for uric_no_slash
+ static {
+ uric_no_slash.or(unreserved);
+ uric_no_slash.or(escaped);
+ uric_no_slash.set(';');
+ uric_no_slash.set('?');
+ uric_no_slash.set(';');
+ uric_no_slash.set('@');
+ uric_no_slash.set('&');
+ uric_no_slash.set('=');
+ uric_no_slash.set('+');
+ uric_no_slash.set('$');
+ uric_no_slash.set(',');
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * opaque_part = uric_no_slash *uric
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet opaque_part = new BitSet(256);
+ // Static initializer for opaque_part
+ static {
+ opaque_part.or(uric_no_slash);
+ opaque_part.or(uric);
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * path = [ abs_path | opaque_part ]
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet path = new BitSet(256);
+ // Static initializer for path
+ static {
+ path.or(abs_path);
+ path.or(opaque_part);
+ }
+
+
+ /**
+ * port
+ */
+ protected static final BitSet port = digit;
+
+
+ /**
+ * <p><blockquote><pre>
+ * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet IPv4address = new BitSet(256);
+ // Static initializer for IPv4address
+ static {
+ IPv4address.or(digit);
+ IPv4address.set('.');
+ }
+
+
+ /**
+ * RFC 2373
+ * <p><blockquote><pre>
+ * IPv6address = hexpart [ ":" IPv4address ]
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet IPv6address = new BitSet(256);
+ // Static initializer for IPv6address reference
+ static {
+ IPv6address.or(hex); // hexpart
+ IPv6address.set(':');
+ IPv6address.or(IPv4address);
+ }
+
+
+ /**
+ * RFC 2732, 2373
+ * <p><blockquote><pre>
+ * IPv6reference = "[" IPv6address "]"
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet IPv6reference = new BitSet(256);
+ // Static initializer for IPv6reference
+ static {
+ IPv6reference.set('[');
+ IPv6reference.or(IPv6address);
+ IPv6reference.set(']');
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet toplabel = new BitSet(256);
+ // Static initializer for toplabel
+ static {
+ toplabel.or(alphanum);
+ toplabel.set('-');
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet domainlabel = toplabel;
+
+
+ /**
+ * <p><blockquote><pre>
+ * hostname = *( domainlabel "." ) toplabel [ "." ]
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet hostname = new BitSet(256);
+ // Static initializer for hostname
+ static {
+ hostname.or(toplabel);
+ // hostname.or(domainlabel);
+ hostname.set('.');
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * host = hostname | IPv4address | IPv6reference
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet host = new BitSet(256);
+ // Static initializer for host
+ static {
+ host.or(hostname);
+ // host.or(IPv4address);
+ host.or(IPv6reference); // IPv4address
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * hostport = host [ ":" port ]
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet hostport = new BitSet(256);
+ // Static initializer for hostport
+ static {
+ host.or(host);
+ host.set(':');
+ host.or(port);
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * userinfo = *( unreserved | escaped |
+ * ";" | ":" | "&" | "=" | "+" | "$" | "," )
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet userinfo = new BitSet(256);
+ // Static initializer for userinfo
+ static {
+ userinfo.or(unreserved);
+ userinfo.or(escaped);
+ userinfo.set(';');
+ userinfo.set(':');
+ userinfo.set('&');
+ userinfo.set('=');
+ userinfo.set('+');
+ userinfo.set('$');
+ userinfo.set(',');
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * server = [ [ userinfo "@" ] hostport ]
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet server = new BitSet(256);
+ // Static initializer for server
+ static {
+ server.or(userinfo);
+ userinfo.set('@');
+ server.or(hostport);
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * reg_name = 1*( unreserved | escaped | "$" | "," |
+ * ";" | ":" | "@" | "&" | "=" | "+" )
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet reg_name = new BitSet(256);
+ // Static initializer for reg_name
+ static {
+ reg_name.or(unreserved);
+ reg_name.or(escaped);
+ reg_name.set('$');
+ reg_name.set(',');
+ reg_name.set(';');
+ reg_name.set(':');
+ reg_name.set('@');
+ reg_name.set('&');
+ reg_name.set('=');
+ reg_name.set('+');
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * authority = server | reg_name
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet authority = new BitSet(256);
+ // Static initializer for authority
+ static {
+ authority.or(server);
+ authority.or(reg_name);
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * scheme = alpha *( alpha | digit | "+" | "-" | "." )
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet scheme = new BitSet(256);
+ // Static initializer for scheme
+ static {
+ scheme.or(alpha);
+ scheme.or(digit);
+ scheme.set('+');
+ scheme.set('-');
+ scheme.set('.');
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * rel_segment = 1*( unreserved | escaped |
+ * ";" | "@" | "&" | "=" | "+" | "$" | "," )
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet rel_segment = new BitSet(256);
+ // Static initializer for rel_segment
+ static {
+ rel_segment.or(unreserved);
+ rel_segment.or(escaped);
+ rel_segment.set(';');
+ rel_segment.set('@');
+ rel_segment.set('&');
+ rel_segment.set('=');
+ rel_segment.set('+');
+ rel_segment.set('$');
+ rel_segment.set(',');
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * rel_path = rel_segment [ abs_path ]
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet rel_path = new BitSet(256);
+ // Static initializer for rel_path
+ static {
+ rel_path.or(rel_segment);
+ rel_path.or(abs_path);
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * net_path = "//" authority [ abs_path ]
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet net_path = new BitSet(256);
+ // Static initializer for net_path
+ static {
+ net_path.set('/');
+ net_path.or(authority);
+ net_path.or(abs_path);
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * hier_part = ( net_path | abs_path ) [ "?" query ]
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet hier_part = new BitSet(256);
+ // Static initializer for hier_part
+ static {
+ hier_part.or(net_path);
+ hier_part.or(abs_path);
+ hier_part.set('?');
+ hier_part.or(query);
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet relativeURI = new BitSet(256);
+ // Static initializer for relativeURI
+ static {
+ relativeURI.or(net_path);
+ relativeURI.or(abs_path);
+ relativeURI.or(rel_path);
+ relativeURI.set('?');
+ relativeURI.or(query);
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * absoluteURI = scheme ":" ( hier_part | opaque_part )
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet absoluteURI = new BitSet(256);
+ // Static initializer for absoluteURI
+ static {
+ absoluteURI.or(scheme);
+ absoluteURI.set(':');
+ absoluteURI.or(hier_part);
+ absoluteURI.or(opaque_part);
+ }
+
+
+ /**
+ * <p><blockquote><pre>
+ * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+ * </pre></blockquote><p>
+ */
+ protected static final BitSet URI_reference = new BitSet(256);
+ // Static initializer for URI_reference
+ static {
+ URI_reference.or(absoluteURI);
+ URI_reference.or(relativeURI);
+ URI_reference.set('#');
+ URI_reference.or(fragment);
+ }
+
+ // ------------------------------- Characters allowed within each component
+
+ /**
+ * Those characters that are allowed within the authority component.
+ */
+ public static final BitSet allowedAuthority = new BitSet(256);
+ // Static initializer for allowedAuthority
+ static {
+ // FIXME: you can verify with validate method.
+ allowedAuthority.or(unreserved);
+ allowedAuthority.or(authority);
+ }
+
+
+ /**
+ * Those characters that are allowed within the path component.
+ */
+ public static final BitSet allowedPath = new BitSet(256);
+ // Static initializer for allowedPath
+ static {
+ // FIXME: you can verify with validate method.
+ allowedPath.or(unreserved);
+ allowedPath.or(path);
+ }
+
+
+ /**
+ * Those characters that are allowed within the query component.
+ */
+ public static final BitSet allowedQuery = new BitSet(256);
+ // Static initializer for allowedQuery
+ static {
+ // FIXME: you can verify with validate method.
+ allowedQuery.or(unreserved);
+ }
+
+
+ /**
+ * Those characters that are allowed within the fragment component.
+ */
+ public static final BitSet allowedFragment = new BitSet(256);
+ // Static initializer for allowedFragment
+ static {
+ // FIXME: you can verify with validate method.
+ allowedFragment.or(unreserved);
+ }
+
+
+ /**
+ * Those characters that are allowed within the userinfo component.
+ * <p><blockquote><pre>
+ * unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
+ * </pre></blockquote><p>
+ */
+ public static final BitSet allowedUnwise = new BitSet(256);
+ // Static initializer for allowedUnwise
+ static {
+ // FIXME: you can verify with validate method.
+ allowedUnwise.or(unreserved);
+ // allowedUnwise.or(unwise);
+ }
+
+ // ------------------------------------------- Flags for this URI-reference
+
+ // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+ protected boolean _is_absoluteURI;
+ protected boolean _is_relativeURI;
+ // absoluteURI = scheme ":" ( hier_part | opaque_part )
+ protected boolean _is_hier_part;
+ protected boolean _is_opaque_part;
+ // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
+ // hier_part = ( net_path | abs_path ) [ "?" query ]
+ protected boolean _is_net_path;
+ protected boolean _is_abs_path;
+ protected boolean _is_rel_path;
+ // net_path = "//" authority [ abs_path ]
+ protected boolean _has_authority;
+ // authority = server | reg_name
+ protected boolean _is_reg_name;
+ protected boolean _is_server; // = _has_server
+ // server = [ [ userinfo "@" ] hostport ]
+ protected boolean _has_userinfo;
+ // hostport = host [ ":" port ]
+ protected boolean _has_hostport;
+ // host = hostname | IPv4address | IPv6reference
+ protected boolean _is_hostname;
+ protected boolean _is_IPv4address;
+ protected boolean _is_IPv6reference;
+
+ // ------------------------------------------------------ Protected methods
/**
* This is a two mapping, one from original characters to octets, and
* subsequently a second from octets to URI characters:
+ * <p><blockquote><pre>
* original character sequence->octet sequence->URI character sequence
+ * </pre></blockquote><p>
*
- * the only time escape encodings can safely be made is when a URI is
- * being created from its component parts
+ * The only time escape encodings can allowedly be made is when a URI is
+ * being created from its component parts. The escape and validate method
+ * might be performed in this method internally.
*
* @param original the original character sequence
- * @param safe those characters that are allowed within a component
+ * @param allowed those characters that are allowed within a component
* @return URI character sequence
+ * @exception UnsupportedEncodingException
* @see escape
*/
- protected char[] encode(String original, BitSet safe);
+ protected char[] encode(String original, BitSet allowed)
+ throws UnsupportedEncodingException {
+
+ if (original == null) return null;
+ byte[] octet = original.getBytes(_documentCharset);
+ // TODO: decode octet to uri characters.
+ // new String(octet, _protocolCharset);
+ return null;
+ }
/**
* This is a two mapping, one from URI characters to octets, and
* subsequently a second from octets to original characters:
+ * <p><blockquote><pre>
* URI character sequence->octet sequence->original character sequence
+ * </pre></blockquote><p>
*
* A URI must be separated into its components before the escaped
- * characters within those components can be safely decoded.
+ * characters within those components can be allowedly decoded.
+ * The unescape method is performed in this method internally.
*
* @param octet the octet sequence
* @return original character sequence
+ * @exception UnsupportedEncodingException
* @see unescape
*/
- protected String decode(char[] uri);
+ protected String decode(char[] uri)
+ throws UnsupportedEncodingException {
+ // TODO: decode octet to uri characters.
+ return null;
+ }
/**
* This is a mapping from octets to URI characters:
+ * <p><blockquote><pre>
* octet sequence->URI character sequence
+ * </pre></blockquote><p>
*
* An escaped octet is encoded as a character triplet, consisting of the
* percent character "%" followed by the two hexadecimal digits
@@ -276,15 +1038,20 @@
* encoding for the US-ASCII space character.
*
* @param octet the octet sequence to be escaped
- * @param safe those characters that are allowed within a component
+ * @param allowed those characters that are allowed within a component
* @return URI character sequence
*/
- protected char[] escape(byte[] octet, BitSet safe);
+ protected char[] escape(byte[] octet, BitSet allowed) {
+ // TODO: escape octet to uri characters.
+ return null;
+ }
/**
* This is a mapping from URI characters to octets:
+ * <p><blockquote><pre>
* URI character sequence->octet sequence
+ * </pre></blockquote><p>
*
* the percent "%" character always has the reserved purpose of being
* the escape indicator, it must be escaped as "%25" in order to be used
@@ -293,10 +1060,356 @@
* @param uri the URI character sequence
* @return octet sequence
*/
- protected byte[] unescape(char[] uri);
+ protected byte[] unescape(char[] uri) {
+ // TODO: unescape uri characters to octets
+ return null;
+ }
+
+
+ /**
+ * Validate the URI characters within the specific component.
+ * It's not that much strict, generous. The strict validation might be
+ * performed before being called this method.
+ *
+ * @param component the characters sequence within the component
+ * @param generous those characters that are allowed within a component
+ * @return if true, it's the URI character sequence
+ */
+ protected boolean validate(char[] component, BitSet generous) {
+ // TODO: validate each component with generous characters
+ return true;
+ }
+
+
+ /**
+ * In order to avoid any possilbity of conflict with non-ASCII characters,
+ * Parse a URI reference as a <code>String</code> with the character
+ * encoding of the local system or the document.
+ * <p>
+ * The following line is the regular expression for breaking-down a URI
+ * reference into its components.
+ * <p><blockquote><pre>
+ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+ * 12 3 4 5 6 7 8 9
+ * </pre></blockquote><p>
+ * For example, matching the above expression to
+ * http://jakarta.apache.org/slide/ietf/uri/#Related
+ * results in the following subexpression matches:
+ * <p><blockquote><pre>
+ * $1 = http:
+ * scheme = $2 = http
+ * $3 = //jakarta.apache.org
+ * authority = $4 = jakarta.apache.org
+ * path = $5 = /slide/ietf/uri/
+ * $6 = <undefined>
+ * query = $7 = <undefined>
+ * $8 = #Related
+ * fragment = $9 = Related
+ * </pre></blockquote><p>
+ *
+ * @param original the original character sequence
+ * @return the original character sequence
+ * @exception Exception
+ */
+ protected void parseUriReference(String original) throws Exception {
+
+ /** @
+ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+ */
+ String tmp = original.trim();
+
+ /**
+ * Consider of the character encoding of the document.
+ * The platform's charset is used for the document by default.
+ */
+ if (_documentCharset != null) {
+ tmp = new String(tmp.getBytes(_documentCharset), _documentCharset);
+ }
+
+ /**
+ * The index to start the search from.
+ */
+ int from = 0;
+
+ /**
+ * <p><blockquote><pre>
+ * @@@@@@@@
+ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+ * </pre></blockquote><p>
+ */
+ int at = tmp.indexOf(":/?#", from);
+
+ /**
+ * The next index to start the search to.
+ * If it's not -1, it's the index to be stopped.
+ */
+ int next = -1;
+
+ /**
+ * <p><blockquote><pre>
+ * scheme = $2 = http
+ * @
+ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+ * </pre></blockquote><p>
+ */
+ _is_absoluteURI = false;
+ if (at > 0 && tmp.charAt(at) == ':') {
+ _scheme = tmp.substring(0, at).trim().toLowerCase().toCharArray();
+ // Set flag
+ _is_absoluteURI = true;
+ }
+ _is_relativeURI = !_is_absoluteURI;
+
+ /**
+ * The length of the sequence of characters.
+ * It may not be equal to the length of the byte array.
+ */
+ int length = tmp.length();
+
+ /**
+ * <p><blockquote><pre>
+ * authority = $4 = jakarta.apache.org
+ * @@
+ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+ * </pre></blockquote><p>
+ */
+ _is_net_path = false;
+ _is_abs_path = false;
+ _is_rel_path = false;
+ _is_hier_part = false;
+ if (tmp.charAt(at) == '/') {
+ // Set flag
+ _is_hier_part = true;
+ if (at+1 < length && tmp.charAt(at+1) == '/') {
+ from = at + 2;
+ next = tmp.indexOf("/?#", from); // at, if not -1
+ parseAuthority((next > 0) ? tmp.substring(from, next) :
+ tmp.substring(from));
+ // Set flag
+ _is_net_path = true;
+ } else {
+ // Set flag
+ _is_abs_path = true;
+ }
+ } else if (_is_relativeURI) {
+ // Set flag
+ _is_rel_path = true;
+ } else {
+ // REMINDME: never here or throw an Exception
+ }
+ _is_opaque_part = !_is_hier_part;
+
+ /**
+ * <p><blockquote><pre>
+ * path = $5 = /slide/ietf/uri/
+ * @@@@@@
+ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+ * </pre></blockquote><p>
+ */
+ if (tmp.charAt(at) != '?' && tmp.charAt(at) != '#') {
+ from = at;
+ next = tmp.indexOf("?#", from);
+ _path = encode(tmp.substring(from, next), allowedPath);
+ if (next > 0) at = next;
+ }
+
+ /**
+ * <p><blockquote><pre>
+ * query = $7 = <undefined>
+ * @@@@@@@@@
+ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+ * </pre></blockquote><p>
+ */
+ if (tmp.charAt(at) == '?') {
+ from = at + 1;
+ next = tmp.indexOf('#', from);
+ _query = encode(tmp.substring(from, next), allowedQuery);
+ if (next > 0) at = next;
+ }
+
+ /**
+ * <p><blockquote><pre>
+ * fragment = $9 = Related
+ * @@@@@@@@
+ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+ * </pre></blockquote><p>
+ */
+ if (tmp.charAt(at) == '#') {
+ from = at + 1;
+ _fragment = encode(tmp.substring(from), allowedFragment);
+ }
+ }
+
+
+ /**
+ * Parse the authority part.
+ *
+ * @param original the original character sequence
+ * @return the original character sequence
+ * @exception Exception
+ */
+ protected void parseAuthority(String original) throws Exception {
+ // TODO: validate the authroity part and
+ // confirm the component within the authority part.
+ //_authority = encode(original, allowedAuthority);
+ // TODO: Set reg_name, server, userinfo, hostport
+ // TODO: _is_reg_name, _is_server, _has_userinfo, _is_hostport
+ // _is_hostname, _is_IPv4address, _is_IPv6reference
+ }
+
+
+ // --------------------------------------------------------- Public methods
+
+
+ /**
+ * Tell whether or not this URI is absolute.
+ *
+ * @return true iif this URI is absoluteURI
+ */
+ public boolean is_absoluteURI() {
+ return _is_absoluteURI;
+ }
+
+
+ /**
+ * Tell whether or not this URI is relative.
+ *
+ * @return true iif this URI is relativeURI
+ */
+ public boolean is_relativeURI() {
+ return _is_relativeURI;
+ }
+
+
+ /**
+ * Tell whether or not this URI is hier_part.
+ *
+ * @return true iif this URI is hier_part
+ */
+ public boolean is_hier_part() {
+ return _is_hier_part;
+ }
+
+
+ /**
+ * Tell whether or not this URI is opaque_part.
+ *
+ * @return true iif this URI is opaque_part
+ */
+ public boolean is_opaque_part() {
+ return _is_opaque_part;
+ }
+
+
+ /**
+ * Tell whether or not this URI is net_path.
+ *
+ * @return true iif this URI is net_path
+ */
+ public boolean is_net_path() {
+ return _is_net_path;
+ }
+
+
+ /**
+ * Tell whether or not this URI is abs_path.
+ *
+ * @return true iif this URI is abs_path
+ */
+ public boolean is_abs_path() {
+ return _is_abs_path;
+ }
+
+
+ /**
+ * Tell whether or not this URI is rel_path.
+ *
+ * @return true iif this URI is rel_path
+ */
+ public boolean is_rel_path() {
+ return _is_rel_path;
+ }
+
+
+ /**
+ * Tell whether or not this URI has authority.
+ *
+ * @return true iif this URI has authority
+ */
+ public boolean has_authority() {
+ return _has_authority;
+ }
+
+ /**
+ * Tell whether or not this URI is reg_name.
+ *
+ * @return true iif this URI is reg_name
+ */
+ public boolean is_reg_name() {
+ return _is_reg_name;
+ }
+
+
+ /**
+ * Tell whether or not this URI is_server.
+ *
+ * @return true iif this URI is_server
+ */
+ public boolean is_server() {
+ return _is_server;
+ }
+
+
+ /**
+ * Tell whether or not this URI has userinfo.
+ *
+ * @return true iif this URI has userinfo
+ */
+ public boolean has_userinfo() {
+ return _has_userinfo;
+ }
+
+
+ /**
+ * Tell whether or not this URI has hostport.
+ *
+ * @return true iif this URI has hostport
+ */
+ public boolean has_hostport() {
+ return _has_hostport;
+ }
+
+
+ /**
+ * Tell whether or not this URI is hostname.
+ *
+ * @return true iif this URI is hostname
+ */
+ public boolean is_hostname() {
+ return _is_hostname;
+ }
- // ------------------------------------------------------- Public methods
+ /**
+ * Tell whether or not this URI is IPv4address.
+ *
+ * @return true iif this URI is IPv4address
+ */
+ public boolean is_IPv4address() {
+ return _is_IPv4address;
+ }
+
+
+ /**
+ * Tell whether or not this URI is IPv6reference.
+ *
+ * @return true iif this URI is IPv6reference
+ */
+ public boolean is_IPv6reference() {
+ return _is_IPv6reference;
+ }
+
+ // ---------------------------------------------------------------- Charset
/**
@@ -344,14 +1457,229 @@
return _documentCharset;
}
+ // ------------------------------------------------------------- The scheme
+
+ /**
+ * Get the scheme.
+ *
+ * @return the scheme
+ * @exception Exception
+ * UnsupportedEncodingException
+ */
+ public String getScheme() throws Exception {
+ return decode(_scheme);
+ }
+
+ // ---------------------------------------------------------- The authority
+
+ /**
+ * Set the authority.
+ *
+ * @param the authority
+ * @exception Exception
+ * UnsupportedEncodingException
+ */
+ public void setAuthority(String authority) throws Exception {
+ _authority = encode(authority, allowedAuthority);
+ }
+
+
+ /**
+ * Get the authority.
+ *
+ * @return the authority
+ * @exception Exception
+ * UnsupportedEncodingException
+ */
+ public String getAuthority() throws Exception {
+ return decode(_authority);
+ }
+
+ // --------------------------------------------------------------- The host
+
+ /**
+ * Set the host.
+ *
+ * @param the host
+ * @exception Exception
+ * UnsupportedEncodingException
+ */
+ public void setHost(String host) throws Exception {
+ // Support the non-ASCII host configuration
+ _host = encode(host, this.host); // Notice that there isn't allowedHost
+ }
+
+
+ /**
+ * Get the host.
+ *
+ * @return the host
+ * @exception Exception
+ * UnsupportedEncodingException
+ */
+ public String getHost() throws Exception {
+ // Support the non-ASCII host configuration
+ return decode(_host);
+ }
+
+ // --------------------------------------------------------------- The port
+
+ /**
+ * Set the port.
+ *
+ * @param the port
+ */
+ public void setPort(int port) {
+ _port = port;
+ }
+
+
+ /**
+ * Get the port.
+ *
+ * @return the port
+ */
+ public int getPort() {
+ return _port;
+ }
+
+ // --------------------------------------------------------------- The path
+
+ /**
+ * Set the path.
+ *
+ * @param the path string
+ * @exception Exception
+ * UnsupportedEncodingException
+ */
+ public void setPath(String path) throws Exception {
+ _path = encode(path, allowedPath);
+ }
+
+
+ /**
+ * Get the path.
+ *
+ * @return the path string
+ * @exception Exception
+ * UnsupportedEncodingException
+ */
+ public String getPath() throws Exception {
+ return decode(_path);
+ }
+
+
+ /**
+ * Get the escaped path.
+ *
+ * @return the escaped path string
+ */
+ public String getEscapedPath() {
+ return new String(_path);
+ }
+
+
+ // -------------------------------------------------------------- The query
+
+ /**
+ * Set the query.
+ *
+ * @param the query string.
+ * @exception Exception
+ * UnsupportedEncodingException
+ */
+ public void setQuery(String query) throws Exception {
+ _query = encode(query, allowedQuery);
+ }
+
+
+ /**
+ * Get the query.
+ *
+ * @return the query string.
+ * @exception Exception
+ * UnsupportedEncodingException
+ */
+ public String getQuery() throws Exception {
+ return decode(_query);
+ }
+
+
+ /**
+ * Get the escaped query.
+ *
+ * @return the escaped query string.
+ */
+ public String getEscapedQuery() {
+ return new String(_query);
+ }
+
+ // ----------------------------------------------------------- The fragment
+
+ /**
+ * Set the fragment.
+ *
+ * @param the fragment string.
+ * @exception Exception
+ * UnsupportedEncodingException
+ */
+ public void setFragment(String fragment) throws Exception {
+ _fragment = encode(fragment, allowedFragment);
+ }
+
+
+ /**
+ * Get the fragment.
+ *
+ * @return the fragment string.
+ * @exception Exception
+ * UnsupportedEncodingException
+ */
+ public String getFragment() throws Exception {
+ return decode(_fragment);
+ }
+
+ /**
+ * Get the escaped fragment.
+ *
+ * @return the escaped fragment string.
+ */
+ public String getEscapedFragment() {
+ return new String(_fragment);
+ }
+
+ // ------------------------------------------------------------- Utilities
+
+ /**
+ * Normalize this URI.
+ *
+ * @return the normalized URI
+ */
+ public URI normalize() {
+ // TODO: normalize and return URI
+ return null;
+ }
+
+ /**
+ * Test a object if this is equal with another.
+ *
+ * @param obj an object to compare
+ * @return true if two URI objects are equal
+ */
+ public boolean equals(Object obj) {
+ // TODO: normalize and test each components
+ return false;
+ }
+
/**
- * Get the scheme for this Generic URI.
+ * Get the escaped URI string.
+ * For the purpose of the protocol to be transported, it's useful.
*
- * @return The scheme for this Generic URI.
+ * @return the escaped URI string
*/
- public String getScheme() {
- return _scheme;
+ public String toString() {
+ return new String(_uri);
}
}
--
To unsubscribe, e-mail: <mailto:[EMAIL PROTECTED]>
For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>