util URI.java

jericho Mon, 29 Apr 2002 00:28:04 -0700

jericho     02/04/29 00:45:22

  Modified:    src/util/org/apache/util URI.java
  Log:
  - Add the javadoc message resolving relative path
  - Normalize them correctly having . and .. at the end
  - Fix some typos of path_segments
  - Make URI tokenizing correctly
  - This class has been tested in resolving and normalizing thingy...
  
  Revision  Changes    Path
  1.5       +227 -106  jakarta-slide/src/util/org/apache/util/URI.java
  
  Index: URI.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/util/org/apache/util/URI.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- URI.java  24 Apr 2002 17:24:21 -0000      1.4
  +++ URI.java  29 Apr 2002 07:45:22 -0000      1.5
  @@ -1,7 +1,7 @@
   /*
  - * $Header: /home/cvs/jakarta-slide/src/util/org/apache/util/URI.java,v 1.4 
2002/04/24 17:24:21 jericho Exp $
  - * $Revision: 1.4 $
  - * $Date: 2002/04/24 17:24:21 $
  + * $Header: /home/cvs/jakarta-slide/src/util/org/apache/util/URI.java,v 1.5 
2002/04/29 07:45:22 jericho Exp $
  + * $Revision: 1.5 $
  + * $Date: 2002/04/29 07:45:22 $
    *
    * ====================================================================
    *
  @@ -128,7 +128,7 @@
    * relative URL(RFC 1808).
    *
    * @author <a href="mailto:[EMAIL PROTECTED]";>Sung-Gu</a>
  - * @version $Revision: 1.4 $ $Date: 2002/03/14 15:14:01 
  + * @version $Revision: 1.5 $ $Date: 2002/03/14 15:14:01 
    */
   
   public class URI implements Comparable, java.io.Serializable {
  @@ -276,6 +276,41 @@
        * </pre></blockquote><p>
        * Resolving Relative References to Absolute Form.
        *
  +     * <strong>Examples of Resolving Relative URI References</strong>
  +     *
  +     * Within an object with a well-defined base URI of
  +     * <p><blockquote><pre>
  +     *   http://a/b/c/d;p?q
  +     * </pre></blockquote><p>
  +     * the relative URI would be resolved as follows:
  +     *
  +     * Normal Examples
  +     *
  +     * <p><blockquote><pre>
  +     *   g:h           =  g:h
  +     *   g             =  http://a/b/c/g
  +     *   ./g           =  http://a/b/c/g
  +     *   g/            =  http://a/b/c/g/
  +     *   /g            =  http://a/g
  +     *   //g           =  http://g
  +     *   ?y            =  http://a/b/c/?y
  +     *   g?y           =  http://a/b/c/g?y
  +     *   #s            =  (current document)#s
  +     *   g#s           =  http://a/b/c/g#s
  +     *   g?y#s         =  http://a/b/c/g?y#s
  +     *   ;x            =  http://a/b/c/;x
  +     *   g;x           =  http://a/b/c/g;x
  +     *   g;x?y#s       =  http://a/b/c/g;x?y#s
  +     *   .             =  http://a/b/c/
  +     *   ./            =  http://a/b/c/
  +     *   ..            =  http://a/b/
  +     *   ../           =  http://a/b/
  +     *   ../g          =  http://a/b/g
  +     *   ../..         =  http://a/
  +     *   ../../        =  http://a/ 
  +     *   ../../g       =  http://a/g
  +     * </pre></blockquote><p>
  +     *
        * Some URI schemes do not allow a hierarchical syntax matching the
        * <hier_part> syntax, and thus cannot use relative references.
        *
  @@ -283,12 +318,12 @@
        * @param relative the relative URI
        */
       public URI(URI base, URI relative) throws Exception {
  -        if (base._scheme != null) {
  +        if (base._scheme == null) {
               throw new IllegalArgumentException("base URI required");
           }
  -        if (relative._scheme != null && // is_relativeURI
  -                !equals(base._scheme, relative._scheme)) {
  -            throw new IllegalArgumentException("not relative URI");
  +        if (base._scheme != null) {
  +            this._scheme = base._scheme;
  +            this._authority = base._authority;
           }
           if (base._is_opaque_part || relative._is_opaque_part) {
               this._scheme = base._scheme;
  @@ -298,21 +333,21 @@
               this.setUriReference();
               return;
           }
  -        if (base._scheme != null) {
  -            this._scheme = base._scheme;
  -        }
  -        if (relative._authority != null) {
  +        if (relative._scheme != null) {
  +            this._scheme = relative._scheme;
               this._is_net_path = relative._is_net_path;
               this._authority = relative._authority;
               if (relative._is_server) {
  -                this._is_server = relative._is_server;
                   this._userinfo = relative._userinfo;
                   this._host = relative._host;
                   this._port = relative._port;
               } else if (relative._is_reg_name) {
                   this._is_reg_name = relative._is_reg_name;
               }
  -        } else if (base._authority != null) {
  +            this._is_abs_path = relative._is_abs_path;
  +            this._is_rel_path = relative._is_rel_path;
  +            this._path = relative._path;
  +        } else if (base._authority != null && relative._scheme == null) {
               this._is_net_path = base._is_net_path;
               this._authority = base._authority;
               if (base._is_server) {
  @@ -323,8 +358,26 @@
                   this._is_reg_name = base._is_reg_name;
               }
           }
  +        if (relative._authority != null) {
  +            this._is_net_path = relative._is_net_path;
  +            this._authority = relative._authority;
  +            if (relative._is_server) {
  +                this._is_server = relative._is_server;
  +                this._userinfo = relative._userinfo;
  +                this._host = relative._host;
  +                this._port = relative._port;
  +            } else if (relative._is_reg_name) {
  +                this._is_reg_name = relative._is_reg_name;
  +            }
  +            this._is_abs_path = relative._is_abs_path;
  +            this._is_rel_path = relative._is_rel_path;
  +            this._path = relative._path;
  +        }
           // resolve the path
  -        this._path = resolvePath(base._path, relative._path);
  +        if (relative._scheme == null && relative._authority == null || 
  +                equals(base._scheme, relative._scheme)) {
  +            this._path = resolvePath(base._path, relative._path);
  +        }
           // base._query removed
           if (relative._query != null) {
               this._query = relative._query;
  @@ -463,10 +516,10 @@
       protected static final BitSet alpha = new BitSet(256);
       // Static initializer for alpha
       static {
  -        for(int i='a';i<='z';i++) {
  +        for (int i = 'a'; i <= 'z';i++) {
               alpha.set(i);
           }
  -        for(int i='A';i<='Z';i++) {
  +        for (int i = 'A'; i <= 'Z';i++) {
               alpha.set(i);
           }
       }
  @@ -656,8 +709,8 @@
       protected static final BitSet path_segments = new BitSet(256);
       // Static initializer for path_segments
       static {
  -        segment.or(segment);
  -        segment.set('/');
  +        path_segments.set('/');
  +        path_segments.or(segment);
       }
   
   
  @@ -979,7 +1032,7 @@
       static {
           hier_part.or(net_path);
           hier_part.or(abs_path);
  -        hier_part.set('?');
  +        // hier_part.set('?'); aleady included
           hier_part.or(query);
       }
   
  @@ -995,7 +1048,7 @@
           relativeURI.or(net_path);
           relativeURI.or(abs_path);
           relativeURI.or(rel_path);
  -        relativeURI.set('?');
  +        // relativeURI.set('?'); aleady included
           relativeURI.or(query);
       }
   
  @@ -1086,13 +1139,13 @@
   
   
       /**
  -     * disallowed rel_segment before escaping
  +     * disallowed rel_path before escaping
        */
  -    public static final BitSet disallowed_rel_segment = new BitSet(256);
  -    // Static initializer for disallowed_rel_segment
  +    public static final BitSet disallowed_rel_path = new BitSet(256);
  +    // Static initializer for disallowed_rel_path
       static {
  -        disallowed_rel_segment.or(uric);
  -        disallowed_rel_segment.andNot(rel_segment);
  +        disallowed_rel_path.or(uric);
  +        disallowed_rel_path.andNot(rel_path);
       }
   
   
  @@ -1188,18 +1241,18 @@
       static {
           allowed_abs_path.or(abs_path);
           // allowed_abs_path.set('/');  // aleady included
  -        allowed_abs_path.clear('%');
  +        allowed_abs_path.andNot(percent);
       }
   
   
       /**
  -     * Those characters that are allowed within the rel_segment.
  +     * Those characters that are allowed within the rel_path.
        */
  -    public static final BitSet allowed_rel_segment = new BitSet(256);
  -    // Static initializer for allowed_rel_segment
  +    public static final BitSet allowed_rel_path = new BitSet(256);
  +    // Static initializer for allowed_rel_path
       static {
  -        allowed_rel_segment.or(rel_segment);
  -        allowed_rel_segment.clear('%');
  +        allowed_rel_path.or(rel_path);
  +        allowed_rel_path.clear('%');
       }
   
   
  @@ -1332,6 +1385,9 @@
           }
           String octets = new String(octet, _protocolCharset);
           char[] preuric = new char[octets.length()];
  +        if (octet.length == 0) {
  +            return preuric;  // defined, but empty
  +        }
           octets.getChars(0, octets.length(), preuric, 0);
           StringBuffer buf = new StringBuffer(preuric.length);
           for (int i = 0; i < preuric.length; i++) {
  @@ -1339,7 +1395,7 @@
               if (allowed.get(c)) {
                   buf.append(c);
               } else {
  -                byte b = (byte) preuric[i];
  +                byte b = (byte) c;
                   buf.append('%');
                   char hexadecimal = Character.forDigit((b >> 4) & 0xF, 16);
                   buf.append(hexadecimal);
  @@ -1375,20 +1431,21 @@
           }
           byte[] octet = new String(uri).getBytes(_protocolCharset);
           int oi = 0; // output index
  -        for (int ii = 0; ii < uri.length; ) {
  +        for (int ii = 0; ii < uri.length; oi++) {
               byte b = (byte) octet[ii++];
               if (b == '%') {
  -                b = (byte) Character.digit(
  -                        (char) (octet[ii++] << 4 + octet[ii++]), 16);
  +                b = (byte) ((Character.digit((char) octet[ii++], 16) << 4) +
  +                Character.digit((char) octet[ii++], 16));
                   if (b == -1) {
                       throw new IllegalArgumentException(
                               "incomplete trailing escape pattern");
                   }
               }
  -            octet[oi++] = (byte) b;
  +            octet[oi] = (byte) b;
           }
  -        octet[oi] = (byte) '\0';
  -        return octet;
  +        byte[] result = new byte[oi];
  +        System.arraycopy(octet, 0, result, 0, oi);
  +        return result;
       }
   
   
  @@ -1407,8 +1464,9 @@
           }
           char[] target = component.toCharArray();
           for (int i = 0; i < target.length; i++) {
  -            if (disallowed.get(target[i]))
  +            if (disallowed.get(target[i])) {
                   return false;
  +            }
           }
           return true;
       }
  @@ -1448,7 +1506,7 @@
               BitSet generous) {
           // validate each component by generous characters
           if (eoffset == -1) {
  -            eoffset = component.length;
  +            eoffset = component.length -1;
           }
           for (int i = soffset; i < eoffset; i++) {
               if (!generous.get(component[i]))
  @@ -1509,12 +1567,26 @@
           }
   
           /**
  +         * The starting index
  +         */
  +        int from = 0;
  +
  +        /**
            * <p><blockquote><pre>
            *     @@@@@@@@
            *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
            * </pre></blockquote><p>
            */
  -        int at = tmp.indexOf(":/?#");
  +        int at = indexOf(tmp, ":/?#", from);
  +        if (at == -1) {
  +            at = 0;
  +        }
  +
  +        /**
  +         * The length of the sequence of characters.
  +         * It may not be equal to the length of the byte array.
  +         */
  +        int length = tmp.length();
   
           /**
            * <p><blockquote><pre>
  @@ -1523,20 +1595,15 @@
            *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
            * </pre></blockquote><p>
            */
  -        if (at > 0 && tmp.charAt(at) == ':') {
  +        if (0 < at && at < length && tmp.charAt(at) == ':') {
               char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
               if (validate(target, scheme)) {
                   _scheme = target;
               }
  +            from = ++at;
           }
   
           /**
  -         * The length of the sequence of characters.
  -         * It may not be equal to the length of the byte array.
  -         */
  -        int length = tmp.length();
  -
  -        /**
            * <p><blockquote><pre>
            *  authority =  $4 = jakarta.apache.org
            *                  @@
  @@ -1545,52 +1612,25 @@
            */
           // Reset flags
           _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
  -        if (at >= 0 && tmp.charAt(at) == '/') {
  +        if (0 <= at && at < length && tmp.charAt(at) == '/') {
               // Set flag
               _is_hier_part = true;
  -            if (at+2 < length && tmp.charAt(at+1) == '/') {
  -                // the temporaray index to start the search from
  -                int from = at + 2;
  -                int next = tmp.indexOf("/?#", from);  // at, if not -1
  +            if (at + 2 < length && tmp.charAt(at + 1) == '/') {
  +                // the temporary index to start the search from
  +                int next = indexOf(tmp, "/?#", at + 2);
                   if (next == -1) {
  -                    next = tmp.length();
  +                    next = (tmp.substring(at + 2).length() == 0) ? at + 2 :
  +                    tmp.length();
                   }
  -                parseAuthority(tmp.substring(from, next));
  -                at = next;
  +                parseAuthority(tmp.substring(at + 2, next));
  +                from = at = next;
                   // Set flag
                   _is_net_path = true;
               }
  -            if (tmp.charAt(at) == '/') {
  +            if (from == at) {
                   // Set flag
                   _is_abs_path = true;
               }
  -        } else {
  -            if (_scheme == null) { // is_relativeURI
  -                // rel_path = rel_segment [ abs_path ]
  -                int next = tmp.indexOf('/');
  -                if (next == -1) {
  -                    next = tmp.length();
  -                }
  -                // validating before escape encoding
  -                if (prevalidate(tmp.substring(at, next),
  -                            disallowed_rel_segment)) {
  -                    // Set flag
  -                    _is_rel_path = true;
  -                }
  -                // REMINDME: let us skip the rest of abs_path to validate
  -            } else { // is_absoluteURI
  -                // validating before escape encoding
  -                if (prevalidate(tmp.substring(at), disallowed_opaque_part)) {
  -                    // Set flag
  -                    _is_opaque_part = true;
  -                }
  -            }
  -            if (!_is_rel_path || !_is_opaque_part) {
  -                // correct validation.  possibly, only fragment.
  -                // is_relativeURI and is_absoluteURI must be false
  -                // Set flag
  -                _is_only_fragment = true;
  -            }
           }
   
           /**
  @@ -1600,12 +1640,26 @@
            *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
            * </pre></blockquote><p>
            */
  -        if (tmp.charAt(at) != '?' && tmp.charAt(at) != '#') {
  -            int from = at;
  -            int next = tmp.indexOf("?#", from);
  +        if (from < length) { // && tmp.charAt(from) != '?' &&
  +                // tmp.charAt(from) != '#') {
  +            // rel_path = rel_segment [ abs_path ]
  +            int next = indexOf(tmp, "?#", from);
               if (next == -1) {
                   next = tmp.length();
               }
  +            if (prevalidate(tmp.substring(from, next),
  +                        disallowed_rel_path)) {
  +                // Set flag
  +                _is_rel_path = true;
  +            } else if (prevalidate(tmp.substring(from, next),
  +                        disallowed_opaque_part)) {
  +                // validating before escape encoding // is_absoluteURI
  +                // Set flag
  +                _is_opaque_part = true;
  +            } else {
  +                // the path component is never undefined, though it may be empty
  +                _path = new char[] {'\0'};
  +            }
               setPath(tmp.substring(from, next));
               at = next;
           }
  @@ -1617,13 +1671,13 @@
            *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
            * </pre></blockquote><p>
            */
  -        if (at+1 < length && tmp.charAt(at) == '?') {
  -            int from = at + 1;
  -            int next = tmp.indexOf('#', from);
  -            if (next != -1) {
  -                _query = encode(tmp.substring(from, next), allowed_query);
  -                at = next;
  +        if (0 <= at && at+1 < length && tmp.charAt(at) == '?') {
  +            int next = tmp.indexOf('#', at + 1);
  +            if (next == -1) {
  +                next = tmp.length();
               }
  +            _query = encode(tmp.substring(at + 1, next), allowed_query);
  +            at = next;
           }
   
           /**
  @@ -1633,9 +1687,14 @@
            *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
            * </pre></blockquote><p>
            */
  -        if (at+1 < length && tmp.charAt(at) == '#') {
  -            int from = at + 1;
  -            _fragment = encode(tmp.substring(from), allowed_fragment);
  +        if (0 <= at && at+1 < length && tmp.charAt(at) == '#') {
  +            _fragment = encode(tmp.substring(at + 1), allowed_fragment);
  +            if (!_is_abs_path && !_is_rel_path && !_is_opaque_part) {
  +                // correct validation.  possibly, only fragment.
  +                // is_relativeURI and is_absoluteURI must be false
  +                // Set flag
  +                _is_only_fragment = true;
  +            }
           }
   
           // set this URI.
  @@ -1644,6 +1703,54 @@
   
   
       /**
  +     * Get the earlier one among indexs that the characters as to be indexed
  +     * are from the given string.
  +     *
  +     * @param s the string to be indexed
  +     * @param delims the delimiters used to index
  +     * @return the earlier index if there are delimiters
  +     */
  +    protected int indexOf(String s, String delims) {
  +        return indexOf(s, delims, -1);
  +    }
  +
  +
  +    /**
  +     * Get the earlier one among indexs that the characters as to be indexed
  +     * are from the given string.
  +     *
  +     * @param s the string to be indexed
  +     * @param delims the delimiters used to index
  +     * @param offset the from index
  +     * @return the earlier index if there are delimiters
  +     */
  +    protected int indexOf(String s, String delims, int offset) {
  +        if (s == null || s.length() == 0) {
  +            return -1;
  +        }
  +        if (delims == null || delims.length() == 0) {
  +            return -1;
  +        }
  +        // check boundaries
  +        if (offset < 0) {
  +            offset = 0;
  +        } else if (offset > s.length()) {
  +            return -1;
  +        }
  +        // s is never null
  +        int min = s.length();
  +        char[] delim = delims.toCharArray();
  +        for (int i = 0; i < delim.length; i++) {
  +            int at = s.indexOf(delim[i], offset);
  +            if (at >= 0 && at < min) {
  +                min = at;
  +            }
  +        }
  +        return (min == s.length()) ? -1 : min;
  +    }
  +
  +
  +    /**
        * Parse the authority component.
        *
        * @param original the original character sequence of authority component
  @@ -1658,7 +1765,7 @@
   
           int from = 0;
           int next = original.indexOf('@');
  -        if (next != -1) {  // neither -1 and 0
  +        if (next != -1) { // neither -1 and 0
               // if next == 0, for example, in ftp, userinfo = 'anonymous'
               // each protocol extented from URI supports the specific userinfo
               _userinfo = encode(original.substring(0, next), allowed_userinfo);
  @@ -1682,12 +1789,12 @@
               if (next == -1) {
                   next = original.length();
               }
  +            // REMINDME: it doesn't need the pre-validation
  +            _host = original.substring(from, next).toCharArray();
               if (validate(_host, IPv4address)) {
  -                _host = original.substring(from, next).toCharArray();
                   // Set flag
                   _is_IPv4address = true;
               } else if (validate(_host, hostname)) {
  -                _host = original.substring(from, next).toCharArray();
                   // Set flag
                   _is_hostname = true;
               } else {
  @@ -1702,7 +1809,8 @@
               // set a registry-based naming authority
               _authority = encode(original.toString(), allowed_reg_name);
           } else {
  -            if (original.charAt(next) == ':') {
  +            next = original.indexOf('/', from);
  +            if (next > 0 && original.charAt(next) == ':') { // not empty
                   from = next + 1;
                   _port = Integer.parseInt(original.substring(from));
               }
  @@ -1747,8 +1855,11 @@
           }
           if (_opaque != null && _is_opaque_part) {
               buf.append(_opaque);
  -        } else if (_path != null) { // _is_hier_part or _is_relativeURI
  -            buf.append(_path);
  +        } else if (_path != null) { //  && _path.length != 0) {
  +            // _is_hier_part or _is_relativeURI
  +            if (_path.length != 0) {
  +                buf.append(_path);
  +            }
               if (_query != null) { // has_query
                   buf.append('?');
                   buf.append(_query);
  @@ -2147,10 +2258,10 @@
               StringBuffer buff = new StringBuffer(path.length());
               int at = path.indexOf('/');
               if (at > 0) {  // never 0
  -                buff.append(encode(path.substring(0, at), allowed_rel_segment));
  +                buff.append(encode(path.substring(0, at), allowed_rel_path));
                   buff.append(encode(path.substring(at), allowed_abs_path));
               } else {
  -                buff.append(encode(path, allowed_rel_segment));
  +                buff.append(encode(path, allowed_rel_path));
               }
               _path = buff.toString().toCharArray();
           } else if (_is_opaque_part) {
  @@ -2176,7 +2287,7 @@
               base_path = base.substring(0, at + 1).toCharArray();
           }
           // _path could be empty
  -        if (rel_path.length == 0) {
  +        if (rel_path == null || rel_path.length == 0) {
               return normalize(base_path);
           } else if (rel_path[0] == '/') {
               return rel_path;
  @@ -2493,9 +2604,14 @@
               return null;
           }
           String normalized = new String(path);
  +        boolean endsWithSlash = true;
           // precondition
           if (!normalized.endsWith("/")) {
               normalized += '/';
  +            endsWithSlash = false;
  +        }
  +        if (normalized.endsWith("/./") || normalized.endsWith("/../")) {
  +            endsWithSlash = true;
           }
           // Resolve occurrences of "/./" in the normalized path
           while (true) {
  @@ -2533,6 +2649,11 @@
               }
               normalized = normalized.substring(0, at) +
               normalized.substring(at + 1);
  +        }
  +        if (!endsWithSlash && normalized.endsWith("/")) {
  +            normalized = normalized.substring(0, normalized.length()-1);
  +        } else if (endsWithSlash && !normalized.endsWith("/")) {
  +            normalized = normalized + "/";
           }
           // Set the normalized path that we have completed
           return normalized.toCharArray();


--
To unsubscribe, e-mail:   <mailto:[EMAIL PROTECTED]>
For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>

cvs commit: jakarta-slide/src/util/org/apache/util URI.java

Reply via email to