mrglavas 2004/09/15 21:29:28 Modified: java/src/org/apache/xerces/util URI.java Log: Adding a new initialization method which has a parameter that
controls whether an exception is thrown if the URI specified is relative. Also factoring the URI resolution code into an absolutize method as well as a method which returns whether the URI is absolute. This will enable us to avoid throwing an exception for relative URIs, but allow us to detect a relative URI so that we can absolutize it against a base. This contribution is a slighty modified patch from John Kim, IBM. Revision Changes Path 1.20 +243 -58 xml-xerces/java/src/org/apache/xerces/util/URI.java Index: URI.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/util/URI.java,v retrieving revision 1.19 retrieving revision 1.20 diff -u -r1.19 -r1.20 --- URI.java 15 Aug 2004 21:22:16 -0000 1.19 +++ URI.java 16 Sep 2004 04:29:28 -0000 1.20 @@ -270,7 +270,30 @@ public URI(String p_uriSpec) throws MalformedURIException { this((URI)null, p_uriSpec); } - + + /** + * Construct a new URI from a URI specification string. If the + * specification follows the "generic URI" syntax, (two slashes + * following the first colon), the specification will be parsed + * accordingly - setting the scheme, userinfo, host,port, path, query + * string and fragment fields as necessary. If the specification does + * not follow the "generic URI" syntax, the specification is parsed + * into a scheme and scheme-specific part (stored as the path) only. + * Construct a relative URI if boolean is assigned to "true" + * and p_uriSpec is not valid absolute URI, instead of throwing an exception. + * + * @param p_uriSpec the URI specification string (cannot be null or + * empty) + * @param allowRelativeURI true to set up not throwing an exception + * in case of relative URI, false otherwise. + * + * @exception MalformedURIException if p_uriSpec violates any syntax + * rules + */ + public URI(String p_uriSpec, boolean allowRelativeURI) throws MalformedURIException { + this((URI)null, p_uriSpec, allowRelativeURI); + } + /** * Construct a new URI from a base URI and a URI specification string. * The URI specification string may be a relative URI. @@ -286,6 +309,27 @@ public URI(URI p_base, String p_uriSpec) throws MalformedURIException { initialize(p_base, p_uriSpec); } + + /** + * Construct a new URI from a base URI and a URI specification string. + * The URI specification string may be a relative URI. + * Construct a relative URI if boolean is assigned to "true" + * and p_uriSpec is not valid absolute URI and p_base is null + * instead of throwing an exception. + * + * @param p_base the base URI (cannot be null if p_uriSpec is null or + * empty) + * @param p_uriSpec the URI specification string (cannot be null or + * empty if p_base is null) + * @param allowRelativeURI true to set up not throwing an exception + * in case of relative URI, false otherwise. + * + * @exception MalformedURIException if p_uriSpec violates any syntax + * rules + */ + public URI(URI p_base, String p_uriSpec, boolean allowRelativeURI) throws MalformedURIException { + initialize(p_base, p_uriSpec, allowRelativeURI); + } /** * Construct a new URI that does not follow the generic URI syntax. @@ -421,6 +465,125 @@ m_queryString = p_other.getQueryString(); m_fragment = p_other.getFragment(); } + + /** + * Initializes this URI from a base URI and a URI specification string. + * See RFC 2396 Section 4 and Appendix B for specifications on parsing + * the URI and Section 5 for specifications on resolving relative URIs + * and relative paths. + * + * @param p_base the base URI (may be null if p_uriSpec is an absolute + * URI) + * @param p_uriSpec the URI spec string which may be an absolute or + * relative URI (can only be null/empty if p_base + * is not null) + * @param allowRelativeURI true to set up not throwing an exception + * in case of relative URI, false otherwise. + * + * @exception MalformedURIException if p_base is null and p_uriSpec + * is not an absolute URI or if + * p_uriSpec violates syntax rules + */ + private void initialize(URI p_base, String p_uriSpec, boolean allowRelativeURI) + throws MalformedURIException { + + String uriSpec = p_uriSpec; + int uriSpecLen = (uriSpec != null) ? uriSpec.length() : 0; + + if (p_base == null && uriSpecLen == 0) { + throw new MalformedURIException("Cannot initialize URI with empty parameters."); + } + + // just make a copy of the base if spec is empty + if (uriSpecLen == 0) { + initialize(p_base); + return; + } + + int index = 0; + + // Check for scheme, which must be before '/', '?' or '#'. Also handle + // names with DOS drive letters ('D:'), so 1-character schemes are not + // allowed. + int colonIdx = uriSpec.indexOf(':'); + if (colonIdx != -1) { + final int searchFrom = colonIdx - 1; + // search backwards starting from character before ':'. + int slashIdx = uriSpec.lastIndexOf('/', searchFrom); + int queryIdx = uriSpec.lastIndexOf('?', searchFrom); + int fragmentIdx = uriSpec.lastIndexOf('#', searchFrom); + + if (colonIdx < 2 || slashIdx != -1 || + queryIdx != -1 || fragmentIdx != -1) { + // A standalone base is a valid URI according to spec + if (colonIdx == 0 || (p_base == null && fragmentIdx != 0 && !allowRelativeURI)) { + throw new MalformedURIException("No scheme found in URI."); + } + } + else { + initializeScheme(uriSpec); + index = m_scheme.length()+1; + + // Neither 'scheme:' or 'scheme:#fragment' are valid URIs. + if (colonIdx == uriSpecLen - 1 || uriSpec.charAt(colonIdx+1) == '#') { + throw new MalformedURIException("Scheme specific part cannot be empty."); + } + } + } + else if (p_base == null && uriSpec.indexOf('#') != 0 && !allowRelativeURI) { + throw new MalformedURIException("No scheme found in URI."); + } + + // Two slashes means we may have authority, but definitely means we're either + // matching net_path or abs_path. These two productions are ambiguous in that + // every net_path (except those containing an IPv6Reference) is an abs_path. + // RFC 2396 resolves this ambiguity by applying a greedy left most matching rule. + // Try matching net_path first, and if that fails we don't have authority so + // then attempt to match abs_path. + // + // net_path = "//" authority [ abs_path ] + // abs_path = "/" path_segments + if (((index+1) < uriSpecLen) && + (uriSpec.charAt(index) == '/' && uriSpec.charAt(index+1) == '/')) { + index += 2; + int startPos = index; + + // Authority will be everything up to path, query or fragment + char testChar = '\0'; + while (index < uriSpecLen) { + testChar = uriSpec.charAt(index); + if (testChar == '/' || testChar == '?' || testChar == '#') { + break; + } + index++; + } + + // Attempt to parse authority. If the section is an empty string + // this is a valid server based authority, so set the host to this + // value. + if (index > startPos) { + // If we didn't find authority we need to back up. Attempt to + // match against abs_path next. + if (!initializeAuthority(uriSpec.substring(startPos, index))) { + index = startPos - 2; + } + } + else { + m_host = ""; + } + } + + initializePath(uriSpec, index); + + // Resolve relative URI to base URI - see RFC 2396 Section 5.2 + // In some cases, it might make more sense to throw an exception + // (when scheme is specified is the string spec and the base URI + // is also specified, for example), but we're just following the + // RFC specifications + if (p_base != null) { + absolutize(p_base); + } + } /** * Initializes this URI from a base URI and a URI specification string. @@ -536,6 +699,16 @@ // is also specified, for example), but we're just following the // RFC specifications if (p_base != null) { + absolutize(p_base); + } + } + + /** + * Absolutize URI with given base URI. + * + * @param p_base base URI for absolutization + */ + public void absolutize(URI p_base) { // check to see if this is the current doc - RFC 2396 5.2 #2 // note that this is slightly different from the RFC spec in that @@ -546,108 +719,109 @@ // identified this as a bug in the RFC if (m_path.length() == 0 && m_scheme == null && m_host == null && m_regAuthority == null) { - m_scheme = p_base.getScheme(); - m_userinfo = p_base.getUserinfo(); - m_host = p_base.getHost(); - m_port = p_base.getPort(); - m_regAuthority = p_base.getRegBasedAuthority(); - m_path = p_base.getPath(); - - if (m_queryString == null) { - m_queryString = p_base.getQueryString(); - } - return; + m_scheme = p_base.getScheme(); + m_userinfo = p_base.getUserinfo(); + m_host = p_base.getHost(); + m_port = p_base.getPort(); + m_regAuthority = p_base.getRegBasedAuthority(); + m_path = p_base.getPath(); + + if (m_queryString == null) { + m_queryString = p_base.getQueryString(); + } + return; } - + // check for scheme - RFC 2396 5.2 #3 // if we found a scheme, it means absolute URI, so we're done if (m_scheme == null) { - m_scheme = p_base.getScheme(); + m_scheme = p_base.getScheme(); } else { - return; + return; } - + // check for authority - RFC 2396 5.2 #4 // if we found a host, then we've got a network path, so we're done if (m_host == null && m_regAuthority == null) { - m_userinfo = p_base.getUserinfo(); - m_host = p_base.getHost(); - m_port = p_base.getPort(); - m_regAuthority = p_base.getRegBasedAuthority(); + m_userinfo = p_base.getUserinfo(); + m_host = p_base.getHost(); + m_port = p_base.getPort(); + m_regAuthority = p_base.getRegBasedAuthority(); } else { - return; + return; } - + // check for absolute path - RFC 2396 5.2 #5 if (m_path.length() > 0 && - m_path.startsWith("/")) { - return; + m_path.startsWith("/")) { + return; } - + // if we get to this point, we need to resolve relative path // RFC 2396 5.2 #6 String path = ""; String basePath = p_base.getPath(); - + // 6a - get all but the last segment of the base URI path if (basePath != null && basePath.length() > 0) { - int lastSlash = basePath.lastIndexOf('/'); - if (lastSlash != -1) { - path = basePath.substring(0, lastSlash+1); - } + int lastSlash = basePath.lastIndexOf('/'); + if (lastSlash != -1) { + path = basePath.substring(0, lastSlash+1); + } } else if (m_path.length() > 0) { - path = "/"; + path = "/"; } - + // 6b - append the relative URI path path = path.concat(m_path); - + // 6c - remove all "./" where "." is a complete path segment - index = -1; + int index = -1; while ((index = path.indexOf("/./")) != -1) { - path = path.substring(0, index+1).concat(path.substring(index+3)); + path = path.substring(0, index+1).concat(path.substring(index+3)); } - + // 6d - remove "." if path ends with "." as a complete path segment if (path.endsWith("/.")) { - path = path.substring(0, path.length()-1); + path = path.substring(0, path.length()-1); } - + // 6e - remove all "<segment>/../" where "<segment>" is a complete // path segment not equal to ".." index = 1; int segIndex = -1; String tempString = null; - + while ((index = path.indexOf("/../", index)) > 0) { - tempString = path.substring(0, path.indexOf("/../")); - segIndex = tempString.lastIndexOf('/'); - if (segIndex != -1) { - if (!tempString.substring(segIndex).equals("..")) { - path = path.substring(0, segIndex+1).concat(path.substring(index+4)); - index = segIndex; + tempString = path.substring(0, path.indexOf("/../")); + segIndex = tempString.lastIndexOf('/'); + if (segIndex != -1) { + if (!tempString.substring(segIndex).equals("..")) { + path = path.substring(0, segIndex+1).concat(path.substring(index+4)); + index = segIndex; + } + else { + index += 4; + } + } + else { + index += 4; } - else - index += 4; - } - else - index += 4; } - + // 6f - remove ending "<segment>/.." where "<segment>" is a // complete path segment if (path.endsWith("/..")) { - tempString = path.substring(0, path.length()-3); - segIndex = tempString.lastIndexOf('/'); - if (segIndex != -1) { - path = path.substring(0, segIndex+1); - } + tempString = path.substring(0, path.length()-3); + segIndex = tempString.lastIndexOf('/'); + if (segIndex != -1) { + path = path.substring(0, segIndex+1); + } } m_path = path; - } } /** @@ -1524,6 +1698,17 @@ // presence of the host (whether valid or empty) means // double-slashes which means generic uri return (m_host != null); + } + + /** + * Returns whether this URI represents an absolute URI. + * + * @return true if this URI represents an absolute URI, false + * otherwise + */ + public boolean isAbsoluteURI() { + // presence of the scheme means absolute uri + return (m_scheme != null); } /** --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]