mrglavas 2004/09/15 21:29:28
Modified: java/src/org/apache/xerces/util URI.java
Log:
Adding a new initialization method which has a parameter that
controls whether an exception is thrown if the URI specified
is relative. Also factoring the URI resolution code into an
absolutize method as well as a method which returns whether
the URI is absolute. This will enable us to avoid throwing an
exception for relative URIs, but allow us to detect a relative
URI so that we can absolutize it against a base.
This contribution is a slighty modified patch from John Kim, IBM.
Revision Changes Path
1.20 +243 -58 xml-xerces/java/src/org/apache/xerces/util/URI.java
Index: URI.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/util/URI.java,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -r1.19 -r1.20
--- URI.java 15 Aug 2004 21:22:16 -0000 1.19
+++ URI.java 16 Sep 2004 04:29:28 -0000 1.20
@@ -270,7 +270,30 @@
public URI(String p_uriSpec) throws MalformedURIException {
this((URI)null, p_uriSpec);
}
-
+
+ /**
+ * Construct a new URI from a URI specification string. If the
+ * specification follows the "generic URI" syntax, (two slashes
+ * following the first colon), the specification will be parsed
+ * accordingly - setting the scheme, userinfo, host,port, path, query
+ * string and fragment fields as necessary. If the specification does
+ * not follow the "generic URI" syntax, the specification is parsed
+ * into a scheme and scheme-specific part (stored as the path) only.
+ * Construct a relative URI if boolean is assigned to "true"
+ * and p_uriSpec is not valid absolute URI, instead of throwing an exception.
+ *
+ * @param p_uriSpec the URI specification string (cannot be null or
+ * empty)
+ * @param allowRelativeURI true to set up not throwing an exception
+ * in case of relative URI, false otherwise.
+ *
+ * @exception MalformedURIException if p_uriSpec violates any syntax
+ * rules
+ */
+ public URI(String p_uriSpec, boolean allowRelativeURI) throws
MalformedURIException {
+ this((URI)null, p_uriSpec, allowRelativeURI);
+ }
+
/**
* Construct a new URI from a base URI and a URI specification string.
* The URI specification string may be a relative URI.
@@ -286,6 +309,27 @@
public URI(URI p_base, String p_uriSpec) throws MalformedURIException {
initialize(p_base, p_uriSpec);
}
+
+ /**
+ * Construct a new URI from a base URI and a URI specification string.
+ * The URI specification string may be a relative URI.
+ * Construct a relative URI if boolean is assigned to "true"
+ * and p_uriSpec is not valid absolute URI and p_base is null
+ * instead of throwing an exception.
+ *
+ * @param p_base the base URI (cannot be null if p_uriSpec is null or
+ * empty)
+ * @param p_uriSpec the URI specification string (cannot be null or
+ * empty if p_base is null)
+ * @param allowRelativeURI true to set up not throwing an exception
+ * in case of relative URI, false otherwise.
+ *
+ * @exception MalformedURIException if p_uriSpec violates any syntax
+ * rules
+ */
+ public URI(URI p_base, String p_uriSpec, boolean allowRelativeURI) throws
MalformedURIException {
+ initialize(p_base, p_uriSpec, allowRelativeURI);
+ }
/**
* Construct a new URI that does not follow the generic URI syntax.
@@ -421,6 +465,125 @@
m_queryString = p_other.getQueryString();
m_fragment = p_other.getFragment();
}
+
+ /**
+ * Initializes this URI from a base URI and a URI specification string.
+ * See RFC 2396 Section 4 and Appendix B for specifications on parsing
+ * the URI and Section 5 for specifications on resolving relative URIs
+ * and relative paths.
+ *
+ * @param p_base the base URI (may be null if p_uriSpec is an absolute
+ * URI)
+ * @param p_uriSpec the URI spec string which may be an absolute or
+ * relative URI (can only be null/empty if p_base
+ * is not null)
+ * @param allowRelativeURI true to set up not throwing an exception
+ * in case of relative URI, false otherwise.
+ *
+ * @exception MalformedURIException if p_base is null and p_uriSpec
+ * is not an absolute URI or if
+ * p_uriSpec violates syntax rules
+ */
+ private void initialize(URI p_base, String p_uriSpec, boolean allowRelativeURI)
+ throws MalformedURIException {
+
+ String uriSpec = p_uriSpec;
+ int uriSpecLen = (uriSpec != null) ? uriSpec.length() : 0;
+
+ if (p_base == null && uriSpecLen == 0) {
+ throw new MalformedURIException("Cannot initialize URI with empty
parameters.");
+ }
+
+ // just make a copy of the base if spec is empty
+ if (uriSpecLen == 0) {
+ initialize(p_base);
+ return;
+ }
+
+ int index = 0;
+
+ // Check for scheme, which must be before '/', '?' or '#'. Also handle
+ // names with DOS drive letters ('D:'), so 1-character schemes are not
+ // allowed.
+ int colonIdx = uriSpec.indexOf(':');
+ if (colonIdx != -1) {
+ final int searchFrom = colonIdx - 1;
+ // search backwards starting from character before ':'.
+ int slashIdx = uriSpec.lastIndexOf('/', searchFrom);
+ int queryIdx = uriSpec.lastIndexOf('?', searchFrom);
+ int fragmentIdx = uriSpec.lastIndexOf('#', searchFrom);
+
+ if (colonIdx < 2 || slashIdx != -1 ||
+ queryIdx != -1 || fragmentIdx != -1) {
+ // A standalone base is a valid URI according to spec
+ if (colonIdx == 0 || (p_base == null && fragmentIdx != 0 &&
!allowRelativeURI)) {
+ throw new MalformedURIException("No scheme found in URI.");
+ }
+ }
+ else {
+ initializeScheme(uriSpec);
+ index = m_scheme.length()+1;
+
+ // Neither 'scheme:' or 'scheme:#fragment' are valid URIs.
+ if (colonIdx == uriSpecLen - 1 || uriSpec.charAt(colonIdx+1) == '#') {
+ throw new MalformedURIException("Scheme specific part cannot be
empty.");
+ }
+ }
+ }
+ else if (p_base == null && uriSpec.indexOf('#') != 0 && !allowRelativeURI) {
+ throw new MalformedURIException("No scheme found in URI.");
+ }
+
+ // Two slashes means we may have authority, but definitely means we're either
+ // matching net_path or abs_path. These two productions are ambiguous in that
+ // every net_path (except those containing an IPv6Reference) is an abs_path.
+ // RFC 2396 resolves this ambiguity by applying a greedy left most matching
rule.
+ // Try matching net_path first, and if that fails we don't have authority so
+ // then attempt to match abs_path.
+ //
+ // net_path = "//" authority [ abs_path ]
+ // abs_path = "/" path_segments
+ if (((index+1) < uriSpecLen) &&
+ (uriSpec.charAt(index) == '/' && uriSpec.charAt(index+1) == '/')) {
+ index += 2;
+ int startPos = index;
+
+ // Authority will be everything up to path, query or fragment
+ char testChar = '\0';
+ while (index < uriSpecLen) {
+ testChar = uriSpec.charAt(index);
+ if (testChar == '/' || testChar == '?' || testChar == '#') {
+ break;
+ }
+ index++;
+ }
+
+ // Attempt to parse authority. If the section is an empty string
+ // this is a valid server based authority, so set the host to this
+ // value.
+ if (index > startPos) {
+ // If we didn't find authority we need to back up. Attempt to
+ // match against abs_path next.
+ if (!initializeAuthority(uriSpec.substring(startPos, index))) {
+ index = startPos - 2;
+ }
+ }
+ else {
+ m_host = "";
+ }
+ }
+
+ initializePath(uriSpec, index);
+
+ // Resolve relative URI to base URI - see RFC 2396 Section 5.2
+ // In some cases, it might make more sense to throw an exception
+ // (when scheme is specified is the string spec and the base URI
+ // is also specified, for example), but we're just following the
+ // RFC specifications
+ if (p_base != null) {
+ absolutize(p_base);
+ }
+ }
/**
* Initializes this URI from a base URI and a URI specification string.
@@ -536,6 +699,16 @@
// is also specified, for example), but we're just following the
// RFC specifications
if (p_base != null) {
+ absolutize(p_base);
+ }
+ }
+
+ /**
+ * Absolutize URI with given base URI.
+ *
+ * @param p_base base URI for absolutization
+ */
+ public void absolutize(URI p_base) {
// check to see if this is the current doc - RFC 2396 5.2 #2
// note that this is slightly different from the RFC spec in that
@@ -546,108 +719,109 @@
// identified this as a bug in the RFC
if (m_path.length() == 0 && m_scheme == null &&
m_host == null && m_regAuthority == null) {
- m_scheme = p_base.getScheme();
- m_userinfo = p_base.getUserinfo();
- m_host = p_base.getHost();
- m_port = p_base.getPort();
- m_regAuthority = p_base.getRegBasedAuthority();
- m_path = p_base.getPath();
-
- if (m_queryString == null) {
- m_queryString = p_base.getQueryString();
- }
- return;
+ m_scheme = p_base.getScheme();
+ m_userinfo = p_base.getUserinfo();
+ m_host = p_base.getHost();
+ m_port = p_base.getPort();
+ m_regAuthority = p_base.getRegBasedAuthority();
+ m_path = p_base.getPath();
+
+ if (m_queryString == null) {
+ m_queryString = p_base.getQueryString();
+ }
+ return;
}
-
+
// check for scheme - RFC 2396 5.2 #3
// if we found a scheme, it means absolute URI, so we're done
if (m_scheme == null) {
- m_scheme = p_base.getScheme();
+ m_scheme = p_base.getScheme();
}
else {
- return;
+ return;
}
-
+
// check for authority - RFC 2396 5.2 #4
// if we found a host, then we've got a network path, so we're done
if (m_host == null && m_regAuthority == null) {
- m_userinfo = p_base.getUserinfo();
- m_host = p_base.getHost();
- m_port = p_base.getPort();
- m_regAuthority = p_base.getRegBasedAuthority();
+ m_userinfo = p_base.getUserinfo();
+ m_host = p_base.getHost();
+ m_port = p_base.getPort();
+ m_regAuthority = p_base.getRegBasedAuthority();
}
else {
- return;
+ return;
}
-
+
// check for absolute path - RFC 2396 5.2 #5
if (m_path.length() > 0 &&
- m_path.startsWith("/")) {
- return;
+ m_path.startsWith("/")) {
+ return;
}
-
+
// if we get to this point, we need to resolve relative path
// RFC 2396 5.2 #6
String path = "";
String basePath = p_base.getPath();
-
+
// 6a - get all but the last segment of the base URI path
if (basePath != null && basePath.length() > 0) {
- int lastSlash = basePath.lastIndexOf('/');
- if (lastSlash != -1) {
- path = basePath.substring(0, lastSlash+1);
- }
+ int lastSlash = basePath.lastIndexOf('/');
+ if (lastSlash != -1) {
+ path = basePath.substring(0, lastSlash+1);
+ }
}
else if (m_path.length() > 0) {
- path = "/";
+ path = "/";
}
-
+
// 6b - append the relative URI path
path = path.concat(m_path);
-
+
// 6c - remove all "./" where "." is a complete path segment
- index = -1;
+ int index = -1;
while ((index = path.indexOf("/./")) != -1) {
- path = path.substring(0, index+1).concat(path.substring(index+3));
+ path = path.substring(0, index+1).concat(path.substring(index+3));
}
-
+
// 6d - remove "." if path ends with "." as a complete path segment
if (path.endsWith("/.")) {
- path = path.substring(0, path.length()-1);
+ path = path.substring(0, path.length()-1);
}
-
+
// 6e - remove all "<segment>/../" where "<segment>" is a complete
// path segment not equal to ".."
index = 1;
int segIndex = -1;
String tempString = null;
-
+
while ((index = path.indexOf("/../", index)) > 0) {
- tempString = path.substring(0, path.indexOf("/../"));
- segIndex = tempString.lastIndexOf('/');
- if (segIndex != -1) {
- if (!tempString.substring(segIndex).equals("..")) {
- path = path.substring(0, segIndex+1).concat(path.substring(index+4));
- index = segIndex;
+ tempString = path.substring(0, path.indexOf("/../"));
+ segIndex = tempString.lastIndexOf('/');
+ if (segIndex != -1) {
+ if (!tempString.substring(segIndex).equals("..")) {
+ path = path.substring(0,
segIndex+1).concat(path.substring(index+4));
+ index = segIndex;
+ }
+ else {
+ index += 4;
+ }
+ }
+ else {
+ index += 4;
}
- else
- index += 4;
- }
- else
- index += 4;
}
-
+
// 6f - remove ending "<segment>/.." where "<segment>" is a
// complete path segment
if (path.endsWith("/..")) {
- tempString = path.substring(0, path.length()-3);
- segIndex = tempString.lastIndexOf('/');
- if (segIndex != -1) {
- path = path.substring(0, segIndex+1);
- }
+ tempString = path.substring(0, path.length()-3);
+ segIndex = tempString.lastIndexOf('/');
+ if (segIndex != -1) {
+ path = path.substring(0, segIndex+1);
+ }
}
m_path = path;
- }
}
/**
@@ -1524,6 +1698,17 @@
// presence of the host (whether valid or empty) means
// double-slashes which means generic uri
return (m_host != null);
+ }
+
+ /**
+ * Returns whether this URI represents an absolute URI.
+ *
+ * @return true if this URI represents an absolute URI, false
+ * otherwise
+ */
+ public boolean isAbsoluteURI() {
+ // presence of the scheme means absolute uri
+ return (m_scheme != null);
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]