mrglavas    2004/09/15 21:29:28

  Modified:    java/src/org/apache/xerces/util URI.java
  Log:
  Adding a new initialization method which has a parameter that

  controls whether an exception is thrown if the URI specified

  is relative. Also factoring the URI resolution code into an

  absolutize method as well as a method which returns whether

  the URI is absolute. This will enable us to avoid throwing an

  exception for relative URIs, but allow us to detect a relative

  URI so that we can absolutize it against a base.

  

  This contribution is a slighty modified patch from John Kim, IBM.
  
  Revision  Changes    Path
  1.20      +243 -58   xml-xerces/java/src/org/apache/xerces/util/URI.java
  
  Index: URI.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/util/URI.java,v
  retrieving revision 1.19
  retrieving revision 1.20
  diff -u -r1.19 -r1.20
  --- URI.java  15 Aug 2004 21:22:16 -0000      1.19
  +++ URI.java  16 Sep 2004 04:29:28 -0000      1.20
  @@ -270,7 +270,30 @@
     public URI(String p_uriSpec) throws MalformedURIException {
       this((URI)null, p_uriSpec);
     }
  -
  +  
  +  /**
  +   * Construct a new URI from a URI specification string. If the
  +   * specification follows the "generic URI" syntax, (two slashes
  +   * following the first colon), the specification will be parsed
  +   * accordingly - setting the scheme, userinfo, host,port, path, query
  +   * string and fragment fields as necessary. If the specification does
  +   * not follow the "generic URI" syntax, the specification is parsed
  +   * into a scheme and scheme-specific part (stored as the path) only.
  +   * Construct a relative URI if boolean is assigned to "true"
  +   * and p_uriSpec is not valid absolute URI, instead of throwing an exception. 
  +   * 
  +   * @param p_uriSpec the URI specification string (cannot be null or
  +   *                  empty)
  +   * @param allowRelativeURI true to set up not throwing an exception 
  +   *                      in case of relative URI, false otherwise.
  +   *
  +   * @exception MalformedURIException if p_uriSpec violates any syntax
  +   *                                   rules
  +   */
  +  public URI(String p_uriSpec, boolean allowRelativeURI) throws 
MalformedURIException {
  +      this((URI)null, p_uriSpec, allowRelativeURI);
  +  }
  +  
    /**
     * Construct a new URI from a base URI and a URI specification string.
     * The URI specification string may be a relative URI.
  @@ -286,6 +309,27 @@
     public URI(URI p_base, String p_uriSpec) throws MalformedURIException {
       initialize(p_base, p_uriSpec);
     }
  +  
  +  /**
  +   * Construct a new URI from a base URI and a URI specification string.
  +   * The URI specification string may be a relative URI.
  +   * Construct a relative URI if boolean is assigned to "true"
  +   * and p_uriSpec is not valid absolute URI and p_base is null
  +   * instead of throwing an exception. 
  +   *
  +   * @param p_base the base URI (cannot be null if p_uriSpec is null or
  +   *               empty)
  +   * @param p_uriSpec the URI specification string (cannot be null or
  +   *                  empty if p_base is null)
  +   * @param allowRelativeURI true to set up not throwing an exception 
  +   *                      in case of relative URI, false otherwise.
  +   *
  +   * @exception MalformedURIException if p_uriSpec violates any syntax
  +   *                                  rules
  +   */
  +  public URI(URI p_base, String p_uriSpec, boolean allowRelativeURI) throws 
MalformedURIException {
  +      initialize(p_base, p_uriSpec, allowRelativeURI);
  +  }
   
    /**
     * Construct a new URI that does not follow the generic URI syntax.
  @@ -421,6 +465,125 @@
       m_queryString = p_other.getQueryString();
       m_fragment = p_other.getFragment();
     }
  +  
  +  /**
  +   * Initializes this URI from a base URI and a URI specification string.
  +   * See RFC 2396 Section 4 and Appendix B for specifications on parsing
  +   * the URI and Section 5 for specifications on resolving relative URIs
  +   * and relative paths.
  +   *
  +   * @param p_base the base URI (may be null if p_uriSpec is an absolute
  +   *               URI)
  +   * @param p_uriSpec the URI spec string which may be an absolute or
  +   *                  relative URI (can only be null/empty if p_base
  +   *                  is not null)
  +   * @param allowRelativeURI true to set up not throwing an exception 
  +   *                         in case of relative URI, false otherwise.
  +   *
  +   * @exception MalformedURIException if p_base is null and p_uriSpec
  +   *                                  is not an absolute URI or if
  +   *                                  p_uriSpec violates syntax rules
  +   */
  +  private void initialize(URI p_base, String p_uriSpec, boolean allowRelativeURI)
  +      throws MalformedURIException {
  +      
  +      String uriSpec = p_uriSpec;
  +      int uriSpecLen = (uriSpec != null) ? uriSpec.length() : 0;
  +      
  +      if (p_base == null && uriSpecLen == 0) {
  +          throw new MalformedURIException("Cannot initialize URI with empty 
parameters.");
  +      }
  +      
  +      // just make a copy of the base if spec is empty
  +      if (uriSpecLen == 0) {
  +          initialize(p_base);
  +          return;
  +      }
  +      
  +      int index = 0;
  +      
  +      // Check for scheme, which must be before '/', '?' or '#'. Also handle
  +      // names with DOS drive letters ('D:'), so 1-character schemes are not
  +      // allowed.
  +      int colonIdx = uriSpec.indexOf(':');
  +      if (colonIdx != -1) {
  +          final int searchFrom = colonIdx - 1;
  +          // search backwards starting from character before ':'.
  +          int slashIdx = uriSpec.lastIndexOf('/', searchFrom);
  +          int queryIdx = uriSpec.lastIndexOf('?', searchFrom);
  +          int fragmentIdx = uriSpec.lastIndexOf('#', searchFrom);
  +          
  +          if (colonIdx < 2 || slashIdx != -1 || 
  +              queryIdx != -1 || fragmentIdx != -1) {
  +              // A standalone base is a valid URI according to spec
  +              if (colonIdx == 0 || (p_base == null && fragmentIdx != 0 && 
!allowRelativeURI)) {
  +                  throw new MalformedURIException("No scheme found in URI.");
  +              }
  +          }
  +          else {
  +              initializeScheme(uriSpec);
  +              index = m_scheme.length()+1;
  +              
  +              // Neither 'scheme:' or 'scheme:#fragment' are valid URIs.
  +              if (colonIdx == uriSpecLen - 1 || uriSpec.charAt(colonIdx+1) == '#') {
  +                  throw new MalformedURIException("Scheme specific part cannot be 
empty.");   
  +              }
  +          }
  +      }
  +      else if (p_base == null && uriSpec.indexOf('#') != 0 && !allowRelativeURI) {
  +          throw new MalformedURIException("No scheme found in URI.");    
  +      }
  +      
  +      // Two slashes means we may have authority, but definitely means we're either
  +      // matching net_path or abs_path. These two productions are ambiguous in that
  +      // every net_path (except those containing an IPv6Reference) is an abs_path. 
  +      // RFC 2396 resolves this ambiguity by applying a greedy left most matching 
rule. 
  +      // Try matching net_path first, and if that fails we don't have authority so 
  +      // then attempt to match abs_path.
  +      //
  +      // net_path = "//" authority [ abs_path ]
  +      // abs_path = "/"  path_segments
  +      if (((index+1) < uriSpecLen) &&
  +          (uriSpec.charAt(index) == '/' && uriSpec.charAt(index+1) == '/')) {
  +          index += 2;
  +          int startPos = index;
  +          
  +          // Authority will be everything up to path, query or fragment
  +          char testChar = '\0';
  +          while (index < uriSpecLen) {
  +              testChar = uriSpec.charAt(index);
  +              if (testChar == '/' || testChar == '?' || testChar == '#') {
  +                  break;
  +              }
  +              index++;
  +          }
  +          
  +          // Attempt to parse authority. If the section is an empty string
  +          // this is a valid server based authority, so set the host to this
  +          // value.
  +          if (index > startPos) {
  +              // If we didn't find authority we need to back up. Attempt to
  +              // match against abs_path next.
  +              if (!initializeAuthority(uriSpec.substring(startPos, index))) {
  +                  index = startPos - 2;
  +              }
  +          }
  +          else {
  +              m_host = "";
  +          }
  +      }
  +      
  +      initializePath(uriSpec, index);
  +      
  +      // Resolve relative URI to base URI - see RFC 2396 Section 5.2
  +      // In some cases, it might make more sense to throw an exception
  +      // (when scheme is specified is the string spec and the base URI
  +      // is also specified, for example), but we're just following the
  +      // RFC specifications
  +      if (p_base != null) {
  +          absolutize(p_base);
  +      }
  +  }
   
    /**
     * Initializes this URI from a base URI and a URI specification string.
  @@ -536,6 +699,16 @@
       // is also specified, for example), but we're just following the
       // RFC specifications
       if (p_base != null) {
  +        absolutize(p_base);
  +    }
  +  }
  +
  +  /**
  +   * Absolutize URI with given base URI.
  +   *
  +   * @param p_base base URI for absolutization
  +   */
  +  public void absolutize(URI p_base) {
   
         // check to see if this is the current doc - RFC 2396 5.2 #2
         // note that this is slightly different from the RFC spec in that
  @@ -546,108 +719,109 @@
         // identified this as a bug in the RFC
         if (m_path.length() == 0 && m_scheme == null &&
             m_host == null && m_regAuthority == null) {
  -        m_scheme = p_base.getScheme();
  -        m_userinfo = p_base.getUserinfo();
  -        m_host = p_base.getHost();
  -        m_port = p_base.getPort();
  -        m_regAuthority = p_base.getRegBasedAuthority();
  -        m_path = p_base.getPath();
  -
  -        if (m_queryString == null) {
  -          m_queryString = p_base.getQueryString();
  -        }
  -        return;
  +          m_scheme = p_base.getScheme();
  +          m_userinfo = p_base.getUserinfo();
  +          m_host = p_base.getHost();
  +          m_port = p_base.getPort();
  +          m_regAuthority = p_base.getRegBasedAuthority();
  +          m_path = p_base.getPath();
  +          
  +          if (m_queryString == null) {
  +              m_queryString = p_base.getQueryString();
  +          }
  +          return;
         }
  -
  +      
         // check for scheme - RFC 2396 5.2 #3
         // if we found a scheme, it means absolute URI, so we're done
         if (m_scheme == null) {
  -        m_scheme = p_base.getScheme();
  +          m_scheme = p_base.getScheme();
         }
         else {
  -        return;
  +          return;
         }
  -
  +      
         // check for authority - RFC 2396 5.2 #4
         // if we found a host, then we've got a network path, so we're done
         if (m_host == null && m_regAuthority == null) {
  -        m_userinfo = p_base.getUserinfo();
  -        m_host = p_base.getHost();
  -        m_port = p_base.getPort();
  -        m_regAuthority = p_base.getRegBasedAuthority();
  +          m_userinfo = p_base.getUserinfo();
  +          m_host = p_base.getHost();
  +          m_port = p_base.getPort();
  +          m_regAuthority = p_base.getRegBasedAuthority();
         }
         else {
  -        return;
  +          return;
         }
  -
  +      
         // check for absolute path - RFC 2396 5.2 #5
         if (m_path.length() > 0 &&
  -          m_path.startsWith("/")) {
  -        return;
  +              m_path.startsWith("/")) {
  +          return;
         }
  -
  +      
         // if we get to this point, we need to resolve relative path
         // RFC 2396 5.2 #6
         String path = "";
         String basePath = p_base.getPath();
  -
  +      
         // 6a - get all but the last segment of the base URI path
         if (basePath != null && basePath.length() > 0) {
  -        int lastSlash = basePath.lastIndexOf('/');
  -        if (lastSlash != -1) {
  -          path = basePath.substring(0, lastSlash+1);
  -        }
  +          int lastSlash = basePath.lastIndexOf('/');
  +          if (lastSlash != -1) {
  +              path = basePath.substring(0, lastSlash+1);
  +          }
         }
         else if (m_path.length() > 0) {
  -             path = "/";
  +          path = "/";
         }
  -
  +      
         // 6b - append the relative URI path
         path = path.concat(m_path);
  -
  +      
         // 6c - remove all "./" where "." is a complete path segment
  -      index = -1;
  +      int index = -1;
         while ((index = path.indexOf("/./")) != -1) {
  -        path = path.substring(0, index+1).concat(path.substring(index+3));
  +          path = path.substring(0, index+1).concat(path.substring(index+3));
         }
  -
  +      
         // 6d - remove "." if path ends with "." as a complete path segment
         if (path.endsWith("/.")) {
  -        path = path.substring(0, path.length()-1);
  +          path = path.substring(0, path.length()-1);
         }
  -
  +      
         // 6e - remove all "<segment>/../" where "<segment>" is a complete
         // path segment not equal to ".."
         index = 1;
         int segIndex = -1;
         String tempString = null;
  -
  +      
         while ((index = path.indexOf("/../", index)) > 0) {
  -        tempString = path.substring(0, path.indexOf("/../"));
  -        segIndex = tempString.lastIndexOf('/');
  -        if (segIndex != -1) {
  -          if (!tempString.substring(segIndex).equals("..")) {
  -            path = path.substring(0, segIndex+1).concat(path.substring(index+4));
  -            index = segIndex;
  +          tempString = path.substring(0, path.indexOf("/../"));
  +          segIndex = tempString.lastIndexOf('/');
  +          if (segIndex != -1) {
  +              if (!tempString.substring(segIndex).equals("..")) {
  +                  path = path.substring(0, 
segIndex+1).concat(path.substring(index+4));
  +                  index = segIndex;
  +              }
  +              else {
  +                  index += 4;
  +              }
  +          }
  +          else {
  +              index += 4;
             }
  -          else
  -            index += 4;
  -        }
  -        else
  -          index += 4;
         }
  -
  +      
         // 6f - remove ending "<segment>/.." where "<segment>" is a
         // complete path segment
         if (path.endsWith("/..")) {
  -        tempString = path.substring(0, path.length()-3);
  -        segIndex = tempString.lastIndexOf('/');
  -        if (segIndex != -1) {
  -          path = path.substring(0, segIndex+1);
  -        }
  +          tempString = path.substring(0, path.length()-3);
  +          segIndex = tempString.lastIndexOf('/');
  +          if (segIndex != -1) {
  +              path = path.substring(0, segIndex+1);
  +          }
         }
         m_path = path;
  -    }
     }
   
    /**
  @@ -1524,6 +1698,17 @@
       // presence of the host (whether valid or empty) means
       // double-slashes which means generic uri
       return (m_host != null);
  +  }
  +  
  +  /**
  +   * Returns whether this URI represents an absolute URI.
  +   *
  +   * @return true if this URI represents an absolute URI, false
  +   *         otherwise
  +   */
  +  public boolean isAbsoluteURI() {
  +      // presence of the scheme means absolute uri
  +      return (m_scheme != null);
     }
   
    /**
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to