mrglavas    2003/07/18 22:06:03

  Modified:    java/src/org/apache/xerces/util URI.java
  Log:
  More fixes to the URI implementation.
  Using the definition of IPv6 addresses from section 2.2 of RFC 2373, since the BNF 
in Appendix B is incorrect.
  Labels in hostnames must be <= 63 chars.
  
  Revision  Changes    Path
  1.11      +77 -63    xml-xerces/java/src/org/apache/xerces/util/URI.java
  
  Index: URI.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/util/URI.java,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- URI.java  15 Jul 2003 21:23:47 -0000      1.10
  +++ URI.java  19 Jul 2003 05:06:02 -0000      1.11
  @@ -1391,7 +1391,8 @@
       }
   
       char testChar;
  -    for (int i = 1; i < p_scheme.length(); i++) {
  +    int schemeLength = p_scheme.length();
  +    for (int i = 1; i < schemeLength; ++i) {
         testChar = p_scheme.charAt(i);
         if (!isSchemeCharacter(testChar)) {
           return false;
  @@ -1406,7 +1407,7 @@
     * a valid IPv4 address, IPv6 reference or the domain name of a network host. 
     * A valid IPv4 address consists of four decimal digit groups separated by a
     * '.'. Each group must consist of one to three digits. See RFC 2732 Section 3,
  -  * and RFC 2373 Appendix B, for the definition of IPv6 references. A hostname 
  +  * and RFC 2373 Section 2.2, for the definition of IPv6 references. A hostname 
     * consists of domain labels (each of which must begin and end with an 
alphanumeric 
     * but may contain '-') separated & by a '.'. See RFC 2396 Section 3.2.2.
     *
  @@ -1428,7 +1429,10 @@
         return isWellFormedIPv6Reference(address);
       }
   
  -    if (address.startsWith(".") || address.startsWith("-")) {
  +    // Cannot start with a '.', '-', or end with a '-'.
  +    if (address.startsWith(".") || 
  +        address.startsWith("-") || 
  +        address.endsWith("-")) {
         return false;
       }
   
  @@ -1458,6 +1462,7 @@
         // domain labels can contain alphanumerics and '-"
         // but must start and end with an alphanumeric
         char testChar;
  +      int labelCharCount = 0;
   
         for (int i = 0; i < addrLength; i++) {
           testChar = address.charAt(i);
  @@ -1468,10 +1473,15 @@
             if (i+1 < addrLength && !isAlphanum(address.charAt(i+1))) {
               return false;
             }
  +          labelCharCount = 0;
           }
           else if (!isAlphanum(testChar) && testChar != '-') {
             return false;
           }
  +        // RFC 1034: Labels must be 63 characters or less.
  +        else if (++labelCharCount > 63) {
  +          return false;
  +        }
         }
       }
       return true;
  @@ -1484,7 +1494,7 @@
      * the 32-bit address constraint, each segment of the address cannot 
      * be greater than 255 (8 bits of information).</p>
      *
  -   * <p>IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT</p>
  +   * <p><code>IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 
1*3DIGIT</code></p>
      *
      * @return true if the string is a syntactically valid IPv4 address
      */
  @@ -1541,22 +1551,25 @@
     }
     
     /**
  -   * <p>Determines whether a string is an IPv6 reference.</p>
  +   * <p>Determines whether a string is an IPv6 reference as defined
  +   * by RFC 2732, where IPv6address is defined in RFC 2373. The 
  +   * IPv6 address is parsed according to Section 2.2 of RFC 2373,
  +   * with the additional constraint that the address be composed of
  +   * 128 bits of information.</p>
  +   *
  +   * <p><code>IPv6reference = "[" IPv6address "]"</code></p>
      *
  -   * <p>IPv6reference = "[" IPv6address "]" <br>
  -   * IPv6address   = hexpart [ ":" IPv4address ] <br>
  -   * IPv4address   = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT <br>
  -   * hexpart       = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ] <br>
  -   * hexseq        = hex4 *( ":" hex4) <br>
  -   * hex4          = 1*4HEXDIG</p>
  +   * <p>Note: The BNF expressed in RFC 2732 Appendix B does not 
  +   * accurately describe section 2.2, and was in fact removed from
  +   * RFC 3513, the successor of RFC 2732.</p>
      *
      * @return true if the string is a syntactically valid IPv6 reference
      */
     public static boolean isWellFormedIPv6Reference(String address) {
  +
         int addrLength = address.length();
  -      int start = 1;
  +      int index = 1;
         int end = addrLength-1;
  -      int index = start;
         
         // Check if string is a potential match for IPv6reference.
         if (!(addrLength > 2 && address.charAt(0) == '[' 
  @@ -1564,77 +1577,71 @@
             return false;
         }
         
  -      // The production hexpart can be rewritten as:
  -      // hexpart = hexseq | [hexseq] "::" [hexseq]
  -      // which means as long as we see one of the following
  -      // three groups, then we have a match.
  +      // Counter for the number of 16-bit sections read in the address.
  +      int [] counter = new int[1];
         
  -      // 1. Scan hex sequence before possible '::'.
  -      index = scanHexSequence(address, index, end);
  +      // Scan hex sequence before possible '::' or IPv4 address.
  +      index = scanHexSequence(address, index, end, counter);
         if (index == -1) {
             return false;
         }
  +      // Address must contain 128-bits of information.
         else if (index == end) {
  -          return true;
  +          return (counter[0] == 8);
         }
         
  -      // 2. Skip '::' if present.
  -      if (index + 1 < end && address.charAt(index) == ':') {
  +      if (index+1 < end && address.charAt(index) == ':') {
             if (address.charAt(index+1) == ':') {
  +              // '::' represents at least one 16-bit group of zeros.
  +              if (++counter[0] > 8) {
  +                  return false;
  +              }
                 index += 2;
  +              // Trailing zeros will fill out the rest of the address.
                 if (index == end) {
                    return true;
                 }
             }
  -          // If the second character wasn't ':', the remainder of the
  -          // string must match IPv4Address. IPv6Address cannot 
  -          // start with [":" IPv4Address].
  +          // If the second character wasn't ':', in order to be valid,
  +          // the remainder of the string must match IPv4Address, 
  +          // and we must have read exactly 6 16-bit groups.
             else {
  -              return (index > start) && 
  +              return (counter[0] == 6) && 
                     isWellFormedIPv4Address(address.substring(index+1, end));
             }
         }
  -      
  -      // 3. Scan hex sequence after '::'.
  -      index = scanHexSequence(address, index, end);
  -      if (index == -1) {
  +      else {
             return false;
         }
  -      else if (index == end) {
  -               return true;
  -      }
         
  -      // If we've gotten this far then the string is a valid
  -      // IPv6 reference only if it contained a valid hexpart,
  -      // and it has an IPv4 address.
  -      //
  -      // REVISIT: The example given for an IPv6 reference
  -      // http://[::192.9.5.5]/ipng in RFC 2732 is an error, or
  -      // the BNF for IPv6address is incorrect. In order to be
  -      // valid for the grammar defined in RFC 2373, if the hexpart
  -      // is only '::', and if the address contains an IPv4 address,
  -      // '::' must be followed by another ':'. Going with the BNF
  -      // from RFC 2373 for now. - mrglavas
  -      if (index > start && index+1 < end && address.charAt(index) == ':') {
  -          return isWellFormedIPv4Address(address.substring(index+1, end));
  -      }
  -      
  -      return false;
  +      // 3. Scan hex sequence after '::'.
  +      int prevCount = counter[0];
  +      index = scanHexSequence(address, index, end, counter);
  +
  +      // We've either reached the end of the string, the address ends in
  +      // an IPv4 address, or it is invalid. scanHexSequence has already 
  +      // made sure that we have the right number of bits. 
  +      return (index == end) || 
  +          (index != -1 && isWellFormedIPv4Address(
  +          address.substring((counter[0] > prevCount) ? index+1 : index, end)));
     }
     
     /**
  -   * Helper method for isWellFormedIPv6Reference which scans hex sequeunces.
  -   * It returns the index of the next character to scan, or -1 if the
  -   * string region cannot match a valid IPv6 address. 
  +   * Helper method for isWellFormedIPv6Reference which scans the 
  +   * hex sequences of an IPv6 address. It returns the index of the 
  +   * next character to scan in the address, or -1 if the string 
  +   * cannot match a valid IPv6 address. 
      *
  -   * @param sequence the string to be scanned
  +   * @param address the string to be scanned
      * @param index the beginning index (inclusive)
      * @param end the ending index (exclusive)
  +   * @param counter a counter for the number of 16-bit sections read
  +   * in the address
      *
      * @return the index of the next character to scan, or -1 if the
  -   * string region cannot match a valid IPv6 address
  +   * string cannot match a valid IPv6 address
      */
  -  private static int scanHexSequence (String sequence, int index, int end) {
  +  private static int scanHexSequence (String address, int index, int end, int [] 
counter) {
        
         char testChar;
         int numDigits = 0;
  @@ -1644,26 +1651,33 @@
         // hexseq = hex4 *( ":" hex4)
         // hex4   = 1*4HEXDIG
         for (; index < end; ++index) {
  -             testChar = sequence.charAt(index);
  +             testChar = address.charAt(index);
                if (testChar == ':') {
  -                 if (numDigits == 0 || ((index+1 < end) && sequence.charAt(index+1) 
== ':')) {
  +                 // IPv6 addresses are 128-bit, so there can be at most eight 
sections.
  +                 if (numDigits > 0 && ++counter[0] > 8) {
  +                     return -1;
  +                 }
  +                 // This could be '::'.
  +                 if (numDigits == 0 || ((index+1 < end) && address.charAt(index+1) 
== ':')) {
                        return index;
                    }
                    numDigits = 0;
           }
           // This might be invalid or an IPv4address. If it's potentially an 
IPv4address,
  -        // backup to the ':' before the first hex digit in this group.
  +        // backup to just after the last valid character that matches hexseq.
           else if (!isHex(testChar)) {
  -            int back = index - numDigits - 1;
  -            return (testChar == '.' && numDigits < 4 && numDigits > 0
  -                    && back >= start && sequence.charAt(back) == ':') ? back : -1;
  +            if (testChar == '.' && numDigits < 4 && numDigits > 0 && counter[0] <= 
6) {
  +                int back = index - numDigits - 1;
  +                return (back >= start) ? back : (back+1);
  +            }
  +            return -1;
           }
           // There can be at most 4 hex digits per group.
           else if (++numDigits > 4) {
               return -1;
           }
         }
  -      return (numDigits > 0) ? end : -1;
  +      return (numDigits > 0 && ++counter[0] <= 8) ? end : -1;
     } 
   
   
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to