mrglavas 2003/07/18 22:06:03 Modified: java/src/org/apache/xerces/util URI.java Log: More fixes to the URI implementation. Using the definition of IPv6 addresses from section 2.2 of RFC 2373, since the BNF in Appendix B is incorrect. Labels in hostnames must be <= 63 chars. Revision Changes Path 1.11 +77 -63 xml-xerces/java/src/org/apache/xerces/util/URI.java Index: URI.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/util/URI.java,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- URI.java 15 Jul 2003 21:23:47 -0000 1.10 +++ URI.java 19 Jul 2003 05:06:02 -0000 1.11 @@ -1391,7 +1391,8 @@ } char testChar; - for (int i = 1; i < p_scheme.length(); i++) { + int schemeLength = p_scheme.length(); + for (int i = 1; i < schemeLength; ++i) { testChar = p_scheme.charAt(i); if (!isSchemeCharacter(testChar)) { return false; @@ -1406,7 +1407,7 @@ * a valid IPv4 address, IPv6 reference or the domain name of a network host. * A valid IPv4 address consists of four decimal digit groups separated by a * '.'. Each group must consist of one to three digits. See RFC 2732 Section 3, - * and RFC 2373 Appendix B, for the definition of IPv6 references. A hostname + * and RFC 2373 Section 2.2, for the definition of IPv6 references. A hostname * consists of domain labels (each of which must begin and end with an alphanumeric * but may contain '-') separated & by a '.'. See RFC 2396 Section 3.2.2. * @@ -1428,7 +1429,10 @@ return isWellFormedIPv6Reference(address); } - if (address.startsWith(".") || address.startsWith("-")) { + // Cannot start with a '.', '-', or end with a '-'. + if (address.startsWith(".") || + address.startsWith("-") || + address.endsWith("-")) { return false; } @@ -1458,6 +1462,7 @@ // domain labels can contain alphanumerics and '-" // but must start and end with an alphanumeric char testChar; + int labelCharCount = 0; for (int i = 0; i < addrLength; i++) { testChar = address.charAt(i); @@ -1468,10 +1473,15 @@ if (i+1 < addrLength && !isAlphanum(address.charAt(i+1))) { return false; } + labelCharCount = 0; } else if (!isAlphanum(testChar) && testChar != '-') { return false; } + // RFC 1034: Labels must be 63 characters or less. + else if (++labelCharCount > 63) { + return false; + } } } return true; @@ -1484,7 +1494,7 @@ * the 32-bit address constraint, each segment of the address cannot * be greater than 255 (8 bits of information).</p> * - * <p>IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT</p> + * <p><code>IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT</code></p> * * @return true if the string is a syntactically valid IPv4 address */ @@ -1541,22 +1551,25 @@ } /** - * <p>Determines whether a string is an IPv6 reference.</p> + * <p>Determines whether a string is an IPv6 reference as defined + * by RFC 2732, where IPv6address is defined in RFC 2373. The + * IPv6 address is parsed according to Section 2.2 of RFC 2373, + * with the additional constraint that the address be composed of + * 128 bits of information.</p> + * + * <p><code>IPv6reference = "[" IPv6address "]"</code></p> * - * <p>IPv6reference = "[" IPv6address "]" <br> - * IPv6address = hexpart [ ":" IPv4address ] <br> - * IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT <br> - * hexpart = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ] <br> - * hexseq = hex4 *( ":" hex4) <br> - * hex4 = 1*4HEXDIG</p> + * <p>Note: The BNF expressed in RFC 2732 Appendix B does not + * accurately describe section 2.2, and was in fact removed from + * RFC 3513, the successor of RFC 2732.</p> * * @return true if the string is a syntactically valid IPv6 reference */ public static boolean isWellFormedIPv6Reference(String address) { + int addrLength = address.length(); - int start = 1; + int index = 1; int end = addrLength-1; - int index = start; // Check if string is a potential match for IPv6reference. if (!(addrLength > 2 && address.charAt(0) == '[' @@ -1564,77 +1577,71 @@ return false; } - // The production hexpart can be rewritten as: - // hexpart = hexseq | [hexseq] "::" [hexseq] - // which means as long as we see one of the following - // three groups, then we have a match. + // Counter for the number of 16-bit sections read in the address. + int [] counter = new int[1]; - // 1. Scan hex sequence before possible '::'. - index = scanHexSequence(address, index, end); + // Scan hex sequence before possible '::' or IPv4 address. + index = scanHexSequence(address, index, end, counter); if (index == -1) { return false; } + // Address must contain 128-bits of information. else if (index == end) { - return true; + return (counter[0] == 8); } - // 2. Skip '::' if present. - if (index + 1 < end && address.charAt(index) == ':') { + if (index+1 < end && address.charAt(index) == ':') { if (address.charAt(index+1) == ':') { + // '::' represents at least one 16-bit group of zeros. + if (++counter[0] > 8) { + return false; + } index += 2; + // Trailing zeros will fill out the rest of the address. if (index == end) { return true; } } - // If the second character wasn't ':', the remainder of the - // string must match IPv4Address. IPv6Address cannot - // start with [":" IPv4Address]. + // If the second character wasn't ':', in order to be valid, + // the remainder of the string must match IPv4Address, + // and we must have read exactly 6 16-bit groups. else { - return (index > start) && + return (counter[0] == 6) && isWellFormedIPv4Address(address.substring(index+1, end)); } } - - // 3. Scan hex sequence after '::'. - index = scanHexSequence(address, index, end); - if (index == -1) { + else { return false; } - else if (index == end) { - return true; - } - // If we've gotten this far then the string is a valid - // IPv6 reference only if it contained a valid hexpart, - // and it has an IPv4 address. - // - // REVISIT: The example given for an IPv6 reference - // http://[::192.9.5.5]/ipng in RFC 2732 is an error, or - // the BNF for IPv6address is incorrect. In order to be - // valid for the grammar defined in RFC 2373, if the hexpart - // is only '::', and if the address contains an IPv4 address, - // '::' must be followed by another ':'. Going with the BNF - // from RFC 2373 for now. - mrglavas - if (index > start && index+1 < end && address.charAt(index) == ':') { - return isWellFormedIPv4Address(address.substring(index+1, end)); - } - - return false; + // 3. Scan hex sequence after '::'. + int prevCount = counter[0]; + index = scanHexSequence(address, index, end, counter); + + // We've either reached the end of the string, the address ends in + // an IPv4 address, or it is invalid. scanHexSequence has already + // made sure that we have the right number of bits. + return (index == end) || + (index != -1 && isWellFormedIPv4Address( + address.substring((counter[0] > prevCount) ? index+1 : index, end))); } /** - * Helper method for isWellFormedIPv6Reference which scans hex sequeunces. - * It returns the index of the next character to scan, or -1 if the - * string region cannot match a valid IPv6 address. + * Helper method for isWellFormedIPv6Reference which scans the + * hex sequences of an IPv6 address. It returns the index of the + * next character to scan in the address, or -1 if the string + * cannot match a valid IPv6 address. * - * @param sequence the string to be scanned + * @param address the string to be scanned * @param index the beginning index (inclusive) * @param end the ending index (exclusive) + * @param counter a counter for the number of 16-bit sections read + * in the address * * @return the index of the next character to scan, or -1 if the - * string region cannot match a valid IPv6 address + * string cannot match a valid IPv6 address */ - private static int scanHexSequence (String sequence, int index, int end) { + private static int scanHexSequence (String address, int index, int end, int [] counter) { char testChar; int numDigits = 0; @@ -1644,26 +1651,33 @@ // hexseq = hex4 *( ":" hex4) // hex4 = 1*4HEXDIG for (; index < end; ++index) { - testChar = sequence.charAt(index); + testChar = address.charAt(index); if (testChar == ':') { - if (numDigits == 0 || ((index+1 < end) && sequence.charAt(index+1) == ':')) { + // IPv6 addresses are 128-bit, so there can be at most eight sections. + if (numDigits > 0 && ++counter[0] > 8) { + return -1; + } + // This could be '::'. + if (numDigits == 0 || ((index+1 < end) && address.charAt(index+1) == ':')) { return index; } numDigits = 0; } // This might be invalid or an IPv4address. If it's potentially an IPv4address, - // backup to the ':' before the first hex digit in this group. + // backup to just after the last valid character that matches hexseq. else if (!isHex(testChar)) { - int back = index - numDigits - 1; - return (testChar == '.' && numDigits < 4 && numDigits > 0 - && back >= start && sequence.charAt(back) == ':') ? back : -1; + if (testChar == '.' && numDigits < 4 && numDigits > 0 && counter[0] <= 6) { + int back = index - numDigits - 1; + return (back >= start) ? back : (back+1); + } + return -1; } // There can be at most 4 hex digits per group. else if (++numDigits > 4) { return -1; } } - return (numDigits > 0) ? end : -1; + return (numDigits > 0 && ++counter[0] <= 8) ? end : -1; }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]