gareth 2003/07/25 03:15:17 Modified: c/src/xercesc/util XMLUri.cpp XMLUri.hpp Log: Patch by Michael Glavassevich The patch fixes Bugzilla #19787, #20006, #20009, #20010 and #20287, and several other issues. A summary of the changes is listed below: 1. Added '[' and ']' to reserved characters as per RFC 2732. 2. '[' and ']' added in RFC 2732, are not allowed in path segments, but may appear in the opaque part. 3. No URI can begin with a ':'. 4. URI has no scheme if ':' occurs in a URI after '?' or '#', it's part of the query string or fragment. 5. Whitespace (even escaped as %20) is not permitted in the authority portion of a URI. 6. IPv4 addresses must match 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT. Since RFC 2732. 7. IPv4 addresses are 32-bit, therefore no segment may be larger than 255. This isn't expressed by the grammar. 8. Hostnames cannot end with a '-'. 9. Labels in a hostname must be 63 bytes or less [RFC 1034]. 10. Hostnames may be no longer than 255 bytes [RFC 1034]. (That restriction was already there. I just moved it inwards. 11. Added support for IPv6 references added in RFC 2732. URIs such as http://[::ffff:1.2.3.4] are valid. The BNF in RFC 2373 isn't correct. IPv6 addresses are read according to section 2.2 of RFC 2373. Revision Changes Path 1.14 +411 -132 xml-xerces/c/src/xercesc/util/XMLUri.cpp Index: XMLUri.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/XMLUri.cpp,v retrieving revision 1.13 retrieving revision 1.14 diff -u -r1.13 -r1.14 --- XMLUri.cpp 24 Jul 2003 09:32:52 -0000 1.13 +++ XMLUri.cpp 25 Jul 2003 10:15:16 -0000 1.14 @@ -1,7 +1,7 @@ /* * The Apache Software License, Version 1.1 * - * Copyright (c) 2001 The Apache Software Foundation. All rights + * Copyright (c) 2001-2003 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without @@ -82,7 +82,8 @@ const XMLCh XMLUri::RESERVED_CHARACTERS[] = { chSemiColon, chForwardSlash, chQuestion, chColon, chAt, - chAmpersand, chEqual, chPlus, chDollarSign, chComma, chNull + chAmpersand, chEqual, chPlus, chDollarSign, chComma, chOpenSquare, + chCloseSquare, chNull }; // @@ -113,6 +114,16 @@ chDollarSign, chPeriod, chNull }; +// pchar plus ';' and '/'. +// pchar = unreserved | escaped | +// ":" | "@" | "&" | "=" | "+" | "$" | "," +const XMLCh XMLUri::PATH_CHARACTERS[] = +{ + chSemiColon, chForwardSlash, chColon, chAt, chAmpersand, + chEqual, chPlus, chDollarSign, chComma, chNull +}; + + // --------------------------------------------------------------------------- // Local methods and data // --------------------------------------------------------------------------- @@ -397,27 +408,31 @@ int index = 0; bool foundScheme = false; - // Check for scheme, which must be before `/'. Also handle names with - // DOS drive letters ('D:'), so 1-character schemes are not allowed. - int colonIdx = XMLString::indexOf(trimedUriSpec, chColon); - int slashIdx = XMLString::indexOf(trimedUriSpec, chForwardSlash); - - if ((colonIdx < 2) || - (colonIdx > slashIdx && slashIdx != -1) ) - { - int fragmentIdx = XMLString::indexOf(trimedUriSpec, chPound); //'#' - // A standalone base is a valid URI according to spec - if ( !baseURI && fragmentIdx != 0 ) + // Check for scheme, which must be before `/', '?' or '#'. + // Also handle names with DOS drive letters ('D:'), + // so 1-character schemes are not allowed. + int colonIdx = XMLString::indexOf(trimedUriSpec, chColon); + int slashIdx = XMLString::indexOf(trimedUriSpec, chForwardSlash); + int queryIdx = XMLString::indexOf(trimedUriSpec, chQuestion); + int fragmentIdx = XMLString::indexOf(trimedUriSpec, chPound); + + if ((colonIdx < 2) || + (colonIdx > slashIdx && slashIdx != -1) || + (colonIdx > queryIdx && queryIdx != -1) || + (colonIdx > fragmentIdx && fragmentIdx != -1)) { - ThrowXML(MalformedURLException, XMLExcepts::XMLNUM_URI_No_Scheme); + // A standalone base is a valid URI according to spec + if ( colonIdx == 0 || (!baseURI && fragmentIdx != 0) ) + { + ThrowXML(MalformedURLException, XMLExcepts::XMLNUM_URI_No_Scheme); + } + } + else + { + foundScheme = true; + initializeScheme(trimedUriSpec); + index = XMLString::stringLen(fScheme)+1; } - } - else - { - foundScheme = true; - initializeScheme(trimedUriSpec); - index = XMLString::stringLen(fScheme)+1; - } // It's an error if we stop here if (index == trimedUriSpecLen || (foundScheme && (trimedUriSpec[index] == chPound))) @@ -678,12 +693,12 @@ { int index = 0; - int start = 0; + int start = 0; const int end = XMLString::stringLen(uriSpec); // // server = [ [ userinfo "@" ] hostport ] - // userinfo is everything up @, + // userinfo is everything up @, // XMLCh* userinfo = (XMLCh*) fMemoryManager->allocate ( @@ -696,7 +711,7 @@ { XMLString::subString(userinfo, &(uriSpec[start]), 0, index); index++; // skip the @ - start += index; + start += index; } else { @@ -705,25 +720,41 @@ // // hostport = host [ ":" port ] - // host is everything up to ':' + // host is everything up to ':', or up to + // and including ']' if followed by ':'. // - XMLCh* host = (XMLCh*) fMemoryManager->allocate + XMLCh* host = (XMLCh*) fMemoryManager->allocate ( (end+1) * sizeof(XMLCh) );//new XMLCh[end+1]; ArrayJanitor<XMLCh> hostName(host, fMemoryManager); - index = XMLString::indexOf(&(uriSpec[start]), chColon); + + // Search for port boundary. + if (start < end && uriSpec[start] == chOpenSquare) + { + index = XMLString::indexOf(&(uriSpec[start]), chCloseSquare); + if (index != -1) + { + // skip the ']' + index = ((start + index + 1) < end + && uriSpec[start + index + 1] == chColon) ? index+1 : -1; + } + } + else + { + index = XMLString::indexOf(&(uriSpec[start]), chColon); + } - if ( index != -1) + if ( index != -1 ) { XMLString::subString(host, &(uriSpec[start]), 0, index); index++; // skip the : - start +=index; + start +=index; } else { XMLString::subString(host, &(uriSpec[start]), 0, end-start); - start=end; + start = end; } // port is everything after ":" @@ -758,7 +789,7 @@ // setHost(host); setPort(port); - setUserInfo(userinfo); + setUserInfo(userinfo); } // scheme = alpha *( alpha | digit | "+" | "-" | "." ) @@ -785,55 +816,111 @@ void XMLUri::initializePath(const XMLCh* const uriSpec) { - if ( !uriSpec ) + if ( !uriSpec ) { ThrowXML1(MalformedURLException , XMLExcepts::XMLNUM_URI_Component_Empty , errMsg_PATH); - } + } - int index = 0; - int start = 0; + int index = 0; + int start = 0; int end = XMLString::stringLen(uriSpec); - XMLCh testChar; + XMLCh testChar; - // path - everything up to query string or fragment - while (index < end) + // path - everything up to query string or fragment + if (start < end) { - testChar = uriSpec[index]; - if (testChar == chQuestion || testChar == chPound) - { - break; - } - - // check for valid escape sequence - if (testChar == chPercent) + // RFC 2732 only allows '[' and ']' to appear in the opaque part. + if (!getScheme() || uriSpec[start] == chForwardSlash) { - if (index+2 >= end || - !XMLString::isHex(uriSpec[index+1]) || - !XMLString::isHex(uriSpec[index+2])) + // Scan path. + // abs_path = "/" path_segments + // rel_path = rel_segment [ abs_path ] + while (index < end) { - XMLString::moveChars(value1, &(uriSpec[index]), 3); - value1[3] = chNull; - ThrowXML2(MalformedURLException - , XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence - , errMsg_PATH - , value1); - } + testChar = uriSpec[index]; + if (testChar == chQuestion || testChar == chPound) + { + break; + } + + // check for valid escape sequence + if (testChar == chPercent) + { + if (index+2 >= end || + !XMLString::isHex(uriSpec[index+1]) || + !XMLString::isHex(uriSpec[index+2])) + { + XMLString::moveChars(value1, &(uriSpec[index]), 3); + value1[3] = chNull; + ThrowXML2(MalformedURLException + , XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence + , errMsg_PATH + , value1); + } + } + else if (!isUnreservedCharacter(testChar) && + !isPathCharacter(testChar)) + { + value1[0] = testChar; + value1[1] = chNull; + ThrowXML2(MalformedURLException + , XMLExcepts::XMLNUM_URI_Component_Invalid_Char + , errMsg_PATH + , value1); + } + + index++; + }//while (index < end) } - else if (!isReservedCharacter(testChar) && - !isUnreservedCharacter(testChar)) + else { - value1[0] = testChar; - value1[1] = chNull; - ThrowXML2(MalformedURLException - , XMLExcepts::XMLNUM_URI_Component_Invalid_Char - , errMsg_PATH - , value1); - } + // Scan opaque part. + // opaque_part = uric_no_slash *uric + while (index < end) + { + testChar = uriSpec[index]; + if (testChar == chQuestion || testChar == chPound) + { + break; + } - index++; - }//while (index < end) + // check for valid escape sequence + if (testChar == chPercent) + { + if (index+2 >= end || + !XMLString::isHex(uriSpec[index+1]) || + !XMLString::isHex(uriSpec[index+2])) + { + XMLString::moveChars(value1, &(uriSpec[index]), 3); + value1[3] = chNull; + ThrowXML2(MalformedURLException + , XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence + , errMsg_PATH + , value1); + } + } + // If the scheme specific part is opaque, it can contain '[' + // and ']'. uric_no_slash wasn't modified by RFC 2732, which + // I've interpreted as an error in the spec, since the + // production should be equivalent to (uric - '/'), and uric + // contains '[' and ']'. + else if (!isUnreservedCharacter(testChar) && + !isReservedCharacter(testChar)) + { + value1[0] = testChar; + value1[1] = chNull; + ThrowXML2(MalformedURLException + , XMLExcepts::XMLNUM_URI_Component_Invalid_Char + , errMsg_PATH + , value1); + } + + index++; + }//while (index < end) + } + } //if (start < end) if (getPath()) { @@ -843,8 +930,8 @@ fPath = (XMLCh*) fMemoryManager->allocate((index+1) * sizeof(XMLCh));//new XMLCh[index+1]; XMLString::subString(fPath, uriSpec, start, index); - // query - starts with ? and up to fragment or end - if (testChar == chQuestion) + // query - starts with ? and up to fragment or end + if (testChar == chQuestion) { index++; start = index; @@ -870,8 +957,8 @@ , value1); } } - else if (!isReservedCharacter(testChar) && - !isUnreservedCharacter(testChar)) + else if (!isUnreservedCharacter(testChar) && + !isReservedCharacter(testChar)) { value1[0] = testChar; value1[1] = chNull; @@ -895,8 +982,8 @@ XMLString::subString(fQueryString, uriSpec, start, index); } - // fragment - starts with # - if (testChar == chPound) + // fragment - starts with # + if (testChar == chPound) { index++; start = index; @@ -918,8 +1005,8 @@ , value1); } } - else if (!isReservedCharacter(testChar) && - !isUnreservedCharacter(testChar)) + else if (!isUnreservedCharacter(testChar) && + !isReservedCharacter(testChar)) { value1[0] = testChar; value1[1] = chNull; @@ -1034,8 +1121,7 @@ void XMLUri::setHost(const XMLCh* const newHost) { - if ( !newHost || - XMLString::isAllWhiteSpace(newHost)) + if ( !newHost ) { if (getHost()) fMemoryManager->deallocate(fHost);//delete [] fHost; @@ -1323,34 +1409,35 @@ // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum // toplabel = alpha | alpha *( alphanum | "-" ) alphanum // -// IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit +// IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT // bool XMLUri::isWellFormedAddress(const XMLCh* const addrString) { - if (!addrString) - return false; - - // - // check length - // - XMLCh* tmpAddr = XMLString::replicate(addrString, XMLPlatformUtils::fgMemoryManager); - ArrayJanitor<XMLCh> janName(tmpAddr, XMLPlatformUtils::fgMemoryManager); - XMLString::trim(tmpAddr); - if ((!tmpAddr || !*tmpAddr) || - (XMLString::stringLen(tmpAddr) > 255) ) + // Check that we have a non-zero length string. + if (!addrString || !*addrString) return false; + + // Get address length. + int addrStrLen = XMLString::stringLen(addrString); + + // Check if the host is a valid IPv6reference. + if (*addrString == chOpenSquare) + { + return isWellFormedIPv6Reference(addrString, addrStrLen); + } // - // the frist letter shall NOT be "." or "-" + // Cannot start with a '.', '-', or end with a '-'. // - if (*addrString == chPeriod || - *addrString == chDash ) + if (*addrString == chPeriod || + *addrString == chDash || + addrString[addrStrLen-1] == chDash) return false; - // rightmost domain label starting with digit indicates IP address - // since top level domain label can only start with an alpha - // see RFC 2396 Section 3.2.2 - int addrStrLen = XMLString::stringLen(addrString); + // rightmost domain label starting with digit indicates IP address + // since top level domain label can only start with an alpha + // see RFC 2396 Section 3.2.2 + int lastPeriodPos = XMLString::lastIndexOf(addrString, chPeriod); // if the string ends with "." @@ -1369,48 +1456,26 @@ return false; } - if (XMLString::isDigit(addrString[lastPeriodPos + 1])) + if (XMLString::isDigit(addrString[lastPeriodPos + 1])) { - // - // IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit - // - // make sure that - // 1) we see only digits and dot separators, - // 2) that any dot separator is preceded and followed by a digit - // 3) that we find 3 dots - - int numDots = 0; - for (int i = 0; i < addrStrLen; i++) - { - if (addrString[i] == chPeriod) - { - if (((i > 0) && - (!XMLString::isDigit(addrString[i-1]))) || - ((i + 1 < addrStrLen) && - (!XMLString::isDigit(addrString[i+1]))) ) - { - return false; - } - numDots++; - } - else if (!XMLString::isDigit(addrString[i])) - { - return false; - } - } //for - - if (numDots != 3) - { - return false; - } + return isWellFormedIPv4Address(addrString, addrStrLen); } // end of IPv4address - else + else { // // hostname = *( domainlabel "." ) toplabel [ "." ] // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum // toplabel = alpha | alpha *( alphanum | "-" ) alphanum - // + + // RFC 2396 states that hostnames take the form described in + // RFC 1034 (Section 3) and RFC 1123 (Section 2.1). According + // to RFC 1034, hostnames are limited to 255 characters. + if (addrStrLen > 255) { + return false; + } + + unsigned int labelCharCount = 0; + // domain labels can contain alphanumerics and '-" // but must start and end with an alphanumeric for (int i = 0; i < addrStrLen; i++) @@ -1424,16 +1489,230 @@ { return false; } + labelCharCount = 0; } else if (!XMLString::isAlphaNum(addrString[i]) && addrString[i] != chDash) { return false; } + // RFC 1034: Labels must be 63 characters or less. + else if (++labelCharCount > 63) { + return false; + } } //for } return true; +} + +// +// RFC 2732 amended RFC 2396 by replacing the definition +// of IPv4address with the one defined by RFC 2373. +// +// IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT +// +bool XMLUri::isWellFormedIPv4Address(const XMLCh* const addr, const int& length) +{ + int numDots = 0; + int numDigits = 0; + + // + // IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT + // + // make sure that + // 1) we see only digits and dot separators, + // 2) that any dot separator is preceded and followed by a digit + // 3) that we find 3 dots + // 4) that each segment contains 1 to 3 digits. + // 5) that each segment is not greater than 255. + + for (int i = 0; i < length; ++i) + { + if (addr[i] == chPeriod) + { + if (((i > 0) && + (!XMLString::isDigit(addr[i-1]))) || + ((i + 1 < length) && + (!XMLString::isDigit(addr[i+1]))) ) + { + return false; + } + numDigits = 0; + if (++numDots > 3) + { + return false; + } + } + else if (!XMLString::isDigit(addr[i])) + { + return false; + } + // Check that that there are no more than three digits + // in this segment. + else if (++numDigits > 3) + { + return false; + } + // Check that this segment is not greater than 255. + else if (numDigits == 3) + { + XMLCh first = addr[i-2]; + XMLCh second = addr[i-1]; + XMLCh last = addr[i]; + if (!(first < chDigit_2 || + (first == chDigit_2 && + (second < chDigit_5 || + (second == chDigit_5 && last <= chDigit_5))))) + { + return false; + } + } + } //for + return (numDots == 3); +} + +// +// IPv6reference = "[" IPv6address "]" +// +bool XMLUri::isWellFormedIPv6Reference(const XMLCh* const addr, const int& length) +{ + int index = 1; + int end = length-1; + + // Check if string is a potential match for IPv6reference. + if (!(length > 2 && addr[0] == chOpenSquare && addr[end] == chCloseSquare)) + { + return false; + } + + // Counter for the number of 16-bit sections read in the address. + int counter = 0; + + // Scan hex sequence before possible '::' or IPv4 address. + index = scanHexSequence(addr, index, end, counter); + if (index == -1) + { + return false; + } + // Address must contain 128-bits of information. + else if (index == end) + { + return (counter == 8); + } + + if (index+1 < end && addr[index] == chColon) + { + if (addr[index+1] == chColon) + { + // '::' represents at least one 16-bit group of zeros. + if (++counter > 8) + { + return false; + } + index += 2; + // Trailing zeros will fill out the rest of the address. + if (index == end) + { + return true; + } + } + // If the second character wasn't ':', in order to be valid, + // the remainder of the string must match IPv4Address, + // and we must have read exactly 6 16-bit groups. + else + { + if (counter == 6) + { + XMLCh* ipVfour = (XMLCh*) + XMLPlatformUtils::fgMemoryManager->allocate((length+1) * sizeof(XMLCh)); + XMLString::subString(ipVfour, addr, index+1, end); + ArrayJanitor<XMLCh> janitor(ipVfour, XMLPlatformUtils::fgMemoryManager); + int newLength = XMLString::stringLen(ipVfour); + return isWellFormedIPv4Address(ipVfour, newLength); + } + else + { + return false; + } + } + } + else + { + return false; + } + + // 3. Scan hex sequence after '::'. + int prevCount = counter; + index = scanHexSequence(addr, index, end, counter); + if (index == -1) + { + return false; + } + // If this is the end of the address then + // we've got 128-bits of information. + else if (index == end) + { + return true; + } + + // The address ends in an IPv4 address, or it is invalid. + // scanHexSequence has already made sure that we have the right number of bits. + XMLCh* ipVfour = + (XMLCh*) XMLPlatformUtils::fgMemoryManager->allocate((length+1) * sizeof(XMLCh)); + XMLString::subString(ipVfour, addr, (counter > prevCount) ? index+1 : index, end); + ArrayJanitor<XMLCh> janitor(ipVfour, XMLPlatformUtils::fgMemoryManager); + int newLength = XMLString::stringLen(ipVfour); + return isWellFormedIPv4Address(ipVfour, newLength); +} + +// +// For use with isWellFormedIPv6Reference only. +// +int XMLUri::scanHexSequence (const XMLCh* const addr, int index, int end, int& counter) +{ + XMLCh testChar = chNull; + int numDigits = 0; + int start = index; + + // Trying to match the following productions: + // hexseq = hex4 *( ":" hex4) + // hex4 = 1*4HEXDIG + for (; index < end; ++index) + { + testChar = addr[index]; + if (testChar == chColon) + { + // IPv6 addresses are 128-bit, so there can be at most eight sections. + if (numDigits > 0 && ++counter > 8) + { + return -1; + } + // This could be '::'. + if (numDigits == 0 || ((index+1 < end) && addr[index+1] == chColon)) + { + return index; + } + numDigits = 0; + } + // This might be invalid or an IPv4address. If it's potentially an IPv4address, + // backup to just after the last valid character that matches hexseq. + else if (!XMLString::isHex(testChar)) + { + if (testChar == chPeriod && numDigits < 4 && numDigits > 0 && counter <= 6) + { + int back = index - numDigits - 1; + return (back >= start) ? back : start; + } + return -1; + } + // There can be at most 4 hex digits per group. + else if (++numDigits > 4) + { + return -1; + } + } + return (numDigits > 0 && ++counter <= 8) ? end : -1; } bool XMLUri::isGenericURI() 1.10 +64 -2 xml-xerces/c/src/xercesc/util/XMLUri.hpp Index: XMLUri.hpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/XMLUri.hpp,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- XMLUri.hpp 16 May 2003 06:01:53 -0000 1.9 +++ XMLUri.hpp 25 Jul 2003 10:15:16 -0000 1.10 @@ -57,6 +57,32 @@ /* * $Id$ * $Log$ + * Revision 1.10 2003/07/25 10:15:16 gareth + * Patch by Michael Glavassevich + * + * The patch fixes Bugzilla #19787, #20006, #20009, #20010 and #20287, and + * several other issues. A summary of the changes is listed below: + * + * 1. Added '[' and ']' to reserved characters as per RFC 2732. + * 2. '[' and ']' added in RFC 2732, are not allowed in path segments, but + * may appear in the opaque part. + * 3. No URI can begin with a ':'. + * 4. URI has no scheme if ':' occurs in a URI after '?' or '#', it's part of + * the query string or fragment. + * 5. Whitespace (even escaped as %20) is not permitted in the authority + * portion of a URI. + * 6. IPv4 addresses must match 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." + * 1*3DIGIT. Since RFC 2732. + * 7. IPv4 addresses are 32-bit, therefore no segment may be larger than 255. + * This isn't expressed by the grammar. + * 8. Hostnames cannot end with a '-'. + * 9. Labels in a hostname must be 63 bytes or less [RFC 1034]. + * 10. Hostnames may be no longer than 255 bytes [RFC 1034]. (That + * restriction was already there. I just moved it inwards. + * 11. Added support for IPv6 references added in RFC 2732. URIs such as + * http://[::ffff:1.2.3.4] are valid. The BNF in RFC 2373 isn't correct. IPv6 + * addresses are read according to section 2.2 of RFC 2373. + * * Revision 1.9 2003/05/16 06:01:53 knoaman * Partial implementation of the configurable memory manager. * @@ -342,6 +368,7 @@ static const XMLCh MARK_CHARACTERS[]; static const XMLCh SCHEME_CHARACTERS[]; static const XMLCh USERINFO_CHARACTERS[]; + static const XMLCh PATH_CHARACTERS[]; //helper method for getUriText void buildFullText(); @@ -356,6 +383,13 @@ * @return true if the string contains any reserved characters */ static bool isReservedCharacter(const XMLCh theChar); + + /** + * Determine whether a character is a path character: + * + * @return true if the character is path character + */ + static bool isPathCharacter(const XMLCh theChar); /** * Determine whether a char is an unreserved character. @@ -381,11 +415,14 @@ static void isConformantUserInfo(const XMLCh* const userInfo); /** * Determine whether a string is syntactically capable of representing - * a valid IPv4 address or the domain name of a network host. + * a valid IPv4 address, IPv6 reference or the domain name of a network host. * * A valid IPv4 address consists of four decimal digit groups * separated by a '.'. * + * See RFC 2732 Section 3, and RFC 2373 Section 2.2, for the + * definition of IPv6 references. + * * A hostname consists of domain labels (each of which must begin and * end with an alphanumeric but may contain '-') separated by a '.'. * See RFC 2396 Section 3.2.2. @@ -394,6 +431,49 @@ * or hostname */ static bool isWellFormedAddress(const XMLCh* const addr); + + /** + * Determines whether a string is an IPv4 address as defined by + * RFC 2373, and under the further constraint that it must be a 32-bit + * address. Though not expressed in the grammar, in order to satisfy + * the 32-bit address constraint, each segment of the address cannot + * be greater than 255 (8 bits of information). + * + * @return true if the string is a syntactically valid IPv4 address + */ + static bool isWellFormedIPv4Address(const XMLCh* const addr, const int& length); + + /** + * Determines whether a string is an IPv6 reference as defined + * by RFC 2732, where IPv6address is defined in RFC 2373. The + * IPv6 address is parsed according to Section 2.2 of RFC 2373, + * with the additional constraint that the address be composed of + * 128 bits of information. + * + * Note: The BNF expressed in RFC 2373 Appendix B does not + * accurately describe section 2.2, and was in fact removed from + * RFC 3513, the successor of RFC 2373. + * + * @return true if the string is a syntactically valid IPv6 reference + */ + static bool isWellFormedIPv6Reference(const XMLCh* const addr, const int& length); + + /** + * Helper function for isWellFormedIPv6Reference which scans the + * hex sequences of an IPv6 address. It returns the index of the + * next character to scan in the address, or -1 if the string + * cannot match a valid IPv6 address. + * + * @param address the string to be scanned + * @param index the beginning index (inclusive) + * @param end the ending index (exclusive) + * @param counter a counter for the number of 16-bit sections read + * in the address + * + * @return the index of the next character to scan, or -1 if the + * string cannot match a valid IPv6 address + */ + static int scanHexSequence (const XMLCh* const addr, int index, int end, int& counter); /** * Get the indicator as to whether this URI uses the "generic URI" @@ -539,6 +619,11 @@ inline bool XMLUri::isReservedCharacter(const XMLCh theChar) { return (XMLString::indexOf(RESERVED_CHARACTERS, theChar) != -1); +} + +inline bool XMLUri::isPathCharacter(const XMLCh theChar) +{ + return (XMLString::indexOf(PATH_CHARACTERS, theChar) != -1); } inline bool XMLUri::isUnreservedCharacter(const XMLCh theChar)
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]