gareth      2003/07/25 03:15:17

  Modified:    c/src/xercesc/util XMLUri.cpp XMLUri.hpp
  Log:
  Patch by Michael Glavassevich
  
  The patch fixes Bugzilla #19787, #20006, #20009, #20010 and #20287, and
  several other issues. A summary of the changes is listed below:
  
  1. Added '[' and ']' to reserved characters as per RFC 2732.
  2. '[' and ']' added in RFC 2732, are not allowed in path segments, but
  may appear in the opaque part.
  3. No URI can begin with a ':'.
  4. URI has no scheme if ':' occurs in a URI after '?' or '#', it's part of
  the query string or fragment.
  5. Whitespace (even escaped as %20) is not permitted in the authority
  portion of a URI.
  6. IPv4 addresses must match 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "."
  1*3DIGIT. Since RFC 2732.
  7. IPv4 addresses are 32-bit, therefore no segment may be larger than 255.
  This isn't expressed by the grammar.
  8. Hostnames cannot end with a '-'.
  9. Labels in a hostname must be 63 bytes or less [RFC 1034].
  10. Hostnames may be no longer than 255 bytes [RFC 1034]. (That
  restriction was already there. I just moved it inwards.
  11. Added support for IPv6 references added in RFC 2732. URIs such as
  http://[::ffff:1.2.3.4] are valid. The BNF in RFC 2373 isn't correct. IPv6
  addresses are read according to section 2.2 of RFC 2373.
  
  Revision  Changes    Path
  1.14      +411 -132  xml-xerces/c/src/xercesc/util/XMLUri.cpp
  
  Index: XMLUri.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/XMLUri.cpp,v
  retrieving revision 1.13
  retrieving revision 1.14
  diff -u -r1.13 -r1.14
  --- XMLUri.cpp        24 Jul 2003 09:32:52 -0000      1.13
  +++ XMLUri.cpp        25 Jul 2003 10:15:16 -0000      1.14
  @@ -1,7 +1,7 @@
   /*
    * The Apache Software License, Version 1.1
    *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * Copyright (c) 2001-2003 The Apache Software Foundation.  All rights
    * reserved.
    *
    * Redistribution and use in source and binary forms, with or without
  @@ -82,7 +82,8 @@
   const XMLCh XMLUri::RESERVED_CHARACTERS[] =
   {
       chSemiColon, chForwardSlash, chQuestion, chColon, chAt,
  -    chAmpersand, chEqual, chPlus, chDollarSign, chComma, chNull
  +    chAmpersand, chEqual, chPlus, chDollarSign, chComma, chOpenSquare,
  +    chCloseSquare, chNull
   };
   
   //
  @@ -113,6 +114,16 @@
       chDollarSign, chPeriod, chNull
   };
   
  +//      pchar plus ';' and '/'.
  +//      pchar         = unreserved | escaped |
  +//                      ":" | "@" | "&" | "=" | "+" | "$" | ","
  +const XMLCh XMLUri::PATH_CHARACTERS[] =
  +{
  +    chSemiColon, chForwardSlash, chColon, chAt, chAmpersand, 
  +    chEqual, chPlus, chDollarSign, chComma, chNull
  +};
  +
  +
   // ---------------------------------------------------------------------------
   //  Local methods and data
   // ---------------------------------------------------------------------------
  @@ -397,27 +408,31 @@
        int index = 0;
        bool foundScheme = false;
   
  -     // Check for scheme, which must be before `/'. Also handle names with
  -     // DOS drive letters ('D:'), so 1-character schemes are not allowed.
  -    int colonIdx = XMLString::indexOf(trimedUriSpec, chColon);
  -    int slashIdx = XMLString::indexOf(trimedUriSpec, chForwardSlash);
  -
  -     if ((colonIdx < 2)                         ||
  -        (colonIdx > slashIdx && slashIdx != -1) )
  -    {
  -        int fragmentIdx = XMLString::indexOf(trimedUriSpec, chPound);  //'#'
  -        // A standalone base is a valid URI according to spec
  -        if ( !baseURI && fragmentIdx != 0 )
  +     // Check for scheme, which must be before `/', '?' or '#'. 
  +     // Also handle names with DOS drive letters ('D:'), 
  +     // so 1-character schemes are not allowed.
  +        int colonIdx = XMLString::indexOf(trimedUriSpec, chColon);
  +        int slashIdx = XMLString::indexOf(trimedUriSpec, chForwardSlash);
  +        int queryIdx = XMLString::indexOf(trimedUriSpec, chQuestion);
  +        int fragmentIdx = XMLString::indexOf(trimedUriSpec, chPound);
  +
  +        if ((colonIdx < 2) ||
  +            (colonIdx > slashIdx && slashIdx != -1) ||
  +            (colonIdx > queryIdx && queryIdx != -1) ||
  +            (colonIdx > fragmentIdx && fragmentIdx != -1))
           {
  -            ThrowXML(MalformedURLException, XMLExcepts::XMLNUM_URI_No_Scheme);
  +            // A standalone base is a valid URI according to spec
  +            if ( colonIdx == 0 || (!baseURI && fragmentIdx != 0) )
  +            {
  +                ThrowXML(MalformedURLException, XMLExcepts::XMLNUM_URI_No_Scheme);
  +            }
  +        }
  +        else
  +        {
  +            foundScheme = true;
  +            initializeScheme(trimedUriSpec);
  +            index = XMLString::stringLen(fScheme)+1;
           }
  -    }
  -     else
  -    {
  -        foundScheme = true;
  -     initializeScheme(trimedUriSpec);
  -        index = XMLString::stringLen(fScheme)+1;
  -    }
   
       // It's an error if we stop here
       if (index == trimedUriSpecLen || (foundScheme && (trimedUriSpec[index] == 
chPound)))
  @@ -678,12 +693,12 @@
   {
   
       int index = 0;
  -     int start = 0;
  +    int start = 0;
       const int end = XMLString::stringLen(uriSpec);
   
       //
       // server = [ [ userinfo "@" ] hostport ]
  -     // userinfo is everything up @,
  +    // userinfo is everything up @,
       //
       XMLCh* userinfo = (XMLCh*) fMemoryManager->allocate
       (
  @@ -696,7 +711,7 @@
       {
           XMLString::subString(userinfo, &(uriSpec[start]), 0, index);
           index++; // skip the @
  -             start += index;
  +        start += index;
       }
       else
       {
  @@ -705,25 +720,41 @@
   
       //
       // hostport = host [ ":" port ]
  -     // host is everything up to ':'
  +    // host is everything up to ':', or up to 
  +    // and including ']' if followed by ':'.
       //
  -     XMLCh* host = (XMLCh*) fMemoryManager->allocate
  +    XMLCh* host = (XMLCh*) fMemoryManager->allocate
       (
           (end+1) * sizeof(XMLCh)
       );//new XMLCh[end+1];
       ArrayJanitor<XMLCh> hostName(host, fMemoryManager);
  -    index = XMLString::indexOf(&(uriSpec[start]), chColon);
  +    
  +    // Search for port boundary.
  +    if (start < end && uriSpec[start] == chOpenSquare)
  +    {
  +     index = XMLString::indexOf(&(uriSpec[start]), chCloseSquare);
  +     if (index != -1)
  +     {
  +            // skip the ']'
  +            index = ((start + index + 1) < end 
  +              && uriSpec[start + index + 1] == chColon) ? index+1 : -1;
  +     }
  +    }
  +    else
  +    {
  +        index = XMLString::indexOf(&(uriSpec[start]), chColon);
  +    }
   
  -    if ( index != -1)
  +    if ( index != -1 )
       {
           XMLString::subString(host, &(uriSpec[start]), 0, index);
           index++;  // skip the :
  -             start +=index;
  +        start +=index;
       }
       else
       {
           XMLString::subString(host, &(uriSpec[start]), 0, end-start);
  -             start=end;
  +        start = end;
       }
   
       // port is everything after ":"
  @@ -758,7 +789,7 @@
       //
       setHost(host);
       setPort(port);
  -     setUserInfo(userinfo);
  +    setUserInfo(userinfo);
   }
   
   // scheme = alpha *( alpha | digit | "+" | "-" | "." )
  @@ -785,55 +816,111 @@
   
   void XMLUri::initializePath(const XMLCh* const uriSpec)
   {
  -     if ( !uriSpec )
  +    if ( !uriSpec )
       {
           ThrowXML1(MalformedURLException
                   , XMLExcepts::XMLNUM_URI_Component_Empty
                   , errMsg_PATH);
  -     }
  +    }
   
  -     int index = 0;
  -     int start = 0;
  +    int index = 0;
  +    int start = 0;
       int end = XMLString::stringLen(uriSpec);
  -     XMLCh testChar;
  +    XMLCh testChar;
   
  -     // path - everything up to query string or fragment
  -     while (index < end)
  +    // path - everything up to query string or fragment
  +    if (start < end)
       {
  -        testChar = uriSpec[index];
  -        if (testChar == chQuestion || testChar == chPound)
  -        {
  -            break;
  -        }
  -
  -        // check for valid escape sequence
  -        if (testChar == chPercent)
  +        // RFC 2732 only allows '[' and ']' to appear in the opaque part.
  +        if (!getScheme() || uriSpec[start] == chForwardSlash)
           {
  -            if (index+2 >= end ||
  -                !XMLString::isHex(uriSpec[index+1]) ||
  -                !XMLString::isHex(uriSpec[index+2]))
  +            // Scan path.
  +            // abs_path = "/"  path_segments
  +            // rel_path = rel_segment [ abs_path ]
  +            while (index < end)
               {
  -                XMLString::moveChars(value1, &(uriSpec[index]), 3);
  -                value1[3] = chNull;
  -                ThrowXML2(MalformedURLException
  -                        , XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
  -                        , errMsg_PATH
  -                        , value1);
  -            }
  +                testChar = uriSpec[index];
  +                if (testChar == chQuestion || testChar == chPound)
  +                {
  +                    break;
  +                }
  +
  +                // check for valid escape sequence
  +                if (testChar == chPercent)
  +                {
  +                    if (index+2 >= end ||
  +                        !XMLString::isHex(uriSpec[index+1]) ||
  +                        !XMLString::isHex(uriSpec[index+2]))
  +                    {
  +                        XMLString::moveChars(value1, &(uriSpec[index]), 3);
  +                        value1[3] = chNull;
  +                        ThrowXML2(MalformedURLException
  +                                , 
XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
  +                                , errMsg_PATH
  +                                , value1);
  +                    }
  +                }
  +                else if (!isUnreservedCharacter(testChar) &&
  +                         !isPathCharacter(testChar))
  +                {
  +                    value1[0] = testChar;
  +                    value1[1] = chNull;
  +                    ThrowXML2(MalformedURLException
  +                            , XMLExcepts::XMLNUM_URI_Component_Invalid_Char
  +                            , errMsg_PATH
  +                            , value1);
  +                }
  +
  +                index++;
  +            }//while (index < end)
           }
  -        else if (!isReservedCharacter(testChar) &&
  -                 !isUnreservedCharacter(testChar))
  +        else
           {
  -            value1[0] = testChar;
  -            value1[1] = chNull;
  -            ThrowXML2(MalformedURLException
  -                    , XMLExcepts::XMLNUM_URI_Component_Invalid_Char
  -                    , errMsg_PATH
  -                    , value1);
  -        }
  +            // Scan opaque part.
  +            // opaque_part = uric_no_slash *uric
  +            while (index < end)
  +            {
  +                testChar = uriSpec[index];
  +                if (testChar == chQuestion || testChar == chPound)
  +                {
  +                    break;
  +                }
   
  -        index++;
  -    }//while (index < end)
  +                // check for valid escape sequence
  +                if (testChar == chPercent)
  +                {
  +                    if (index+2 >= end ||
  +                        !XMLString::isHex(uriSpec[index+1]) ||
  +                        !XMLString::isHex(uriSpec[index+2]))
  +                    {
  +                        XMLString::moveChars(value1, &(uriSpec[index]), 3);
  +                        value1[3] = chNull;
  +                        ThrowXML2(MalformedURLException
  +                                , 
XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
  +                                , errMsg_PATH
  +                                , value1);
  +                    }
  +                }
  +                // If the scheme specific part is opaque, it can contain '['
  +                // and ']'. uric_no_slash wasn't modified by RFC 2732, which
  +                // I've interpreted as an error in the spec, since the 
  +                // production should be equivalent to (uric - '/'), and uric
  +                // contains '[' and ']'.
  +                else if (!isUnreservedCharacter(testChar) &&
  +                         !isReservedCharacter(testChar))
  +                {
  +                    value1[0] = testChar;
  +                    value1[1] = chNull;
  +                    ThrowXML2(MalformedURLException
  +                            , XMLExcepts::XMLNUM_URI_Component_Invalid_Char
  +                            , errMsg_PATH
  +                            , value1);
  +                }
  +
  +                index++;
  +            }//while (index < end)           
  +        }
  +    } //if (start < end)
   
       if (getPath())
       {
  @@ -843,8 +930,8 @@
       fPath = (XMLCh*) fMemoryManager->allocate((index+1) * sizeof(XMLCh));//new 
XMLCh[index+1];
       XMLString::subString(fPath, uriSpec, start, index);
   
  -     // query - starts with ? and up to fragment or end
  -     if (testChar == chQuestion)
  +    // query - starts with ? and up to fragment or end
  +    if (testChar == chQuestion)
       {
           index++;
           start = index;
  @@ -870,8 +957,8 @@
                               , value1);
                   }
               }
  -            else if (!isReservedCharacter(testChar) &&
  -                     !isUnreservedCharacter(testChar))
  +            else if (!isUnreservedCharacter(testChar) &&
  +                     !isReservedCharacter(testChar))
               {
                   value1[0] = testChar;
                   value1[1] = chNull;
  @@ -895,8 +982,8 @@
           XMLString::subString(fQueryString, uriSpec, start, index);
       }
   
  -     // fragment - starts with #
  -     if (testChar == chPound)
  +    // fragment - starts with #
  +    if (testChar == chPound)
       {
           index++;
           start = index;
  @@ -918,8 +1005,8 @@
                               , value1);
                   }
               }
  -            else if (!isReservedCharacter(testChar) &&
  -                     !isUnreservedCharacter(testChar))
  +            else if (!isUnreservedCharacter(testChar) &&
  +                     !isReservedCharacter(testChar))
               {
                   value1[0] = testChar;
                   value1[1] = chNull;
  @@ -1034,8 +1121,7 @@
   
   void XMLUri::setHost(const XMLCh* const newHost)
   {
  -    if ( !newHost ||
  -        XMLString::isAllWhiteSpace(newHost))
  +    if ( !newHost )
       {
           if (getHost())
               fMemoryManager->deallocate(fHost);//delete [] fHost;
  @@ -1323,34 +1409,35 @@
   //  domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
   //  toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
   //
  -//  IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
  +//  IPv4address   = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
   //
   bool XMLUri::isWellFormedAddress(const XMLCh* const addrString)
   {
  -     if (!addrString)
  -        return false;
  -
  -    //
  -    // check length
  -    //
  -    XMLCh* tmpAddr = XMLString::replicate(addrString, 
XMLPlatformUtils::fgMemoryManager);
  -    ArrayJanitor<XMLCh>  janName(tmpAddr, XMLPlatformUtils::fgMemoryManager);
  -    XMLString::trim(tmpAddr);
  -    if ((!tmpAddr || !*tmpAddr) ||
  -        (XMLString::stringLen(tmpAddr) > 255) )
  +    // Check that we have a non-zero length string.
  +    if (!addrString || !*addrString)
           return false;
  +        
  +    // Get address length.
  +    int addrStrLen = XMLString::stringLen(addrString);
  +    
  +    // Check if the host is a valid IPv6reference.
  +    if (*addrString == chOpenSquare)
  +    {
  +        return isWellFormedIPv6Reference(addrString, addrStrLen);
  +    }
   
       //
  -    // the frist letter shall NOT be "." or "-"
  +    // Cannot start with a '.', '-', or end with a '-'.
       //
  -     if (*addrString == chPeriod ||
  -        *addrString == chDash    )
  +    if (*addrString == chPeriod ||
  +        *addrString == chDash ||
  +        addrString[addrStrLen-1] == chDash)
           return false;
   
  -     // rightmost domain label starting with digit indicates IP address
  -     // since top level domain label can only start with an alpha
  -     // see RFC 2396 Section 3.2.2
  -    int addrStrLen = XMLString::stringLen(addrString);
  +    // rightmost domain label starting with digit indicates IP address
  +    // since top level domain label can only start with an alpha
  +    // see RFC 2396 Section 3.2.2
  +    
       int lastPeriodPos = XMLString::lastIndexOf(addrString, chPeriod);
   
       // if the string ends with "."
  @@ -1369,48 +1456,26 @@
                        return false;
       }
   
  -     if (XMLString::isDigit(addrString[lastPeriodPos + 1]))
  +    if (XMLString::isDigit(addrString[lastPeriodPos + 1]))
       {
  -        //
  -        // IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
  -        //
  -        // make sure that
  -        // 1) we see only digits and dot separators,
  -        // 2) that any dot separator is preceded and followed by a digit
  -        // 3) that we find 3 dots
  -
  -        int numDots = 0;
  -        for (int i = 0; i < addrStrLen; i++)
  -        {
  -            if (addrString[i] == chPeriod)
  -            {
  -                if (((i > 0)  &&
  -                    (!XMLString::isDigit(addrString[i-1]))) ||
  -                    ((i + 1 < addrStrLen) &&
  -                    (!XMLString::isDigit(addrString[i+1])))  )
  -                {
  -                    return false;
  -                }
  -                numDots++;
  -            }
  -            else if (!XMLString::isDigit(addrString[i]))
  -            {
  -                return false;
  -            }
  -        } //for
  -
  -        if (numDots != 3)
  -        {
  -            return false;
  -        }
  +        return isWellFormedIPv4Address(addrString, addrStrLen);
       } // end of IPv4address
  -     else
  +    else
       {
           //
           //  hostname      = *( domainlabel "." ) toplabel [ "." ]
           //  domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
           //  toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
  -        //
  +        
  +        // RFC 2396 states that hostnames take the form described in 
  +        // RFC 1034 (Section 3) and RFC 1123 (Section 2.1). According
  +        // to RFC 1034, hostnames are limited to 255 characters.
  +        if (addrStrLen > 255) {
  +            return false;
  +        }
  +        
  +        unsigned int labelCharCount = 0;
  +        
           // domain labels can contain alphanumerics and '-"
           // but must start and end with an alphanumeric
           for (int i = 0; i < addrStrLen; i++)
  @@ -1424,16 +1489,230 @@
                   {
                       return false;
                   }
  +                labelCharCount = 0;
               }
               else if (!XMLString::isAlphaNum(addrString[i]) &&
                         addrString[i] != chDash)
               {
                   return false;
               }
  +            // RFC 1034: Labels must be 63 characters or less.
  +            else if (++labelCharCount > 63) {
  +                return false;
  +            }
           } //for
       }
   
       return true;
  +}
  +
  +//
  +//  RFC 2732 amended RFC 2396 by replacing the definition 
  +//  of IPv4address with the one defined by RFC 2373.
  +//
  +//  IPv4address   = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
  +//
  +bool XMLUri::isWellFormedIPv4Address(const XMLCh* const addr, const int& length)
  +{
  +    int numDots = 0;
  +    int numDigits = 0;
  +    
  +    //
  +    // IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
  +    //
  +    // make sure that
  +    // 1) we see only digits and dot separators,
  +    // 2) that any dot separator is preceded and followed by a digit
  +    // 3) that we find 3 dots
  +    // 4) that each segment contains 1 to 3 digits.
  +    // 5) that each segment is not greater than 255.
  +    
  +    for (int i = 0; i < length; ++i)
  +    {
  +        if (addr[i] == chPeriod)
  +        {
  +            if (((i > 0)  &&
  +                (!XMLString::isDigit(addr[i-1]))) ||
  +                ((i + 1 < length) &&
  +                (!XMLString::isDigit(addr[i+1])))  )
  +            {
  +               return false;
  +            }
  +            numDigits = 0;
  +            if (++numDots > 3)
  +            {
  +                return false;
  +            }
  +        }
  +        else if (!XMLString::isDigit(addr[i]))
  +        {
  +            return false;
  +        }
  +        // Check that that there are no more than three digits
  +        // in this segment.
  +        else if (++numDigits > 3)
  +        {
  +            return false;
  +        }
  +        // Check that this segment is not greater than 255.
  +        else if (numDigits == 3)
  +        {
  +            XMLCh first = addr[i-2];
  +            XMLCh second = addr[i-1];
  +            XMLCh last = addr[i];
  +            if (!(first < chDigit_2 || 
  +                 (first == chDigit_2 && 
  +                 (second < chDigit_5 || 
  +                 (second == chDigit_5 && last <= chDigit_5)))))
  +            {
  +                return false;
  +            }
  +        }
  +    } //for
  +    return (numDots == 3);
  +}
  +
  +//
  +//  IPv6reference = "[" IPv6address "]"
  +//
  +bool XMLUri::isWellFormedIPv6Reference(const XMLCh* const addr, const int& length)
  +{
  +    int index = 1;
  +    int end = length-1;
  +    
  +    // Check if string is a potential match for IPv6reference.
  +    if (!(length > 2 && addr[0] == chOpenSquare && addr[end] == chCloseSquare))
  +    {
  +        return false;
  +    }
  +    
  +    // Counter for the number of 16-bit sections read in the address.
  +    int counter = 0;
  +      
  +    // Scan hex sequence before possible '::' or IPv4 address.
  +    index = scanHexSequence(addr, index, end, counter);
  +    if (index == -1) 
  +    {
  +        return false;
  +    }
  +    // Address must contain 128-bits of information.
  +    else if (index == end) 
  +    {
  +       return (counter == 8);
  +    }
  +      
  +    if (index+1 < end && addr[index] == chColon) 
  +    {
  +        if (addr[index+1] == chColon) 
  +        {
  +            // '::' represents at least one 16-bit group of zeros.
  +            if (++counter > 8) 
  +            {
  +                return false;
  +            }
  +            index += 2;
  +            // Trailing zeros will fill out the rest of the address.
  +            if (index == end)
  +            {
  +                return true;
  +            }
  +        }
  +        // If the second character wasn't ':', in order to be valid,
  +        // the remainder of the string must match IPv4Address, 
  +        // and we must have read exactly 6 16-bit groups.
  +        else 
  +        {
  +            if (counter == 6)
  +            {
  +                XMLCh* ipVfour = (XMLCh*) 
  +                  XMLPlatformUtils::fgMemoryManager->allocate((length+1) * 
sizeof(XMLCh));
  +                XMLString::subString(ipVfour, addr, index+1, end);
  +                ArrayJanitor<XMLCh> janitor(ipVfour, 
XMLPlatformUtils::fgMemoryManager);
  +                int newLength = XMLString::stringLen(ipVfour);
  +                return isWellFormedIPv4Address(ipVfour, newLength);
  +            }
  +            else
  +            {
  +                return false;
  +            }
  +        }
  +    }
  +    else 
  +    {
  +       return false;
  +    }
  +      
  +    // 3. Scan hex sequence after '::'.
  +    int prevCount = counter;
  +    index = scanHexSequence(addr, index, end, counter);
  +    if (index == -1) 
  +    {
  +        return false;
  +    }
  +    // If this is the end of the address then
  +    // we've got 128-bits of information.
  +    else if (index == end) 
  +    {
  +        return true;
  +    }
  +
  +    // The address ends in an IPv4 address, or it is invalid. 
  +    // scanHexSequence has already made sure that we have the right number of bits. 
  +    XMLCh* ipVfour = 
  +      (XMLCh*) XMLPlatformUtils::fgMemoryManager->allocate((length+1) * 
sizeof(XMLCh));
  +    XMLString::subString(ipVfour, addr, (counter > prevCount) ? index+1 : index, 
end);
  +    ArrayJanitor<XMLCh> janitor(ipVfour, XMLPlatformUtils::fgMemoryManager);
  +    int newLength = XMLString::stringLen(ipVfour);
  +    return isWellFormedIPv4Address(ipVfour, newLength);
  +}
  +
  +//
  +//  For use with isWellFormedIPv6Reference only.
  +//
  +int XMLUri::scanHexSequence (const XMLCh* const addr, int index, int end, int& 
counter)
  +{
  +    XMLCh testChar = chNull;
  +    int numDigits = 0;
  +    int start = index;
  +      
  +    // Trying to match the following productions:
  +    // hexseq = hex4 *( ":" hex4)
  +    // hex4   = 1*4HEXDIG
  +    for (; index < end; ++index)
  +    {
  +             testChar = addr[index];
  +             if (testChar == chColon)
  +             {
  +                 // IPv6 addresses are 128-bit, so there can be at most eight 
sections.
  +                 if (numDigits > 0 && ++counter > 8)
  +                 {
  +                     return -1;
  +                 }
  +                 // This could be '::'.
  +                 if (numDigits == 0 || ((index+1 < end) && addr[index+1] == 
chColon))
  +                 {
  +                     return index;
  +                 }
  +                 numDigits = 0;
  +        }
  +        // This might be invalid or an IPv4address. If it's potentially an 
IPv4address,
  +        // backup to just after the last valid character that matches hexseq.
  +        else if (!XMLString::isHex(testChar))
  +        {
  +            if (testChar == chPeriod && numDigits < 4 && numDigits > 0 && counter 
<= 6)
  +            {
  +                int back = index - numDigits - 1;
  +                return (back >= start) ? back : start;
  +            }
  +            return -1;
  +        }
  +        // There can be at most 4 hex digits per group.
  +        else if (++numDigits > 4)
  +        {
  +            return -1;
  +        }
  +    }
  +    return (numDigits > 0 && ++counter <= 8) ? end : -1;
   }
   
   bool XMLUri::isGenericURI()
  
  
  
  1.10      +64 -2     xml-xerces/c/src/xercesc/util/XMLUri.hpp
  
  Index: XMLUri.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/XMLUri.hpp,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- XMLUri.hpp        16 May 2003 06:01:53 -0000      1.9
  +++ XMLUri.hpp        25 Jul 2003 10:15:16 -0000      1.10
  @@ -57,6 +57,32 @@
   /*
    * $Id$
    * $Log$
  + * Revision 1.10  2003/07/25 10:15:16  gareth
  + * Patch by Michael Glavassevich
  + *
  + * The patch fixes Bugzilla #19787, #20006, #20009, #20010 and #20287, and
  + * several other issues. A summary of the changes is listed below:
  + *
  + * 1. Added '[' and ']' to reserved characters as per RFC 2732.
  + * 2. '[' and ']' added in RFC 2732, are not allowed in path segments, but
  + * may appear in the opaque part.
  + * 3. No URI can begin with a ':'.
  + * 4. URI has no scheme if ':' occurs in a URI after '?' or '#', it's part of
  + * the query string or fragment.
  + * 5. Whitespace (even escaped as %20) is not permitted in the authority
  + * portion of a URI.
  + * 6. IPv4 addresses must match 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "."
  + * 1*3DIGIT. Since RFC 2732.
  + * 7. IPv4 addresses are 32-bit, therefore no segment may be larger than 255.
  + * This isn't expressed by the grammar.
  + * 8. Hostnames cannot end with a '-'.
  + * 9. Labels in a hostname must be 63 bytes or less [RFC 1034].
  + * 10. Hostnames may be no longer than 255 bytes [RFC 1034]. (That
  + * restriction was already there. I just moved it inwards.
  + * 11. Added support for IPv6 references added in RFC 2732. URIs such as
  + * http://[::ffff:1.2.3.4] are valid. The BNF in RFC 2373 isn't correct. IPv6
  + * addresses are read according to section 2.2 of RFC 2373.
  + *
    * Revision 1.9  2003/05/16 06:01:53  knoaman
    * Partial implementation of the configurable memory manager.
    *
  @@ -342,6 +368,7 @@
       static const XMLCh MARK_CHARACTERS[];
       static const XMLCh SCHEME_CHARACTERS[];
       static const XMLCh USERINFO_CHARACTERS[];
  +    static const XMLCh PATH_CHARACTERS[];
   
       //helper method for getUriText
       void buildFullText();
  @@ -356,6 +383,13 @@
        * @return true if the string contains any reserved characters
        */
       static bool isReservedCharacter(const XMLCh theChar);
  +    
  +    /**
  +     * Determine whether a character is a path character:
  +     *
  +     * @return true if the character is path character
  +     */
  +    static bool isPathCharacter(const XMLCh theChar);
   
       /**
        * Determine whether a char is an unreserved character.
  @@ -381,11 +415,14 @@
       static void isConformantUserInfo(const XMLCh* const userInfo);
       /**
        * Determine whether a string is syntactically capable of representing
  -     * a valid IPv4 address or the domain name of a network host.
  +     * a valid IPv4 address, IPv6 reference or the domain name of a network host.
        *
        * A valid IPv4 address consists of four decimal digit groups
        * separated by a '.'.
        *
  +     * See RFC 2732 Section 3, and RFC 2373 Section 2.2, for the 
  +     * definition of IPv6 references.
  +     *
        * A hostname consists of domain labels (each of which must begin and
        * end with an alphanumeric but may contain '-') separated by a '.'.
        * See RFC 2396 Section 3.2.2.
  @@ -394,6 +431,49 @@
        *              or hostname
        */
        static bool isWellFormedAddress(const XMLCh* const addr);
  +     
  +    /**
  +     * Determines whether a string is an IPv4 address as defined by 
  +     * RFC 2373, and under the further constraint that it must be a 32-bit
  +     * address. Though not expressed in the grammar, in order to satisfy 
  +     * the 32-bit address constraint, each segment of the address cannot 
  +     * be greater than 255 (8 bits of information).
  +     *
  +     * @return true if the string is a syntactically valid IPv4 address
  +     */
  +     static bool isWellFormedIPv4Address(const XMLCh* const addr, const int& 
length);
  +     
  +    /**
  +     * Determines whether a string is an IPv6 reference as defined
  +     * by RFC 2732, where IPv6address is defined in RFC 2373. The 
  +     * IPv6 address is parsed according to Section 2.2 of RFC 2373,
  +     * with the additional constraint that the address be composed of
  +     * 128 bits of information.
  +     *
  +     * Note: The BNF expressed in RFC 2373 Appendix B does not 
  +     * accurately describe section 2.2, and was in fact removed from
  +     * RFC 3513, the successor of RFC 2373.
  +     *
  +     * @return true if the string is a syntactically valid IPv6 reference
  +     */
  +     static bool isWellFormedIPv6Reference(const XMLCh* const addr, const int& 
length);
  +     
  +    /**
  +     * Helper function for isWellFormedIPv6Reference which scans the 
  +     * hex sequences of an IPv6 address. It returns the index of the 
  +     * next character to scan in the address, or -1 if the string 
  +     * cannot match a valid IPv6 address. 
  +     *
  +     * @param address the string to be scanned
  +     * @param index the beginning index (inclusive)
  +     * @param end the ending index (exclusive)
  +     * @param counter a counter for the number of 16-bit sections read
  +     * in the address
  +     *
  +     * @return the index of the next character to scan, or -1 if the
  +     * string cannot match a valid IPv6 address
  +     */
  +     static int scanHexSequence (const XMLCh* const addr, int index, int end, int& 
counter);
   
       /**
        * Get the indicator as to whether this URI uses the "generic URI"
  @@ -539,6 +619,11 @@
   inline bool XMLUri::isReservedCharacter(const XMLCh theChar)
   {
       return (XMLString::indexOf(RESERVED_CHARACTERS, theChar) != -1);
  +}
  +
  +inline bool XMLUri::isPathCharacter(const XMLCh theChar)
  +{
  +    return (XMLString::indexOf(PATH_CHARACTERS, theChar) != -1);
   }
   
   inline bool XMLUri::isUnreservedCharacter(const XMLCh theChar)
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to