knoaman     2003/09/30 17:20:41

  Modified:    c/src/xercesc/util XMLUri.hpp XMLUri.cpp
  Log:
  Add a static method to check whether a given string is a valid URI.
  
  Revision  Changes    Path
  1.12      +21 -4     xml-xerces/c/src/xercesc/util/XMLUri.hpp
  
  Index: XMLUri.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/XMLUri.hpp,v
  retrieving revision 1.11
  retrieving revision 1.12
  diff -u -r1.11 -r1.12
  --- XMLUri.hpp        25 Sep 2003 22:23:25 -0000      1.11
  +++ XMLUri.hpp        1 Oct 2003 00:20:41 -0000       1.12
  @@ -57,6 +57,9 @@
   /*
    * $Id$
    * $Log$
  + * Revision 1.12  2003/10/01 00:20:41  knoaman
  + * Add a static method to check whether a given string is a valid URI.
  + *
    * Revision 1.11  2003/09/25 22:23:25  peiyongz
    * Implementation of Serialization/Deserialization
    *
  @@ -144,7 +147,7 @@
    *
    */
   
  - class XMLUTIL_EXPORT XMLUri : public XSerializable, public XMemory
  +class XMLUTIL_EXPORT XMLUri : public XSerializable, public XMemory
   {
   public:
   
  @@ -366,6 +369,12 @@
        */
       static bool isURIString(const XMLCh* const uric);
   
  +    /**
  +     * Determine whether a given string is a valid URI
  +     */
  +    static bool isValidURI( const XMLUri* const baseURI
  +                          , const XMLCh* const uriStr);
  +
       /***
        * Support for Serialization/De-serialization
        ***/
  @@ -452,7 +461,7 @@
        *
        * @return true if the string is a syntactically valid IPv4 address
        */
  -     static bool isWellFormedIPv4Address(const XMLCh* const addr, const int& 
length);
  +     static bool isWellFormedIPv4Address(const XMLCh* const addr, const int length);
        
       /**
        * Determines whether a string is an IPv6 reference as defined
  @@ -467,7 +476,7 @@
        *
        * @return true if the string is a syntactically valid IPv6 reference
        */
  -     static bool isWellFormedIPv6Reference(const XMLCh* const addr, const int& 
length);
  +     static bool isWellFormedIPv6Reference(const XMLCh* const addr, const int 
length);
        
       /**
        * Helper function for isWellFormedIPv6Reference which scans the 
  @@ -553,6 +562,14 @@
         *
         */
        void cleanUp();
  +
  +    static bool isConformantSchemeName(const XMLCh* const scheme,
  +                                       const int schemeLen);
  +    static bool processScheme(const XMLCh* const uriStr, int& index);
  +    static bool processAuthority(const XMLCh* const uriStr, const int authLen);
  +    static bool isWellFormedAddress(const XMLCh* const addr, const int addrLen);
  +    static bool processPath(const XMLCh* const pathStr, const int pathStrLen,
  +                            const bool isSchemePresent);
   
       // -----------------------------------------------------------------------
       //  Data members
  
  
  
  1.16      +412 -32   xml-xerces/c/src/xercesc/util/XMLUri.cpp
  
  Index: XMLUri.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/XMLUri.cpp,v
  retrieving revision 1.15
  retrieving revision 1.16
  diff -u -r1.15 -r1.16
  --- XMLUri.cpp        25 Sep 2003 22:23:25 -0000      1.15
  +++ XMLUri.cpp        1 Oct 2003 00:20:41 -0000       1.16
  @@ -61,13 +61,10 @@
   // ---------------------------------------------------------------------------
   //  Includes
   // ---------------------------------------------------------------------------
  -#include <xercesc/util/PlatformUtils.hpp>
   #include <xercesc/util/Janitor.hpp>
   #include <xercesc/util/XMLURL.hpp>
   #include <xercesc/util/XMLUri.hpp>
  -#include <xercesc/util/XMLString.hpp>
  -#include <xercesc/util/XMLUniDefs.hpp>
  -#include <xercesc/util/XMLUni.hpp>
  +#include <xercesc/util/XMLChar.hpp>
   
   XERCES_CPP_NAMESPACE_BEGIN
   
  @@ -1512,12 +1509,11 @@
   //
   //  IPv4address   = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
   //
  -bool XMLUri::isWellFormedIPv4Address(const XMLCh* const addr, const int& length)
  +bool XMLUri::isWellFormedIPv4Address(const XMLCh* const addr, const int length)
   {
       int numDots = 0;
       int numDigits = 0;
       
  -    //
       // IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
       //
       // make sure that
  @@ -1525,24 +1521,20 @@
       // 2) that any dot separator is preceded and followed by a digit
       // 3) that we find 3 dots
       // 4) that each segment contains 1 to 3 digits.
  -    // 5) that each segment is not greater than 255.
  -    
  +    // 5) that each segment is not greater than 255.    
       for (int i = 0; i < length; ++i)
       {
           if (addr[i] == chPeriod)
           {
  -            if (((i > 0)  &&
  -                (!XMLString::isDigit(addr[i-1]))) ||
  -                ((i + 1 < length) &&
  -                (!XMLString::isDigit(addr[i+1])))  )
  +            if ((i == 0) ||
  +                (i+1 == length) ||
  +                !XMLString::isDigit(addr[i+1]))
               {
                  return false;
               }
               numDigits = 0;
               if (++numDots > 3)
  -            {
                   return false;
  -            }
           }
           else if (!XMLString::isDigit(addr[i]))
           {
  @@ -1575,7 +1567,7 @@
   //
   //  IPv6reference = "[" IPv6address "]"
   //
  -bool XMLUri::isWellFormedIPv6Reference(const XMLCh* const addr, const int& length)
  +bool XMLUri::isWellFormedIPv6Reference(const XMLCh* const addr, const int length)
   {
       int index = 1;
       int end = length-1;
  @@ -1623,18 +1615,9 @@
           else 
           {
               if (counter == 6)
  -            {
  -                XMLCh* ipVfour = (XMLCh*) 
  -                  XMLPlatformUtils::fgMemoryManager->allocate((length+1) * 
sizeof(XMLCh));
  -                XMLString::subString(ipVfour, addr, index+1, end);
  -                ArrayJanitor<XMLCh> janitor(ipVfour, 
XMLPlatformUtils::fgMemoryManager);
  -                int newLength = XMLString::stringLen(ipVfour);
  -                return isWellFormedIPv4Address(ipVfour, newLength);
  -            }
  +                return isWellFormedIPv4Address(addr+index+1, end-index-1);
               else
  -            {
                   return false;
  -            }
           }
       }
       else 
  @@ -1658,12 +1641,8 @@
   
       // The address ends in an IPv4 address, or it is invalid. 
       // scanHexSequence has already made sure that we have the right number of bits. 
  -    XMLCh* ipVfour = 
  -      (XMLCh*) XMLPlatformUtils::fgMemoryManager->allocate((length+1) * 
sizeof(XMLCh));
  -    XMLString::subString(ipVfour, addr, (counter > prevCount) ? index+1 : index, 
end);
  -    ArrayJanitor<XMLCh> janitor(ipVfour, XMLPlatformUtils::fgMemoryManager);
  -    int newLength = XMLString::stringLen(ipVfour);
  -    return isWellFormedIPv4Address(ipVfour, newLength);
  +    int shiftCount = (counter > prevCount) ? index+1 : index;
  +    return isWellFormedIPv4Address(addr + shiftCount, end - shiftCount);
   }
   
   //
  @@ -1811,6 +1790,407 @@
   
       // Cap it off in case the last op was not a string copy
       *outPtr = 0;
  +}
  +
  +// NOTE: no check for NULL value of uriStr (caller responsiblilty)
  +bool XMLUri::isValidURI(const XMLUri* const baseURI
  +                       , const XMLCh* const uriStr)
  +{
  +    // get a trimmed version of uriStr
  +    // uriStr will NO LONGER be used in this function.
  +    const XMLCh* trimedUriSpec = uriStr;
  +
  +    while (XMLChar1_0::isWhitespace(*trimedUriSpec))
  +        trimedUriSpec++;
  +
  +    int trimedUriSpecLen = XMLString::stringLen(trimedUriSpec);
  +
  +    while (trimedUriSpecLen) {
  +        if (XMLChar1_0::isWhitespace(trimedUriSpec[trimedUriSpecLen-1]))
  +            trimedUriSpecLen--;
  +        else
  +            break;
  +    }
  +
  +    if (trimedUriSpecLen == 0)
  +    {
  +        if (!baseURI)
  +            return false;
  +        else
  +            return true;
  +    }
  +
  +    int index = 0;
  +    bool foundScheme = false;
  +
  +    // Check for scheme, which must be before `/', '?' or '#'. 
  +    // Also handle names with DOS drive letters ('D:'), 
  +    // so 1-character schemes are not allowed.
  +    int colonIdx = XMLString::indexOf(trimedUriSpec, chColon);
  +    int slashIdx = XMLString::indexOf(trimedUriSpec, chForwardSlash);
  +    int queryIdx = XMLString::indexOf(trimedUriSpec, chQuestion);
  +    int fragmentIdx = XMLString::indexOf(trimedUriSpec, chPound);
  +
  +    if ((colonIdx < 2) ||
  +        (colonIdx > slashIdx && slashIdx != -1) ||
  +        (colonIdx > queryIdx && queryIdx != -1) ||
  +        (colonIdx > fragmentIdx && fragmentIdx != -1))
  +    {
  +        // A standalone base is a valid URI according to spec
  +        if (colonIdx == 0 || (!baseURI && fragmentIdx != 0))
  +            return false;
  +    }
  +    else
  +    {
  +        if (!processScheme(trimedUriSpec, index))
  +            return false;
  +        foundScheme = true;
  +        ++index;
  +    }
  +
  +    // It's an error if we stop here
  +    if (index == trimedUriSpecLen || (foundScheme && (trimedUriSpec[index] == 
chPound)))
  +        return false;
  +
  +     // two slashes means generic URI syntax, so we get the authority
  +    const XMLCh* authUriSpec = trimedUriSpec +  index;
  +    if (((index+1) < trimedUriSpecLen) &&
  +        XMLString::startsWith(authUriSpec, DOUBLE_SLASH))
  +    {
  +        index += 2;
  +        int startPos = index;
  +
  +        // get authority - everything up to path, query or fragment
  +        XMLCh testChar;
  +        while (index < trimedUriSpecLen)
  +        {
  +            testChar = trimedUriSpec[index];
  +            if (testChar == chForwardSlash ||
  +                testChar == chQuestion     ||
  +                testChar == chPound         )
  +            {
  +                break;
  +            }
  +
  +            index++;
  +        }
  +
  +        // if we found authority, parse it out, otherwise we set the
  +        // host to empty string
  +        if (index > startPos)
  +        {
  +            if (!processAuthority(trimedUriSpec + startPos, index - startPos))
  +                return false;
  +        }
  +    }
  +
  +    // we need to check if index has exceed the lenght or not
  +    if (index < trimedUriSpecLen)
  +    {
  +         if (!processPath(trimedUriSpec + index, trimedUriSpecLen - index, 
foundScheme))
  +            return false;
  +    }
  +
  +    return true;
  +}
  +
  +bool XMLUri::isWellFormedAddress(const XMLCh* const addrString,
  +                                 const int addrStrLen)
  +{
  +    // Check that we have a non-zero length string.
  +    if (addrStrLen == 0)
  +        return false;
  +        
  +    // Check if the host is a valid IPv6reference.
  +    if (*addrString == chOpenSquare)
  +    {
  +        return isWellFormedIPv6Reference(addrString, addrStrLen);
  +    }
  +
  +    //
  +    // Cannot start with a '.', '-', or end with a '-'.
  +    //
  +    if (*addrString == chPeriod ||
  +        *addrString == chDash ||
  +        addrString[addrStrLen-1] == chDash)
  +        return false;
  +
  +    // rightmost domain label starting with digit indicates IP address
  +    // since top level domain label can only start with an alpha
  +    // see RFC 2396 Section 3.2.2
  +    
  +    int lastPeriodPos = XMLString::lastIndexOf(chPeriod, addrString, addrStrLen);
  +
  +    // if the string ends with "."
  +    // get the second last "."
  +    if (lastPeriodPos + 1 == addrStrLen)
  +    {
  +        lastPeriodPos = XMLString::lastIndexOf(chPeriod, addrString, lastPeriodPos);
  +
  +        if ( XMLString::isDigit(addrString[lastPeriodPos + 1]))
  +                     return false;
  +    }
  +
  +    if (XMLString::isDigit(addrString[lastPeriodPos + 1]))
  +    {
  +        return isWellFormedIPv4Address(addrString, addrStrLen);
  +    } // end of IPv4address
  +    else
  +    {
  +        //
  +        //  hostname      = *( domainlabel "." ) toplabel [ "." ]
  +        //  domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
  +        //  toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
  +        
  +        // RFC 2396 states that hostnames take the form described in 
  +        // RFC 1034 (Section 3) and RFC 1123 (Section 2.1). According
  +        // to RFC 1034, hostnames are limited to 255 characters.
  +        if (addrStrLen > 255) {
  +            return false;
  +        }
  +        
  +        unsigned int labelCharCount = 0;
  +        
  +        // domain labels can contain alphanumerics and '-"
  +        // but must start and end with an alphanumeric
  +        for (int i = 0; i < addrStrLen; i++)
  +        {
  +            if (addrString[i] == chPeriod)
  +            {
  +              if (((i > 0)  &&
  +                   (!XMLString::isAlphaNum(addrString[i-1]))) ||
  +                  ((i + 1 < addrStrLen) &&
  +                   (!XMLString::isAlphaNum(addrString[i+1])))  )
  +                {
  +                    return false;
  +                }
  +                labelCharCount = 0;
  +            }
  +            else if (!XMLString::isAlphaNum(addrString[i]) &&
  +                      addrString[i] != chDash)
  +            {
  +                return false;
  +            }
  +            // RFC 1034: Labels must be 63 characters or less.
  +            else if (++labelCharCount > 63) {
  +                return false;
  +            }
  +        } //for
  +    }
  +
  +    return true;
  +}
  +
  +bool XMLUri::processScheme(const XMLCh* const schemeStr, int& index)
  +{
  +    const XMLCh* tmpPtr = XMLString::findAny(schemeStr, SCHEME_SEPARATORS);
  +
  +    if (tmpPtr) {
  +        index = tmpPtr - schemeStr;
  +        return isConformantSchemeName(schemeStr, index);
  +    }
  +    else {
  +        return false;
  +    }
  +}
  +
  +
  +bool XMLUri::isConformantSchemeName( const XMLCh* const scheme
  +                                   , const int schemeLen)
  +{
  +    if (!XMLString::isAlpha(*scheme))     // first: alpha
  +        return false;
  +
  +    // second onwards: ( alpha | digit | "+" | "-" | "." )
  +    for (int i=1; i<schemeLen; i++)
  +    {
  +        if ( !XMLString::isAlphaNum(scheme[i]) &&
  +             (XMLString::indexOf(SCHEME_CHARACTERS, scheme[i]) == -1))
  +            return false;
  +    }
  +
  +    return true;
  +}
  +
  +bool XMLUri::processAuthority( const XMLCh* const authSpec
  +                             , const int authLen)
  +{
  +    int index = XMLString::indexOf(authSpec, chAt);
  +    int start = 0;
  +
  +    // server = [ [ userinfo "@" ] hostport ]
  +    // userinfo is everything up @,
  +    const XMLCh* userinfo;
  +    int userInfoLen = 0;
  +    if ((index != -1) && (index < authLen))
  +    {
  +        userinfo = authSpec;
  +        userInfoLen = index;
  +        start = index + 1;
  +    }
  +    else
  +    {
  +        userinfo = XMLUni::fgZeroLenString;
  +    }
  +
  +    // hostport = host [ ":" port ]
  +    // host is everything up to ':', or up to 
  +    // and including ']' if followed by ':'.
  +    //
  +    // Search for port boundary.
  +    const XMLCh* host;
  +    int hostLen = 0;
  +    if ((start < authLen) && (authSpec[start] == chOpenSquare))
  +    {
  +     index = XMLString::indexOf(&(authSpec[start]), chCloseSquare);
  +     if ((index != -1) && (index < authLen))
  +     {
  +            // skip the ']'
  +            index = ((start + index + 1) < authLen
  +              && authSpec[start + index + 1] == chColon) ? index+1 : -1;
  +     }
  +    }
  +    else
  +    {
  +        index = XMLString::indexOf(&(authSpec[start]), chColon);
  +        if (index >= authLen)
  +            index = -1;
  +    }
  +
  +    host = &(authSpec[start]);
  +    if (index != -1)
  +    {
  +        hostLen = index;
  +        start += index + 1;  // skip the :
  +    }
  +    else
  +    {
  +        hostLen = authLen - start;
  +        start = authLen;
  +    }
  +
  +    // port is everything after ":"
  +    int port = -1;
  +    if ((hostLen) &&   // non empty host
  +        (index != -1)                    &&   // ":" found
  +        (start < authLen)                     )   // ":" is not the last
  +    {
  +        const XMLCh* portStr = &(authSpec[start]);
  +        if (*portStr)
  +        {
  +            port = 0;
  +            for (int i=0; i<(authLen - start); i++)
  +            {
  +                if (portStr[i] < chDigit_0 || portStr[i] > chDigit_9)
  +                    return false;
  +
  +                port = (port * 10) + (int) (portStr[i] - chDigit_0);              
  +            }
  +        
  +        }
  +    }
  +
  +    // The order is important, do not change
  +    if (!isWellFormedAddress(host, hostLen))
  +        return false;
  +
  +    // check port number
  +    if ((port > 65535) || (port < 0 && port != -1))
  +        return false;
  +
  +    // check userinfo
  +    index = 0;
  +     while (index < userInfoLen)
  +    {
  +        if (isUnreservedCharacter(userinfo[index]) ||
  +            (XMLString::indexOf(USERINFO_CHARACTERS, userinfo[index]) != -1))
  +        {
  +            index++;
  +        }
  +        else if (userinfo[index] == chPercent)               // '%'
  +        {
  +            if (XMLString::isHex(userinfo[index+1]) &&     // 1st hex
  +                XMLString::isHex(userinfo[index+2])  )     // 2nd hex
  +                index +=3;
  +            else
  +                return false;
  +        }
  +        else
  +            return false;
  +    } //while
  +
  +    return true;
  +}
  +
  +bool XMLUri::processPath(const XMLCh* const pathStr,
  +                         const int pathStrLen,
  +                         const bool isSchemePresent)
  +{
  +    if (pathStrLen != 0)
  +    {
  +        int index = 0;
  +        XMLCh testChar = chNull;
  +        bool isOpaque = (!isSchemePresent || *pathStr == chForwardSlash);
  +
  +        // path - everything up to query string or fragment
  +        //
  +        // RFC 2732 only allows '[' and ']' to appear in the opaque part.
  +        while (index < pathStrLen)
  +        {
  +            testChar = pathStr[index];
  +            if (testChar == chQuestion || testChar == chPound)
  +                break;
  +
  +            if (testChar == chPercent)
  +            {
  +                if (index+2 >= pathStrLen ||
  +                    !XMLString::isHex(pathStr[index+1]) ||
  +                    !XMLString::isHex(pathStr[index+2]))
  +                        return false;
  +            }
  +            else if (!isUnreservedCharacter(testChar) &&
  +                     ((isOpaque && !isPathCharacter(testChar)) ||
  +                      (!isOpaque && !isReservedCharacter(testChar))))
  +            {
  +                return false;
  +            }
  +
  +            index++;
  +        }
  +
  +        // query - starts with ? and up to fragment or end
  +        // fragment - starts with #
  +        bool isQuery = (testChar == chQuestion);
  +        if (isQuery || testChar == chPound)
  +        {
  +            index++;
  +            while (index < pathStrLen)
  +            {
  +                testChar = pathStr[index];
  +                if (testChar == chPound && isQuery) {
  +                    isQuery = false;
  +                    index++;
  +                    continue;
  +                }
  +
  +                if (testChar == chPercent)
  +                {
  +                    if (index+2 >= pathStrLen ||
  +                        !XMLString::isHex(pathStr[index+1]) ||
  +                        !XMLString::isHex(pathStr[index+2]))
  +                        return false;
  +                }
  +                else if (!isUnreservedCharacter(testChar) &&
  +                         !isReservedCharacter(testChar))
  +                {
  +                    return false;
  +                }
  +                index++;
  +            }
  +        }
  +    } //if (pathStrLen...)
  +
  +    return true;
   }
   
   /***
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to