tng 2003/01/06 11:43:18 Modified: c/src/xercesc/util XMLUri.hpp XMLURL.cpp XMLURL.hpp Log: New feature StandardUriConformant to force strict standard uri conformance. Revision Changes Path 1.7 +14 -11 xml-xerces/c/src/xercesc/util/XMLUri.hpp Index: XMLUri.hpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/XMLUri.hpp,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- XMLUri.hpp 21 Nov 2002 15:42:39 -0000 1.6 +++ XMLUri.hpp 6 Jan 2003 19:43:18 -0000 1.7 @@ -57,6 +57,9 @@ /* * $Id$ * $Log$ + * Revision 1.7 2003/01/06 19:43:18 tng + * New feature StandardUriConformant to force strict standard uri conformance. + * * Revision 1.6 2002/11/21 15:42:39 gareth * Implemented copy constructor and operator =. Patch by Jennifer Schachter. * @@ -317,6 +320,16 @@ // Miscellaneous methods // ----------------------------------------------------------------------- + /** + * Determine whether a given string contains only URI characters (also + * called "uric" in RFC 2396). uric consist of all reserved + * characters, unreserved characters and escaped characters. + * + * @return true if the string is comprised of uric, false otherwise + */ + static bool isURIString(const XMLCh* const uric); + + private: static const XMLCh RESERVED_CHARACTERS[]; @@ -360,16 +373,6 @@ * @return true if the scheme is conformant, false otherwise */ static void isConformantUserInfo(const XMLCh* const userInfo); - - /** - * Determine whether a given string contains only URI characters (also - * called "uric" in RFC 2396). uric consist of all reserved - * characters, unreserved characters and escaped characters. - * - * @return true if the string is comprised of uric, false otherwise - */ - static bool isURIString(const XMLCh* const uric); - /** * Determine whether a string is syntactically capable of representing * a valid IPv4 address or the domain name of a network host. 1.5 +34 -151 xml-xerces/c/src/xercesc/util/XMLURL.cpp Index: XMLURL.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/XMLURL.cpp,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- XMLURL.cpp 6 Dec 2002 17:05:29 -0000 1.4 +++ XMLURL.cpp 6 Jan 2003 19:43:18 -0000 1.5 @@ -72,6 +72,7 @@ #include <xercesc/util/XMLString.hpp> #include <xercesc/util/XMLUniDefs.hpp> #include <xercesc/util/XMLUni.hpp> +#include <xercesc/util/XMLUri.hpp> XERCES_CPP_NAMESPACE_BEGIN @@ -191,6 +192,7 @@ , fQuery(0) , fUser(0) , fURLText(0) + , fHasInvalidChar(false) { } @@ -206,6 +208,7 @@ , fQuery(0) , fUser(0) , fURLText(0) + , fHasInvalidChar(false) { try { @@ -230,6 +233,7 @@ , fQuery(0) , fUser(0) , fURLText(0) + , fHasInvalidChar(false) { XMLCh* tmpRel = XMLString::transcode(relativeURL); ArrayJanitor<XMLCh> janRel(tmpRel); @@ -256,6 +260,7 @@ , fQuery(0) , fUser(0) , fURLText(0) + , fHasInvalidChar(false) { try { @@ -280,6 +285,7 @@ , fQuery(0) , fUser(0) , fURLText(0) + , fHasInvalidChar(false) { XMLCh* tmpRel = XMLString::transcode(relativeURL); ArrayJanitor<XMLCh> janRel(tmpRel); @@ -306,6 +312,7 @@ , fQuery(0) , fUser(0) , fURLText(0) + , fHasInvalidChar(false) { try { @@ -329,6 +336,7 @@ , fQuery(0) , fUser(0) , fURLText(0) + , fHasInvalidChar(false) { XMLCh* tmpText = XMLString::transcode(urlText); ArrayJanitor<XMLCh> janRel(tmpText); @@ -354,6 +362,7 @@ , fQuery(XMLString::replicate(toCopy.fQuery)) , fUser(XMLString::replicate(toCopy.fUser)) , fURLText(XMLString::replicate(toCopy.fURLText)) + , fHasInvalidChar(toCopy.fHasInvalidChar) { } @@ -503,6 +512,11 @@ } +bool XMLURL::hasInvalidChar() const { + return fHasInvalidChar; +} + + BinInputStream* XMLURL::makeNewStream() const { // @@ -514,38 +528,10 @@ { if (!fHost || !XMLString::compareIString(fHost, XMLUni::fgLocalHostString)) { - // - // We have to play a little trick here. If its really a Windows - // style fully qualified path, we have to toss the leading / - // character. - // + XMLCh* realPath = XMLString::replicate(fPath); ArrayJanitor<XMLCh> basePathName(realPath); - if (*fPath == chForwardSlash) - { - if (XMLString::stringLen(fPath) > 3) - { - if (*(fPath + 2) == chColon) - { - const XMLCh chDrive = *(fPath + 1); - if (((chDrive >= chLatin_A) && (chDrive <= chLatin_Z)) - || ((chDrive >= chLatin_a) && (chDrive <= chLatin_z))) - { - realPath = fPath + 1; - } - } - - // Similarly for UNC paths - if ( *(fPath + 1) == *(fPath + 2) && - (*(fPath + 1) == chForwardSlash || - *(fPath + 1) == chBackSlash) ) - { - realPath = fPath + 1; - } - } - } - // // Need to manually replace any character reference %xx first // HTTP protocol will be done automatically by the netaccessor @@ -835,8 +821,11 @@ } // Its a relative path, so weave them together. - if (baseURL.fPath) - weavePaths(baseURL.fPath); + if (baseURL.fPath) { + XMLCh* temp = XMLPlatformUtils::weavePaths(baseURL.fPath, fPath); + delete [] fPath; + fPath = temp; + } // If we had any original path, then we are done if (hadPath) @@ -860,6 +849,12 @@ if (!*urlText) ThrowXML(MalformedURLException, XMLExcepts::URL_NoProtocolPresent); + // Before we start, check if this urlText contains valid uri characters + if (!XMLUri::isURIString(urlText)) + fHasInvalidChar = true; + else + fHasInvalidChar = false; + // // The first thing we will do is to check for a file name, so that // we don't waste time thinking its a URL. If its in the form x:\ @@ -988,17 +983,17 @@ } else { - // - // http protocol requires two forward slashes - // we didn't get them, so throw an exception - // - if (fProtocol == HTTP) { - ThrowXML + // + // http protocol requires two forward slashes + // we didn't get them, so throw an exception + // + if (fProtocol == HTTP) { + ThrowXML ( MalformedURLException , XMLExcepts::URL_ExpectingTwoSlashes ); - } + } } // @@ -1135,118 +1130,6 @@ } } - -void XMLURL::weavePaths(const XMLCh* const basePart) -{ - // Watch for stupid stuff - if (!basePart) - return; - if (!*basePart) - return; - - // - // Ok, lets start at the end of the base path and work backwards and - // our path part and work forwards. For each leading . we see, we just - // eat it. For each leading .. we see, we eat it and throw away one - // level in the source URL. - // - // If the last character in the base part is a forward slash, back - // up one first before we look for the last slash. - // - const XMLCh* basePtr = basePart + (XMLString::stringLen(basePart) - 1); - if (*basePtr == chForwardSlash) - basePtr--; - - while ((basePtr >= basePart) - && ((*basePtr != chForwardSlash) && (*basePtr != chBackSlash))) - { - basePtr--; - } - - if (basePtr < basePart) - return; - - // Create a buffer as large as both parts - XMLCh* tmpBuf = new XMLCh[XMLString::stringLen(fPath) - + XMLString::stringLen(basePart) - + 2]; - // - // If we have no path part, then copy the base part up to the - // base pointer - // - if (!fPath) - { - XMLCh* bufPtr = tmpBuf; - const XMLCh* tmpPtr = basePart; - while (tmpPtr <= basePtr) - *bufPtr++ = *tmpPtr++; - *bufPtr = 0; - - fPath = tmpBuf; - return; - } - - // After this, make sure the buffer gets handled if we exit early - ArrayJanitor<XMLCh> janBuf(tmpBuf); - - // - // We have some path part, so we need to check to see if we ahve to - // weave any of the parts together. - // - XMLCh* pathPtr = fPath; - while (true) - { - // If it does not start with some period, then we are done - if (*pathPtr != chPeriod) - break; - - unsigned int periodCount = 1; - pathPtr++; - if (*pathPtr == chPeriod) - { - pathPtr++; - periodCount++; - } - - // Has to be followed by a / or \ or the null to mean anything - if ((*pathPtr != chForwardSlash) && (*pathPtr != chBackSlash) - && *pathPtr) - { - break; - } - if (*pathPtr) - pathPtr++; - - // If its one period, just eat it, else move backwards in the base - if (periodCount == 2) - { - basePtr--; - while ((basePtr >= basePart) - && ((*basePtr != chForwardSlash) && (*basePtr != chBackSlash))) - { - basePtr--; - } - - // There are not enough levels to handle all the .. parts - if (basePtr < basePart) - ThrowXML(MalformedURLException, XMLExcepts::URL_BaseUnderflow); - } - } - - // Copy the base part up to the base pointer - XMLCh* bufPtr = tmpBuf; - const XMLCh* tmpPtr = basePart; - while (tmpPtr <= basePtr) - *bufPtr++ = *tmpPtr++; - - // And then copy on the rest of our path - XMLString::copyString(bufPtr, pathPtr); - - // Now delete our path and make the new buffer our path - delete [] fPath; - janBuf.orphan(); - fPath = tmpBuf; -} XERCES_CPP_NAMESPACE_END 1.4 +7 -2 xml-xerces/c/src/xercesc/util/XMLURL.hpp Index: XMLURL.hpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/XMLURL.hpp,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- XMLURL.hpp 4 Nov 2002 15:22:05 -0000 1.3 +++ XMLURL.hpp 6 Jan 2003 19:43:18 -0000 1.4 @@ -177,6 +177,7 @@ // Miscellaneous methods // ----------------------------------------------------------------------- bool isRelative() const; + bool hasInvalidChar() const; BinInputStream* makeNewStream() const; void makeRelativeTo(const XMLCh* const baseURLText); void makeRelativeTo(const XMLURL& baseURL); @@ -193,7 +194,6 @@ ( const XMLCh* const urlText ); - void weavePaths(const XMLCh* const basePart); // ----------------------------------------------------------------------- @@ -231,6 +231,10 @@ // This is a copy of the URL text, after it has been taken apart, // made relative if needed, canonicalized, and then put back // together. Its only created upon demand. + // + // fHasInvalidChar + // This indicates if the URL Text contains invalid characters as per + // RFC 2396 standard. // ----------------------------------------------------------------------- XMLCh* fFragment; XMLCh* fHost; @@ -241,6 +245,7 @@ XMLCh* fQuery; XMLCh* fUser; XMLCh* fURLText; + bool fHasInvalidChar; };
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]