peiyongz 2003/03/15 22:00:44
Modified: c/src/xercesc/framework XMLFormatter.hpp XMLFormatter.cpp
Log:
Bug#17983 Formatter does not escape control characters
Revision Changes Path
1.11 +23 -4 xml-xerces/c/src/xercesc/framework/XMLFormatter.hpp
Index: XMLFormatter.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/framework/XMLFormatter.hpp,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- XMLFormatter.hpp 11 Mar 2003 12:58:36 -0000 1.10
+++ XMLFormatter.hpp 16 Mar 2003 06:00:43 -0000 1.11
@@ -56,6 +56,9 @@
/*
* $Log$
+ * Revision 1.11 2003/03/16 06:00:43 peiyongz
+ * Bug#17983 Formatter does not escape control characters
+ *
* Revision 1.10 2003/03/11 12:58:36 tng
* Fix compilation error on AIX.
*
@@ -273,6 +276,7 @@
XMLFormatter
(
const XMLCh* const outEncoding
+ , const XMLCh* const docVersion
, XMLFormatTarget* const target
, const EscapeFlags escapeFlags = NoEscapes
, const UnRepFlags unrepFlags = UnRep_Fail
@@ -281,6 +285,7 @@
XMLFormatter
(
const char* const outEncoding
+ , const char* const docVersion
, XMLFormatTarget* const target
, const EscapeFlags escapeFlags = NoEscapes
, const UnRepFlags unrepFlags = UnRep_Fail
@@ -411,9 +416,15 @@
XMLByte * ref,
const XMLCh * stdRef);
- unsigned int handleUnEscapedChars(const XMLCh * srcPtr,
- const unsigned int count,
- const UnRepFlags unrepFlags);
+ const void writeCharRef(const XMLCh &toWrite);
+
+ bool inEscapeList(const XMLFormatter::EscapeFlags escStyle
+ , const XMLCh toCheck);
+
+
+ unsigned int handleUnEscapedChars(const XMLCh * srcPtr,
+ const unsigned int count,
+ const UnRepFlags unrepFlags);
void specialFormat
(
@@ -458,6 +469,11 @@
// These are character refs for the standard char refs, in the
// output encoding. They are faulted in as required, by transcoding
// them from fixed Unicode versions.
+ //
+ // fIsXML11
+ // for performance reason, we do not store the actual version string
+ // and do the string comparison again and again.
+ //
// -----------------------------------------------------------------------
EscapeFlags fEscapeFlags;
XMLCh* fOutEncoding;
@@ -476,6 +492,9 @@
unsigned int fLTLen;
XMLByte* fQuoteRef;
unsigned int fQuoteLen;
+
+ bool fIsXML11;
+
};
@@ -493,7 +512,7 @@
// -----------------------------------------------------------------------
virtual void writeChars
(
- const XMLByte* const toWrite
+ const XMLByte* const toWrite
, const unsigned int count
, XMLFormatter* const formatter
) = 0;
1.9 +110 -53 xml-xerces/c/src/xercesc/framework/XMLFormatter.cpp
Index: XMLFormatter.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/framework/XMLFormatter.cpp,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- XMLFormatter.cpp 7 Mar 2003 21:42:37 -0000 1.8
+++ XMLFormatter.cpp 16 Mar 2003 06:00:43 -0000 1.9
@@ -69,6 +69,9 @@
#include <xercesc/util/TranscodingException.hpp>
#include <xercesc/util/XMLExceptMsgs.hpp>
#include <xercesc/framework/XMLFormatter.hpp>
+#include <xercesc/util/Janitor.hpp>
+#include <xercesc/util/XMLChar.hpp>
+
#include <string.h>
XERCES_CPP_NAMESPACE_BEGIN
@@ -124,8 +127,8 @@
// ---------------------------------------------------------------------------
// Local methods
// ---------------------------------------------------------------------------
-static inline bool inEscapeList(const XMLFormatter::EscapeFlags escStyle
- , const XMLCh toCheck)
+bool XMLFormatter::inEscapeList(const XMLFormatter::EscapeFlags escStyle
+ , const XMLCh toCheck)
{
const XMLCh* escList = gEscapeChars[escStyle];
while (*escList)
@@ -133,7 +136,42 @@
if (*escList++ == toCheck)
return true;
}
- return false;
+
+ /***
+ * XML1.1
+ *
+ * Finally, there is considerable demand to define a standard representation
of
+ * arbitrary Unicode characters in XML documents. Therefore, XML 1.1 allows
the
+ * use of character references to the control characters #x1 through #x1F,
+ * most of which are forbidden in XML 1.0. For reasons of robustness, however,
+ * these characters still cannot be used directly in documents.
+ * In order to improve the robustness of character encoding detection, the
+ * additional control characters #x7F through #x9F, which were freely allowed
in
+ * XML 1.0 documents, now must also appear only as character references.
+ * (Whitespace characters are of course exempt.) The minor sacrifice of
backward
+ * compatibility is considered not significant.
+ * Due to potential problems with APIs, #x0 is still forbidden both directly
and
+ * as a character reference.
+ *
+ ***/
+ if (fIsXML11)
+ {
+ // for XML11
+ if ( XMLChar1_1::isControlChar(toCheck, 0) &&
+ !XMLChar1_1::isWhitespace(toCheck, 0) )
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ else
+ {
+ return false;
+ }
+
}
@@ -141,24 +179,26 @@
// XMLFormatter: Constructors and Destructor
// ---------------------------------------------------------------------------
XMLFormatter::XMLFormatter( const char* const outEncoding
+ , const char* const docVersion
, XMLFormatTarget* const target
, const EscapeFlags escapeFlags
- , const UnRepFlags unrepFlags) :
- fEscapeFlags(escapeFlags)
+ , const UnRepFlags unrepFlags)
+ : fEscapeFlags(escapeFlags)
, fOutEncoding(0)
, fTarget(target)
, fUnRepFlags(unrepFlags)
, fXCoder(0)
, fAposRef(0)
- , fAmpRef(0)
- , fGTRef(0)
- , fLTRef(0)
- , fQuoteRef(0)
, fAposLen(0)
+ , fAmpRef(0)
, fAmpLen(0)
+ , fGTRef(0)
, fGTLen(0)
+ , fLTRef(0)
, fLTLen(0)
+ , fQuoteRef(0)
, fQuoteLen(0)
+ , fIsXML11(false)
{
// Transcode the encoding string
fOutEncoding = XMLString::transcode(outEncoding);
@@ -183,24 +223,34 @@
, outEncoding
);
}
+
+ XMLCh* const tmpDocVer = XMLString::transcode(docVersion);
+ ArrayJanitor<XMLCh> jname(tmpDocVer);
+ fIsXML11 = XMLString::equals(tmpDocVer, XMLUni::fgVersion1_1);
}
XMLFormatter::XMLFormatter( const XMLCh* const outEncoding
+ , const XMLCh* const docVersion
, XMLFormatTarget* const target
, const EscapeFlags escapeFlags
- , const UnRepFlags unrepFlags) :
- fEscapeFlags(escapeFlags)
+ , const UnRepFlags unrepFlags)
+ : fEscapeFlags(escapeFlags)
, fOutEncoding(0)
, fTarget(target)
, fUnRepFlags(unrepFlags)
- , fXCoder(0)
-
+ , fXCoder(0)
, fAposRef(0)
- , fAmpRef(0)
+ , fAposLen(0)
+ , fAmpRef(0)
+ , fAmpLen(0)
, fGTRef(0)
+ , fGTLen(0)
, fLTRef(0)
+ , fLTLen(0)
, fQuoteRef(0)
+ , fQuoteLen(0)
+ , fIsXML11(false)
{
// Copy the encoding string
fOutEncoding = XMLString::replicate(outEncoding);
@@ -225,6 +275,8 @@
, outEncoding
);
}
+
+ fIsXML11 = XMLString::equals(docVersion, XMLUni::fgVersion1_1);
}
XMLFormatter::~XMLFormatter()
@@ -323,32 +375,33 @@
const XMLByte * theChars;
switch (*srcPtr) {
case chAmpersand :
- theChars = getCharRef(fAmpLen, fAmpRef, gAmpRef);
+ theChars = getCharRef(fAmpLen, fAmpRef, gAmpRef);
fTarget->writeChars(theChars, fAmpLen, this);
break;
case chSingleQuote :
- theChars = getCharRef(fAposLen, fAposRef, gAposRef);
+ theChars = getCharRef(fAposLen, fAposRef, gAposRef);
fTarget->writeChars(theChars, fAposLen, this);
break;
case chDoubleQuote :
- theChars = getCharRef(fQuoteLen, fQuoteRef, gQuoteRef);
+ theChars = getCharRef(fQuoteLen, fQuoteRef, gQuoteRef);
fTarget->writeChars(theChars, fQuoteLen, this);
break;
case chCloseAngle :
- theChars = getCharRef(fGTLen, fGTRef, gGTRef);
+ theChars = getCharRef(fGTLen, fGTRef, gGTRef);
fTarget->writeChars(theChars, fGTLen, this);
break;
case chOpenAngle :
- theChars = getCharRef(fLTLen, fLTRef, gLTRef);
+ theChars = getCharRef(fLTLen, fLTRef, gLTRef);
fTarget->writeChars(theChars, fLTLen, this);
break;
default:
- // <TBD> This is obviously an error
+ // control characters
+ writeCharRef(*srcPtr);
break;
}
srcPtr++;
@@ -357,7 +410,7 @@
}
}
-
+
unsigned int
XMLFormatter::handleUnEscapedChars(const XMLCh * srcPtr,
const unsigned int oCount,
@@ -432,29 +485,52 @@
// ---------------------------------------------------------------------------
// XMLFormatter: Private helper methods
// ---------------------------------------------------------------------------
+const void XMLFormatter::writeCharRef(const XMLCh &toWrite)
+{
+ XMLCh tmpBuf[32];
+ tmpBuf[0] = chAmpersand;
+ tmpBuf[1] = chPound;
+ tmpBuf[2] = chLatin_x;
+
+ // Build a char ref for the current char
+ XMLString::binToText(toWrite, &tmpBuf[3], 8, 16);
+ const unsigned int bufLen = XMLString::stringLen(tmpBuf);
+ tmpBuf[bufLen] = chSemiColon;
+ tmpBuf[bufLen+1] = chNull;
+
+ // write it out
+ formatBuf(tmpBuf
+ , bufLen + 1
+ , XMLFormatter::NoEscapes
+ , XMLFormatter::UnRep_Fail);
+
+}
+
const XMLByte* XMLFormatter::getCharRef(unsigned int & count,
XMLByte * ref,
const XMLCh * stdRef)
{
if (!ref) {
- unsigned int charsEaten;
- const unsigned int outBytes
- = fXCoder->transcodeTo(stdRef, XMLString::stringLen(stdRef),
+
+ unsigned int charsEaten;
+ const unsigned int outBytes =
+ fXCoder->transcodeTo(stdRef, XMLString::stringLen(stdRef),
fTmpBuf, kTmpBufSize, charsEaten,
XMLTranscoder::UnRep_Throw);
- fTmpBuf[outBytes] = 0; fTmpBuf[outBytes + 1] = 0;
- fTmpBuf[outBytes + 2] = 0; fTmpBuf[outBytes + 3] = 0;
-
- ref = new XMLByte[outBytes + 4];
- memcpy(ref, fTmpBuf, outBytes + 4);
- count = outBytes;
- }
+ fTmpBuf[outBytes] = 0;
+ fTmpBuf[outBytes + 1] = 0;
+ fTmpBuf[outBytes + 2] = 0;
+ fTmpBuf[outBytes + 3] = 0;
+
+ ref = new XMLByte[outBytes + 4];
+ memcpy(ref, fTmpBuf, outBytes + 4);
+ count = outBytes;
+ }
return ref;
}
-
void XMLFormatter::specialFormat(const XMLCh* const toFormat
, const unsigned int count
, const EscapeFlags escapeFlags)
@@ -470,12 +546,6 @@
const XMLCh* srcPtr = toFormat;
const XMLCh* endPtr = toFormat + count;
- // Set up the common part of the buffer that we build char refs into
- XMLCh tmpBuf[32];
- tmpBuf[0] = chAmpersand;
- tmpBuf[1] = chPound;
- tmpBuf[2] = chLatin_x;
-
while (srcPtr < endPtr)
{
const XMLCh* tmpPtr = srcPtr;
@@ -510,20 +580,7 @@
//
while (srcPtr < endPtr)
{
- // Build a char ref for the current char
- XMLString::binToText(*srcPtr, &tmpBuf[3], 8, 16);
- const unsigned int bufLen = XMLString::stringLen(tmpBuf);
- tmpBuf[bufLen] = chSemiColon;
- tmpBuf[bufLen+1] = chNull;
-
- // And now call recursively back to our caller to format this
- formatBuf
- (
- tmpBuf
- , bufLen + 1
- , XMLFormatter::NoEscapes
- , XMLFormatter::UnRep_Fail
- );
+ writeCharRef(*srcPtr);
// Move up the source pointer and break out if needed
srcPtr++;
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]