cargilld 2005/03/22 12:43:27
Modified: c/src/xercesc/internal XMLReader.cpp XMLReader.hpp
Log:
Check in Christian's patches for Xercesc-1369 and 1370.
Revision Changes Path
1.29 +188 -8 xml-xerces/c/src/xercesc/internal/XMLReader.cpp
Index: XMLReader.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLReader.cpp,v
retrieving revision 1.28
retrieving revision 1.29
diff -u -r1.28 -r1.29
--- XMLReader.cpp 20 Mar 2005 19:02:45 -0000 1.28
+++ XMLReader.cpp 22 Mar 2005 20:43:27 -0000 1.29
@@ -40,7 +40,7 @@
// not. Breaks out on the first non-whitespace.
//
bool XMLReader::isAllSpaces(const XMLCh* const toCheck
- , const unsigned int count)
+ , const unsigned int count) const
{
const XMLCh* curCh = toCheck;
const XMLCh* endPtr = toCheck + count;
@@ -58,7 +58,7 @@
// not.
//
bool XMLReader::containsWhiteSpace(const XMLCh* const toCheck
- , const unsigned int count)
+ , const unsigned int count) const
{
const XMLCh* curCh = toCheck;
const XMLCh* endPtr = toCheck + count;
@@ -73,7 +73,7 @@
//
// This one is not called terribly often, so call the XMLChar utility
//
-bool XMLReader::isPublicIdChar(const XMLCh toCheck)
+bool XMLReader::isPublicIdChar(const XMLCh toCheck) const
{
if (fXMLVersion == XMLV1_1)
return XMLChar1_1::isPublicIdChar(toCheck);
@@ -704,7 +704,28 @@
// Eat this char
fCharIndex++;
- handleEOL(curCh, false);
+ //
+ // 'curCh' is a whitespace(x20|x9|xD|xA), so we only can
have
+ // end-of-line combinations with a leading chCR(xD) or
chLF(xA)
+ //
+ // 100000 x20
+ // 001001 x9
+ // 001010 chLF
+ // 001101 chCR
+ // -----------
+ // 000110 == (chCR|chLF) & ~(0x9|0x20)
+ //
+ // if the result of thelogical-& operation is
+ // true : 'curCh' must be xA or xD
+ // false : 'curCh' must be x20 or x9
+ //
+ if ( ( curCh & (chCR|chLF) & ~(0x9|0x20) ) == 0 )
+ {
+ fCurCol++;
+ } else
+ {
+ handleEOL(curCh, false);
+ }
// Ok we can add this guy to our buffer
toFill.append(curCh);
@@ -747,7 +768,27 @@
// Eat this char
fCharIndex++;
- handleEOL(curCh, false);
+ //
+ // 'curCh' is not a whitespace(x20|x9|xD|xA), so we only can
+ // have end-of-line combinations with a leading chNEL(x85)
or
+ // chLineSeparator(x2028)
+ //
+ // 0010000000101000 chLineSeparator
+ // 0000000010000101 chNEL
+ // ---------------------
+ // 1101111101010010 == ~(chNEL|chLineSeparator)
+ //
+ // if the result of the logical-& operation is
+ // true : 'curCh' can not be chNEL or chLineSeparator
+ // false : 'curCh' can be chNEL or chLineSeparator
+ //
+ if ( curCh & (XMLCh) ~(chNEL|chLineSeparator) )
+ {
+ fCurCol++;
+ } else
+ {
+ handleEOL(curCh, false);
+ }
// Add it to our buffer
toFill.append(curCh);
@@ -813,7 +854,28 @@
// Get the current char out of the buffer and eat it
XMLCh curCh = fCharBuf[fCharIndex++];
- handleEOL(curCh, inDecl);
+ //
+ // 'curCh' is a whitespace(x20|x9|xD|xA), so we only can
have
+ // end-of-line combinations with a leading chCR(xD) or
chLF(xA)
+ //
+ // 100000 x20
+ // 001001 x9
+ // 001010 chLF
+ // 001101 chCR
+ // -----------
+ // 000110 == (chCR|chLF) & ~(0x9|0x20)
+ //
+ // if the result of the logical-& operation is
+ // true : 'curCh' must be xA or xD
+ // false : 'curCh' must be x20 or x9
+ //
+ if ( ( curCh & (chCR|chLF) & ~(0x9|0x20) ) == 0 )
+ {
+ fCurCol++;
+ } else
+ {
+ handleEOL(curCh, inDecl);
+ }
}
else
@@ -883,7 +945,28 @@
// Eat the character
fCharIndex++;
- handleEOL((XMLCh&)curCh, false);
+ //
+ // 'curCh' is a whitespace(x20|x9|xD|xA), so we only can have
+ // end-of-line combinations with a leading chCR(xD) or chLF(xA)
+ //
+ // 100000 x20
+ // 001001 x9
+ // 001010 chLF
+ // 001101 chCR
+ // -----------
+ // 000110 == (chCR|chLF) & ~(0x9|0x20)
+ //
+ // if the result of the logical-& operation is
+ // true : 'curCh' must be xA or xD
+ // false : 'curCh' must be x20 or x9
+ //
+ if ( ( curCh & (chCR|chLF) & ~(0x9|0x20) ) == 0 )
+ {
+ fCurCol++;
+ } else
+ {
+ handleEOL((XMLCh&)curCh, false);
+ }
return true;
}
@@ -1522,4 +1605,101 @@
return charsDone;
}
+/***
+ *
+ * XML1.1
+ *
+ * 2.11 End-of-Line Handling
+ *
+ * XML parsed entities are often stored in computer files which, for
editing
+ * convenience, are organized into lines. These lines are typically
separated
+ * by some combination of the characters CARRIAGE RETURN (#xD) and LINE
FEED (#xA).
+ *
+ * To simplify the tasks of applications, the XML processor MUST behave
as if
+ * it normalized all line breaks in external parsed entities (including
the document
+ * entity) on input, before parsing, by translating all of the following
to a single
+ * #xA character:
+ *
+ * 1. the two-character sequence #xD #xA
+ * 2. the two-character sequence #xD #x85
+ * 3. the single character #x85
+ * 4. the single character #x2028
+ * 5. any #xD character that is not immediately followed by #xA or #x85.
+ *
+ *
+ ***/
+void XMLReader::handleEOL(XMLCh& curCh, bool inDecl)
+{
+ // 1. the two-character sequence #xD #xA
+ // 2. the two-character sequence #xD #x85
+ // 5. any #xD character that is not immediately followed by #xA or #x85.
+ if (curCh == chCR)
+ {
+ fCurCol = 1;
+ fCurLine++;
+
+ //
+ // If not already internalized, then convert it to an
+ // LF and eat any following LF.
+ //
+ if (fSource == Source_External)
+ {
+ if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
+ {
+ if ( fCharBuf[fCharIndex] == chLF ||
+ ((fCharBuf[fCharIndex] == chNEL) && fNEL) )
+ {
+ fCharIndex++;
+ }
+ }
+ curCh = chLF;
+ }
+ }
+ else if (curCh == chLF)
+ {
+ fCurCol = 1;
+ fCurLine++;
+ }
+ // 3. the single character #x85
+ // 4. the single character #x2028
+ else if (curCh == chNEL || curCh == chLineSeparator)
+ {
+ if (inDecl && fXMLVersion == XMLV1_1)
+ {
+
+ /***
+ * XML1.1
+ *
+ * 2.11 End-of-Line Handling
+ * ...
+ * The characters #x85 and #x2028 cannot be reliably recognized
and translated
+ * until an entity's encoding declaration (if present) has been
read.
+ * Therefore, it is a fatal error to use them within the XML
declaration or
+ * text declaration.
+ *
+ ***/
+ ThrowXMLwithMemMgr1
+ (
+ TranscodingException
+ , XMLExcepts::Reader_NelLsepinDecl
+ , fSystemId
+ , fMemoryManager
+ );
+ }
+
+ if (fNEL && fSource == Source_External)
+ {
+ fCurCol = 1;
+ fCurLine++;
+ curCh = chLF;
+ }
+ }
+ else
+ {
+ fCurCol++;
+ }
+
+ return;
+}
+
XERCES_CPP_NAMESPACE_END
1.21 +67 -119 xml-xerces/c/src/xercesc/internal/XMLReader.hpp
Index: XMLReader.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLReader.hpp,v
retrieving revision 1.20
retrieving revision 1.21
diff -u -r1.20 -r1.21
--- XMLReader.hpp 29 Sep 2004 00:24:01 -0000 1.20
+++ XMLReader.hpp 22 Mar 2005 20:43:27 -0000 1.21
@@ -16,6 +16,9 @@
/*
* $Log$
+ * Revision 1.21 2005/03/22 20:43:27 cargilld
+ * Check in Christian's patches for Xercesc-1369 and 1370.
+ *
* Revision 1.20 2004/09/29 00:24:01 knoaman
* Performance: improve src offset calculation. Patch by Anthony O'Dowd.
*
@@ -219,24 +222,24 @@
(
const XMLCh* const toCheck
, const unsigned int count
- );
+ ) const;
bool containsWhiteSpace
(
const XMLCh* const toCheck
, const unsigned int count
- );
+ ) const;
- bool isXMLLetter(const XMLCh toCheck);
- bool isFirstNameChar(const XMLCh toCheck);
- bool isNameChar(const XMLCh toCheck);
- bool isPlainContentChar(const XMLCh toCheck);
- bool isSpecialStartTagChar(const XMLCh toCheck);
- bool isXMLChar(const XMLCh toCheck);
- bool isWhitespace(const XMLCh toCheck);
- bool isControlChar(const XMLCh toCheck);
- bool isPublicIdChar(const XMLCh toCheck);
+ bool isXMLLetter(const XMLCh toCheck) const;
+ bool isFirstNameChar(const XMLCh toCheck) const;
+ bool isNameChar(const XMLCh toCheck) const;
+ bool isPlainContentChar(const XMLCh toCheck) const;
+ bool isSpecialStartTagChar(const XMLCh toCheck) const;
+ bool isXMLChar(const XMLCh toCheck) const;
+ bool isWhitespace(const XMLCh toCheck) const;
+ bool isControlChar(const XMLCh toCheck) const;
+ bool isPublicIdChar(const XMLCh toCheck) const;
//
-----------------------------------------------------------------------
// Constructors and Destructor
@@ -400,7 +403,7 @@
, const unsigned int maxChars
);
- inline void handleEOL
+ void handleEOL
(
XMLCh& curCh
, bool inDecl = false
@@ -590,43 +593,43 @@
//
---------------------------------------------------------------------------
// XMLReader: Public, query methods
//
---------------------------------------------------------------------------
-inline bool XMLReader::isNameChar(const XMLCh toCheck)
+inline bool XMLReader::isNameChar(const XMLCh toCheck) const
{
return ((fgCharCharsTable[toCheck] & gNameCharMask) != 0);
}
-inline bool XMLReader::isPlainContentChar(const XMLCh toCheck)
+inline bool XMLReader::isPlainContentChar(const XMLCh toCheck) const
{
return ((fgCharCharsTable[toCheck] & gPlainContentCharMask) != 0);
}
-inline bool XMLReader::isFirstNameChar(const XMLCh toCheck)
+inline bool XMLReader::isFirstNameChar(const XMLCh toCheck) const
{
return ((fgCharCharsTable[toCheck] & gFirstNameCharMask) != 0);
}
-inline bool XMLReader::isSpecialStartTagChar(const XMLCh toCheck)
+inline bool XMLReader::isSpecialStartTagChar(const XMLCh toCheck) const
{
return ((fgCharCharsTable[toCheck] & gSpecialStartTagCharMask) != 0);
}
-inline bool XMLReader::isXMLChar(const XMLCh toCheck)
+inline bool XMLReader::isXMLChar(const XMLCh toCheck) const
{
return ((fgCharCharsTable[toCheck] & gXMLCharMask) != 0);
}
-inline bool XMLReader::isXMLLetter(const XMLCh toCheck)
+inline bool XMLReader::isXMLLetter(const XMLCh toCheck) const
{
return ((fgCharCharsTable[toCheck] & gLetterCharMask) != 0);
}
-inline bool XMLReader::isWhitespace(const XMLCh toCheck)
+inline bool XMLReader::isWhitespace(const XMLCh toCheck) const
{
return ((fgCharCharsTable[toCheck] & gWhitespaceCharMask) != 0);
}
-inline bool XMLReader::isControlChar(const XMLCh toCheck)
+inline bool XMLReader::isControlChar(const XMLCh toCheck) const
{
return ((fgCharCharsTable[toCheck] & gControlCharMask) != 0);
}
@@ -784,7 +787,28 @@
chGotten = fCharBuf[fCharIndex++];
// Handle end of line normalization and line/col member maintenance.
- handleEOL(chGotten, false);
+ //
+ // we can have end-of-line combinations with a leading
+ // chCR(xD), chLF(xA), chNEL(x85), or chLineSeparator(x2028)
+ //
+ // 0000000000001101 chCR
+ // 0000000000001010 chLF
+ // 0000000010000101 chNEL
+ // 0010000000101000 chLineSeparator
+ // -----------------------
+ // 1101111101010000 == ~(chCR|chLF|chNEL|chLineSeparator)
+ //
+ // if the result of the logical-& operation is
+ // true : 'curCh' can not be chCR, chLF, chNEL or chLineSeparator
+ // false : 'curCh' can be chCR, chLF, chNEL or chLineSeparator
+ //
+ if ( chGotten & (XMLCh) ~(chCR|chLF|chNEL|chLineSeparator) )
+ {
+ fCurCol++;
+ } else
+ {
+ handleEOL(chGotten, false);
+ }
return true;
}
@@ -812,7 +836,28 @@
chGotten = fCharBuf[fCharIndex++];
// Handle end of line normalization and line/col member maintenance.
- handleEOL(chGotten, false);
+ //
+ // we can have end-of-line combinations with a leading
+ // chCR(xD), chLF(xA), chNEL(x85), or chLineSeparator(x2028)
+ //
+ // 0000000000001101 chCR
+ // 0000000000001010 chLF
+ // 0000000010000101 chNEL
+ // 0010000000101000 chLineSeparator
+ // -----------------------
+ // 1101111101010000 == ~(chCR|chLF|chNEL|chLineSeparator)
+ //
+ // if the result of the logical-& operation is
+ // true : 'curCh' can not be chCR, chLF, chNEL or chLineSeparator
+ // false : 'curCh' can be chCR, chLF, chNEL or chLineSeparator
+ //
+ if ( chGotten & (XMLCh) ~(chCR|chLF|chNEL|chLineSeparator) )
+ {
+ fCurCol++;
+ } else
+ {
+ handleEOL(chGotten, false);
+ }
return true;
}
@@ -851,103 +896,6 @@
return true;
}
-/***
- *
- * XML1.1
- *
- * 2.11 End-of-Line Handling
- *
- * XML parsed entities are often stored in computer files which, for
editing
- * convenience, are organized into lines. These lines are typically
separated
- * by some combination of the characters CARRIAGE RETURN (#xD) and LINE
FEED (#xA).
- *
- * To simplify the tasks of applications, the XML processor MUST behave
as if
- * it normalized all line breaks in external parsed entities (including
the document
- * entity) on input, before parsing, by translating all of the following
to a single
- * #xA character:
- *
- * 1. the two-character sequence #xD #xA
- * 2. the two-character sequence #xD #x85
- * 3. the single character #x85
- * 4. the single character #x2028
- * 5. any #xD character that is not immediately followed by #xA or #x85.
- *
- *
- ***/
-inline void XMLReader::handleEOL(XMLCh& curCh, bool inDecl)
-{
- // 1. the two-character sequence #xD #xA
- // 2. the two-character sequence #xD #x85
- // 5. any #xD character that is not immediately followed by #xA or #x85.
- if (curCh == chCR)
- {
- fCurCol = 1;
- fCurLine++;
-
- //
- // If not already internalized, then convert it to an
- // LF and eat any following LF.
- //
- if (fSource == Source_External)
- {
- if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
- {
- if ( fCharBuf[fCharIndex] == chLF ||
- ((fCharBuf[fCharIndex] == chNEL) && fNEL) )
- {
- fCharIndex++;
- }
- }
- curCh = chLF;
- }
- }
- else if (curCh == chLF)
- {
- fCurCol = 1;
- fCurLine++;
- }
- // 3. the single character #x85
- // 4. the single character #x2028
- else if (curCh == chNEL || curCh == chLineSeparator)
- {
- if (inDecl && fXMLVersion == XMLV1_1)
- {
-
- /***
- * XML1.1
- *
- * 2.11 End-of-Line Handling
- * ...
- * The characters #x85 and #x2028 cannot be reliably recognized
and translated
- * until an entity's encoding declaration (if present) has been
read.
- * Therefore, it is a fatal error to use them within the XML
declaration or
- * text declaration.
- *
- ***/
- ThrowXMLwithMemMgr1
- (
- TranscodingException
- , XMLExcepts::Reader_NelLsepinDecl
- , fSystemId
- , fMemoryManager
- );
- }
-
- if (fNEL && fSource == Source_External)
- {
- fCurCol = 1;
- fCurLine++;
- curCh = chLF;
- }
- }
- else
- {
- fCurCol++;
- }
-
- return;
-}
-
XERCES_CPP_NAMESPACE_END
#endif
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]