peiyongz 2004/06/14 08:18:53
Modified: c/src/xercesc/internal ReaderMgr.cpp ReaderMgr.hpp
XMLReader.cpp XMLReader.hpp XMLScanner.cpp
XMLScanner.hpp
Log:
Consolidated End Of Line Handling
Revision Changes Path
1.25 +3 -27 xml-xerces/c/src/xercesc/internal/ReaderMgr.cpp
Index: ReaderMgr.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/ReaderMgr.cpp,v
retrieving revision 1.24
retrieving revision 1.25
diff -u -r1.24 -r1.25
--- ReaderMgr.cpp 3 Jun 2004 15:38:27 -0000 1.24
+++ ReaderMgr.cpp 14 Jun 2004 15:18:52 -0000 1.25
@@ -286,7 +286,7 @@
}
-bool ReaderMgr::skipPastSpaces()
+bool ReaderMgr::skipPastSpaces(bool inDecl)
{
bool skippedSomething = false;
bool tmpFlag;
@@ -297,31 +297,7 @@
// it hit a non-space, break out. Else we have to pop another entity
// and keep going.
//
- if (fCurReader->skipSpaces(tmpFlag))
- break;
-
- if (tmpFlag)
- skippedSomething = true;
-
- // Try to pop another enitity. If we can't then we are done
- if (!popReader())
- break;
- }
- return (tmpFlag || skippedSomething);
-}
-
-bool ReaderMgr::skipPastSpacesInDecl()
-{
- bool skippedSomething = false;
- bool tmpFlag;
- while (true)
- {
- //
- // Skip all the spaces in the current reader. If it returned because
- // it hit a non-space, break out. Else we have to pop another entity
- // and keep going.
- //
- if (fCurReader->skipSpacesInDecl(tmpFlag))
+ if (fCurReader->skipSpaces(tmpFlag, inDecl))
break;
if (tmpFlag)
1.13 +7 -2 xml-xerces/c/src/xercesc/internal/ReaderMgr.hpp
Index: ReaderMgr.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/ReaderMgr.hpp,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -r1.12 -r1.13
--- ReaderMgr.hpp 3 Jun 2004 15:38:27 -0000 1.12
+++ ReaderMgr.hpp 14 Jun 2004 15:18:52 -0000 1.13
@@ -56,6 +56,9 @@
/*
* $Log$
+ * Revision 1.13 2004/06/14 15:18:52 peiyongz
+ * Consolidated End Of Line Handling
+ *
* Revision 1.12 2004/06/03 15:38:27 peiyongz
* XML1.1: The characters #x85 and #x2028 cannot be reliably recognized
* and translated until an entity's encoding declaration (if present) has been
@@ -223,8 +226,7 @@
XMLCh peekNextChar();
bool skipIfQuote(XMLCh& chGotten);
void skipPastChar(const XMLCh toSkip);
- bool skipPastSpaces();
- bool skipPastSpacesInDecl();
+ bool skipPastSpaces(bool inDecl = false);
void skipToChar(const XMLCh toSkipTo);
bool skippedChar(const XMLCh toSkip);
bool skippedSpace();
1.22 +103 -230 xml-xerces/c/src/xercesc/internal/XMLReader.cpp
Index: XMLReader.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLReader.cpp,v
retrieving revision 1.21
retrieving revision 1.22
diff -u -r1.21 -r1.22
--- XMLReader.cpp 4 Jun 2004 14:31:15 -0000 1.21
+++ XMLReader.cpp 14 Jun 2004 15:18:52 -0000 1.22
@@ -72,7 +72,6 @@
#include <xercesc/util/XMLString.hpp>
#include <xercesc/util/Janitor.hpp>
-
XERCES_CPP_NAMESPACE_BEGIN
// ---------------------------------------------------------------------------
@@ -695,42 +694,7 @@
// Eat this char
fCharIndex++;
- //
- // Ok, we've got some whitespace here. So we have to store
- // it. But we have to normalize it and update the line and
- // column info along the way.
- //
- if (curCh == chCR)
- {
- fCurCol = 1;
- fCurLine++;
-
- //
- // If not already internalized, then convert it to an
- // LF and eat any following LF.
- //
- if (fSource == Source_External)
- {
- if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
- {
- if (fCharBuf[fCharIndex] == chLF
- || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
- fCharIndex++;
- }
- curCh = chLF;
- }
- }
- else if (curCh == chLF
- || ((curCh == chNEL || curCh == chLineSeparator) && fNEL))
- {
- curCh = chLF;
- fCurCol = 1;
- fCurLine++;
- }
- else
- {
- fCurCol++;
- }
+ handleEOL(curCh, false);
// Ok we can add this guy to our buffer
toFill.append(curCh);
@@ -773,42 +737,7 @@
// Eat this char
fCharIndex++;
- //
- // Ok, we've got some whitespace here. So we have to store
- // it. But we have to normalize it and update the line and
- // column info along the way.
- //
- if (curCh == chCR)
- {
- fCurCol = 1;
- fCurLine++;
-
- //
- // If not already internalized, then convert it to an
- // LF and eat any following LF.
- //
- if (fSource == Source_External)
- {
- if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
- {
- if (fCharBuf[fCharIndex] == chLF
- || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
- fCharIndex++;
- }
- curCh = chLF;
- }
- }
- else if (curCh == chLF
- || ((curCh == chNEL || curCh == chLineSeparator) && fNEL))
- {
- curCh = chLF;
- fCurCol = 1;
- fCurLine++;
- }
- else
- {
- fCurCol++;
- }
+ handleEOL(curCh, false);
// Add it to our buffer
toFill.append(curCh);
@@ -853,89 +782,7 @@
}
-bool XMLReader::skipSpaces(bool& skippedSomething)
-{
- // Remember the current line and column
- XMLSSize_t orgLine = fCurLine;
- XMLSSize_t orgCol = fCurCol;
-
- // We enter a loop where we skip over spaces until we hit the end of
- // this reader or a non-space value. The return indicates whether we
- // hit the non-space (true) or the end (false).
- while (true)
- {
- // Loop through the current chars in the buffer
- while (fCharIndex < fCharsAvail)
- {
- // See if its a white space char. If so, then process it. Else
- // we've hit a non-space and need to return.
- if (isWhitespace(fCharBuf[fCharIndex]))
- {
- // Get the current char out of the buffer and eat it
- XMLCh curCh = fCharBuf[fCharIndex++];
-
- // Ok, we've got some whitespace here. So we have to store
- // it. But we have to normalize it and update the line and
- // column info along the way.
- if (curCh == chCR)
- {
- fCurCol = 1;
- fCurLine++;
-
- // If not already internalized, then convert it to an
- // LF and eat any following LF.
- if (fSource == Source_External)
- {
- if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
- {
- if (fCharBuf[fCharIndex] == chLF
- || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
- fCharIndex++;
- }
- }
- }
- else if (curCh == chLF
- || ((curCh == chNEL || curCh == chLineSeparator) && fNEL))
- {
- fCurCol = 1;
- fCurLine++;
- }
- else
- {
- fCurCol++;
- }
- }
- else
- {
- skippedSomething = (orgLine != fCurLine) || (orgCol != fCurCol);
- return true;
- }
- }
-
- // We've eaten up the current buffer, so lets try to reload it. If
- // we don't get anything new, then break out. If we do, then we go
- // back to the top to keep getting spaces.
- if (!refreshCharBuffer())
- break;
- }
-
- // We never hit any non-space and ate up the whole reader
- skippedSomething = (orgLine != fCurLine) || (orgCol != fCurCol);
- return false;
-}
-
-/***
- * XML1.1
- *
- * 2.11 End-of-Line Handling
- * ...
- * The characters #x85 and #x2028 cannot be reliably recognized and translated
- * until an entity's encoding declaration (if present) has been read.
- * Therefore, it is a fatal error to use them within the XML declaration or
- * text declaration.
- *
-***/
-bool XMLReader::skipSpacesInDecl(bool& skippedSomething)
+bool XMLReader::skipSpaces(bool& skippedSomething, bool inDecl)
{
// Remember the current line and column
XMLSSize_t orgLine = fCurLine;
@@ -956,56 +803,8 @@
// Get the current char out of the buffer and eat it
XMLCh curCh = fCharBuf[fCharIndex++];
- // Ok, we've got some whitespace here. So we have to store
- // it. But we have to normalize it and update the line and
- // column info along the way.
- if (curCh == chCR)
- {
- fCurCol = 1;
- fCurLine++;
+ handleEOL(curCh, inDecl);
- // If not already internalized, then convert it to an
- // LF and eat any following LF.
- if (fSource == Source_External)
- {
- if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
- {
- if (fCharBuf[fCharIndex] == chLF
- || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
- fCharIndex++;
- }
- }
- }
- else if (curCh == chLF)
- {
- fCurCol = 1;
- fCurLine++;
- }
- else if (curCh == chNEL || curCh == chLineSeparator)
- {
- if (fXMLVersion == XMLV1_1)
- {
- ThrowXMLwithMemMgr1
- (
- TranscodingException
- , XMLExcepts::Reader_NelLsepinDecl
- , fSystemId
- , fMemoryManager
- );
- }
- else //XMLV1_0
- {
- if (fNEL)
- {
- fCurCol = 1;
- fCurLine++;
- }
- }
- }
- else
- {
- fCurCol++;
- }
}
else
{
@@ -1074,31 +873,8 @@
// Eat the character
fCharIndex++;
- if (curCh == chCR)
- {
- fCurLine++;
- fCurCol = 1;
+ handleEOL((XMLCh&)curCh, false);
- if (fSource == Source_External)
- {
- if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
- {
- if (fCharBuf[fCharIndex] == chLF
- || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
- fCharIndex++;
- }
- }
- }
- else if (curCh == chLF
- || ((curCh == chNEL || curCh == chLineSeparator) && fNEL))
- {
- fCurLine++;
- fCurCol = 1;
- }
- else
- {
- fCurCol++;
- }
return true;
}
return false;
@@ -1723,6 +1499,103 @@
fRawBufIndex += bytesEaten;
return charsDone;
+}
+
+/***
+ *
+ * XML1.1
+ *
+ * 2.11 End-of-Line Handling
+ *
+ * XML parsed entities are often stored in computer files which, for editing
+ * convenience, are organized into lines. These lines are typically separated
+ * by some combination of the characters CARRIAGE RETURN (#xD) and LINE FEED
(#xA).
+ *
+ * To simplify the tasks of applications, the XML processor MUST behave as if
+ * it normalized all line breaks in external parsed entities (including the
document
+ * entity) on input, before parsing, by translating all of the following to a
single
+ * #xA character:
+ *
+ * 1. the two-character sequence #xD #xA
+ * 2. the two-character sequence #xD #x85
+ * 3. the single character #x85
+ * 4. the single character #x2028
+ * 5. any #xD character that is not immediately followed by #xA or #x85.
+ *
+ *
+ ***/
+inline void XMLReader::handleEOL(XMLCh& curCh, bool inDecl)
+{
+ // 1. the two-character sequence #xD #xA
+ // 2. the two-character sequence #xD #x85
+ // 5. any #xD character that is not immediately followed by #xA or #x85.
+ if (curCh == chCR)
+ {
+ fCurCol = 1;
+ fCurLine++;
+
+ //
+ // If not already internalized, then convert it to an
+ // LF and eat any following LF.
+ //
+ if (fSource == Source_External)
+ {
+ if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
+ {
+ if ( fCharBuf[fCharIndex] == chLF ||
+ ((fCharBuf[fCharIndex] == chNEL) && fNEL) )
+ {
+ fCharIndex++;
+ }
+ }
+ curCh = chLF;
+ }
+ }
+ else if (curCh == chLF)
+ {
+ fCurCol = 1;
+ fCurLine++;
+ }
+ // 3. the single character #x85
+ // 4. the single character #x2028
+ else if (curCh == chNEL || curCh == chLineSeparator)
+ {
+ if (inDecl && fXMLVersion == XMLV1_1)
+ {
+
+ /***
+ * XML1.1
+ *
+ * 2.11 End-of-Line Handling
+ * ...
+ * The characters #x85 and #x2028 cannot be reliably recognized and
translated
+ * until an entity's encoding declaration (if present) has been read.
+ * Therefore, it is a fatal error to use them within the XML declaration
or
+ * text declaration.
+ *
+ ***/
+ ThrowXMLwithMemMgr1
+ (
+ TranscodingException
+ , XMLExcepts::Reader_NelLsepinDecl
+ , fSystemId
+ , fMemoryManager
+ );
+ }
+
+ if (fNEL && fSource == Source_External)
+ {
+ fCurCol = 1;
+ fCurLine++;
+ curCh = chLF;
+ }
+ }
+ else
+ {
+ fCurCol++;
+ }
+
+ return;
}
XERCES_CPP_NAMESPACE_END
1.17 +16 -105 xml-xerces/c/src/xercesc/internal/XMLReader.hpp
Index: XMLReader.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLReader.hpp,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -r1.16 -r1.17
--- XMLReader.hpp 3 Jun 2004 15:38:27 -0000 1.16
+++ XMLReader.hpp 14 Jun 2004 15:18:53 -0000 1.17
@@ -56,6 +56,9 @@
/*
* $Log$
+ * Revision 1.17 2004/06/14 15:18:53 peiyongz
+ * Consolidated End Of Line Handling
+ *
* Revision 1.16 2004/06/03 15:38:27 peiyongz
* XML1.1: The characters #x85 and #x2028 cannot be reliably recognized
* and translated until an entity's encoding declaration (if present) has been
@@ -333,8 +336,7 @@
bool getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
bool peekNextChar(XMLCh& chGotten);
bool skipIfQuote(XMLCh& chGotten);
- bool skipSpaces(bool& skippedSomething);
- bool skipSpacesInDecl(bool& skippedSomething);
+ bool skipSpaces(bool& skippedSomething, bool inDecl = false);
bool skippedChar(const XMLCh toSkip);
bool skippedSpace();
bool skippedString(const XMLCh* const toSkip);
@@ -428,6 +430,11 @@
, const unsigned int maxChars
);
+ inline void handleEOL
+ (
+ XMLCh& curCh
+ , bool inDecl = false
+ );
// -----------------------------------------------------------------------
// Data members
@@ -801,62 +808,8 @@
chGotten = fCharBuf[fCharIndex++];
// Handle end of line normalization and line/col member maintenance.
- if (chGotten == chCR)
- {
- //
- // Do the normalization. We return chLF regardless of which was
- // found. We also eat a chCR followed by an chLF.
- //
- // We only do this if the content being spooled is not already
- // internalized.
- //
- if (fSource == Source_External)
- {
- //
- // See if we have another char left. If not, don't bother.
- // Else, see if its an chLF to eat. If it is, bump the
- // index again.
- //
- if (fCharIndex < fCharsAvail)
- {
- if (fCharBuf[fCharIndex] == chLF
- || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
- fCharIndex++;
- }
- else
- {
- if (refreshCharBuffer())
- {
- if (fCharBuf[fCharIndex] == chLF
- || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
- fCharIndex++;
- }
- }
-
- // And return just an chLF
- chGotten = chLF;
- }
-
- // And handle the line/col stuff
- fCurCol = 1;
- fCurLine++;
- }
- else if (chGotten == chLF
- || ((chGotten == chNEL || chGotten == chLineSeparator) && fNEL))
- {
- chGotten = chLF;
- fCurLine++;
- fCurCol = 1;
- }
- else if (chGotten)
- {
- //
- // Only do this is not a null char. Null chars are not part of the
- // real content. They are just marker characters inserted into
- // the stream.
- //
- fCurCol++;
- }
+ handleEOL(chGotten, false);
+
return true;
}
@@ -883,53 +836,8 @@
chGotten = fCharBuf[fCharIndex++];
// Handle end of line normalization and line/col member maintenance.
- if (chGotten == chCR)
- {
- //
- // Do the normalization. We return chLF regardless of which was
- // found. We also eat a chCR followed by an chLF.
- //
- // We only do this if the content being spooled is not already
- // internalized.
- //
- if (fSource == Source_External)
- {
- //
- // See if we have another char left. If not, don't bother.
- // Else, see if its an chLF to eat. If it is, bump the
- // index again.
- //
- if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
- {
- if (fCharBuf[fCharIndex] == chLF
- || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
- fCharIndex++;
- }
-
- // And return just an chLF
- chGotten = chLF;
- }
-
- // And handle the line/col stuff
- fCurCol = 1;
- fCurLine++;
- }
- else if (chGotten == chLF
- || ((chGotten == chNEL || chGotten == chLineSeparator) && fNEL))
- {
- chGotten = chLF;
- fCurLine++;
- fCurCol = 1;
- }
- else if (chGotten)
- {
- //
- // Only do this is not a null char. Null chars are not part of the
- // real content. They are just marker characters inserted into
- // the stream.
- //
- fCurCol++;
- }
+ handleEOL(chGotten, false);
+
return true;
}
1.67 +6 -6 xml-xerces/c/src/xercesc/internal/XMLScanner.cpp
Index: XMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.cpp,v
retrieving revision 1.66
retrieving revision 1.67
diff -u -r1.66 -r1.67
--- XMLScanner.cpp 3 Jun 2004 15:38:27 -0000 1.66
+++ XMLScanner.cpp 14 Jun 2004 15:18:53 -0000 1.67
@@ -1340,7 +1340,7 @@
while (true)
{
// Skip any spaces
- const unsigned int spaceCount = fReaderMgr.skipPastSpacesInDecl();
+ const unsigned int spaceCount = fReaderMgr.skipPastSpaces(true);
// If we are looking at a question mark, then break out
if (fReaderMgr.lookingAtChar(chQuestion))
@@ -1375,7 +1375,7 @@
// Scan for an equal's sign. If we don't find it, issue an error
// but keep trying to go on.
- if (!scanEq())
+ if (!scanEq(true))
emitError(XMLErrs::ExpectedEqSign);
// Get a quote string into the buffer for the string that we are
@@ -2168,12 +2168,12 @@
// Most equal signs can have white space around them, so this little guy
// just makes the calling code cleaner by eating whitespace.
-bool XMLScanner::scanEq()
+bool XMLScanner::scanEq(bool inDecl)
{
- fReaderMgr.skipPastSpacesInDecl();
+ fReaderMgr.skipPastSpaces(inDecl);
if (fReaderMgr.skippedChar(chEqual))
{
- fReaderMgr.skipPastSpacesInDecl();
+ fReaderMgr.skipPastSpaces(inDecl);
return true;
}
return false;
1.36 +4 -1 xml-xerces/c/src/xercesc/internal/XMLScanner.hpp
Index: XMLScanner.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.hpp,v
retrieving revision 1.35
retrieving revision 1.36
diff -u -r1.35 -r1.36
--- XMLScanner.hpp 13 Apr 2004 18:57:54 -0000 1.35
+++ XMLScanner.hpp 14 Jun 2004 15:18:53 -0000 1.36
@@ -56,6 +56,9 @@
/*
* $Log$
+ * Revision 1.36 2004/06/14 15:18:53 peiyongz
+ * Consolidated End Of Line Handling
+ *
* Revision 1.35 2004/04/13 18:57:54 peiyongz
* Unrelavant comment removal
*
@@ -734,7 +737,7 @@
// -----------------------------------------------------------------------
bool scanCharRef(XMLCh& toFill, XMLCh& second);
void scanComment();
- bool scanEq();
+ bool scanEq(bool inDecl = false);
void scanMiscellaneous();
void scanPI();
void scanProlog();
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]