tng 2002/12/24 08:11:40
Modified: c/src/xercesc/internal DGXMLScanner.cpp IGXMLScanner2.cpp
SGXMLScanner.cpp WFXMLScanner.cpp XMLScanner.cpp
Log:
For performance reason, move the character check to scancharref.
Revision Changes Path
1.3 +73 -103 xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp
Index: DGXMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- DGXMLScanner.cpp 20 Dec 2002 22:09:56 -0000 1.2
+++ DGXMLScanner.cpp 24 Dec 2002 16:11:39 -0000 1.3
@@ -2326,23 +2326,13 @@
bool firstNonWS = false;
bool gotLeadingSurrogate = false;
bool escaped;
- bool charref_expanded = false;
while (true)
{
try
{
while(true)
{
- // Get another char. Use second char if one is waiting
- if (secondCh)
- {
- nextCh = secondCh;
- secondCh = 0;
- }
- else
- {
- nextCh = fReaderMgr.getNextChar();
- }
+ nextCh = fReaderMgr.getNextChar();
if (!nextCh)
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
@@ -2372,18 +2362,16 @@
gotLeadingSurrogate = false;
continue;
}
- charref_expanded = true;
}
-
- // Deal with surrogate pairs
- if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
+ else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
{
+ // Deal with surrogate pairs
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
- else
+ else
gotLeadingSurrogate = true;
}
else
@@ -2407,22 +2395,17 @@
// Its got to at least be a valid XML character
if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
{
- // if it was a character reference and is control char,
then it's ok
- if (!(charref_expanded &&
fReaderMgr.getCurrentReader()->isControlChar(nextCh)))
- {
- XMLCh tmpBuf[9];
- XMLString::binToText
- (
- nextCh
- , tmpBuf
- , 8
- , 16
- );
- emitError(XMLErrs::InvalidCharacterInAttrValue,
attrName, tmpBuf);
- }
+ XMLCh tmpBuf[9];
+ XMLString::binToText
+ (
+ nextCh
+ , tmpBuf
+ , 8
+ , 16
+ );
+ emitError(XMLErrs::InvalidCharacterInAttrValue, attrName,
tmpBuf);
}
}
- charref_expanded = false;
gotLeadingSurrogate = false;
}
@@ -2498,6 +2481,9 @@
// Else add it to the buffer
toFill.append(nextCh);
+
+ if (secondCh)
+ toFill.append(secondCh);
}
}
catch(const EndOfEntityException&)
@@ -2704,44 +2690,36 @@
bool escaped = false;
bool gotLeadingSurrogate = false;
bool notDone = true;
- bool charref_expanded = false;
while (notDone)
{
try
{
while (true)
{
- if (secondCh)
+ // Eat through as many plain content characters as possible without
+ // needing special handling. Moving most content characters here,
+ // in this one call, rather than running the overall loop once
+ // per content character, is a speed optimization.
+ if (curState == State_Waiting && !gotLeadingSurrogate)
{
- nextCh = secondCh;
- secondCh = 0;
+ fReaderMgr.movePlainContentChars(toUse);
}
- else
- {
- // Eat through as many plain content characters as possible
without
- // needing special handling. Moving most content characters
here,
- // in this one call, rather than running the overall loop once
- // per content character, is a speed optimization.
- if (curState == State_Waiting && !gotLeadingSurrogate)
- {
- fReaderMgr.movePlainContentChars(toUse);
- }
- // Try to get another char from the source
- // The code from here on down covers all contengencies,
- if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
- {
- // If we were waiting for a trailing surrogate, its an error
- if (gotLeadingSurrogate)
- emitError(XMLErrs::Expected2ndSurrogateChar);
+ // Try to get another char from the source
+ // The code from here on down covers all contengencies,
+ if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
+ {
+ // If we were waiting for a trailing surrogate, its an error
+ if (gotLeadingSurrogate)
+ emitError(XMLErrs::Expected2ndSurrogateChar);
- notDone = false;
- break;
- }
+ notDone = false;
+ break;
}
// Watch for a reference. Note that the escapement mechanism
// is ignored in this content.
+ escaped = false;
if (nextCh == chAmpersand)
{
sendCharData(toUse);
@@ -2754,42 +2732,10 @@
gotLeadingSurrogate = false;
continue;
}
- charref_expanded = true;
- }
- else
- {
- escaped = false;
- }
-
- // Keep the state machine up to date
- if (!escaped)
- {
- if (nextCh == chCloseSquare)
- {
- if (curState == State_Waiting)
- curState = State_GotOne;
- else if (curState == State_GotOne)
- curState = State_GotTwo;
- }
- else if (nextCh == chCloseAngle)
- {
- if (curState == State_GotTwo)
- emitError(XMLErrs::BadSequenceInCharData);
- curState = State_Waiting;
- }
- else
- {
- curState = State_Waiting;
- }
- }
- else
- {
- curState = State_Waiting;
}
-
- // Deal with surrogate pairs
- if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
+ else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
{
+ // Deal with surrogate pairs
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
@@ -2819,27 +2765,51 @@
// Make sure the returned char is a valid XML char
if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
{
- // if it was a character reference and is control char,
then it's ok
- if (!(charref_expanded &&
fReaderMgr.getCurrentReader()->isControlChar(nextCh)))
- {
- XMLCh tmpBuf[9];
- XMLString::binToText
- (
- nextCh
- , tmpBuf
- , 8
- , 16
- );
- emitError(XMLErrs::InvalidCharacter, tmpBuf);
- }
+ XMLCh tmpBuf[9];
+ XMLString::binToText
+ (
+ nextCh
+ , tmpBuf
+ , 8
+ , 16
+ );
+ emitError(XMLErrs::InvalidCharacter, tmpBuf);
}
}
- charref_expanded = false;
gotLeadingSurrogate = false;
}
+ // Keep the state machine up to date
+ if (!escaped)
+ {
+ if (nextCh == chCloseSquare)
+ {
+ if (curState == State_Waiting)
+ curState = State_GotOne;
+ else if (curState == State_GotOne)
+ curState = State_GotTwo;
+ }
+ else if (nextCh == chCloseAngle)
+ {
+ if (curState == State_GotTwo)
+ emitError(XMLErrs::BadSequenceInCharData);
+ curState = State_Waiting;
+ }
+ else
+ {
+ curState = State_Waiting;
+ }
+ }
+ else
+ {
+ curState = State_Waiting;
+ }
+
// Add this char to the buffer
toUse.append(nextCh);
+
+ if (secondCh)
+ toUse.append(secondCh);
}
}
catch(const EndOfEntityException& toCatch)
1.5 +87 -132 xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp
Index: IGXMLScanner2.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- IGXMLScanner2.cpp 20 Dec 2002 22:09:56 -0000 1.4
+++ IGXMLScanner2.cpp 24 Dec 2002 16:11:39 -0000 1.5
@@ -1542,23 +1542,13 @@
XMLCh secondCh = 0;
bool gotLeadingSurrogate = false;
bool escaped;
- bool charref_expanded = false;
while (true)
{
try
{
while(true)
{
- // Get another char. Use second char if one is waiting
- if (secondCh)
- {
- nextCh = secondCh;
- secondCh = 0;
- }
- else
- {
- nextCh = fReaderMgr.getNextChar();
- }
+ nextCh = fReaderMgr.getNextChar();
if (!nextCh)
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
@@ -1589,12 +1579,10 @@
gotLeadingSurrogate = false;
continue;
}
- charref_expanded = true;
}
-
- // Deal with surrogate pairs
- if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
+ else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
{
+ // Deal with surrogate pairs
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
@@ -1626,23 +1614,17 @@
// Its got to at least be a valid XML character
else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
{
- // if it was a character reference and is control char,
then it's ok
- if (!(charref_expanded &&
fReaderMgr.getCurrentReader()->isControlChar(nextCh)))
- {
-
- XMLCh tmpBuf[9];
- XMLString::binToText
- (
- nextCh
- , tmpBuf
- , 8
- , 16
- );
- emitError(XMLErrs::InvalidCharacterInAttrValue,
attrName, tmpBuf);
- }
+ XMLCh tmpBuf[9];
+ XMLString::binToText
+ (
+ nextCh
+ , tmpBuf
+ , 8
+ , 16
+ );
+ emitError(XMLErrs::InvalidCharacterInAttrValue,
attrName, tmpBuf);
}
}
- charref_expanded = false;
gotLeadingSurrogate = false;
}
@@ -1655,6 +1637,9 @@
// Else add it to the buffer
toFill.append(nextCh);
+
+ if (secondCh)
+ toFill.append(secondCh);
}
}
catch(const EndOfEntityException&)
@@ -1705,23 +1690,13 @@
bool firstNonWS = false;
bool gotLeadingSurrogate = false;
bool escaped;
- bool charref_expanded = false;
while (true)
{
try
{
while(true)
{
- // Get another char. Use second char if one is waiting
- if (secondCh)
- {
- nextCh = secondCh;
- secondCh = 0;
- }
- else
- {
- nextCh = fReaderMgr.getNextChar();
- }
+ nextCh = fReaderMgr.getNextChar();
if (!nextCh)
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
@@ -1751,12 +1726,10 @@
gotLeadingSurrogate = false;
continue;
}
- charref_expanded = true;
}
-
- // Deal with surrogate pairs
- if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
+ else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
{
+ // Deal with surrogate pairs
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
@@ -1786,22 +1759,17 @@
// Its got to at least be a valid XML character
if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
{
- // if it was a character reference and is control char,
then it's ok
- if (!(charref_expanded &&
fReaderMgr.getCurrentReader()->isControlChar(nextCh)))
- {
- XMLCh tmpBuf[9];
- XMLString::binToText
- (
- nextCh
- , tmpBuf
- , 8
- , 16
- );
- emitError(XMLErrs::InvalidCharacterInAttrValue,
attrName, tmpBuf);
- }
+ XMLCh tmpBuf[9];
+ XMLString::binToText
+ (
+ nextCh
+ , tmpBuf
+ , 8
+ , 16
+ );
+ emitError(XMLErrs::InvalidCharacterInAttrValue, attrName,
tmpBuf);
}
}
- charref_expanded = false;
gotLeadingSurrogate = false;
}
@@ -1877,6 +1845,9 @@
// Else add it to the buffer
toFill.append(nextCh);
+
+ if (secondCh)
+ toFill.append(secondCh);
}
}
catch(const EndOfEntityException&)
@@ -2093,44 +2064,36 @@
bool escaped = false;
bool gotLeadingSurrogate = false;
bool notDone = true;
- bool charref_expanded = false;
while (notDone)
{
try
{
while (true)
{
- if (secondCh)
+ // Eat through as many plain content characters as possible without
+ // needing special handling. Moving most content characters here,
+ // in this one call, rather than running the overall loop once
+ // per content character, is a speed optimization.
+ if (curState == State_Waiting && !gotLeadingSurrogate)
{
- nextCh = secondCh;
- secondCh = 0;
+ fReaderMgr.movePlainContentChars(toUse);
}
- else
- {
- // Eat through as many plain content characters as possible
without
- // needing special handling. Moving most content characters
here,
- // in this one call, rather than running the overall loop once
- // per content character, is a speed optimization.
- if (curState == State_Waiting && !gotLeadingSurrogate)
- {
- fReaderMgr.movePlainContentChars(toUse);
- }
- // Try to get another char from the source
- // The code from here on down covers all contengencies,
- if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
- {
- // If we were waiting for a trailing surrogate, its an error
- if (gotLeadingSurrogate)
- emitError(XMLErrs::Expected2ndSurrogateChar);
+ // Try to get another char from the source
+ // The code from here on down covers all contengencies,
+ if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
+ {
+ // If we were waiting for a trailing surrogate, its an error
+ if (gotLeadingSurrogate)
+ emitError(XMLErrs::Expected2ndSurrogateChar);
- notDone = false;
- break;
- }
+ notDone = false;
+ break;
}
// Watch for a reference. Note that the escapement mechanism
// is ignored in this content.
+ escaped = false;
if (nextCh == chAmpersand)
{
sendCharData(toUse);
@@ -2143,42 +2106,10 @@
gotLeadingSurrogate = false;
continue;
}
- charref_expanded = true;
}
- else
- {
- escaped = false;
- }
-
- // Keep the state machine up to date
- if (!escaped)
- {
- if (nextCh == chCloseSquare)
- {
- if (curState == State_Waiting)
- curState = State_GotOne;
- else if (curState == State_GotOne)
- curState = State_GotTwo;
- }
- else if (nextCh == chCloseAngle)
- {
- if (curState == State_GotTwo)
- emitError(XMLErrs::BadSequenceInCharData);
- curState = State_Waiting;
- }
- else
- {
- curState = State_Waiting;
- }
- }
- else
- {
- curState = State_Waiting;
- }
-
- // Deal with surrogate pairs
- if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
+ else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
{
+ // Deal with surrogate pairs
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
@@ -2208,27 +2139,51 @@
// Make sure the returned char is a valid XML char
if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
{
- // if it was a character reference and is control char,
then it's ok
- if (!(charref_expanded &&
fReaderMgr.getCurrentReader()->isControlChar(nextCh)))
- {
- XMLCh tmpBuf[9];
- XMLString::binToText
- (
- nextCh
- , tmpBuf
- , 8
- , 16
- );
- emitError(XMLErrs::InvalidCharacter, tmpBuf);
- }
+ XMLCh tmpBuf[9];
+ XMLString::binToText
+ (
+ nextCh
+ , tmpBuf
+ , 8
+ , 16
+ );
+ emitError(XMLErrs::InvalidCharacter, tmpBuf);
}
}
- charref_expanded = false;
gotLeadingSurrogate = false;
}
+ // Keep the state machine up to date
+ if (!escaped)
+ {
+ if (nextCh == chCloseSquare)
+ {
+ if (curState == State_Waiting)
+ curState = State_GotOne;
+ else if (curState == State_GotOne)
+ curState = State_GotTwo;
+ }
+ else if (nextCh == chCloseAngle)
+ {
+ if (curState == State_GotTwo)
+ emitError(XMLErrs::BadSequenceInCharData);
+ curState = State_Waiting;
+ }
+ else
+ {
+ curState = State_Waiting;
+ }
+ }
+ else
+ {
+ curState = State_Waiting;
+ }
+
// Add this char to the buffer
toUse.append(nextCh);
+
+ if (secondCh)
+ toUse.append(secondCh);
}
}
catch(const EndOfEntityException& toCatch)
1.5 +72 -102 xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp
Index: SGXMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- SGXMLScanner.cpp 20 Dec 2002 22:09:56 -0000 1.4
+++ SGXMLScanner.cpp 24 Dec 2002 16:11:39 -0000 1.5
@@ -3224,23 +3224,13 @@
XMLCh secondCh = 0;
bool gotLeadingSurrogate = false;
bool escaped;
- bool charref_expanded = false;
while (true)
{
try
{
while(true)
{
- // Get another char. Use second char if one is waiting
- if (secondCh)
- {
- nextCh = secondCh;
- secondCh = 0;
- }
- else
- {
- nextCh = fReaderMgr.getNextChar();
- }
+ nextCh = fReaderMgr.getNextChar();
if (!nextCh)
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
@@ -3271,12 +3261,10 @@
gotLeadingSurrogate = false;
continue;
}
- charref_expanded = true;
}
-
- // Deal with surrogate pairs
- if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
+ else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
{
+ // Deal with surrogate pairs
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
@@ -3308,22 +3296,17 @@
// Its got to at least be a valid XML character
else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
{
- // if it was a character reference and is control char,
then it's ok
- if (!(charref_expanded &&
fReaderMgr.getCurrentReader()->isControlChar(nextCh)))
- {
- XMLCh tmpBuf[9];
- XMLString::binToText
- (
- nextCh
- , tmpBuf
- , 8
- , 16
- );
- emitError(XMLErrs::InvalidCharacterInAttrValue,
attrName, tmpBuf);
- }
+ XMLCh tmpBuf[9];
+ XMLString::binToText
+ (
+ nextCh
+ , tmpBuf
+ , 8
+ , 16
+ );
+ emitError(XMLErrs::InvalidCharacterInAttrValue,
attrName, tmpBuf);
}
}
- charref_expanded = false;
gotLeadingSurrogate = false;
}
@@ -3336,6 +3319,9 @@
// Else add it to the buffer
toFill.append(nextCh);
+
+ if (secondCh)
+ toFill.append(secondCh);
}
}
catch(const EndOfEntityException&)
@@ -3553,44 +3539,36 @@
bool escaped = false;
bool gotLeadingSurrogate = false;
bool notDone = true;
- bool charref_expanded = false;
while (notDone)
{
try
{
while (true)
{
- if (secondCh)
+ // Eat through as many plain content characters as possible without
+ // needing special handling. Moving most content characters here,
+ // in this one call, rather than running the overall loop once
+ // per content character, is a speed optimization.
+ if (curState == State_Waiting && !gotLeadingSurrogate)
{
- nextCh = secondCh;
- secondCh = 0;
+ fReaderMgr.movePlainContentChars(toUse);
}
- else
- {
- // Eat through as many plain content characters as possible
without
- // needing special handling. Moving most content characters
here,
- // in this one call, rather than running the overall loop once
- // per content character, is a speed optimization.
- if (curState == State_Waiting && !gotLeadingSurrogate)
- {
- fReaderMgr.movePlainContentChars(toUse);
- }
- // Try to get another char from the source
- // The code from here on down covers all contengencies,
- if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
- {
- // If we were waiting for a trailing surrogate, its an error
- if (gotLeadingSurrogate)
- emitError(XMLErrs::Expected2ndSurrogateChar);
+ // Try to get another char from the source
+ // The code from here on down covers all contengencies,
+ if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
+ {
+ // If we were waiting for a trailing surrogate, its an error
+ if (gotLeadingSurrogate)
+ emitError(XMLErrs::Expected2ndSurrogateChar);
- notDone = false;
- break;
- }
+ notDone = false;
+ break;
}
// Watch for a reference. Note that the escapement mechanism
// is ignored in this content.
+ escaped = false;
if (nextCh == chAmpersand)
{
sendCharData(toUse);
@@ -3603,42 +3581,10 @@
gotLeadingSurrogate = false;
continue;
}
- charref_expanded = true;
- }
- else
- {
- escaped = false;
- }
-
- // Keep the state machine up to date
- if (!escaped)
- {
- if (nextCh == chCloseSquare)
- {
- if (curState == State_Waiting)
- curState = State_GotOne;
- else if (curState == State_GotOne)
- curState = State_GotTwo;
- }
- else if (nextCh == chCloseAngle)
- {
- if (curState == State_GotTwo)
- emitError(XMLErrs::BadSequenceInCharData);
- curState = State_Waiting;
- }
- else
- {
- curState = State_Waiting;
- }
- }
- else
- {
- curState = State_Waiting;
}
-
- // Deal with surrogate pairs
- if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
+ else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
{
+ // Deal with surrogate pairs
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
@@ -3668,27 +3614,51 @@
// Make sure the returned char is a valid XML char
if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
{
- // if it was a character reference and is control char,
then it's ok
- if (!(charref_expanded &&
fReaderMgr.getCurrentReader()->isControlChar(nextCh)))
- {
- XMLCh tmpBuf[9];
- XMLString::binToText
- (
- nextCh
- , tmpBuf
- , 8
- , 16
- );
- emitError(XMLErrs::InvalidCharacter, tmpBuf);
- }
+ XMLCh tmpBuf[9];
+ XMLString::binToText
+ (
+ nextCh
+ , tmpBuf
+ , 8
+ , 16
+ );
+ emitError(XMLErrs::InvalidCharacter, tmpBuf);
}
}
- charref_expanded = false;
gotLeadingSurrogate = false;
}
+ // Keep the state machine up to date
+ if (!escaped)
+ {
+ if (nextCh == chCloseSquare)
+ {
+ if (curState == State_Waiting)
+ curState = State_GotOne;
+ else if (curState == State_GotOne)
+ curState = State_GotTwo;
+ }
+ else if (nextCh == chCloseAngle)
+ {
+ if (curState == State_GotTwo)
+ emitError(XMLErrs::BadSequenceInCharData);
+ curState = State_Waiting;
+ }
+ else
+ {
+ curState = State_Waiting;
+ }
+ }
+ else
+ {
+ curState = State_Waiting;
+ }
+
// Add this char to the buffer
toUse.append(nextCh);
+
+ if (secondCh)
+ toUse.append(secondCh);
}
}
catch(const EndOfEntityException& toCatch)
1.4 +72 -105 xml-xerces/c/src/xercesc/internal/WFXMLScanner.cpp
Index: WFXMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/WFXMLScanner.cpp,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- WFXMLScanner.cpp 20 Dec 2002 22:09:56 -0000 1.3
+++ WFXMLScanner.cpp 24 Dec 2002 16:11:39 -0000 1.4
@@ -1522,23 +1522,13 @@
bool firstNonWS = false;
bool gotLeadingSurrogate = false;
bool escaped;
- bool charref_expanded = false;
while (true)
{
try
{
while(true)
{
- // Get another char. Use second char if one is waiting
- if (secondCh)
- {
- nextCh = secondCh;
- secondCh = 0;
- }
- else
- {
- nextCh = fReaderMgr.getNextChar();
- }
+ nextCh = fReaderMgr.getNextChar();
if (!nextCh)
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
@@ -1568,12 +1558,10 @@
gotLeadingSurrogate = false;
continue;
}
- charref_expanded = true;
}
-
- // Deal with surrogate pairs
- if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
+ else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
{
+ // Deal with surrogate pairs
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
@@ -1605,30 +1593,24 @@
// Its got to at least be a valid XML character
else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
{
- // if it was a character reference and is control char,
then it's ok
- if (!(charref_expanded &&
fReaderMgr.getCurrentReader()->isControlChar(nextCh)))
- {
-
- XMLCh tmpBuf[9];
- XMLString::binToText
- (
- nextCh
- , tmpBuf
- , 8
- , 16
- );
- emitError(XMLErrs::InvalidCharacterInAttrValue,
attrName, tmpBuf);
- }
+ XMLCh tmpBuf[9];
+ XMLString::binToText
+ (
+ nextCh
+ , tmpBuf
+ , 8
+ , 16
+ );
+ emitError(XMLErrs::InvalidCharacterInAttrValue, attrName,
tmpBuf);
}
}
- charref_expanded = false;
gotLeadingSurrogate = false;
}
// If its not escaped, then make sure its not a < character, which
// is not allowed in attribute values.
if (!escaped) {
- if (nextCh == chOpenAngle)
+ if (nextCh == chOpenAngle)
emitError(XMLErrs::BracketInAttrValue, attrName);
else if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
nextCh = chSpace;
@@ -1636,6 +1618,9 @@
// Else add it to the buffer
toFill.append(nextCh);
+
+ if (secondCh)
+ toFill.append(secondCh);
}
}
catch(const EndOfEntityException&)
@@ -1807,44 +1792,36 @@
bool escaped = false;
bool gotLeadingSurrogate = false;
bool notDone = true;
- bool charref_expanded = false;
while (notDone)
{
try
{
while (true)
{
- if (secondCh)
+ // Eat through as many plain content characters as possible without
+ // needing special handling. Moving most content characters here,
+ // in this one call, rather than running the overall loop once
+ // per content character, is a speed optimization.
+ if (curState == State_Waiting && !gotLeadingSurrogate)
{
- nextCh = secondCh;
- secondCh = 0;
+ fReaderMgr.movePlainContentChars(toUse);
}
- else
- {
- // Eat through as many plain content characters as possible
without
- // needing special handling. Moving most content characters
here,
- // in this one call, rather than running the overall loop once
- // per content character, is a speed optimization.
- if (curState == State_Waiting && !gotLeadingSurrogate)
- {
- fReaderMgr.movePlainContentChars(toUse);
- }
- // Try to get another char from the source
- // The code from here on down covers all contengencies,
- if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
- {
- // If we were waiting for a trailing surrogate, its an error
- if (gotLeadingSurrogate)
- emitError(XMLErrs::Expected2ndSurrogateChar);
+ // Try to get another char from the source
+ // The code from here on down covers all contengencies,
+ if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
+ {
+ // If we were waiting for a trailing surrogate, its an error
+ if (gotLeadingSurrogate)
+ emitError(XMLErrs::Expected2ndSurrogateChar);
- notDone = false;
- break;
- }
+ notDone = false;
+ break;
}
// Watch for a reference. Note that the escapement mechanism
// is ignored in this content.
+ escaped = false;
if (nextCh == chAmpersand)
{
sendCharData(toUse);
@@ -1857,42 +1834,10 @@
gotLeadingSurrogate = false;
continue;
}
- charref_expanded = true;
- }
- else
- {
- escaped = false;
- }
-
- // Keep the state machine up to date
- if (!escaped)
- {
- if (nextCh == chCloseSquare)
- {
- if (curState == State_Waiting)
- curState = State_GotOne;
- else if (curState == State_GotOne)
- curState = State_GotTwo;
- }
- else if (nextCh == chCloseAngle)
- {
- if (curState == State_GotTwo)
- emitError(XMLErrs::BadSequenceInCharData);
- curState = State_Waiting;
- }
- else
- {
- curState = State_Waiting;
- }
}
- else
- {
- curState = State_Waiting;
- }
-
- // Deal with surrogate pairs
- if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
+ else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
{
+ // Deal with surrogate pairs
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
@@ -1924,29 +1869,51 @@
// Its got to at least be a valid XML character
else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
{
- // if it was a character reference and is control char,
then it's ok
- if (!(charref_expanded &&
fReaderMgr.getCurrentReader()->isControlChar(nextCh)))
- {
-
- XMLCh tmpBuf[9];
- XMLString::binToText
- (
- nextCh
- , tmpBuf
- , 8
- , 16
- );
- emitError(XMLErrs::InvalidCharacter, tmpBuf);
- }
+ XMLCh tmpBuf[9];
+ XMLString::binToText
+ (
+ nextCh
+ , tmpBuf
+ , 8
+ , 16
+ );
+ emitError(XMLErrs::InvalidCharacter, tmpBuf);
}
}
- charref_expanded = false;
gotLeadingSurrogate = false;
}
+ // Keep the state machine up to date
+ if (!escaped)
+ {
+ if (nextCh == chCloseSquare)
+ {
+ if (curState == State_Waiting)
+ curState = State_GotOne;
+ else if (curState == State_GotOne)
+ curState = State_GotTwo;
+ }
+ else if (nextCh == chCloseAngle)
+ {
+ if (curState == State_GotTwo)
+ emitError(XMLErrs::BadSequenceInCharData);
+ curState = State_Waiting;
+ }
+ else
+ {
+ curState = State_Waiting;
+ }
+ }
+ else
+ {
+ curState = State_Waiting;
+ }
// Add this char to the buffer
toUse.append(nextCh);
+
+ if (secondCh)
+ toUse.append(secondCh);
}
}
catch(const EndOfEntityException& toCatch)
1.32 +14 -3 xml-xerces/c/src/xercesc/internal/XMLScanner.cpp
Index: XMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.cpp,v
retrieving revision 1.31
retrieving revision 1.32
diff -u -r1.31 -r1.32
--- XMLScanner.cpp 23 Dec 2002 19:34:37 -0000 1.31
+++ XMLScanner.cpp 24 Dec 2002 16:11:39 -0000 1.32
@@ -1725,16 +1725,27 @@
}
// Return the char (or chars)
- if (value >= 0x10000)
+ // And check if the character expanded is valid or not
+ if (value >= 0x10000 && value <= 0x10FFFF)
{
value -= 0x10000;
toFill = XMLCh((value >> 10) + 0xD800);
second = XMLCh((value & 0x3FF) + 0xDC00);
}
- else
+ else if (value <= 0xFFFD)
{
toFill = XMLCh(value);
second = 0;
+ if (!fReaderMgr.getCurrentReader()->isXMLChar(toFill) &&
!fReaderMgr.getCurrentReader()->isControlChar(toFill)) {
+ // Character reference was not in the valid range
+ emitError(XMLErrs::InvalidCharacterRef);
+ return false;
+ }
+ }
+ else {
+ // Character reference was not in the valid range
+ emitError(XMLErrs::InvalidCharacterRef);
+ return false;
}
return true;
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]