tng 2002/12/24 08:11:40 Modified: c/src/xercesc/internal DGXMLScanner.cpp IGXMLScanner2.cpp SGXMLScanner.cpp WFXMLScanner.cpp XMLScanner.cpp Log: For performance reason, move the character check to scancharref. Revision Changes Path 1.3 +73 -103 xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp Index: DGXMLScanner.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- DGXMLScanner.cpp 20 Dec 2002 22:09:56 -0000 1.2 +++ DGXMLScanner.cpp 24 Dec 2002 16:11:39 -0000 1.3 @@ -2326,23 +2326,13 @@ bool firstNonWS = false; bool gotLeadingSurrogate = false; bool escaped; - bool charref_expanded = false; while (true) { try { while(true) { - // Get another char. Use second char if one is waiting - if (secondCh) - { - nextCh = secondCh; - secondCh = 0; - } - else - { - nextCh = fReaderMgr.getNextChar(); - } + nextCh = fReaderMgr.getNextChar(); if (!nextCh) ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF); @@ -2372,18 +2362,16 @@ gotLeadingSurrogate = false; continue; } - charref_expanded = true; } - - // Deal with surrogate pairs - if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) + else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) { + // Deal with surrogate pairs // Its a leading surrogate. If we already got one, then // issue an error, else set leading flag to make sure that // we look for a trailing next time. if (gotLeadingSurrogate) emitError(XMLErrs::Expected2ndSurrogateChar); - else + else gotLeadingSurrogate = true; } else @@ -2407,22 +2395,17 @@ // Its got to at least be a valid XML character if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) { - // if it was a character reference and is control char, then it's ok - if (!(charref_expanded && fReaderMgr.getCurrentReader()->isControlChar(nextCh))) - { - XMLCh tmpBuf[9]; - XMLString::binToText - ( - nextCh - , tmpBuf - , 8 - , 16 - ); - emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf); - } + XMLCh tmpBuf[9]; + XMLString::binToText + ( + nextCh + , tmpBuf + , 8 + , 16 + ); + emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf); } } - charref_expanded = false; gotLeadingSurrogate = false; } @@ -2498,6 +2481,9 @@ // Else add it to the buffer toFill.append(nextCh); + + if (secondCh) + toFill.append(secondCh); } } catch(const EndOfEntityException&) @@ -2704,44 +2690,36 @@ bool escaped = false; bool gotLeadingSurrogate = false; bool notDone = true; - bool charref_expanded = false; while (notDone) { try { while (true) { - if (secondCh) + // Eat through as many plain content characters as possible without + // needing special handling. Moving most content characters here, + // in this one call, rather than running the overall loop once + // per content character, is a speed optimization. + if (curState == State_Waiting && !gotLeadingSurrogate) { - nextCh = secondCh; - secondCh = 0; + fReaderMgr.movePlainContentChars(toUse); } - else - { - // Eat through as many plain content characters as possible without - // needing special handling. Moving most content characters here, - // in this one call, rather than running the overall loop once - // per content character, is a speed optimization. - if (curState == State_Waiting && !gotLeadingSurrogate) - { - fReaderMgr.movePlainContentChars(toUse); - } - // Try to get another char from the source - // The code from here on down covers all contengencies, - if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh)) - { - // If we were waiting for a trailing surrogate, its an error - if (gotLeadingSurrogate) - emitError(XMLErrs::Expected2ndSurrogateChar); + // Try to get another char from the source + // The code from here on down covers all contengencies, + if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh)) + { + // If we were waiting for a trailing surrogate, its an error + if (gotLeadingSurrogate) + emitError(XMLErrs::Expected2ndSurrogateChar); - notDone = false; - break; - } + notDone = false; + break; } // Watch for a reference. Note that the escapement mechanism // is ignored in this content. + escaped = false; if (nextCh == chAmpersand) { sendCharData(toUse); @@ -2754,42 +2732,10 @@ gotLeadingSurrogate = false; continue; } - charref_expanded = true; - } - else - { - escaped = false; - } - - // Keep the state machine up to date - if (!escaped) - { - if (nextCh == chCloseSquare) - { - if (curState == State_Waiting) - curState = State_GotOne; - else if (curState == State_GotOne) - curState = State_GotTwo; - } - else if (nextCh == chCloseAngle) - { - if (curState == State_GotTwo) - emitError(XMLErrs::BadSequenceInCharData); - curState = State_Waiting; - } - else - { - curState = State_Waiting; - } - } - else - { - curState = State_Waiting; } - - // Deal with surrogate pairs - if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) + else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) { + // Deal with surrogate pairs // Its a leading surrogate. If we already got one, then // issue an error, else set leading flag to make sure that // we look for a trailing next time. @@ -2819,27 +2765,51 @@ // Make sure the returned char is a valid XML char if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) { - // if it was a character reference and is control char, then it's ok - if (!(charref_expanded && fReaderMgr.getCurrentReader()->isControlChar(nextCh))) - { - XMLCh tmpBuf[9]; - XMLString::binToText - ( - nextCh - , tmpBuf - , 8 - , 16 - ); - emitError(XMLErrs::InvalidCharacter, tmpBuf); - } + XMLCh tmpBuf[9]; + XMLString::binToText + ( + nextCh + , tmpBuf + , 8 + , 16 + ); + emitError(XMLErrs::InvalidCharacter, tmpBuf); } } - charref_expanded = false; gotLeadingSurrogate = false; } + // Keep the state machine up to date + if (!escaped) + { + if (nextCh == chCloseSquare) + { + if (curState == State_Waiting) + curState = State_GotOne; + else if (curState == State_GotOne) + curState = State_GotTwo; + } + else if (nextCh == chCloseAngle) + { + if (curState == State_GotTwo) + emitError(XMLErrs::BadSequenceInCharData); + curState = State_Waiting; + } + else + { + curState = State_Waiting; + } + } + else + { + curState = State_Waiting; + } + // Add this char to the buffer toUse.append(nextCh); + + if (secondCh) + toUse.append(secondCh); } } catch(const EndOfEntityException& toCatch) 1.5 +87 -132 xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp Index: IGXMLScanner2.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- IGXMLScanner2.cpp 20 Dec 2002 22:09:56 -0000 1.4 +++ IGXMLScanner2.cpp 24 Dec 2002 16:11:39 -0000 1.5 @@ -1542,23 +1542,13 @@ XMLCh secondCh = 0; bool gotLeadingSurrogate = false; bool escaped; - bool charref_expanded = false; while (true) { try { while(true) { - // Get another char. Use second char if one is waiting - if (secondCh) - { - nextCh = secondCh; - secondCh = 0; - } - else - { - nextCh = fReaderMgr.getNextChar(); - } + nextCh = fReaderMgr.getNextChar(); if (!nextCh) ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF); @@ -1589,12 +1579,10 @@ gotLeadingSurrogate = false; continue; } - charref_expanded = true; } - - // Deal with surrogate pairs - if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) + else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) { + // Deal with surrogate pairs // Its a leading surrogate. If we already got one, then // issue an error, else set leading flag to make sure that // we look for a trailing next time. @@ -1626,23 +1614,17 @@ // Its got to at least be a valid XML character else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) { - // if it was a character reference and is control char, then it's ok - if (!(charref_expanded && fReaderMgr.getCurrentReader()->isControlChar(nextCh))) - { - - XMLCh tmpBuf[9]; - XMLString::binToText - ( - nextCh - , tmpBuf - , 8 - , 16 - ); - emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf); - } + XMLCh tmpBuf[9]; + XMLString::binToText + ( + nextCh + , tmpBuf + , 8 + , 16 + ); + emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf); } } - charref_expanded = false; gotLeadingSurrogate = false; } @@ -1655,6 +1637,9 @@ // Else add it to the buffer toFill.append(nextCh); + + if (secondCh) + toFill.append(secondCh); } } catch(const EndOfEntityException&) @@ -1705,23 +1690,13 @@ bool firstNonWS = false; bool gotLeadingSurrogate = false; bool escaped; - bool charref_expanded = false; while (true) { try { while(true) { - // Get another char. Use second char if one is waiting - if (secondCh) - { - nextCh = secondCh; - secondCh = 0; - } - else - { - nextCh = fReaderMgr.getNextChar(); - } + nextCh = fReaderMgr.getNextChar(); if (!nextCh) ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF); @@ -1751,12 +1726,10 @@ gotLeadingSurrogate = false; continue; } - charref_expanded = true; } - - // Deal with surrogate pairs - if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) + else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) { + // Deal with surrogate pairs // Its a leading surrogate. If we already got one, then // issue an error, else set leading flag to make sure that // we look for a trailing next time. @@ -1786,22 +1759,17 @@ // Its got to at least be a valid XML character if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) { - // if it was a character reference and is control char, then it's ok - if (!(charref_expanded && fReaderMgr.getCurrentReader()->isControlChar(nextCh))) - { - XMLCh tmpBuf[9]; - XMLString::binToText - ( - nextCh - , tmpBuf - , 8 - , 16 - ); - emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf); - } + XMLCh tmpBuf[9]; + XMLString::binToText + ( + nextCh + , tmpBuf + , 8 + , 16 + ); + emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf); } } - charref_expanded = false; gotLeadingSurrogate = false; } @@ -1877,6 +1845,9 @@ // Else add it to the buffer toFill.append(nextCh); + + if (secondCh) + toFill.append(secondCh); } } catch(const EndOfEntityException&) @@ -2093,44 +2064,36 @@ bool escaped = false; bool gotLeadingSurrogate = false; bool notDone = true; - bool charref_expanded = false; while (notDone) { try { while (true) { - if (secondCh) + // Eat through as many plain content characters as possible without + // needing special handling. Moving most content characters here, + // in this one call, rather than running the overall loop once + // per content character, is a speed optimization. + if (curState == State_Waiting && !gotLeadingSurrogate) { - nextCh = secondCh; - secondCh = 0; + fReaderMgr.movePlainContentChars(toUse); } - else - { - // Eat through as many plain content characters as possible without - // needing special handling. Moving most content characters here, - // in this one call, rather than running the overall loop once - // per content character, is a speed optimization. - if (curState == State_Waiting && !gotLeadingSurrogate) - { - fReaderMgr.movePlainContentChars(toUse); - } - // Try to get another char from the source - // The code from here on down covers all contengencies, - if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh)) - { - // If we were waiting for a trailing surrogate, its an error - if (gotLeadingSurrogate) - emitError(XMLErrs::Expected2ndSurrogateChar); + // Try to get another char from the source + // The code from here on down covers all contengencies, + if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh)) + { + // If we were waiting for a trailing surrogate, its an error + if (gotLeadingSurrogate) + emitError(XMLErrs::Expected2ndSurrogateChar); - notDone = false; - break; - } + notDone = false; + break; } // Watch for a reference. Note that the escapement mechanism // is ignored in this content. + escaped = false; if (nextCh == chAmpersand) { sendCharData(toUse); @@ -2143,42 +2106,10 @@ gotLeadingSurrogate = false; continue; } - charref_expanded = true; } - else - { - escaped = false; - } - - // Keep the state machine up to date - if (!escaped) - { - if (nextCh == chCloseSquare) - { - if (curState == State_Waiting) - curState = State_GotOne; - else if (curState == State_GotOne) - curState = State_GotTwo; - } - else if (nextCh == chCloseAngle) - { - if (curState == State_GotTwo) - emitError(XMLErrs::BadSequenceInCharData); - curState = State_Waiting; - } - else - { - curState = State_Waiting; - } - } - else - { - curState = State_Waiting; - } - - // Deal with surrogate pairs - if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) + else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) { + // Deal with surrogate pairs // Its a leading surrogate. If we already got one, then // issue an error, else set leading flag to make sure that // we look for a trailing next time. @@ -2208,27 +2139,51 @@ // Make sure the returned char is a valid XML char if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) { - // if it was a character reference and is control char, then it's ok - if (!(charref_expanded && fReaderMgr.getCurrentReader()->isControlChar(nextCh))) - { - XMLCh tmpBuf[9]; - XMLString::binToText - ( - nextCh - , tmpBuf - , 8 - , 16 - ); - emitError(XMLErrs::InvalidCharacter, tmpBuf); - } + XMLCh tmpBuf[9]; + XMLString::binToText + ( + nextCh + , tmpBuf + , 8 + , 16 + ); + emitError(XMLErrs::InvalidCharacter, tmpBuf); } } - charref_expanded = false; gotLeadingSurrogate = false; } + // Keep the state machine up to date + if (!escaped) + { + if (nextCh == chCloseSquare) + { + if (curState == State_Waiting) + curState = State_GotOne; + else if (curState == State_GotOne) + curState = State_GotTwo; + } + else if (nextCh == chCloseAngle) + { + if (curState == State_GotTwo) + emitError(XMLErrs::BadSequenceInCharData); + curState = State_Waiting; + } + else + { + curState = State_Waiting; + } + } + else + { + curState = State_Waiting; + } + // Add this char to the buffer toUse.append(nextCh); + + if (secondCh) + toUse.append(secondCh); } } catch(const EndOfEntityException& toCatch) 1.5 +72 -102 xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp Index: SGXMLScanner.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- SGXMLScanner.cpp 20 Dec 2002 22:09:56 -0000 1.4 +++ SGXMLScanner.cpp 24 Dec 2002 16:11:39 -0000 1.5 @@ -3224,23 +3224,13 @@ XMLCh secondCh = 0; bool gotLeadingSurrogate = false; bool escaped; - bool charref_expanded = false; while (true) { try { while(true) { - // Get another char. Use second char if one is waiting - if (secondCh) - { - nextCh = secondCh; - secondCh = 0; - } - else - { - nextCh = fReaderMgr.getNextChar(); - } + nextCh = fReaderMgr.getNextChar(); if (!nextCh) ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF); @@ -3271,12 +3261,10 @@ gotLeadingSurrogate = false; continue; } - charref_expanded = true; } - - // Deal with surrogate pairs - if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) + else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) { + // Deal with surrogate pairs // Its a leading surrogate. If we already got one, then // issue an error, else set leading flag to make sure that // we look for a trailing next time. @@ -3308,22 +3296,17 @@ // Its got to at least be a valid XML character else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) { - // if it was a character reference and is control char, then it's ok - if (!(charref_expanded && fReaderMgr.getCurrentReader()->isControlChar(nextCh))) - { - XMLCh tmpBuf[9]; - XMLString::binToText - ( - nextCh - , tmpBuf - , 8 - , 16 - ); - emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf); - } + XMLCh tmpBuf[9]; + XMLString::binToText + ( + nextCh + , tmpBuf + , 8 + , 16 + ); + emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf); } } - charref_expanded = false; gotLeadingSurrogate = false; } @@ -3336,6 +3319,9 @@ // Else add it to the buffer toFill.append(nextCh); + + if (secondCh) + toFill.append(secondCh); } } catch(const EndOfEntityException&) @@ -3553,44 +3539,36 @@ bool escaped = false; bool gotLeadingSurrogate = false; bool notDone = true; - bool charref_expanded = false; while (notDone) { try { while (true) { - if (secondCh) + // Eat through as many plain content characters as possible without + // needing special handling. Moving most content characters here, + // in this one call, rather than running the overall loop once + // per content character, is a speed optimization. + if (curState == State_Waiting && !gotLeadingSurrogate) { - nextCh = secondCh; - secondCh = 0; + fReaderMgr.movePlainContentChars(toUse); } - else - { - // Eat through as many plain content characters as possible without - // needing special handling. Moving most content characters here, - // in this one call, rather than running the overall loop once - // per content character, is a speed optimization. - if (curState == State_Waiting && !gotLeadingSurrogate) - { - fReaderMgr.movePlainContentChars(toUse); - } - // Try to get another char from the source - // The code from here on down covers all contengencies, - if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh)) - { - // If we were waiting for a trailing surrogate, its an error - if (gotLeadingSurrogate) - emitError(XMLErrs::Expected2ndSurrogateChar); + // Try to get another char from the source + // The code from here on down covers all contengencies, + if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh)) + { + // If we were waiting for a trailing surrogate, its an error + if (gotLeadingSurrogate) + emitError(XMLErrs::Expected2ndSurrogateChar); - notDone = false; - break; - } + notDone = false; + break; } // Watch for a reference. Note that the escapement mechanism // is ignored in this content. + escaped = false; if (nextCh == chAmpersand) { sendCharData(toUse); @@ -3603,42 +3581,10 @@ gotLeadingSurrogate = false; continue; } - charref_expanded = true; - } - else - { - escaped = false; - } - - // Keep the state machine up to date - if (!escaped) - { - if (nextCh == chCloseSquare) - { - if (curState == State_Waiting) - curState = State_GotOne; - else if (curState == State_GotOne) - curState = State_GotTwo; - } - else if (nextCh == chCloseAngle) - { - if (curState == State_GotTwo) - emitError(XMLErrs::BadSequenceInCharData); - curState = State_Waiting; - } - else - { - curState = State_Waiting; - } - } - else - { - curState = State_Waiting; } - - // Deal with surrogate pairs - if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) + else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) { + // Deal with surrogate pairs // Its a leading surrogate. If we already got one, then // issue an error, else set leading flag to make sure that // we look for a trailing next time. @@ -3668,27 +3614,51 @@ // Make sure the returned char is a valid XML char if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) { - // if it was a character reference and is control char, then it's ok - if (!(charref_expanded && fReaderMgr.getCurrentReader()->isControlChar(nextCh))) - { - XMLCh tmpBuf[9]; - XMLString::binToText - ( - nextCh - , tmpBuf - , 8 - , 16 - ); - emitError(XMLErrs::InvalidCharacter, tmpBuf); - } + XMLCh tmpBuf[9]; + XMLString::binToText + ( + nextCh + , tmpBuf + , 8 + , 16 + ); + emitError(XMLErrs::InvalidCharacter, tmpBuf); } } - charref_expanded = false; gotLeadingSurrogate = false; } + // Keep the state machine up to date + if (!escaped) + { + if (nextCh == chCloseSquare) + { + if (curState == State_Waiting) + curState = State_GotOne; + else if (curState == State_GotOne) + curState = State_GotTwo; + } + else if (nextCh == chCloseAngle) + { + if (curState == State_GotTwo) + emitError(XMLErrs::BadSequenceInCharData); + curState = State_Waiting; + } + else + { + curState = State_Waiting; + } + } + else + { + curState = State_Waiting; + } + // Add this char to the buffer toUse.append(nextCh); + + if (secondCh) + toUse.append(secondCh); } } catch(const EndOfEntityException& toCatch) 1.4 +72 -105 xml-xerces/c/src/xercesc/internal/WFXMLScanner.cpp Index: WFXMLScanner.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/WFXMLScanner.cpp,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- WFXMLScanner.cpp 20 Dec 2002 22:09:56 -0000 1.3 +++ WFXMLScanner.cpp 24 Dec 2002 16:11:39 -0000 1.4 @@ -1522,23 +1522,13 @@ bool firstNonWS = false; bool gotLeadingSurrogate = false; bool escaped; - bool charref_expanded = false; while (true) { try { while(true) { - // Get another char. Use second char if one is waiting - if (secondCh) - { - nextCh = secondCh; - secondCh = 0; - } - else - { - nextCh = fReaderMgr.getNextChar(); - } + nextCh = fReaderMgr.getNextChar(); if (!nextCh) ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF); @@ -1568,12 +1558,10 @@ gotLeadingSurrogate = false; continue; } - charref_expanded = true; } - - // Deal with surrogate pairs - if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) + else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) { + // Deal with surrogate pairs // Its a leading surrogate. If we already got one, then // issue an error, else set leading flag to make sure that // we look for a trailing next time. @@ -1605,30 +1593,24 @@ // Its got to at least be a valid XML character else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) { - // if it was a character reference and is control char, then it's ok - if (!(charref_expanded && fReaderMgr.getCurrentReader()->isControlChar(nextCh))) - { - - XMLCh tmpBuf[9]; - XMLString::binToText - ( - nextCh - , tmpBuf - , 8 - , 16 - ); - emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf); - } + XMLCh tmpBuf[9]; + XMLString::binToText + ( + nextCh + , tmpBuf + , 8 + , 16 + ); + emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf); } } - charref_expanded = false; gotLeadingSurrogate = false; } // If its not escaped, then make sure its not a < character, which // is not allowed in attribute values. if (!escaped) { - if (nextCh == chOpenAngle) + if (nextCh == chOpenAngle) emitError(XMLErrs::BracketInAttrValue, attrName); else if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) nextCh = chSpace; @@ -1636,6 +1618,9 @@ // Else add it to the buffer toFill.append(nextCh); + + if (secondCh) + toFill.append(secondCh); } } catch(const EndOfEntityException&) @@ -1807,44 +1792,36 @@ bool escaped = false; bool gotLeadingSurrogate = false; bool notDone = true; - bool charref_expanded = false; while (notDone) { try { while (true) { - if (secondCh) + // Eat through as many plain content characters as possible without + // needing special handling. Moving most content characters here, + // in this one call, rather than running the overall loop once + // per content character, is a speed optimization. + if (curState == State_Waiting && !gotLeadingSurrogate) { - nextCh = secondCh; - secondCh = 0; + fReaderMgr.movePlainContentChars(toUse); } - else - { - // Eat through as many plain content characters as possible without - // needing special handling. Moving most content characters here, - // in this one call, rather than running the overall loop once - // per content character, is a speed optimization. - if (curState == State_Waiting && !gotLeadingSurrogate) - { - fReaderMgr.movePlainContentChars(toUse); - } - // Try to get another char from the source - // The code from here on down covers all contengencies, - if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh)) - { - // If we were waiting for a trailing surrogate, its an error - if (gotLeadingSurrogate) - emitError(XMLErrs::Expected2ndSurrogateChar); + // Try to get another char from the source + // The code from here on down covers all contengencies, + if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh)) + { + // If we were waiting for a trailing surrogate, its an error + if (gotLeadingSurrogate) + emitError(XMLErrs::Expected2ndSurrogateChar); - notDone = false; - break; - } + notDone = false; + break; } // Watch for a reference. Note that the escapement mechanism // is ignored in this content. + escaped = false; if (nextCh == chAmpersand) { sendCharData(toUse); @@ -1857,42 +1834,10 @@ gotLeadingSurrogate = false; continue; } - charref_expanded = true; - } - else - { - escaped = false; - } - - // Keep the state machine up to date - if (!escaped) - { - if (nextCh == chCloseSquare) - { - if (curState == State_Waiting) - curState = State_GotOne; - else if (curState == State_GotOne) - curState = State_GotTwo; - } - else if (nextCh == chCloseAngle) - { - if (curState == State_GotTwo) - emitError(XMLErrs::BadSequenceInCharData); - curState = State_Waiting; - } - else - { - curState = State_Waiting; - } } - else - { - curState = State_Waiting; - } - - // Deal with surrogate pairs - if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) + else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) { + // Deal with surrogate pairs // Its a leading surrogate. If we already got one, then // issue an error, else set leading flag to make sure that // we look for a trailing next time. @@ -1924,29 +1869,51 @@ // Its got to at least be a valid XML character else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) { - // if it was a character reference and is control char, then it's ok - if (!(charref_expanded && fReaderMgr.getCurrentReader()->isControlChar(nextCh))) - { - - XMLCh tmpBuf[9]; - XMLString::binToText - ( - nextCh - , tmpBuf - , 8 - , 16 - ); - emitError(XMLErrs::InvalidCharacter, tmpBuf); - } + XMLCh tmpBuf[9]; + XMLString::binToText + ( + nextCh + , tmpBuf + , 8 + , 16 + ); + emitError(XMLErrs::InvalidCharacter, tmpBuf); } } - charref_expanded = false; gotLeadingSurrogate = false; } + // Keep the state machine up to date + if (!escaped) + { + if (nextCh == chCloseSquare) + { + if (curState == State_Waiting) + curState = State_GotOne; + else if (curState == State_GotOne) + curState = State_GotTwo; + } + else if (nextCh == chCloseAngle) + { + if (curState == State_GotTwo) + emitError(XMLErrs::BadSequenceInCharData); + curState = State_Waiting; + } + else + { + curState = State_Waiting; + } + } + else + { + curState = State_Waiting; + } // Add this char to the buffer toUse.append(nextCh); + + if (secondCh) + toUse.append(secondCh); } } catch(const EndOfEntityException& toCatch) 1.32 +14 -3 xml-xerces/c/src/xercesc/internal/XMLScanner.cpp Index: XMLScanner.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.cpp,v retrieving revision 1.31 retrieving revision 1.32 diff -u -r1.31 -r1.32 --- XMLScanner.cpp 23 Dec 2002 19:34:37 -0000 1.31 +++ XMLScanner.cpp 24 Dec 2002 16:11:39 -0000 1.32 @@ -1725,16 +1725,27 @@ } // Return the char (or chars) - if (value >= 0x10000) + // And check if the character expanded is valid or not + if (value >= 0x10000 && value <= 0x10FFFF) { value -= 0x10000; toFill = XMLCh((value >> 10) + 0xD800); second = XMLCh((value & 0x3FF) + 0xDC00); } - else + else if (value <= 0xFFFD) { toFill = XMLCh(value); second = 0; + if (!fReaderMgr.getCurrentReader()->isXMLChar(toFill) && !fReaderMgr.getCurrentReader()->isControlChar(toFill)) { + // Character reference was not in the valid range + emitError(XMLErrs::InvalidCharacterRef); + return false; + } + } + else { + // Character reference was not in the valid range + emitError(XMLErrs::InvalidCharacterRef); + return false; } return true;
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]