tng 2003/02/04 13:21:51
Modified: c/src/xercesc/internal IGXMLScanner2.cpp SGXMLScanner.cpp
Log:
Performance: check AllCharData charopts once instead for every nextCh in the loop.
and also call normalizeWhitespace for CDATA as well.
Revision Changes Path
1.12 +50 -34 xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp
Index: IGXMLScanner2.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -r1.11 -r1.12
--- IGXMLScanner2.cpp 4 Feb 2003 17:15:36 -0000 1.11
+++ IGXMLScanner2.cpp 4 Feb 2003 21:21:50 -0000 1.12
@@ -281,7 +281,7 @@
//we may have set it to invalid already, but this is the first time
we are guarenteed to have the attDef
if(((SchemaAttDef *)(attDef))->getValidity() != PSVIDefs::INVALID)
((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::VALID);
-
+
((SchemaAttDef *)(attDef))->setValidationAttempted(PSVIDefs::FULL);
}
@@ -292,7 +292,7 @@
attDef->setCreateReason(XMLAttDef::JustFaultIn);
}
- bool errorCondition = fValidate && !attDefForWildCard &&
+ bool errorCondition = fValidate && !attDefForWildCard &&
attDef->getCreateReason() == XMLAttDef::JustFaultIn &&
!attDef->getProvided();
if (errorCondition && !skipThisOne && !laxThisOne)
{
@@ -334,7 +334,7 @@
if (attDef->getProvided())
{
emitError
- (
+ (
XMLErrs::AttrAlreadyUsedInSTag
, attDef->getFullName()
, elemDecl->getFullName()
@@ -517,7 +517,7 @@
XMLValid::RequiredAttrNotProvided
, curDef->getFullName()
);
- if(fGrammarType == Grammar::SchemaGrammarType)
+ if(fGrammarType == Grammar::SchemaGrammarType)
((SchemaAttDef
*)(curDef))->setValidity(PSVIDefs::INVALID);
}
else if ((defType == XMLAttDef::Default) ||
@@ -527,7 +527,7 @@
{
// XML 1.0 Section 2.9
// Document is standalone, so attributes must not be
defaulted.
- fValidator->emitError(XMLValid::NoDefAttForStandalone,
curDef->getFullName(), elemDecl->getFullName());
+ fValidator->emitError(XMLValid::NoDefAttForStandalone,
curDef->getFullName(), elemDecl->getFullName());
if(fGrammarType == Grammar::SchemaGrammarType)
((SchemaAttDef
*)(curDef))->setValidity(PSVIDefs::INVALID);
}
@@ -993,7 +993,7 @@
{
// They definitely cannot handle any type of char data
fValidator->emitError(XMLValid::NoCharDataInCM);
- if(fGrammarType == Grammar::SchemaGrammarType)
+ if(fGrammarType == Grammar::SchemaGrammarType)
((SchemaElementDecl
*)topElem->fThisElement)->setValidity(PSVIDefs::INVALID);
}
else if (fReaderMgr.getCurrentReader()->isAllSpaces(rawBuf, len))
@@ -1086,7 +1086,7 @@
else
{
fValidator->emitError(XMLValid::NoCharDataInCM);
- if(fGrammarType == Grammar::SchemaGrammarType)
+ if(fGrammarType == Grammar::SchemaGrammarType)
((SchemaElementDecl
*)topElem->fThisElement)->setValidity(PSVIDefs::INVALID);
}
}
@@ -1987,6 +1987,11 @@
// characters specially here.
bool emittedError = false;
bool gotLeadingSurrogate = false;
+
+ // Get the character data opts for the current element
+ const ElemStack::StackElem* topElem = fElemStack.topElement();
+ XMLElementDecl::CharDataOpts charOpts =
topElem->fThisElement->getCharDataOpts();
+
while (true)
{
const XMLCh nextCh = fReaderMgr.getNextChar();
@@ -2003,19 +2008,15 @@
// This document is standalone; this ignorable CDATA whitespace is
forbidden.
// XML 1.0, Section 2.9
// And see if the current element is a 'Children' style content model
- const ElemStack::StackElem* topElem = fElemStack.topElement();
if (topElem->fThisElement->isExternal()) {
- // Get the character data opts for the current element
- XMLElementDecl::CharDataOpts charOpts =
topElem->fThisElement->getCharDataOpts();
-
if (charOpts == XMLElementDecl::SpacesOk) // Element Content
{
// Error - standalone should have a value of "no" as whitespace
detected in an
// element type with element content whose element declaration
was external
fValidator->emitError(XMLValid::NoWSForStandalone);
- if(fGrammarType == Grammar::SchemaGrammarType)
+ if(fGrammarType == Grammar::SchemaGrammarType)
((SchemaElementDecl
*)topElem->fThisElement)->setValidity(PSVIDefs::INVALID);
}
}
@@ -2025,16 +2026,47 @@
// sequence.
if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose))
{
+ // make sure we were not expecting a trailing surrogate.
+ if (gotLeadingSurrogate)
+ emitError(XMLErrs::Expected2ndSurrogateChar);
+
if (fGrammarType == Grammar::SchemaGrammarType) {
+ if (fNormalizeData) {
+ // normalize the character according to schema whitespace facet
+ XMLBufBid bbtemp(&fBufMgr);
+ XMLBuffer& tempBuf = bbtemp.getBuffer();
+
+ DatatypeValidator* tempDV = ((SchemaElementDecl*)
topElem->fThisElement)->getDatatypeValidator();
+ ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV,
bbCData.getRawBuffer(), tempBuf);
+ bbCData.set(tempBuf.getRawBuffer());
+ }
+
+ if (fValidate) {
+
+ // tell the schema validation about the character data for
checkContent later
+ ((SchemaValidator*)
fValidator)->setDatatypeBuffer(bbCData.getRawBuffer());
+
+ if (charOpts != XMLElementDecl::AllCharData)
+ {
+ // They definitely cannot handle any type of char data
+ fValidator->emitError(XMLValid::NoCharDataInCM);
+ ((SchemaElementDecl
*)topElem->fThisElement)->setValidity(PSVIDefs::INVALID);
+ }
+ }
+
if (fMatcherStack->getMatcherCount())
fContent.append(bbCData.getRawBuffer(), bbCData.getLen());
}
+ else {
+ if (fValidate) {
- // tell the schema validation about the character data for checkContent
later
- if (fValidate && fSchemaValidator)
- {
- fSchemaValidator->setDatatypeBuffer(bbCData.getRawBuffer());
+ if (charOpts != XMLElementDecl::AllCharData)
+ {
+ // They definitely cannot handle any type of char data
+ fValidator->emitError(XMLValid::NoCharDataInCM);
+ }
+ }
}
// If we have a doc handler, call it
@@ -2105,22 +2137,6 @@
}
}
- if (fValidate) {
- // And see if the current element is a 'Children' style content model
- const ElemStack::StackElem* topElem = fElemStack.topElement();
-
- // Get the character data opts for the current element
- XMLElementDecl::CharDataOpts charOpts =
topElem->fThisElement->getCharDataOpts();
-
- if (charOpts != XMLElementDecl::AllCharData)
- {
- // They definitely cannot handle any type of char data
- fValidator->emitError(XMLValid::NoCharDataInCM);
- if(fGrammarType == Grammar::SchemaGrammarType)
- ((SchemaElementDecl
*)topElem->fThisElement)->setValidity(PSVIDefs::INVALID);
- }
- }
-
// Add it to the buffer
bbCData.append(nextCh);
}
@@ -2315,7 +2331,7 @@
// element type with element content whose element declaration
was external
//
fValidator->emitError(XMLValid::NoWSForStandalone);
- if(fGrammarType == Grammar::SchemaGrammarType)
+ if(fGrammarType == Grammar::SchemaGrammarType)
((SchemaElementDecl
*)fElemStack.topElement()->fThisElement)->setValidity(PSVIDefs::INVALID);
}
}
1.15 +41 -39 xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp
Index: SGXMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -r1.14 -r1.15
--- SGXMLScanner.cpp 4 Feb 2003 17:15:36 -0000 1.14
+++ SGXMLScanner.cpp 4 Feb 2003 21:21:50 -0000 1.15
@@ -954,7 +954,7 @@
, topElem->fThisElement->getFormattedContentModel()
);
}
-
+
}
// call matchers and de-activate context
@@ -1209,7 +1209,7 @@
,uriStr
);
errorBeforeElementFound = true;
- }
+ }
else if(errorCondition)
laxBeforeElementFound = true;
@@ -1252,7 +1252,7 @@
);
errorBeforeElementFound = true;
}
- else if(errorCondition)
+ else if(errorCondition)
laxBeforeElementFound = true;
}
@@ -1297,10 +1297,10 @@
, XMLUni::fgZeroLenString
);
errorBeforeElementFound = true;
-
+
}
else if(errorCondition)
- laxBeforeElementFound = true;
+ laxBeforeElementFound = true;
elemDecl = fGrammar->getElemDecl
(
@@ -1336,7 +1336,7 @@
errorBeforeElementFound = true;
}
else if(errorCondition)
- laxBeforeElementFound = true;
+ laxBeforeElementFound = true;
elemDecl = fGrammar->getElemDecl
(
@@ -1414,9 +1414,9 @@
if (!elemDecl->isDeclared()) {
if (laxThisOne) {
fValidate = false;
- fElemStack.setValidationFlag(fValidate);
+ fElemStack.setValidationFlag(fValidate);
}
-
+
if (fValidate)
{
fValidator->emitError
@@ -2096,7 +2096,7 @@
//we may have set it to invalid already, but this is the first time
we are guarenteed to have the attDef
if(((SchemaAttDef *)(attDef))->getValidity() != PSVIDefs::INVALID)
((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::VALID);
-
+
((SchemaAttDef *)(attDef))->setValidationAttempted(PSVIDefs::FULL);
}
@@ -2107,7 +2107,7 @@
attDef->setCreateReason(XMLAttDef::JustFaultIn);
}
- bool errorCondition = fValidate && !attDefForWildCard &&
+ bool errorCondition = fValidate && !attDefForWildCard &&
attDef->getCreateReason() == XMLAttDef::JustFaultIn &&
!attDef->getProvided();
if (errorCondition && !skipThisOne && !laxThisOne)
{
@@ -2384,7 +2384,7 @@
XMLValid::ProhibitedAttributePresent
, curDef->getFullName()
);
- ((SchemaAttDef *)curDef)->setValidity(PSVIDefs::INVALID);
+ ((SchemaAttDef *)curDef)->setValidity(PSVIDefs::INVALID);
}
((SchemaElementDecl
*)elemDecl)->updateValidityFromAttribute((SchemaAttDef *)curDef);
}
@@ -2463,7 +2463,7 @@
// Can't have a standalone document declaration of "yes"
if attribute
// values are subject to normalisation
fValidator->emitError(XMLValid::NoAttNormForStandalone,
attrName);
- ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID);
+ ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID);
}
nextCh = chSpace;
}
@@ -3489,6 +3489,11 @@
// characters specially here.
bool emittedError = false;
bool gotLeadingSurrogate = false;
+
+ // Get the character data opts for the current element
+ const ElemStack::StackElem* topElem = fElemStack.topElement();
+ XMLElementDecl::CharDataOpts charOpts =
topElem->fThisElement->getCharDataOpts();
+
while (true)
{
const XMLCh nextCh = fReaderMgr.getNextChar();
@@ -3505,13 +3510,8 @@
// This document is standalone; this ignorable CDATA whitespace is
forbidden.
// XML 1.0, Section 2.9
// And see if the current element is a 'Children' style content model
- const ElemStack::StackElem* topElem = fElemStack.topElement();
-
if (topElem->fThisElement->isExternal()) {
- // Get the character data opts for the current element
- XMLElementDecl::CharDataOpts charOpts =
topElem->fThisElement->getCharDataOpts();
-
if (charOpts == XMLElementDecl::SpacesOk) // Element Content
{
// Error - standalone should have a value of "no" as whitespace
detected in an
@@ -3530,16 +3530,33 @@
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
+ if (fValidate) {
+
+ if (fNormalizeData) {
+ // normalize the character according to schema whitespace facet
+ XMLBufBid bbtemp(&fBufMgr);
+ XMLBuffer& tempBuf = bbtemp.getBuffer();
+
+ DatatypeValidator* tempDV = ((SchemaElementDecl*)
topElem->fThisElement)->getDatatypeValidator();
+ ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV,
bbCData.getRawBuffer(), tempBuf);
+ bbCData.set(tempBuf.getRawBuffer());
+ }
+
+ // tell the schema validation about the character data for
checkContent later
+ ((SchemaValidator*)
fValidator)->setDatatypeBuffer(bbCData.getRawBuffer());
+
+ if (charOpts != XMLElementDecl::AllCharData)
+ {
+ // They definitely cannot handle any type of char data
+ fValidator->emitError(XMLValid::NoCharDataInCM);
+ ((SchemaElementDecl
*)topElem->fThisElement)->setValidity(PSVIDefs::INVALID);
+ }
+ }
+
// call all active identity constraints
if (fMatcherStack->getMatcherCount())
fContent.append(bbCData.getRawBuffer(), bbCData.getLen());
- // tell the schema validation about the character data for checkContent
later
- if (fValidate && fSchemaValidator)
- {
- fSchemaValidator->setDatatypeBuffer(bbCData.getRawBuffer());
- }
-
// If we have a doc handler, call it
if (fDocHandler)
{
@@ -3605,21 +3622,6 @@
}
}
gotLeadingSurrogate = false;
- }
- }
-
- if (fValidate) {
- // And see if the current element is a 'Children' style content model
- const ElemStack::StackElem* topElem = fElemStack.topElement();
-
- // Get the character data opts for the current element
- XMLElementDecl::CharDataOpts charOpts =
topElem->fThisElement->getCharDataOpts();
-
- if (charOpts != XMLElementDecl::AllCharData)
- {
- // They definitely cannot handle any type of char data
- fValidator->emitError(XMLValid::NoCharDataInCM);
- ((SchemaElementDecl
*)topElem->fThisElement)->setValidity(PSVIDefs::INVALID);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]