tng 2003/02/04 13:21:51 Modified: c/src/xercesc/internal IGXMLScanner2.cpp SGXMLScanner.cpp Log: Performance: check AllCharData charopts once instead for every nextCh in the loop. and also call normalizeWhitespace for CDATA as well. Revision Changes Path 1.12 +50 -34 xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp Index: IGXMLScanner2.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp,v retrieving revision 1.11 retrieving revision 1.12 diff -u -r1.11 -r1.12 --- IGXMLScanner2.cpp 4 Feb 2003 17:15:36 -0000 1.11 +++ IGXMLScanner2.cpp 4 Feb 2003 21:21:50 -0000 1.12 @@ -281,7 +281,7 @@ //we may have set it to invalid already, but this is the first time we are guarenteed to have the attDef if(((SchemaAttDef *)(attDef))->getValidity() != PSVIDefs::INVALID) ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::VALID); - + ((SchemaAttDef *)(attDef))->setValidationAttempted(PSVIDefs::FULL); } @@ -292,7 +292,7 @@ attDef->setCreateReason(XMLAttDef::JustFaultIn); } - bool errorCondition = fValidate && !attDefForWildCard && + bool errorCondition = fValidate && !attDefForWildCard && attDef->getCreateReason() == XMLAttDef::JustFaultIn && !attDef->getProvided(); if (errorCondition && !skipThisOne && !laxThisOne) { @@ -334,7 +334,7 @@ if (attDef->getProvided()) { emitError - ( + ( XMLErrs::AttrAlreadyUsedInSTag , attDef->getFullName() , elemDecl->getFullName() @@ -517,7 +517,7 @@ XMLValid::RequiredAttrNotProvided , curDef->getFullName() ); - if(fGrammarType == Grammar::SchemaGrammarType) + if(fGrammarType == Grammar::SchemaGrammarType) ((SchemaAttDef *)(curDef))->setValidity(PSVIDefs::INVALID); } else if ((defType == XMLAttDef::Default) || @@ -527,7 +527,7 @@ { // XML 1.0 Section 2.9 // Document is standalone, so attributes must not be defaulted. - fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef->getFullName(), elemDecl->getFullName()); + fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef->getFullName(), elemDecl->getFullName()); if(fGrammarType == Grammar::SchemaGrammarType) ((SchemaAttDef *)(curDef))->setValidity(PSVIDefs::INVALID); } @@ -993,7 +993,7 @@ { // They definitely cannot handle any type of char data fValidator->emitError(XMLValid::NoCharDataInCM); - if(fGrammarType == Grammar::SchemaGrammarType) + if(fGrammarType == Grammar::SchemaGrammarType) ((SchemaElementDecl *)topElem->fThisElement)->setValidity(PSVIDefs::INVALID); } else if (fReaderMgr.getCurrentReader()->isAllSpaces(rawBuf, len)) @@ -1086,7 +1086,7 @@ else { fValidator->emitError(XMLValid::NoCharDataInCM); - if(fGrammarType == Grammar::SchemaGrammarType) + if(fGrammarType == Grammar::SchemaGrammarType) ((SchemaElementDecl *)topElem->fThisElement)->setValidity(PSVIDefs::INVALID); } } @@ -1987,6 +1987,11 @@ // characters specially here. bool emittedError = false; bool gotLeadingSurrogate = false; + + // Get the character data opts for the current element + const ElemStack::StackElem* topElem = fElemStack.topElement(); + XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts(); + while (true) { const XMLCh nextCh = fReaderMgr.getNextChar(); @@ -2003,19 +2008,15 @@ // This document is standalone; this ignorable CDATA whitespace is forbidden. // XML 1.0, Section 2.9 // And see if the current element is a 'Children' style content model - const ElemStack::StackElem* topElem = fElemStack.topElement(); if (topElem->fThisElement->isExternal()) { - // Get the character data opts for the current element - XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts(); - if (charOpts == XMLElementDecl::SpacesOk) // Element Content { // Error - standalone should have a value of "no" as whitespace detected in an // element type with element content whose element declaration was external fValidator->emitError(XMLValid::NoWSForStandalone); - if(fGrammarType == Grammar::SchemaGrammarType) + if(fGrammarType == Grammar::SchemaGrammarType) ((SchemaElementDecl *)topElem->fThisElement)->setValidity(PSVIDefs::INVALID); } } @@ -2025,16 +2026,47 @@ // sequence. if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose)) { + // make sure we were not expecting a trailing surrogate. + if (gotLeadingSurrogate) + emitError(XMLErrs::Expected2ndSurrogateChar); + if (fGrammarType == Grammar::SchemaGrammarType) { + if (fNormalizeData) { + // normalize the character according to schema whitespace facet + XMLBufBid bbtemp(&fBufMgr); + XMLBuffer& tempBuf = bbtemp.getBuffer(); + + DatatypeValidator* tempDV = ((SchemaElementDecl*) topElem->fThisElement)->getDatatypeValidator(); + ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, bbCData.getRawBuffer(), tempBuf); + bbCData.set(tempBuf.getRawBuffer()); + } + + if (fValidate) { + + // tell the schema validation about the character data for checkContent later + ((SchemaValidator*) fValidator)->setDatatypeBuffer(bbCData.getRawBuffer()); + + if (charOpts != XMLElementDecl::AllCharData) + { + // They definitely cannot handle any type of char data + fValidator->emitError(XMLValid::NoCharDataInCM); + ((SchemaElementDecl *)topElem->fThisElement)->setValidity(PSVIDefs::INVALID); + } + } + if (fMatcherStack->getMatcherCount()) fContent.append(bbCData.getRawBuffer(), bbCData.getLen()); } + else { + if (fValidate) { - // tell the schema validation about the character data for checkContent later - if (fValidate && fSchemaValidator) - { - fSchemaValidator->setDatatypeBuffer(bbCData.getRawBuffer()); + if (charOpts != XMLElementDecl::AllCharData) + { + // They definitely cannot handle any type of char data + fValidator->emitError(XMLValid::NoCharDataInCM); + } + } } // If we have a doc handler, call it @@ -2105,22 +2137,6 @@ } } - if (fValidate) { - // And see if the current element is a 'Children' style content model - const ElemStack::StackElem* topElem = fElemStack.topElement(); - - // Get the character data opts for the current element - XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts(); - - if (charOpts != XMLElementDecl::AllCharData) - { - // They definitely cannot handle any type of char data - fValidator->emitError(XMLValid::NoCharDataInCM); - if(fGrammarType == Grammar::SchemaGrammarType) - ((SchemaElementDecl *)topElem->fThisElement)->setValidity(PSVIDefs::INVALID); - } - } - // Add it to the buffer bbCData.append(nextCh); } @@ -2315,7 +2331,7 @@ // element type with element content whose element declaration was external // fValidator->emitError(XMLValid::NoWSForStandalone); - if(fGrammarType == Grammar::SchemaGrammarType) + if(fGrammarType == Grammar::SchemaGrammarType) ((SchemaElementDecl *)fElemStack.topElement()->fThisElement)->setValidity(PSVIDefs::INVALID); } } 1.15 +41 -39 xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp Index: SGXMLScanner.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp,v retrieving revision 1.14 retrieving revision 1.15 diff -u -r1.14 -r1.15 --- SGXMLScanner.cpp 4 Feb 2003 17:15:36 -0000 1.14 +++ SGXMLScanner.cpp 4 Feb 2003 21:21:50 -0000 1.15 @@ -954,7 +954,7 @@ , topElem->fThisElement->getFormattedContentModel() ); } - + } // call matchers and de-activate context @@ -1209,7 +1209,7 @@ ,uriStr ); errorBeforeElementFound = true; - } + } else if(errorCondition) laxBeforeElementFound = true; @@ -1252,7 +1252,7 @@ ); errorBeforeElementFound = true; } - else if(errorCondition) + else if(errorCondition) laxBeforeElementFound = true; } @@ -1297,10 +1297,10 @@ , XMLUni::fgZeroLenString ); errorBeforeElementFound = true; - + } else if(errorCondition) - laxBeforeElementFound = true; + laxBeforeElementFound = true; elemDecl = fGrammar->getElemDecl ( @@ -1336,7 +1336,7 @@ errorBeforeElementFound = true; } else if(errorCondition) - laxBeforeElementFound = true; + laxBeforeElementFound = true; elemDecl = fGrammar->getElemDecl ( @@ -1414,9 +1414,9 @@ if (!elemDecl->isDeclared()) { if (laxThisOne) { fValidate = false; - fElemStack.setValidationFlag(fValidate); + fElemStack.setValidationFlag(fValidate); } - + if (fValidate) { fValidator->emitError @@ -2096,7 +2096,7 @@ //we may have set it to invalid already, but this is the first time we are guarenteed to have the attDef if(((SchemaAttDef *)(attDef))->getValidity() != PSVIDefs::INVALID) ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::VALID); - + ((SchemaAttDef *)(attDef))->setValidationAttempted(PSVIDefs::FULL); } @@ -2107,7 +2107,7 @@ attDef->setCreateReason(XMLAttDef::JustFaultIn); } - bool errorCondition = fValidate && !attDefForWildCard && + bool errorCondition = fValidate && !attDefForWildCard && attDef->getCreateReason() == XMLAttDef::JustFaultIn && !attDef->getProvided(); if (errorCondition && !skipThisOne && !laxThisOne) { @@ -2384,7 +2384,7 @@ XMLValid::ProhibitedAttributePresent , curDef->getFullName() ); - ((SchemaAttDef *)curDef)->setValidity(PSVIDefs::INVALID); + ((SchemaAttDef *)curDef)->setValidity(PSVIDefs::INVALID); } ((SchemaElementDecl *)elemDecl)->updateValidityFromAttribute((SchemaAttDef *)curDef); } @@ -2463,7 +2463,7 @@ // Can't have a standalone document declaration of "yes" if attribute // values are subject to normalisation fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName); - ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID); + ((SchemaAttDef *)attDef)->setValidity(PSVIDefs::INVALID); } nextCh = chSpace; } @@ -3489,6 +3489,11 @@ // characters specially here. bool emittedError = false; bool gotLeadingSurrogate = false; + + // Get the character data opts for the current element + const ElemStack::StackElem* topElem = fElemStack.topElement(); + XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts(); + while (true) { const XMLCh nextCh = fReaderMgr.getNextChar(); @@ -3505,13 +3510,8 @@ // This document is standalone; this ignorable CDATA whitespace is forbidden. // XML 1.0, Section 2.9 // And see if the current element is a 'Children' style content model - const ElemStack::StackElem* topElem = fElemStack.topElement(); - if (topElem->fThisElement->isExternal()) { - // Get the character data opts for the current element - XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts(); - if (charOpts == XMLElementDecl::SpacesOk) // Element Content { // Error - standalone should have a value of "no" as whitespace detected in an @@ -3530,16 +3530,33 @@ if (gotLeadingSurrogate) emitError(XMLErrs::Expected2ndSurrogateChar); + if (fValidate) { + + if (fNormalizeData) { + // normalize the character according to schema whitespace facet + XMLBufBid bbtemp(&fBufMgr); + XMLBuffer& tempBuf = bbtemp.getBuffer(); + + DatatypeValidator* tempDV = ((SchemaElementDecl*) topElem->fThisElement)->getDatatypeValidator(); + ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, bbCData.getRawBuffer(), tempBuf); + bbCData.set(tempBuf.getRawBuffer()); + } + + // tell the schema validation about the character data for checkContent later + ((SchemaValidator*) fValidator)->setDatatypeBuffer(bbCData.getRawBuffer()); + + if (charOpts != XMLElementDecl::AllCharData) + { + // They definitely cannot handle any type of char data + fValidator->emitError(XMLValid::NoCharDataInCM); + ((SchemaElementDecl *)topElem->fThisElement)->setValidity(PSVIDefs::INVALID); + } + } + // call all active identity constraints if (fMatcherStack->getMatcherCount()) fContent.append(bbCData.getRawBuffer(), bbCData.getLen()); - // tell the schema validation about the character data for checkContent later - if (fValidate && fSchemaValidator) - { - fSchemaValidator->setDatatypeBuffer(bbCData.getRawBuffer()); - } - // If we have a doc handler, call it if (fDocHandler) { @@ -3605,21 +3622,6 @@ } } gotLeadingSurrogate = false; - } - } - - if (fValidate) { - // And see if the current element is a 'Children' style content model - const ElemStack::StackElem* topElem = fElemStack.topElement(); - - // Get the character data opts for the current element - XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts(); - - if (charOpts != XMLElementDecl::AllCharData) - { - // They definitely cannot handle any type of char data - fValidator->emitError(XMLValid::NoCharDataInCM); - ((SchemaElementDecl *)topElem->fThisElement)->setValidity(PSVIDefs::INVALID); } }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]