knoaman 2003/08/13 19:56:41 Modified: c/src/xercesc/internal IGXMLScanner.cpp IGXMLScanner.hpp IGXMLScanner2.cpp ReaderMgr.cpp ReaderMgr.hpp XMLScanner.cpp Log: Code refactoring to improve performance of validation. Revision Changes Path 1.21 +44 -3 xml-xerces/c/src/xercesc/internal/IGXMLScanner.cpp Index: IGXMLScanner.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner.cpp,v retrieving revision 1.20 retrieving revision 1.21 diff -u -r1.20 -r1.21 --- IGXMLScanner.cpp 31 Jul 2003 17:05:03 -0000 1.20 +++ IGXMLScanner.cpp 14 Aug 2003 02:56:41 -0000 1.21 @@ -518,6 +518,9 @@ fValueStoreCache = new (fMemoryManager) ValueStoreCache(fMemoryManager); fFieldActivator = new (fMemoryManager) FieldActivator(fValueStoreCache, fMatcherStack, fMemoryManager); fValueStoreCache->setScanner(this); + + // Create schemaLocation pair info + fLocationPairs = new (fMemoryManager) ValueVectorOf<XMLCh*>(8, fMemoryManager); } void IGXMLScanner::cleanUp() @@ -529,6 +532,7 @@ delete fFieldActivator; delete fMatcherStack; delete fValueStoreCache; + delete fLocationPairs; } // --------------------------------------------------------------------------- @@ -2851,8 +2855,16 @@ } } - fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager); - fGrammarResolver->putGrammar(fDTDGrammar); + fDTDGrammar = (DTDGrammar*) fGrammarResolver->getGrammar(XMLUni::fgDTDEntityString); + + if (fDTDGrammar) { + fDTDGrammar->reset(); + } + else { + fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager); + fGrammarResolver->putGrammar(fDTDGrammar); + } + fGrammar = fDTDGrammar; fGrammarType = fGrammar->getGrammarType(); fValidator->setGrammar(fGrammar); @@ -2956,5 +2968,34 @@ return fDTDGrammar; } +// --------------------------------------------------------------------------- +// IGXMLScanner: Helper methods +// --------------------------------------------------------------------------- +void IGXMLScanner::processSchemaLocation(XMLCh* const schemaLoc) +{ + XMLCh* locStr = schemaLoc; + XMLReader* curReader = fReaderMgr.getCurrentReader(); + + fLocationPairs->removeAllElements(); + while (*locStr) + { + do { + if (!curReader->isWhitespace(*locStr)) + break; + + *locStr = chNull; + } while (*++locStr); + + if (*locStr) { + + fLocationPairs->addElement(locStr); + + while (*++locStr) { + if (curReader->isWhitespace(*locStr)) + break; + } + } + } +} XERCES_CPP_NAMESPACE_END 1.9 +6 -0 xml-xerces/c/src/xercesc/internal/IGXMLScanner.hpp Index: IGXMLScanner.hpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner.hpp,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- IGXMLScanner.hpp 10 Jul 2003 19:47:23 -0000 1.8 +++ IGXMLScanner.hpp 14 Aug 2003 02:56:41 -0000 1.9 @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.9 2003/08/14 02:56:41 knoaman + * Code refactoring to improve performance of validation. + * * Revision 1.8 2003/07/10 19:47:23 peiyongz * Stateless Grammar: Initialize scanner with grammarResolver, * creating grammar through grammarPool @@ -239,6 +242,7 @@ bool& skipThisOne, bool& laxThisOne); void resizeElemState(); + void processSchemaLocation(XMLCh* const schemaLoc); // ----------------------------------------------------------------------- // Private scanning methods @@ -327,6 +331,7 @@ XPathMatcherStack* fMatcherStack; ValueStoreCache* fValueStoreCache; FieldActivator* fFieldActivator; + ValueVectorOf<XMLCh*>* fLocationPairs; }; inline const XMLCh* IGXMLScanner::getName() const 1.30 +40 -37 xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp Index: IGXMLScanner2.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp,v retrieving revision 1.29 retrieving revision 1.30 diff -u -r1.29 -r1.30 --- IGXMLScanner2.cpp 31 Jul 2003 17:05:03 -0000 1.29 +++ IGXMLScanner2.cpp 14 Aug 2003 02:56:41 -0000 1.30 @@ -886,8 +886,16 @@ fGrammarResolver->cacheGrammarFromParse(fToCacheGrammar); fGrammarResolver->useCachedGrammarInParse(fUseCachedGrammar); - fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager); - fGrammarResolver->putGrammar(fDTDGrammar); + fDTDGrammar = (DTDGrammar*) fGrammarResolver->getGrammar(XMLUni::fgDTDEntityString); + + if (!fDTDGrammar) { + + fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager); + fGrammarResolver->putGrammar(fDTDGrammar); + } + else + fDTDGrammar->reset(); + fGrammar = fDTDGrammar; fGrammarType = fGrammar->getGrammarType(); fRootGrammar = 0; @@ -1011,8 +1019,8 @@ if (fValidate) { // Get the raw data we need for the callback - const XMLCh* const rawBuf = toSend.getRawBuffer(); - const unsigned int len = toSend.getLen(); + XMLCh* rawBuf = toSend.getRawBuffer(); + unsigned int len = toSend.getLen(); // And see if the current element is a 'Children' style content model const ElemStack::StackElem* topElem = fElemStack.topElement(); @@ -1045,31 +1053,26 @@ } else { - // The normalized data can only be as large as the - // original size, so this will avoid allocating way - // too much or too little memory. - XMLBuffer toFill(len+1, fMemoryManager); - toFill.set(rawBuf); - if (fNormalizeData) { // normalize the character according to schema whitespace facet XMLBufBid bbtemp(&fBufMgr); XMLBuffer& tempBuf = bbtemp.getBuffer(); DatatypeValidator* tempDV = ((SchemaElementDecl*) topElem->fThisElement)->getDatatypeValidator(); - ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, toFill.getRawBuffer(), tempBuf); - toFill.set(tempBuf.getRawBuffer()); + ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, tempBuf); + rawBuf = tempBuf.getRawBuffer(); + len = tempBuf.getLen(); } // tell the schema validation about the character data for checkContent later - ((SchemaValidator*) fValidator)->setDatatypeBuffer(toFill.getRawBuffer()); + ((SchemaValidator*) fValidator)->setDatatypeBuffer(rawBuf); // call all active identity constraints if (fMatcherStack->getMatcherCount()) - fContent.append(toFill.getRawBuffer(), toFill.getLen()); + fContent.append(rawBuf, len); if (fDocHandler) - fDocHandler->docCharacters(toFill.getRawBuffer(), toFill.getLen(), false); + fDocHandler->docCharacters(rawBuf, len, false); } } } @@ -1087,31 +1090,26 @@ } else { - // The normalized data can only be as large as the - // original size, so this will avoid allocating way - // too much or too little memory. - XMLBuffer toFill(len+1, fMemoryManager); - toFill.set(rawBuf); - if (fNormalizeData) { // normalize the character according to schema whitespace facet XMLBufBid bbtemp(&fBufMgr); XMLBuffer& tempBuf = bbtemp.getBuffer(); DatatypeValidator* tempDV = ((SchemaElementDecl*) topElem->fThisElement)->getDatatypeValidator(); - ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, toFill.getRawBuffer(), tempBuf); - toFill.set(tempBuf.getRawBuffer()); + ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, tempBuf); + rawBuf = tempBuf.getRawBuffer(); + len = tempBuf.getLen(); } // tell the schema validation about the character data for checkContent later - ((SchemaValidator*) fValidator)->setDatatypeBuffer(toFill.getRawBuffer()); + ((SchemaValidator*) fValidator)->setDatatypeBuffer(rawBuf); // call all active identity constraints if (fMatcherStack->getMatcherCount()) - fContent.append(toFill.getRawBuffer(), toFill.getLen()); + fContent.append(rawBuf, len); if (fDocHandler) - fDocHandler->docCharacters(toFill.getRawBuffer(), toFill.getLen(), false); + fDocHandler->docCharacters(rawBuf, len, false); } } else @@ -1247,23 +1245,26 @@ XMLBufBid bbXsi(&fBufMgr); XMLBuffer& fXsiType = bbXsi.getBuffer(); - QName attName(fMemoryManager); - for (index = 0; index < attCount; index++) { // each attribute has the prefix:suffix="value" const KVStringPair* curPair = fRawAttrList->elementAt(index); const XMLCh* rawPtr = curPair->getKey(); + const XMLCh* prefPtr = XMLUni::fgZeroLenString; + int colonInd = XMLString::indexOf(rawPtr, chColon); - attName.setName(rawPtr, fEmptyNamespaceId); - const XMLCh* prefPtr = attName.getPrefix(); + if (colonInd != -1) { + + fURIBuf.set(rawPtr, colonInd); + prefPtr = fURIBuf.getRawBuffer(); + } // if schema URI has been seen, scan for the schema location and uri // and resolve the schema grammar; or scan for schema type if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) == fSchemaNamespaceId) { const XMLCh* valuePtr = curPair->getValue(); - const XMLCh* suffPtr = attName.getLocalPart(); + const XMLCh* suffPtr = &rawPtr[colonInd + 1]; if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCACTION)) parseSchemaLocation(valuePtr); @@ -1298,17 +1299,19 @@ void IGXMLScanner::parseSchemaLocation(const XMLCh* const schemaLocationStr) { - BaseRefVectorOf<XMLCh>* schemaLocation = XMLString::tokenizeString(schemaLocationStr); - unsigned int size = schemaLocation->size(); + XMLCh* locStr = XMLString::replicate(schemaLocationStr, fMemoryManager); + ArrayJanitor<XMLCh> janLoc(locStr, fMemoryManager); + + processSchemaLocation(locStr); + unsigned int size = fLocationPairs->size(); + if (size % 2 != 0 ) { emitError(XMLErrs::BadSchemaLocation); } else { for(unsigned int i=0; i<size; i=i+2) { - resolveSchemaGrammar(schemaLocation->elementAt(i+1), schemaLocation->elementAt(i)); + resolveSchemaGrammar(fLocationPairs->elementAt(i+1), fLocationPairs->elementAt(i)); } } - - delete schemaLocation; } void IGXMLScanner::resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri) { 1.16 +1 -12 xml-xerces/c/src/xercesc/internal/ReaderMgr.cpp Index: ReaderMgr.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/ReaderMgr.cpp,v retrieving revision 1.15 retrieving revision 1.16 diff -u -r1.15 -r1.16 --- ReaderMgr.cpp 18 May 2003 14:02:04 -0000 1.15 +++ ReaderMgr.cpp 14 Aug 2003 02:56:41 -0000 1.16 @@ -789,17 +789,6 @@ } -const XMLReader* ReaderMgr::getCurrentReader() const -{ - return fCurReader; -} - - -XMLReader* ReaderMgr::getCurrentReader() -{ - return fCurReader; -} - unsigned int ReaderMgr::getReaderDepth() const { // If the stack doesn't exist, its obviously zero 1.10 +13 -0 xml-xerces/c/src/xercesc/internal/ReaderMgr.hpp Index: ReaderMgr.hpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/ReaderMgr.hpp,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- ReaderMgr.hpp 16 May 2003 21:36:57 -0000 1.9 +++ ReaderMgr.hpp 14 Aug 2003 02:56:41 -0000 1.10 @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.10 2003/08/14 02:56:41 knoaman + * Code refactoring to improve performance of validation. + * * Revision 1.9 2003/05/16 21:36:57 knoaman * Memory manager implementation: Modify constructors to pass in the memory manager. * @@ -385,6 +388,16 @@ inline unsigned int ReaderMgr::getCurrentReaderNum() const { return fCurReader->getReaderNum(); +} + +inline const XMLReader* ReaderMgr::getCurrentReader() const +{ + return fCurReader; +} + +inline XMLReader* ReaderMgr::getCurrentReader() +{ + return fCurReader; } inline bool ReaderMgr::getName(XMLBuffer& toFill) 1.47 +41 -38 xml-xerces/c/src/xercesc/internal/XMLScanner.cpp Index: XMLScanner.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.cpp,v retrieving revision 1.46 retrieving revision 1.47 diff -u -r1.46 -r1.47 --- XMLScanner.cpp 10 Jul 2003 19:47:24 -0000 1.46 +++ XMLScanner.cpp 14 Aug 2003 02:56:41 -0000 1.47 @@ -331,27 +331,51 @@ // Create a temporary URL. Since this is the primary document, // it has to be fully qualified. If not, then assume we are just // mistaking a file for a URL. - XMLURL tmpURL(systemId, fMemoryManager); - if (tmpURL.isRelative()) { + XMLURL tmpURL(fMemoryManager); + + if (XMLURL::parse(systemId, tmpURL)) { + + if (tmpURL.isRelative()) { + if (!fStandardUriConformant) + srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, fMemoryManager); + else { + // since this is the top of the try/catch, cannot call ThrowXML + // emit the error directly + MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_NoProtocolPresent); + fInException = true; + emitError + ( + XMLErrs::XMLException_Fatal + , e.getType() + , e.getMessage() + ); + return; + } + } + else + { + if (fStandardUriConformant && tmpURL.hasInvalidChar()) { + MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL); + fInException = true; + emitError + ( + XMLErrs::XMLException_Fatal + , e.getType() + , e.getMessage() + ); + return; + } + srcToUse = new (fMemoryManager) URLInputSource(tmpURL, fMemoryManager); + } + } + else { + if (!fStandardUriConformant) srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, fMemoryManager); else { // since this is the top of the try/catch, cannot call ThrowXML // emit the error directly - MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_NoProtocolPresent); - fInException = true; - emitError - ( - XMLErrs::XMLException_Fatal - , e.getType() - , e.getMessage() - ); - return; - } - } - else - { - if (fStandardUriConformant && tmpURL.hasInvalidChar()) { + // lazy bypass ... since all MalformedURLException are fatal, no need to check the type MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL); fInException = true; emitError @@ -362,29 +386,8 @@ ); return; } - srcToUse = new (fMemoryManager) URLInputSource(tmpURL, fMemoryManager); } - } - catch(const MalformedURLException& e) - { - if (!fStandardUriConformant) - srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, fMemoryManager); - else { - // since this is the top of the try/catch, cannot call ThrowXML - // emit the error directly - // lazy bypass ... since all MalformedURLException are fatal, no need to check the type - fInException = true; - emitError - ( - XMLErrs::XMLException_Fatal - , e.getType() - , e.getMessage() - ); - return; - } - } - catch(const XMLException& excToCatch) { // For any other XMLException,
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]