knoaman 2003/08/13 19:56:41
Modified: c/src/xercesc/internal IGXMLScanner.cpp IGXMLScanner.hpp
IGXMLScanner2.cpp ReaderMgr.cpp ReaderMgr.hpp
XMLScanner.cpp
Log:
Code refactoring to improve performance of validation.
Revision Changes Path
1.21 +44 -3 xml-xerces/c/src/xercesc/internal/IGXMLScanner.cpp
Index: IGXMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner.cpp,v
retrieving revision 1.20
retrieving revision 1.21
diff -u -r1.20 -r1.21
--- IGXMLScanner.cpp 31 Jul 2003 17:05:03 -0000 1.20
+++ IGXMLScanner.cpp 14 Aug 2003 02:56:41 -0000 1.21
@@ -518,6 +518,9 @@
fValueStoreCache = new (fMemoryManager) ValueStoreCache(fMemoryManager);
fFieldActivator = new (fMemoryManager) FieldActivator(fValueStoreCache,
fMatcherStack, fMemoryManager);
fValueStoreCache->setScanner(this);
+
+ // Create schemaLocation pair info
+ fLocationPairs = new (fMemoryManager) ValueVectorOf<XMLCh*>(8, fMemoryManager);
}
void IGXMLScanner::cleanUp()
@@ -529,6 +532,7 @@
delete fFieldActivator;
delete fMatcherStack;
delete fValueStoreCache;
+ delete fLocationPairs;
}
// ---------------------------------------------------------------------------
@@ -2851,8 +2855,16 @@
}
}
- fDTDGrammar = new (fGrammarPoolMemoryManager)
DTDGrammar(fGrammarPoolMemoryManager);
- fGrammarResolver->putGrammar(fDTDGrammar);
+ fDTDGrammar = (DTDGrammar*)
fGrammarResolver->getGrammar(XMLUni::fgDTDEntityString);
+
+ if (fDTDGrammar) {
+ fDTDGrammar->reset();
+ }
+ else {
+ fDTDGrammar = new (fGrammarPoolMemoryManager)
DTDGrammar(fGrammarPoolMemoryManager);
+ fGrammarResolver->putGrammar(fDTDGrammar);
+ }
+
fGrammar = fDTDGrammar;
fGrammarType = fGrammar->getGrammarType();
fValidator->setGrammar(fGrammar);
@@ -2956,5 +2968,34 @@
return fDTDGrammar;
}
+// ---------------------------------------------------------------------------
+// IGXMLScanner: Helper methods
+// ---------------------------------------------------------------------------
+void IGXMLScanner::processSchemaLocation(XMLCh* const schemaLoc)
+{
+ XMLCh* locStr = schemaLoc;
+ XMLReader* curReader = fReaderMgr.getCurrentReader();
+
+ fLocationPairs->removeAllElements();
+ while (*locStr)
+ {
+ do {
+ if (!curReader->isWhitespace(*locStr))
+ break;
+
+ *locStr = chNull;
+ } while (*++locStr);
+
+ if (*locStr) {
+
+ fLocationPairs->addElement(locStr);
+
+ while (*++locStr) {
+ if (curReader->isWhitespace(*locStr))
+ break;
+ }
+ }
+ }
+}
XERCES_CPP_NAMESPACE_END
1.9 +6 -0 xml-xerces/c/src/xercesc/internal/IGXMLScanner.hpp
Index: IGXMLScanner.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner.hpp,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- IGXMLScanner.hpp 10 Jul 2003 19:47:23 -0000 1.8
+++ IGXMLScanner.hpp 14 Aug 2003 02:56:41 -0000 1.9
@@ -56,6 +56,9 @@
/*
* $Log$
+ * Revision 1.9 2003/08/14 02:56:41 knoaman
+ * Code refactoring to improve performance of validation.
+ *
* Revision 1.8 2003/07/10 19:47:23 peiyongz
* Stateless Grammar: Initialize scanner with grammarResolver,
* creating grammar through grammarPool
@@ -239,6 +242,7 @@
bool& skipThisOne,
bool& laxThisOne);
void resizeElemState();
+ void processSchemaLocation(XMLCh* const schemaLoc);
// -----------------------------------------------------------------------
// Private scanning methods
@@ -327,6 +331,7 @@
XPathMatcherStack* fMatcherStack;
ValueStoreCache* fValueStoreCache;
FieldActivator* fFieldActivator;
+ ValueVectorOf<XMLCh*>* fLocationPairs;
};
inline const XMLCh* IGXMLScanner::getName() const
1.30 +40 -37 xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp
Index: IGXMLScanner2.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp,v
retrieving revision 1.29
retrieving revision 1.30
diff -u -r1.29 -r1.30
--- IGXMLScanner2.cpp 31 Jul 2003 17:05:03 -0000 1.29
+++ IGXMLScanner2.cpp 14 Aug 2003 02:56:41 -0000 1.30
@@ -886,8 +886,16 @@
fGrammarResolver->cacheGrammarFromParse(fToCacheGrammar);
fGrammarResolver->useCachedGrammarInParse(fUseCachedGrammar);
- fDTDGrammar = new (fGrammarPoolMemoryManager)
DTDGrammar(fGrammarPoolMemoryManager);
- fGrammarResolver->putGrammar(fDTDGrammar);
+ fDTDGrammar = (DTDGrammar*)
fGrammarResolver->getGrammar(XMLUni::fgDTDEntityString);
+
+ if (!fDTDGrammar) {
+
+ fDTDGrammar = new (fGrammarPoolMemoryManager)
DTDGrammar(fGrammarPoolMemoryManager);
+ fGrammarResolver->putGrammar(fDTDGrammar);
+ }
+ else
+ fDTDGrammar->reset();
+
fGrammar = fDTDGrammar;
fGrammarType = fGrammar->getGrammarType();
fRootGrammar = 0;
@@ -1011,8 +1019,8 @@
if (fValidate)
{
// Get the raw data we need for the callback
- const XMLCh* const rawBuf = toSend.getRawBuffer();
- const unsigned int len = toSend.getLen();
+ XMLCh* rawBuf = toSend.getRawBuffer();
+ unsigned int len = toSend.getLen();
// And see if the current element is a 'Children' style content model
const ElemStack::StackElem* topElem = fElemStack.topElement();
@@ -1045,31 +1053,26 @@
}
else
{
- // The normalized data can only be as large as the
- // original size, so this will avoid allocating way
- // too much or too little memory.
- XMLBuffer toFill(len+1, fMemoryManager);
- toFill.set(rawBuf);
-
if (fNormalizeData) {
// normalize the character according to schema whitespace
facet
XMLBufBid bbtemp(&fBufMgr);
XMLBuffer& tempBuf = bbtemp.getBuffer();
DatatypeValidator* tempDV = ((SchemaElementDecl*)
topElem->fThisElement)->getDatatypeValidator();
- ((SchemaValidator*)
fValidator)->normalizeWhiteSpace(tempDV, toFill.getRawBuffer(), tempBuf);
- toFill.set(tempBuf.getRawBuffer());
+ ((SchemaValidator*)
fValidator)->normalizeWhiteSpace(tempDV, rawBuf, tempBuf);
+ rawBuf = tempBuf.getRawBuffer();
+ len = tempBuf.getLen();
}
// tell the schema validation about the character data for
checkContent later
- ((SchemaValidator*)
fValidator)->setDatatypeBuffer(toFill.getRawBuffer());
+ ((SchemaValidator*) fValidator)->setDatatypeBuffer(rawBuf);
// call all active identity constraints
if (fMatcherStack->getMatcherCount())
- fContent.append(toFill.getRawBuffer(), toFill.getLen());
+ fContent.append(rawBuf, len);
if (fDocHandler)
- fDocHandler->docCharacters(toFill.getRawBuffer(),
toFill.getLen(), false);
+ fDocHandler->docCharacters(rawBuf, len, false);
}
}
}
@@ -1087,31 +1090,26 @@
}
else
{
- // The normalized data can only be as large as the
- // original size, so this will avoid allocating way
- // too much or too little memory.
- XMLBuffer toFill(len+1, fMemoryManager);
- toFill.set(rawBuf);
-
if (fNormalizeData) {
// normalize the character according to schema whitespace
facet
XMLBufBid bbtemp(&fBufMgr);
XMLBuffer& tempBuf = bbtemp.getBuffer();
DatatypeValidator* tempDV = ((SchemaElementDecl*)
topElem->fThisElement)->getDatatypeValidator();
- ((SchemaValidator*)
fValidator)->normalizeWhiteSpace(tempDV, toFill.getRawBuffer(), tempBuf);
- toFill.set(tempBuf.getRawBuffer());
+ ((SchemaValidator*)
fValidator)->normalizeWhiteSpace(tempDV, rawBuf, tempBuf);
+ rawBuf = tempBuf.getRawBuffer();
+ len = tempBuf.getLen();
}
// tell the schema validation about the character data for
checkContent later
- ((SchemaValidator*)
fValidator)->setDatatypeBuffer(toFill.getRawBuffer());
+ ((SchemaValidator*) fValidator)->setDatatypeBuffer(rawBuf);
// call all active identity constraints
if (fMatcherStack->getMatcherCount())
- fContent.append(toFill.getRawBuffer(), toFill.getLen());
+ fContent.append(rawBuf, len);
if (fDocHandler)
- fDocHandler->docCharacters(toFill.getRawBuffer(),
toFill.getLen(), false);
+ fDocHandler->docCharacters(rawBuf, len, false);
}
}
else
@@ -1247,23 +1245,26 @@
XMLBufBid bbXsi(&fBufMgr);
XMLBuffer& fXsiType = bbXsi.getBuffer();
- QName attName(fMemoryManager);
-
for (index = 0; index < attCount; index++)
{
// each attribute has the prefix:suffix="value"
const KVStringPair* curPair = fRawAttrList->elementAt(index);
const XMLCh* rawPtr = curPair->getKey();
+ const XMLCh* prefPtr = XMLUni::fgZeroLenString;
+ int colonInd = XMLString::indexOf(rawPtr, chColon);
- attName.setName(rawPtr, fEmptyNamespaceId);
- const XMLCh* prefPtr = attName.getPrefix();
+ if (colonInd != -1) {
+
+ fURIBuf.set(rawPtr, colonInd);
+ prefPtr = fURIBuf.getRawBuffer();
+ }
// if schema URI has been seen, scan for the schema location and uri
// and resolve the schema grammar; or scan for schema type
if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) ==
fSchemaNamespaceId) {
const XMLCh* valuePtr = curPair->getValue();
- const XMLCh* suffPtr = attName.getLocalPart();
+ const XMLCh* suffPtr = &rawPtr[colonInd + 1];
if (XMLString::equals(suffPtr,
SchemaSymbols::fgXSI_SCHEMALOCACTION))
parseSchemaLocation(valuePtr);
@@ -1298,17 +1299,19 @@
void IGXMLScanner::parseSchemaLocation(const XMLCh* const schemaLocationStr)
{
- BaseRefVectorOf<XMLCh>* schemaLocation =
XMLString::tokenizeString(schemaLocationStr);
- unsigned int size = schemaLocation->size();
+ XMLCh* locStr = XMLString::replicate(schemaLocationStr, fMemoryManager);
+ ArrayJanitor<XMLCh> janLoc(locStr, fMemoryManager);
+
+ processSchemaLocation(locStr);
+ unsigned int size = fLocationPairs->size();
+
if (size % 2 != 0 ) {
emitError(XMLErrs::BadSchemaLocation);
} else {
for(unsigned int i=0; i<size; i=i+2) {
- resolveSchemaGrammar(schemaLocation->elementAt(i+1),
schemaLocation->elementAt(i));
+ resolveSchemaGrammar(fLocationPairs->elementAt(i+1),
fLocationPairs->elementAt(i));
}
}
-
- delete schemaLocation;
}
void IGXMLScanner::resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const
uri) {
1.16 +1 -12 xml-xerces/c/src/xercesc/internal/ReaderMgr.cpp
Index: ReaderMgr.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/ReaderMgr.cpp,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -r1.15 -r1.16
--- ReaderMgr.cpp 18 May 2003 14:02:04 -0000 1.15
+++ ReaderMgr.cpp 14 Aug 2003 02:56:41 -0000 1.16
@@ -789,17 +789,6 @@
}
-const XMLReader* ReaderMgr::getCurrentReader() const
-{
- return fCurReader;
-}
-
-
-XMLReader* ReaderMgr::getCurrentReader()
-{
- return fCurReader;
-}
-
unsigned int ReaderMgr::getReaderDepth() const
{
// If the stack doesn't exist, its obviously zero
1.10 +13 -0 xml-xerces/c/src/xercesc/internal/ReaderMgr.hpp
Index: ReaderMgr.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/ReaderMgr.hpp,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- ReaderMgr.hpp 16 May 2003 21:36:57 -0000 1.9
+++ ReaderMgr.hpp 14 Aug 2003 02:56:41 -0000 1.10
@@ -56,6 +56,9 @@
/*
* $Log$
+ * Revision 1.10 2003/08/14 02:56:41 knoaman
+ * Code refactoring to improve performance of validation.
+ *
* Revision 1.9 2003/05/16 21:36:57 knoaman
* Memory manager implementation: Modify constructors to pass in the memory manager.
*
@@ -385,6 +388,16 @@
inline unsigned int ReaderMgr::getCurrentReaderNum() const
{
return fCurReader->getReaderNum();
+}
+
+inline const XMLReader* ReaderMgr::getCurrentReader() const
+{
+ return fCurReader;
+}
+
+inline XMLReader* ReaderMgr::getCurrentReader()
+{
+ return fCurReader;
}
inline bool ReaderMgr::getName(XMLBuffer& toFill)
1.47 +41 -38 xml-xerces/c/src/xercesc/internal/XMLScanner.cpp
Index: XMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.cpp,v
retrieving revision 1.46
retrieving revision 1.47
diff -u -r1.46 -r1.47
--- XMLScanner.cpp 10 Jul 2003 19:47:24 -0000 1.46
+++ XMLScanner.cpp 14 Aug 2003 02:56:41 -0000 1.47
@@ -331,27 +331,51 @@
// Create a temporary URL. Since this is the primary document,
// it has to be fully qualified. If not, then assume we are just
// mistaking a file for a URL.
- XMLURL tmpURL(systemId, fMemoryManager);
- if (tmpURL.isRelative()) {
+ XMLURL tmpURL(fMemoryManager);
+
+ if (XMLURL::parse(systemId, tmpURL)) {
+
+ if (tmpURL.isRelative()) {
+ if (!fStandardUriConformant)
+ srcToUse = new (fMemoryManager) LocalFileInputSource(systemId,
fMemoryManager);
+ else {
+ // since this is the top of the try/catch, cannot call ThrowXML
+ // emit the error directly
+ MalformedURLException e(__FILE__, __LINE__,
XMLExcepts::URL_NoProtocolPresent);
+ fInException = true;
+ emitError
+ (
+ XMLErrs::XMLException_Fatal
+ , e.getType()
+ , e.getMessage()
+ );
+ return;
+ }
+ }
+ else
+ {
+ if (fStandardUriConformant && tmpURL.hasInvalidChar()) {
+ MalformedURLException e(__FILE__, __LINE__,
XMLExcepts::URL_MalformedURL);
+ fInException = true;
+ emitError
+ (
+ XMLErrs::XMLException_Fatal
+ , e.getType()
+ , e.getMessage()
+ );
+ return;
+ }
+ srcToUse = new (fMemoryManager) URLInputSource(tmpURL,
fMemoryManager);
+ }
+ }
+ else {
+
if (!fStandardUriConformant)
srcToUse = new (fMemoryManager) LocalFileInputSource(systemId,
fMemoryManager);
else {
// since this is the top of the try/catch, cannot call ThrowXML
// emit the error directly
- MalformedURLException e(__FILE__, __LINE__,
XMLExcepts::URL_NoProtocolPresent);
- fInException = true;
- emitError
- (
- XMLErrs::XMLException_Fatal
- , e.getType()
- , e.getMessage()
- );
- return;
- }
- }
- else
- {
- if (fStandardUriConformant && tmpURL.hasInvalidChar()) {
+ // lazy bypass ... since all MalformedURLException are fatal, no
need to check the type
MalformedURLException e(__FILE__, __LINE__,
XMLExcepts::URL_MalformedURL);
fInException = true;
emitError
@@ -362,29 +386,8 @@
);
return;
}
- srcToUse = new (fMemoryManager) URLInputSource(tmpURL, fMemoryManager);
}
-
}
- catch(const MalformedURLException& e)
- {
- if (!fStandardUriConformant)
- srcToUse = new (fMemoryManager) LocalFileInputSource(systemId,
fMemoryManager);
- else {
- // since this is the top of the try/catch, cannot call ThrowXML
- // emit the error directly
- // lazy bypass ... since all MalformedURLException are fatal, no need
to check the type
- fInException = true;
- emitError
- (
- XMLErrs::XMLException_Fatal
- , e.getType()
- , e.getMessage()
- );
- return;
- }
- }
-
catch(const XMLException& excToCatch)
{
// For any other XMLException,
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]