peiyongz 2004/09/28 14:27:38 Modified: c/src/xercesc/internal XMLScanner.hpp XMLScanner.cpp WFXMLScanner.cpp IGXMLScanner2.cpp DGXMLScanner.cpp Log: Optimized duplicated attributes checking for large number of attributes Revision Changes Path 1.40 +34 -0 xml-xerces/c/src/xercesc/internal/XMLScanner.hpp Index: XMLScanner.hpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.hpp,v retrieving revision 1.39 retrieving revision 1.40 diff -u -r1.39 -r1.40 --- XMLScanner.hpp 28 Sep 2004 02:14:13 -0000 1.39 +++ XMLScanner.hpp 28 Sep 2004 21:27:38 -0000 1.40 @@ -16,6 +16,9 @@ /* * $Log$ + * Revision 1.40 2004/09/28 21:27:38 peiyongz + * Optimized duplicated attributes checking for large number of attributes + * * Revision 1.39 2004/09/28 02:14:13 cargilld * Add support for validating annotations. * @@ -731,6 +734,13 @@ void resetUIntPool(); void recreateUIntPool(); + inline + void setAttrDupChkRegistry + ( + const unsigned int &attrNumber + , bool &toUseHashTable + ); + // ----------------------------------------------------------------------- // Data members // @@ -986,6 +996,7 @@ XMLUInt32 fScannerId; XMLUInt32 fSequenceId; RefVectorOf<XMLAttr>* fAttrList; + RefHash2KeysTableOf<XMLAttr>* fAttrDupChkRegistry; XMLDocumentHandler* fDocHandler; DocTypeHandler* fDocTypeHandler; XMLEntityHandler* fEntityHandler; @@ -1519,6 +1530,29 @@ fValidationContext->clearIdRefList(); fValidationContext->setEntityDeclPool(0); fEntityDeclPoolRetrieved = false; +} + +inline void XMLScanner::setAttrDupChkRegistry(const unsigned int &attrNumber + , bool &toUseHashTable) +{ + // once the attribute exceed 20, we use hash table to check duplication + if (attrNumber > 20) + { + toUseHashTable = true; + + if (!fAttrDupChkRegistry) + { + fAttrDupChkRegistry = new (fMemoryManager) RefHash2KeysTableOf<XMLAttr> + ( + 2*attrNumber+1, false, new (fMemoryManager)HashXMLCh(), fMemoryManager + ); + } + else + { + fAttrDupChkRegistry->removeAll(); + } + } + } XERCES_CPP_NAMESPACE_END 1.71 +4 -1 xml-xerces/c/src/xercesc/internal/XMLScanner.cpp Index: XMLScanner.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.cpp,v retrieving revision 1.70 retrieving revision 1.71 diff -u -r1.70 -r1.71 --- XMLScanner.cpp 28 Sep 2004 02:14:13 -0000 1.70 +++ XMLScanner.cpp 28 Sep 2004 21:27:38 -0000 1.71 @@ -157,6 +157,7 @@ , fScannerId(0) , fSequenceId(0) , fAttrList(0) + , fAttrDupChkRegistry(0) , fDocHandler(0) , fDocTypeHandler(0) , fEntityHandler(0) @@ -237,6 +238,7 @@ , fScannerId(0) , fSequenceId(0) , fAttrList(0) + , fAttrDupChkRegistry(0) , fDocHandler(docHandler) , fDocTypeHandler(docTypeHandler) , fEntityHandler(entityHandler) @@ -279,6 +281,7 @@ XMLScanner::~XMLScanner() { delete fAttrList; + delete fAttrDupChkRegistry; delete fValidationContext; fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName; fMemoryManager->deallocate(fExternalSchemaLocation);//delete [] fExternalSchemaLocation; 1.25 +33 -8 xml-xerces/c/src/xercesc/internal/WFXMLScanner.cpp Index: WFXMLScanner.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/WFXMLScanner.cpp,v retrieving revision 1.24 retrieving revision 1.25 diff -u -r1.24 -r1.25 --- WFXMLScanner.cpp 8 Sep 2004 13:56:13 -0000 1.24 +++ WFXMLScanner.cpp 28 Sep 2004 21:27:38 -0000 1.25 @@ -1457,6 +1457,13 @@ } if(attCount) { + + // + // Decide if to use hash table to do duplicate checking + // + bool toUseHashTable = false; + setAttrDupChkRegistry(attCount, toUseHashTable); + // check for duplicate namespace attributes: // by checking for qualified names with the same local part and with prefixes // which have been bound to namespace names that are identical. @@ -1464,17 +1471,35 @@ XMLAttr* curAtt; for (unsigned int attrIndex=0; attrIndex < attCount-1; attrIndex++) { loopAttr = fAttrList->elementAt(attrIndex); - for (unsigned int curAttrIndex = attrIndex+1; curAttrIndex < attCount; curAttrIndex++) { - curAtt = fAttrList->elementAt(curAttrIndex); - if (curAtt->getURIId() == loopAttr->getURIId() && - XMLString::equals(curAtt->getName(), loopAttr->getName())) { - emitError - ( - XMLErrs::AttrAlreadyUsedInSTag + + if (!toUseHashTable) + { + for (unsigned int curAttrIndex = attrIndex+1; curAttrIndex < attCount; curAttrIndex++) { + curAtt = fAttrList->elementAt(curAttrIndex); + if (curAtt->getURIId() == loopAttr->getURIId() && + XMLString::equals(curAtt->getName(), loopAttr->getName())) { + emitError + ( + XMLErrs::AttrAlreadyUsedInSTag , curAtt->getName() , elemDecl->getFullName() + ); + } + } + } + else + { + if (fAttrDupChkRegistry->containsKey((void*)loopAttr->getName(), loopAttr->getURIId())) + { + emitError + ( + XMLErrs::AttrAlreadyUsedInSTag + , loopAttr->getName() + , elemDecl->getFullName() ); } + + fAttrDupChkRegistry->put((void*)loopAttr->getName(), loopAttr->getURIId(), loopAttr); } } } 1.72 +38 -7 xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp Index: IGXMLScanner2.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp,v retrieving revision 1.71 retrieving revision 1.72 diff -u -r1.71 -r1.72 --- IGXMLScanner2.cpp 26 Sep 2004 18:23:50 -0000 1.71 +++ IGXMLScanner2.cpp 28 Sep 2004 21:27:38 -0000 1.72 @@ -110,6 +110,15 @@ XMLBufBid bbNormal(&fBufMgr); XMLBuffer& normBuf = bbNormal.getBuffer(); + // + // Decide if to use hash table to do duplicate checking + // + bool toUseHashTable = false; + if (fGrammarType == Grammar::DTDGrammarType) + { + setAttrDupChkRegistry(attCount, toUseHashTable); + } + // Loop through our explicitly provided attributes, which are in the raw // scanned form, and build up XMLAttr objects. unsigned int index; @@ -617,16 +626,32 @@ // by checking for qualified names with the same local part and with prefixes // which have been bound to namespace names that are identical. if (fGrammarType == Grammar::DTDGrammarType) { - for (unsigned int attrIndex=0; attrIndex < retCount; attrIndex++) { - curAttr = toFill.elementAt(attrIndex); - if (uriId == curAttr->getURIId() && - XMLString::equals(suffPtr, curAttr->getName())) { + if (!toUseHashTable) + { + for (unsigned int attrIndex=0; attrIndex < retCount; attrIndex++) { + curAttr = toFill.elementAt(attrIndex); + if (uriId == curAttr->getURIId() && + XMLString::equals(suffPtr, curAttr->getName())) { + emitError + ( + + XMLErrs::AttrAlreadyUsedInSTag + , curAttr->getName() + , elemDecl->getFullName() + ); + } + } + } + else + { + if (fAttrDupChkRegistry->containsKey((void*)suffPtr, uriId)) + { emitError - ( + ( XMLErrs::AttrAlreadyUsedInSTag , curAttr->getName() , elemDecl->getFullName() - ); + ); } } } @@ -658,6 +683,12 @@ ); curAttr->setSpecified(true); } + + if (toUseHashTable) + { + fAttrDupChkRegistry->put((void*)suffPtr, uriId, curAttr); + } + if(psviAttr) psviAttr->setValue(curAttr->getValue()); 1.54 +32 -7 xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp Index: DGXMLScanner.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp,v retrieving revision 1.53 retrieving revision 1.54 diff -u -r1.53 -r1.54 --- DGXMLScanner.cpp 20 Sep 2004 15:00:49 -0000 1.53 +++ DGXMLScanner.cpp 28 Sep 2004 21:27:38 -0000 1.54 @@ -2378,6 +2378,13 @@ void DGXMLScanner::scanAttrListforNameSpaces(RefVectorOf<XMLAttr>* theAttrList, int attCount, XMLElementDecl* elemDecl) { + + // + // Decide if to use hash table to do duplicate checking + // + bool toUseHashTable = false; + setAttrDupChkRegistry((unsigned int&)attCount, toUseHashTable); + // Make an initial pass through the list and find any xmlns attributes or // schema attributes. // When we find one, send it off to be used to update the element stack's @@ -2412,17 +2419,35 @@ // by checking for qualified names with the same local part and with prefixes // which have been bound to namespace names that are identical. XMLAttr* loopAttr; - for (int attrIndex=0; attrIndex < index; attrIndex++) { - loopAttr = theAttrList->elementAt(attrIndex); - if (loopAttr->getURIId() == curAttr->getURIId() && - XMLString::equals(loopAttr->getName(), curAttr->getName())) { + + if (!toUseHashTable) + { + for (int attrIndex=0; attrIndex < index; attrIndex++) { + loopAttr = theAttrList->elementAt(attrIndex); + if (loopAttr->getURIId() == curAttr->getURIId() && + XMLString::equals(loopAttr->getName(), curAttr->getName())) { + emitError + ( + XMLErrs::AttrAlreadyUsedInSTag + , curAttr->getName() + , elemDecl->getFullName() + ); + } + } + } + else + { + if (fAttrDupChkRegistry->containsKey((void*)curAttr->getName(), curAttr->getURIId())) + { emitError - ( + ( XMLErrs::AttrAlreadyUsedInSTag , curAttr->getName() , elemDecl->getFullName() - ); + ); } + + fAttrDupChkRegistry->put((void*)curAttr->getName(), curAttr->getURIId(), curAttr); } } }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]