neilg 2004/06/02 12:58:10 Modified: c/src/xercesc/internal DGXMLScanner.cpp IGXMLScanner.cpp SGXMLScanner.cpp ElemStack.cpp ElemStack.hpp Log: Fix bug where scanners would accept malformed tags of the form <p:a xmlns:p="b" xmlns:q="b"></q:a> when namespace processing was enabled. This also opened the way for some end-tag scanning performance improvements. Revision Changes Path 1.48 +10 -24 xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp Index: DGXMLScanner.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/DGXMLScanner.cpp,v retrieving revision 1.47 retrieving revision 1.48 diff -u -r1.47 -r1.48 --- DGXMLScanner.cpp 27 May 2004 16:33:07 -0000 1.47 +++ DGXMLScanner.cpp 2 Jun 2004 19:58:09 -0000 1.48 @@ -645,44 +645,30 @@ ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager); } - // After the </ is the element QName, so get a name from the input - if (!fReaderMgr.getName(fQNameBuf)) - { - // It failed so we can't really do anything with it - emitError(XMLErrs::ExpectedElementName); - fReaderMgr.skipPastChar(chCloseAngle); - return; - } - - // Resolve element name uri if needed - unsigned int uriId = fEmptyNamespaceId; - const ElemStack::StackElem* topElem = fElemStack.topElement(); - if (fDoNamespaces) - { - uriId = resolvePrefix - ( - topElem->fThisElement->getElementName()->getPrefix() - , ElemStack::Mode_Element - ); - } + // Pop the stack of the element we are supposed to be ending. Remember + // that we don't own this. The stack just keeps them and reuses them. + unsigned int uriId = (fDoNamespaces) + ? fElemStack.getCurrentURI() : fEmptyNamespaceId; // Pop the stack of the element we are supposed to be ending. Remember // that we don't own this. The stack just keeps them and reuses them. - fElemStack.popTop(); + const ElemStack::StackElem* topElem = fElemStack.popTop(); + XMLElementDecl *tempElement = topElem->fThisElement; // See if it was the root element, to avoid multiple calls below const bool isRoot = fElemStack.isEmpty(); // Make sure that its the end of the element that we expect - if (!XMLString::equals(topElem->fThisElement->getFullName(), fQNameBuf.getRawBuffer())) + if (!fReaderMgr.skippedString(tempElement->getFullName())) { emitError ( XMLErrs::ExpectedEndOfTagX - , topElem->fThisElement->getFullName() + , tempElement->getFullName() ); + fReaderMgr.skipPastChar(chCloseAngle); + return; } - // Make sure we are back on the same reader as where we started if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum()) 1.70 +46 -59 xml-xerces/c/src/xercesc/internal/IGXMLScanner.cpp Index: IGXMLScanner.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner.cpp,v retrieving revision 1.69 retrieving revision 1.70 diff -u -r1.69 -r1.70 --- IGXMLScanner.cpp 1 Jun 2004 16:48:13 -0000 1.69 +++ IGXMLScanner.cpp 2 Jun 2004 19:58:09 -0000 1.70 @@ -976,68 +976,45 @@ ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager); } - // After the </ is the element QName, so get a name from the input - if (!fReaderMgr.getName(fQNameBuf)) + // Pop the stack of the element we are supposed to be ending. Remember + // that we don't own this. The stack just keeps them and reuses them. + unsigned int uriId = (fDoNamespaces) + ? fElemStack.getCurrentURI() : fEmptyNamespaceId; + + // these get initialized below + const ElemStack::StackElem* topElem = 0; + XMLElementDecl *tempElement = 0; + XMLCh *elemName = 0; + + // Make sure that its the end of the element that we expect + // special case for schema validation, whose element decls, + // obviously don't contain prefix information + if(fGrammarType == Grammar::SchemaGrammarType) + { + elemName = fElemStack.getCurrentSchemaElemName(); + topElem = fElemStack.popTop(); + tempElement = topElem->fThisElement; + } + else { - // It failed so we can't really do anything with it - emitError(XMLErrs::ExpectedElementName); - fReaderMgr.skipPastChar(chCloseAngle); - //REVISIT: Do we restore PSVI information? - return; + topElem = fElemStack.popTop(); + tempElement = topElem->fThisElement; + elemName = (XMLCh *)tempElement->getFullName(); } - - unsigned int uriId = fEmptyNamespaceId; - int prefixColonPos = -1; - if (fDoNamespaces) + if (!fReaderMgr.skippedString(elemName)) { - uriId = resolveQName + emitError ( - fQNameBuf.getRawBuffer() - , fPrefixBuf - , ElemStack::Mode_Element - , prefixColonPos + XMLErrs::ExpectedEndOfTagX + , elemName ); + fReaderMgr.skipPastChar(chCloseAngle); + return; } - // Pop the stack of the element we are supposed to be ending. Remember - // that we don't own this. The stack just keeps them and reuses them. - // - // NOTE: We CANNOT do this until we've resolved the element name because - // the element stack top contains the prefix to URI mappings for this - // element. - unsigned int topUri = fElemStack.getCurrentURI(); - const ElemStack::StackElem* topElem = fElemStack.popTop(); - // See if it was the root element, to avoid multiple calls below const bool isRoot = fElemStack.isEmpty(); - // Make sure that its the end of the element that we expect - XMLElementDecl* tempElement = topElem->fThisElement; - if (fDoNamespaces && fGrammarType == Grammar::SchemaGrammarType) { - - // reset error occurred - fPSVIElemContext.fErrorOccurred = fErrorStack->pop(); - const XMLCh* rawNameBuf = fQNameBuf.getRawBuffer(); - if ((topUri != uriId) || (!XMLString::equals(tempElement->getBaseName(), &rawNameBuf[prefixColonPos + 1]))) - { - emitError - ( - XMLErrs::ExpectedEndOfTagX - , topElem->fThisElement->getFullName() - ); - } - } - else { - if (!XMLString::equals(tempElement->getFullName(), fQNameBuf.getRawBuffer())) - { - emitError - ( - XMLErrs::ExpectedEndOfTagX - , topElem->fThisElement->getFullName() - ); - } - } - // Make sure we are back on the same reader as where we started if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum()) emitError(XMLErrs::PartialTagMarkupError); @@ -1057,6 +1034,8 @@ if (fGrammarType == Grammar::SchemaGrammarType) { + // reset error occurred + fPSVIElemContext.fErrorOccurred = fErrorStack->pop(); if (fValidate && topElem->fThisElement->isDeclared()) { fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo(); @@ -2372,7 +2351,6 @@ const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1]; const XMLCh* original_uriStr = fGrammar->getTargetNamespace(); - unsigned orgGrammarUri = fURIStringPool->getId(original_uriStr); // REVISIT: since all this code only really // makes sense for schemas, why can DTD validation theoretically pass @@ -2380,12 +2358,11 @@ if (uriId != fEmptyNamespaceId) { // Check in current grammar before switching if necessary - const XMLCh *rawQName = fQNameBuf.getRawBuffer(); elemDecl = fGrammar->getElemDecl ( uriId , nameRawBuf - , rawQName + , qnameRawBuf , currentScope ); // may have not been declared; must look everywhere: @@ -2393,14 +2370,14 @@ if(fGrammarType == Grammar::DTDGrammarType) { // should never occur in practice - elemDecl = fDTDElemNonDeclPool->getByKey(rawQName); + elemDecl = fDTDElemNonDeclPool->getByKey(qnameRawBuf); } else if (fGrammarType == Grammar::SchemaGrammarType) { elemDecl = fSchemaElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope); } - if (!elemDecl && (orgGrammarUri != uriId)) { + if (!elemDecl && ( fURIStringPool->getId(original_uriStr) != uriId)) { // not found, switch to the specified grammar const XMLCh* uriStr = getURIText(uriId); bool errorCondition = !switchGrammar(uriStr) && fValidate; @@ -2524,7 +2501,12 @@ // before we made grammars stateless: elemDecl = fSchemaElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope); } - if (!elemDecl && orgGrammarUri != fEmptyNamespaceId) { + // this is initialized correctly only if there is + // no element decl. The other uses in this scope will only + // be encountered if there continues to be no element decl--which + // implies that this will have been initialized correctly. + unsigned orgGrammarUri = fEmptyNamespaceId; + if (!elemDecl && (orgGrammarUri = fURIStringPool->getId(original_uriStr)) != fEmptyNamespaceId) { //not found, switch grammar and try globalNS bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate; if (errorCondition && !laxThisOne) @@ -2753,6 +2735,10 @@ if (fGrammarType == Grammar::SchemaGrammarType) { + // squirrel away the element's QName, so that we can do an efficient + // end-tag match + fElemStack.setCurrentSchemaElemName(fQNameBuf.getRawBuffer()); + ComplexTypeInfo* typeinfo = (fValidate) ? ((SchemaValidator*)fValidator)->getCurrentTypeInfo() : ((SchemaElementDecl*) elemDecl)->getComplexTypeInfo(); @@ -3527,3 +3513,4 @@ } XERCES_CPP_NAMESPACE_END + 1.83 +27 -41 xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp Index: SGXMLScanner.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/SGXMLScanner.cpp,v retrieving revision 1.82 retrieving revision 1.83 diff -u -r1.82 -r1.83 --- SGXMLScanner.cpp 27 May 2004 16:33:07 -0000 1.82 +++ SGXMLScanner.cpp 2 Jun 2004 19:58:09 -0000 1.83 @@ -904,53 +904,31 @@ ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager); } - // After the </ is the element QName, so get a name from the input - if (!fReaderMgr.getName(fQNameBuf)) - { - // It failed so we can't really do anything with it - emitError(XMLErrs::ExpectedElementName); - fReaderMgr.skipPastChar(chCloseAngle); - return; - } - - int prefixColonPos = -1; - unsigned int uriId = resolveQName - ( - fQNameBuf.getRawBuffer() - , fPrefixBuf - , ElemStack::Mode_Element - , prefixColonPos - ); - // Pop the stack of the element we are supposed to be ending. Remember // that we don't own this. The stack just keeps them and reuses them. - // - // NOTE: We CANNOT do this until we've resolved the element name because - // the element stack top contains the prefix to URI mappings for this - // element. - unsigned int topUri = fElemStack.getCurrentURI(); - const ElemStack::StackElem* topElem = fElemStack.popTop(); - - // See if it was the root element, to avoid multiple calls below - const bool isRoot = fElemStack.isEmpty(); + unsigned int uriId = (fDoNamespaces) + ? fElemStack.getCurrentURI() : fEmptyNamespaceId; // Make sure that its the end of the element that we expect - XMLElementDecl* tempElement = topElem->fThisElement; - const XMLCh* rawNameBuf = fQNameBuf.getRawBuffer(); - - // reset error occurred - fPSVIElemContext.fErrorOccurred = fErrorStack->pop(); - - if ((topUri != uriId) || - (!XMLString::equals(tempElement->getBaseName(), &rawNameBuf[prefixColonPos + 1]))) + XMLCh *elemName = fElemStack.getCurrentSchemaElemName(); + const ElemStack::StackElem* topElem = fElemStack.popTop(); + XMLElementDecl *tempElement = topElem->fThisElement; + if (!fReaderMgr.skippedString(elemName)) { emitError ( XMLErrs::ExpectedEndOfTagX - , topElem->fThisElement->getFullName() + , elemName ); + fReaderMgr.skipPastChar(chCloseAngle); + return; } + // See if it was the root element, to avoid multiple calls below + const bool isRoot = fElemStack.isEmpty(); + + fPSVIElemContext.fErrorOccurred = fErrorStack->pop(); + // Make sure we are back on the same reader as where we started if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum()) emitError(XMLErrs::PartialTagMarkupError); @@ -1202,10 +1180,11 @@ // First we have to do the rawest attribute scan. We don't do any // normalization of them at all, since we don't know yet what type they // might be (since we need the element decl in order to do that.) + const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer(); bool isEmpty; unsigned int attCount = rawAttrScan ( - fQNameBuf.getRawBuffer() + qnameRawBuf , *fRawAttrList , isEmpty ); @@ -1275,7 +1254,6 @@ // the element decl for this element. We have now update the prefix to // namespace map so we should get the correct element now. int prefixColonPos = -1; - const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer(); unsigned int uriId = resolveQName ( qnameRawBuf @@ -1302,7 +1280,6 @@ bool laxBeforeElementFound = false; const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1]; const XMLCh* original_uriStr = fGrammar->getTargetNamespace(); - unsigned orgGrammarUri = fURIStringPool->getId(original_uriStr); if (uriId != fEmptyNamespaceId) { @@ -1320,7 +1297,7 @@ // before we made grammars stateless: elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope); } - if (!elemDecl && (orgGrammarUri != uriId)) { + if (!elemDecl && ( fURIStringPool->getId(original_uriStr) != uriId)) { // not found, switch to the specified grammar const XMLCh* uriStr = getURIText(uriId); bool errorCondition = !switchGrammar(uriStr) && fValidate; @@ -1422,7 +1399,12 @@ // before we made grammars stateless: elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope); } - if (!elemDecl && orgGrammarUri != fEmptyNamespaceId) { + // this is initialized correctly only if there is + // no element decl. The other uses in this scope will only + // be encountered if there continues to be no element decl--which + // implies that this will have been initialized correctly. + unsigned orgGrammarUri = fEmptyNamespaceId; + if (!elemDecl && (orgGrammarUri = fURIStringPool->getId(original_uriStr)) != fEmptyNamespaceId) { //not found, switch grammar and try globalNS bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate; if (errorCondition && !laxThisOne) @@ -1625,6 +1607,10 @@ if (((SchemaValidator*) fValidator)->getErrorOccurred()) fPSVIElemContext.fErrorOccurred = true; } + + // squirrel away the element's QName, so that we can do an efficient + // end-tag match + fElemStack.setCurrentSchemaElemName(fQNameBuf.getRawBuffer()); ComplexTypeInfo* typeinfo = (fValidate) ? ((SchemaValidator*)fValidator)->getCurrentTypeInfo() 1.13 +10 -1 xml-xerces/c/src/xercesc/internal/ElemStack.cpp Index: ElemStack.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/ElemStack.cpp,v retrieving revision 1.12 retrieving revision 1.13 diff -u -r1.12 -r1.13 --- ElemStack.cpp 27 Apr 2004 19:17:52 -0000 1.12 +++ ElemStack.cpp 2 Jun 2004 19:58:10 -0000 1.13 @@ -56,6 +56,12 @@ /* * $Log$ + * Revision 1.13 2004/06/02 19:58:10 neilg + * Fix bug where scanners would accept malformed tags of the form + * <p:a xmlns:p="b" xmlns:q="b"></q:a> when namespace processing was + * enabled. This also opened the way for some end-tag scanning + * performance improvements. + * * Revision 1.12 2004/04/27 19:17:52 peiyongz * XML1.0-3rd VC: element content(children) dont allow white space from * EntityRef/CharRef @@ -209,6 +215,7 @@ fMemoryManager->deallocate(fStack[stackInd]->fChildren);//delete [] fStack[stackInd]->fChildren; fMemoryManager->deallocate(fStack[stackInd]->fMap);//delete [] fStack[stackInd]->fMap; + fMemoryManager->deallocate(fStack[stackInd]->fSchemaElemName); delete fStack[stackInd]; } @@ -235,6 +242,8 @@ fStack[fStackTop]->fChildren = 0; fStack[fStackTop]->fMapCapacity = 0; fStack[fStackTop]->fMap = 0; + fStack[fStackTop]->fSchemaElemName = 0; + fStack[fStackTop]->fSchemaElemNameMaxLen = 0; } // Set up the new top row @@ -271,6 +280,8 @@ fStack[fStackTop]->fChildren = 0; fStack[fStackTop]->fMapCapacity = 0; fStack[fStackTop]->fMap = 0; + fStack[fStackTop]->fSchemaElemName = 0; + fStack[fStackTop]->fSchemaElemNameMaxLen = 0; } // Set up the new top row 1.10 +30 -1 xml-xerces/c/src/xercesc/internal/ElemStack.hpp Index: ElemStack.hpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/ElemStack.hpp,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- ElemStack.hpp 27 Apr 2004 19:17:52 -0000 1.9 +++ ElemStack.hpp 2 Jun 2004 19:58:10 -0000 1.10 @@ -56,6 +56,12 @@ /* * $Log$ + * Revision 1.10 2004/06/02 19:58:10 neilg + * Fix bug where scanners would accept malformed tags of the form + * <p:a xmlns:p="b" xmlns:q="b"></q:a> when namespace processing was + * enabled. This also opened the way for some end-tag scanning + * performance improvements. + * * Revision 1.9 2004/04/27 19:17:52 peiyongz * XML1.0-3rd VC: element content(children) dont allow white space from * EntityRef/CharRef @@ -221,6 +227,8 @@ int fCurrentScope; Grammar* fCurrentGrammar; unsigned int fCurrentURI; + XMLCh * fSchemaElemName; + unsigned int fSchemaElemNameMaxLen; }; enum MapModes @@ -270,6 +278,9 @@ void setCurrentURI(unsigned int uri); unsigned int getCurrentURI(); + inline void setCurrentSchemaElemName(const XMLCh * const schemaElemName); + inline XMLCh *getCurrentSchemaElemName(); + // ----------------------------------------------------------------------- // Prefix map methods // ----------------------------------------------------------------------- @@ -584,6 +595,26 @@ { fStack[fStackTop-1]->fReferenceEscaped = true; return; +} + +inline void ElemStack::setCurrentSchemaElemName(const XMLCh * const schemaElemName) +{ + unsigned int schemaElemNameLen = XMLString::stringLen(schemaElemName); + unsigned int stackPos = fStackTop-1; + + if(fStack[stackPos]->fSchemaElemNameMaxLen <= schemaElemNameLen) + { + XMLCh *tempStr = fStack[stackPos]->fSchemaElemName; + fStack[stackPos]->fSchemaElemNameMaxLen = schemaElemNameLen << 1; + fStack[stackPos]->fSchemaElemName = (XMLCh *)fMemoryManager->allocate((fStack[stackPos]->fSchemaElemNameMaxLen)*sizeof(XMLCh)); + fMemoryManager->deallocate(tempStr); + } + XMLString::copyString(fStack[stackPos]->fSchemaElemName, schemaElemName); +} + +inline XMLCh *ElemStack::getCurrentSchemaElemName() +{ + return fStack[fStackTop-1]->fSchemaElemName; } inline int ElemStack::getCurrentScope()
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]