internal IGXMLScanner.cpp IGXMLScanner.hpp IGXMLScanner2.cpp ReaderMgr.cpp ReaderMgr.hpp XMLScanner.cpp

knoaman Thu, 14 Aug 2003 11:42:04 -0700

knoaman     2003/08/13 19:56:41

  Modified:    c/src/xercesc/internal IGXMLScanner.cpp IGXMLScanner.hpp
                        IGXMLScanner2.cpp ReaderMgr.cpp ReaderMgr.hpp
                        XMLScanner.cpp
  Log:
  Code refactoring to improve performance of validation.
  
  Revision  Changes    Path
  1.21      +44 -3     xml-xerces/c/src/xercesc/internal/IGXMLScanner.cpp
  
  Index: IGXMLScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner.cpp,v
  retrieving revision 1.20
  retrieving revision 1.21
  diff -u -r1.20 -r1.21
  --- IGXMLScanner.cpp  31 Jul 2003 17:05:03 -0000      1.20
  +++ IGXMLScanner.cpp  14 Aug 2003 02:56:41 -0000      1.21
  @@ -518,6 +518,9 @@
       fValueStoreCache = new (fMemoryManager) ValueStoreCache(fMemoryManager);
       fFieldActivator = new (fMemoryManager) FieldActivator(fValueStoreCache, 
fMatcherStack, fMemoryManager);
       fValueStoreCache->setScanner(this);
  +
  +    // Create schemaLocation pair info
  +    fLocationPairs = new (fMemoryManager) ValueVectorOf<XMLCh*>(8, fMemoryManager);
   }
   
   void IGXMLScanner::cleanUp()
  @@ -529,6 +532,7 @@
       delete fFieldActivator;
       delete fMatcherStack;
       delete fValueStoreCache;
  +    delete fLocationPairs;
   }
   
   // ---------------------------------------------------------------------------
  @@ -2851,8 +2855,16 @@
           }
       }
   
  -    fDTDGrammar = new (fGrammarPoolMemoryManager) 
DTDGrammar(fGrammarPoolMemoryManager);
  -    fGrammarResolver->putGrammar(fDTDGrammar);
  +    fDTDGrammar = (DTDGrammar*) 
fGrammarResolver->getGrammar(XMLUni::fgDTDEntityString);
  +
  +    if (fDTDGrammar) {
  +        fDTDGrammar->reset();
  +    }
  +    else {
  +        fDTDGrammar = new (fGrammarPoolMemoryManager) 
DTDGrammar(fGrammarPoolMemoryManager);
  +        fGrammarResolver->putGrammar(fDTDGrammar);
  +    }
  +
       fGrammar = fDTDGrammar;
       fGrammarType = fGrammar->getGrammarType();
       fValidator->setGrammar(fGrammar);
  @@ -2956,5 +2968,34 @@
       return fDTDGrammar;
   }
   
  +// ---------------------------------------------------------------------------
  +//  IGXMLScanner: Helper methods
  +// ---------------------------------------------------------------------------
  +void IGXMLScanner::processSchemaLocation(XMLCh* const schemaLoc)
  +{
  +    XMLCh* locStr = schemaLoc;
  +    XMLReader* curReader = fReaderMgr.getCurrentReader();
  +
  +    fLocationPairs->removeAllElements();
  +    while (*locStr)
  +    {
  +        do {
  +            if (!curReader->isWhitespace(*locStr))
  +               break;
  +
  +            *locStr = chNull;
  +        } while (*++locStr);
  +
  +        if (*locStr) {
  +            
  +            fLocationPairs->addElement(locStr);
  +
  +            while (*++locStr) {
  +                if (curReader->isWhitespace(*locStr))
  +                    break;
  +            }
  +        }
  +    }
  +}
   
   XERCES_CPP_NAMESPACE_END
  
  
  
  1.9       +6 -0      xml-xerces/c/src/xercesc/internal/IGXMLScanner.hpp
  
  Index: IGXMLScanner.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner.hpp,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- IGXMLScanner.hpp  10 Jul 2003 19:47:23 -0000      1.8
  +++ IGXMLScanner.hpp  14 Aug 2003 02:56:41 -0000      1.9
  @@ -56,6 +56,9 @@
   
   /*
    * $Log$
  + * Revision 1.9  2003/08/14 02:56:41  knoaman
  + * Code refactoring to improve performance of validation.
  + *
    * Revision 1.8  2003/07/10 19:47:23  peiyongz
    * Stateless Grammar: Initialize scanner with grammarResolver,
    *                                creating grammar through grammarPool
  @@ -239,6 +242,7 @@
                                   bool& skipThisOne,
                                   bool& laxThisOne);
       void resizeElemState();
  +    void processSchemaLocation(XMLCh* const schemaLoc);
   
       // -----------------------------------------------------------------------
       //  Private scanning methods
  @@ -327,6 +331,7 @@
       XPathMatcherStack*          fMatcherStack;
       ValueStoreCache*            fValueStoreCache;
       FieldActivator*             fFieldActivator;
  +    ValueVectorOf<XMLCh*>*      fLocationPairs;
   };
   
   inline const XMLCh* IGXMLScanner::getName() const
  
  
  
  1.30      +40 -37    xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp
  
  Index: IGXMLScanner2.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/IGXMLScanner2.cpp,v
  retrieving revision 1.29
  retrieving revision 1.30
  diff -u -r1.29 -r1.30
  --- IGXMLScanner2.cpp 31 Jul 2003 17:05:03 -0000      1.29
  +++ IGXMLScanner2.cpp 14 Aug 2003 02:56:41 -0000      1.30
  @@ -886,8 +886,16 @@
       fGrammarResolver->cacheGrammarFromParse(fToCacheGrammar);
       fGrammarResolver->useCachedGrammarInParse(fUseCachedGrammar);
   
  -    fDTDGrammar = new (fGrammarPoolMemoryManager) 
DTDGrammar(fGrammarPoolMemoryManager);
  -    fGrammarResolver->putGrammar(fDTDGrammar);
  +    fDTDGrammar = (DTDGrammar*) 
fGrammarResolver->getGrammar(XMLUni::fgDTDEntityString);
  +     
  +    if (!fDTDGrammar) {
  +
  +        fDTDGrammar = new (fGrammarPoolMemoryManager) 
DTDGrammar(fGrammarPoolMemoryManager);
  +        fGrammarResolver->putGrammar(fDTDGrammar);
  +    }
  +    else
  +        fDTDGrammar->reset();
  +
       fGrammar = fDTDGrammar;
       fGrammarType = fGrammar->getGrammarType();
       fRootGrammar = 0;
  @@ -1011,8 +1019,8 @@
       if (fValidate)
       {
           // Get the raw data we need for the callback
  -        const XMLCh* const rawBuf = toSend.getRawBuffer();
  -        const unsigned int len = toSend.getLen();
  +        XMLCh* rawBuf = toSend.getRawBuffer();
  +        unsigned int len = toSend.getLen();
   
           // And see if the current element is a 'Children' style content model
           const ElemStack::StackElem* topElem = fElemStack.topElement();
  @@ -1045,31 +1053,26 @@
                   }
                   else
                   {
  -                    // The normalized data can only be as large as the
  -                    // original size, so this will avoid allocating way
  -                    // too much or too little memory.
  -                    XMLBuffer toFill(len+1, fMemoryManager);
  -                    toFill.set(rawBuf);
  -
                       if (fNormalizeData) {
                           // normalize the character according to schema whitespace 
facet
                           XMLBufBid bbtemp(&fBufMgr);
                           XMLBuffer& tempBuf = bbtemp.getBuffer();
   
                           DatatypeValidator* tempDV = ((SchemaElementDecl*) 
topElem->fThisElement)->getDatatypeValidator();
  -                        ((SchemaValidator*) 
fValidator)->normalizeWhiteSpace(tempDV, toFill.getRawBuffer(),  tempBuf);
  -                        toFill.set(tempBuf.getRawBuffer());
  +                        ((SchemaValidator*) 
fValidator)->normalizeWhiteSpace(tempDV, rawBuf,  tempBuf);
  +                        rawBuf = tempBuf.getRawBuffer();
  +                        len = tempBuf.getLen();
                       }
   
                       // tell the schema validation about the character data for 
checkContent later
  -                    ((SchemaValidator*) 
fValidator)->setDatatypeBuffer(toFill.getRawBuffer());
  +                    ((SchemaValidator*) fValidator)->setDatatypeBuffer(rawBuf);
   
                       // call all active identity constraints
                       if (fMatcherStack->getMatcherCount())
  -                        fContent.append(toFill.getRawBuffer(), toFill.getLen());
  +                        fContent.append(rawBuf, len);
   
                       if (fDocHandler)
  -                        fDocHandler->docCharacters(toFill.getRawBuffer(), 
toFill.getLen(), false);
  +                        fDocHandler->docCharacters(rawBuf, len, false);
                   }
               }
           }
  @@ -1087,31 +1090,26 @@
                   }
                   else
                   {
  -                    // The normalized data can only be as large as the
  -                    // original size, so this will avoid allocating way
  -                    // too much or too little memory.
  -                    XMLBuffer toFill(len+1, fMemoryManager);
  -                    toFill.set(rawBuf);
  -
                       if (fNormalizeData) {
                           // normalize the character according to schema whitespace 
facet
                           XMLBufBid bbtemp(&fBufMgr);
                           XMLBuffer& tempBuf = bbtemp.getBuffer();
   
                           DatatypeValidator* tempDV = ((SchemaElementDecl*) 
topElem->fThisElement)->getDatatypeValidator();
  -                        ((SchemaValidator*) 
fValidator)->normalizeWhiteSpace(tempDV, toFill.getRawBuffer(),  tempBuf);
  -                        toFill.set(tempBuf.getRawBuffer());
  +                        ((SchemaValidator*) 
fValidator)->normalizeWhiteSpace(tempDV, rawBuf,  tempBuf);
  +                        rawBuf = tempBuf.getRawBuffer();
  +                        len = tempBuf.getLen();
                       }
   
                       // tell the schema validation about the character data for 
checkContent later
  -                    ((SchemaValidator*) 
fValidator)->setDatatypeBuffer(toFill.getRawBuffer());
  +                    ((SchemaValidator*) fValidator)->setDatatypeBuffer(rawBuf);
   
                       // call all active identity constraints
                       if (fMatcherStack->getMatcherCount())
  -                        fContent.append(toFill.getRawBuffer(), toFill.getLen());
  +                        fContent.append(rawBuf, len);
   
                       if (fDocHandler)
  -                        fDocHandler->docCharacters(toFill.getRawBuffer(), 
toFill.getLen(), false);
  +                        fDocHandler->docCharacters(rawBuf, len, false);
                   }
               }
               else
  @@ -1247,23 +1245,26 @@
           XMLBufBid bbXsi(&fBufMgr);
           XMLBuffer& fXsiType = bbXsi.getBuffer();
   
  -        QName attName(fMemoryManager);
  -
           for (index = 0; index < attCount; index++)
           {
               // each attribute has the prefix:suffix="value"
               const KVStringPair* curPair = fRawAttrList->elementAt(index);
               const XMLCh* rawPtr = curPair->getKey();
  +            const XMLCh* prefPtr = XMLUni::fgZeroLenString;
  +            int   colonInd = XMLString::indexOf(rawPtr, chColon);
   
  -            attName.setName(rawPtr, fEmptyNamespaceId);
  -            const XMLCh* prefPtr = attName.getPrefix();
  +            if (colonInd != -1) {
  +
  +                fURIBuf.set(rawPtr, colonInd);
  +                prefPtr = fURIBuf.getRawBuffer();
  +            }
   
               // if schema URI has been seen, scan for the schema location and uri
               // and resolve the schema grammar; or scan for schema type
               if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) == 
fSchemaNamespaceId) {
   
                   const XMLCh* valuePtr = curPair->getValue();
  -                const XMLCh* suffPtr = attName.getLocalPart();
  +                const XMLCh* suffPtr = &rawPtr[colonInd + 1];
   
                   if (XMLString::equals(suffPtr, 
SchemaSymbols::fgXSI_SCHEMALOCACTION))
                       parseSchemaLocation(valuePtr);
  @@ -1298,17 +1299,19 @@
   
   void IGXMLScanner::parseSchemaLocation(const XMLCh* const schemaLocationStr)
   {
  -    BaseRefVectorOf<XMLCh>* schemaLocation = 
XMLString::tokenizeString(schemaLocationStr);
  -    unsigned int size = schemaLocation->size();
  +    XMLCh* locStr = XMLString::replicate(schemaLocationStr, fMemoryManager);
  +    ArrayJanitor<XMLCh> janLoc(locStr, fMemoryManager);
  +
  +    processSchemaLocation(locStr);
  +    unsigned int size = fLocationPairs->size();
  +
       if (size % 2 != 0 ) {
           emitError(XMLErrs::BadSchemaLocation);
       } else {
           for(unsigned int i=0; i<size; i=i+2) {
  -            resolveSchemaGrammar(schemaLocation->elementAt(i+1), 
schemaLocation->elementAt(i));
  +            resolveSchemaGrammar(fLocationPairs->elementAt(i+1), 
fLocationPairs->elementAt(i));
           }
       }
  -
  -    delete schemaLocation;
   }
   
   void IGXMLScanner::resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const 
uri) {
  
  
  
  1.16      +1 -12     xml-xerces/c/src/xercesc/internal/ReaderMgr.cpp
  
  Index: ReaderMgr.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/ReaderMgr.cpp,v
  retrieving revision 1.15
  retrieving revision 1.16
  diff -u -r1.15 -r1.16
  --- ReaderMgr.cpp     18 May 2003 14:02:04 -0000      1.15
  +++ ReaderMgr.cpp     14 Aug 2003 02:56:41 -0000      1.16
  @@ -789,17 +789,6 @@
   }
   
   
  -const XMLReader* ReaderMgr::getCurrentReader() const
  -{
  -    return fCurReader;
  -}
  -
  -
  -XMLReader* ReaderMgr::getCurrentReader()
  -{
  -    return fCurReader;
  -}
  -
   unsigned int ReaderMgr::getReaderDepth() const
   {
       // If the stack doesn't exist, its obviously zero
  
  
  
  1.10      +13 -0     xml-xerces/c/src/xercesc/internal/ReaderMgr.hpp
  
  Index: ReaderMgr.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/ReaderMgr.hpp,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- ReaderMgr.hpp     16 May 2003 21:36:57 -0000      1.9
  +++ ReaderMgr.hpp     14 Aug 2003 02:56:41 -0000      1.10
  @@ -56,6 +56,9 @@
   
   /*
    * $Log$
  + * Revision 1.10  2003/08/14 02:56:41  knoaman
  + * Code refactoring to improve performance of validation.
  + *
    * Revision 1.9  2003/05/16 21:36:57  knoaman
    * Memory manager implementation: Modify constructors to pass in the memory manager.
    *
  @@ -385,6 +388,16 @@
   inline unsigned int ReaderMgr::getCurrentReaderNum() const
   {
       return fCurReader->getReaderNum();
  +}
  +
  +inline const XMLReader* ReaderMgr::getCurrentReader() const
  +{
  +    return fCurReader;
  +}
  +
  +inline XMLReader* ReaderMgr::getCurrentReader()
  +{
  +    return fCurReader;
   }
   
   inline bool ReaderMgr::getName(XMLBuffer& toFill)
  
  
  
  1.47      +41 -38    xml-xerces/c/src/xercesc/internal/XMLScanner.cpp
  
  Index: XMLScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.cpp,v
  retrieving revision 1.46
  retrieving revision 1.47
  diff -u -r1.46 -r1.47
  --- XMLScanner.cpp    10 Jul 2003 19:47:24 -0000      1.46
  +++ XMLScanner.cpp    14 Aug 2003 02:56:41 -0000      1.47
  @@ -331,27 +331,51 @@
           //  Create a temporary URL. Since this is the primary document,
           //  it has to be fully qualified. If not, then assume we are just
           //  mistaking a file for a URL.
  -        XMLURL tmpURL(systemId, fMemoryManager);
  -        if (tmpURL.isRelative()) {
  +        XMLURL tmpURL(fMemoryManager);
  +
  +        if (XMLURL::parse(systemId, tmpURL)) {
  +
  +            if (tmpURL.isRelative()) {
  +                if (!fStandardUriConformant)
  +                    srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, 
fMemoryManager);
  +                else {
  +                    // since this is the top of the try/catch, cannot call ThrowXML
  +                    // emit the error directly
  +                    MalformedURLException e(__FILE__, __LINE__, 
XMLExcepts::URL_NoProtocolPresent);
  +                    fInException = true;
  +                    emitError
  +                    (
  +                        XMLErrs::XMLException_Fatal
  +                        , e.getType()
  +                        , e.getMessage()
  +                    );
  +                    return;
  +                }
  +            }
  +            else
  +            {
  +                if (fStandardUriConformant && tmpURL.hasInvalidChar()) {
  +                    MalformedURLException e(__FILE__, __LINE__, 
XMLExcepts::URL_MalformedURL);
  +                    fInException = true;
  +                    emitError
  +                    (
  +                        XMLErrs::XMLException_Fatal
  +                        , e.getType()
  +                        , e.getMessage()
  +                    );
  +                    return;
  +                }
  +                srcToUse = new (fMemoryManager) URLInputSource(tmpURL, 
fMemoryManager);
  +            }
  +        }
  +        else {
  +
               if (!fStandardUriConformant)
                   srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, 
fMemoryManager);
               else {
                   // since this is the top of the try/catch, cannot call ThrowXML
                   // emit the error directly
  -                MalformedURLException e(__FILE__, __LINE__, 
XMLExcepts::URL_NoProtocolPresent);
  -                fInException = true;
  -                emitError
  -                (
  -                    XMLErrs::XMLException_Fatal
  -                    , e.getType()
  -                    , e.getMessage()
  -                );
  -                return;
  -            }
  -        }
  -        else
  -        {
  -            if (fStandardUriConformant && tmpURL.hasInvalidChar()) {
  +                // lazy bypass ... since all MalformedURLException are fatal, no 
need to check the type
                   MalformedURLException e(__FILE__, __LINE__, 
XMLExcepts::URL_MalformedURL);
                   fInException = true;
                   emitError
  @@ -362,29 +386,8 @@
                   );
                   return;
               }
  -            srcToUse = new (fMemoryManager) URLInputSource(tmpURL, fMemoryManager);
           }
  -
       }
  -    catch(const MalformedURLException& e)
  -    {
  -        if (!fStandardUriConformant)
  -            srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, 
fMemoryManager);
  -        else {
  -            // since this is the top of the try/catch, cannot call ThrowXML
  -            // emit the error directly
  -            // lazy bypass ... since all MalformedURLException are fatal, no need 
to check the type
  -            fInException = true;
  -            emitError
  -            (
  -                XMLErrs::XMLException_Fatal
  -                , e.getType()
  -                , e.getMessage()
  -            );
  -            return;
  -        }
  -    }
  -
       catch(const XMLException& excToCatch)
       {
           //  For any other XMLException,


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

cvs commit: xml-xerces/c/src/xercesc/internal IGXMLScanner.cpp IGXMLScanner.hpp IGXMLScanner2.cpp ReaderMgr.cpp ReaderMgr.hpp XMLScanner.cpp

Reply via email to