http://nagoya.apache.org/bugzilla/show_bug.cgi?id=2365

*** shadow/2365 Wed Jun 27 11:44:12 2001
--- shadow/2365.tmp.7542        Wed Jun 27 11:44:12 2001
***************
*** 0 ****
--- 1,115 ----
+ +============================================================================+
+ | Huge performance problem with the parser in XMLScanner::sendCharData()     |
+ +----------------------------------------------------------------------------+
+ |        Bug #: 2365                        Product: Xerces-C                |
+ |       Status: NEW                         Version: 1.5                     |
+ |   Resolution:                            Platform: All                     |
+ |     Severity: Critical                 OS/Version: Other                   |
+ |     Priority: Other                     Component: Miscellaneous           |
+ +----------------------------------------------------------------------------+
+ |  Assigned To: [EMAIL PROTECTED]                                  |
+ |  Reported By: [EMAIL PROTECTED]                                    |
+ |      CC list: Cc:                                                          |
+ +----------------------------------------------------------------------------+
+ |          URL:                                                              |
+ +============================================================================+
+ |                              DESCRIPTION                                   |
+ XMLScanner::sendCharData() create an XMLBuffer upon entering the function.  The 
+ constructor for XMLBuffer allocates a 1K block of memory.  Since the buffer is 
+ only used in one section of the code, scoping it will drastically improve 
+ performance.  A patch is included below.
+ 
+ I also have included a patch for XMLBuffer, which allows an initial size during 
+ construction.  This should make processing small amounts of data cheaper, since 
+ an initial size can be specified.
+ 
+ Note that this patch subsumes the patch I posted for bug 2363.
+ 
+ cvs diff XMLScanner2.cpp (in directory V:\xml-xerces\c\src\internal\)
+ Index: XMLScanner2.cpp
+ ===================================================================
+ RCS file: /home/cvs/xml-xerces/c/src/internal/XMLScanner2.cpp,v
+ retrieving revision 1.38
+ diff -r1.38 XMLScanner2.cpp
+ 843,847d842
+ <     // Get the raw data we need for the callback
+ <     const XMLCh* rawBuf = toSend.getRawBuffer();
+ <     const unsigned int len = toSend.getLen();
+ <     XMLBuffer toFill;
+ < 
+ 855,856c850,852
+ <         // See if all the text is whitespace
+ <         const bool isSpaces = XMLReader::isAllSpaces(rawBuf, len);
+ ---
+ >         // Get the raw data we need for the callback
+ >         const XMLCh* const rawBuf = toSend.getRawBuffer();
+ >         const unsigned int len = toSend.getLen();
+ 869c865
+ <          else if (isSpaces)
+ ---
+ >          else if (XMLReader::isAllSpaces(rawBuf, len))
+ 893c889
+ <                 if (fGrammar->getGrammarType() == Grammar::SchemaGrammarType)
+ ---
+ >                 if (fDocHandler)
+ 895,899c891,900
+ <                     // normalize the character according to schema whitespace 
+ facet
+ <                     DatatypeValidator* tempDV = ((SchemaElementDecl*) topElem-
+ >fThisElement)->getDatatypeValidator();
+ <                     ((SchemaValidator*) fValidator)->normalizeWhiteSpace
+ (tempDV, rawBuf, toFill);
+ <                     rawBuf = toFill.getRawBuffer();
+ <                 }
+ ---
+ >                     if (fGrammar->getGrammarType() != 
+ Grammar::SchemaGrammarType)
+ >                     {
+ >                         fDocHandler->docCharacters(rawBuf, len, false);
+ >                     }
+ >                     else
+ >                     {
+ >                         // The normalized data can only be as large as the 
+ >                         // original size, so this will avoid allocating way
+ >                         // too much or too little memory.
+ >                         XMLBuffer toFill(toSend.getLen());
+ 901,902c902,908
+ <                 if (fDocHandler)
+ <                     fDocHandler->docCharacters(rawBuf, len, false);
+ ---
+ >                         // normalize the character according to schema 
+ whitespace facet
+ >                         DatatypeValidator* tempDV = ((SchemaElementDecl*) 
+ topElem->fThisElement)->getDatatypeValidator();
+ >                         ((SchemaValidator*) fValidator)->normalizeWhiteSpace
+ (tempDV, rawBuf, toFill);
+ > 
+ >                         fDocHandler->docCharacters(toFill.getRawBuffer(), 
+ toFill.getLen(), false);
+ >                     }
+ >                 }
+ 914c920
+ <             fDocHandler->docCharacters(rawBuf, len, false);
+ ---
+ >             fDocHandler->docCharacters(toSend.getRawBuffer(), toSend.getLen
+ (), false);
+ 
+ *****CVS exited normally with code 1*****
+ 
+ 
+ cvs diff XMLBuffer.hpp (in directory V:\xml-xerces\c\src\framework\)
+ Index: XMLBuffer.hpp
+ ===================================================================
+ RCS file: /home/cvs/xml-xerces/c/src/framework/XMLBuffer.hpp,v
+ retrieving revision 1.5
+ diff -r1.5 XMLBuffer.hpp
+ 106c106
+ <     XMLBuffer() :
+ ---
+ >     XMLBuffer(int capacity = 1023) :
+ 110c110
+ <         , fCapacity(1023)
+ ---
+ >         , fCapacity(capacity)
+ 
+ *****CVS exited normally with code 1*****
\ No newline at end of file

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to