Hi All,

I am using DOMBuilder as a parser to parse my 10 MB file and it is terribly
horribly slow. Could anybody suggest me how to improve perfomance. James
clark SP parser was used to take not more than a 30 seconds to parse this
file. DOMBuilder is taking around 20 minutes to parse my file if I override
the startElement function as follows. 

Normally DOMBuilder is also efficient and taking just 30 seconds if I do not
override the startElement function. I am wondering why is it adding another
19 and half minutes in parsing time if I just override the startElement and
that is just to put node location information in the node user data.

Am I doing something wrong in the overridden startElement function below.

class EsDOMBuilderImpl : public DOMBuilderImpl 

void EsDOMBuilderImpl::startElement
    (
        const   XMLElementDecl&         elemDecl
        , const unsigned int            urlId
        , const XMLCh* const            elemPrefix
        , const RefVectorOf<XMLAttr>&   attrList
        , const unsigned int            attrCount
        , const bool                    isEmpty
        , const bool                    isRoot
    )
{
        const XMLCh* const                systemId = 0;
        
AbstractDOMParser::startElement(elemDecl,urlId,elemPrefix,attrList,attrCount
,isEmpty,isRoot);
        XMLScanner* sc = this->getScanner();
        const Locator* loc = sc->getLocator();

        int lineNum = loc->getLineNumber();
        int colNum = loc->getColumnNumber();
        int offset = sc->getSrcOffset();
        DOMNode* node = this->getCurrentNode();
        DOMLocator* location = new  DOMLocatorImpl(lineNum, colNum,
getCurrentNode(), systemId);
        location->setOffset(offset);
        
        node->setUserData(locXmlStr_,location,0);

        DOMLocator* vlocator = (DOMLocator*)node->getUserData(locXmlStr_);
        return;
}

In the following way I am creating the parser
DOMBuilder* XmlDocumentTree::parserInstance()
{
        if(parserInstance_ == NULL)
        {
                static const XMLCh gLS[] = { chLatin_L, chLatin_S, chNull };
        
DOMImplementationRegistry::addSource((DOMImplementationSource*)EsDOMImplemen
tationImpl::getDOMImplementationImpl());
                DOMImplementation *impl =
DOMImplementationRegistry::getDOMImplementation(gLS);

                parserInstance_ =
((DOMImplementationLS*)impl)->createDOMBuilder(DOMImplementationLS::MODE_SYN
CHRONOUS, 0); // it returns EsDOMBuilderImpl class pointer.
                delete impl;
                parserInstance_->setFeature(XMLUni::fgDOMValidation, true);
                parserInstance_->setFeature(XMLUni::fgDOMNamespaces, false);
                //parserInstance_->setFeature(X("namespaces"), true);

                
                parserInstance_->setFeature(XMLUni::fgDOMEntities, false);
                parserInstance_->setFeature(XMLUni::fgXercesCalculateSrcOfs,
true);
        
((EsDOMBuilderImpl*)parserInstance_)->setIncludeIgnorableWhitespace(false);
        }
        return parserInstance_;
}

I use it in the following way

                XMLPlatformUtils::Initialize();
                app->init();
                parser = parserInstance();

                errHandler_ = new EsDOMErrorHandler(warn);
                parserInstance_->setErrorHandler(errHandler_);

                entHandler_ = new EsXml4cDomEntityResolver();
                entHandler_->addDirToPath(env);
                parserInstance_->setEntityResolver(entHandler_);
                DOMDocument * doc = 0;
                doc = parser->parseURI(fname.c_str());
                node = doc->getDocumentElement();

Thanks & Regards
Vikas Agrawal

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to