/* this function loads an xml and divides the xml into smaller xmls of 4k
size and processes it so that the memory consumption does not increase but
when i parsed a xml file of 380MB size it consumed all the memory and ran
out of memory.. i got a message in the notification area that i had run out
of all the virtual memory...Please look at the code ... I know it is quite
long...but please help me out in this .. i dont know what to release()...
none of the example programs use the
domelement/domnodeiterator/parser object in a loop*/
void load()
throw( std::runtime_error )
{
std::cout<<"in load"<<std::endl;
DWORD dw;
DWORD dwErrorCount=0;
LPDWORD lpFileSize;
HANDLE hSourceFile;
HANDLE hDestFile;
HANDLE hTempFile;
DWORD dwBytesRead=0, dwBytesWritten=0, dwBufSize=BUFSIZE;
char szDestFile[MAX_PATH];
const char *szSourceFile=InXMLFile_.c_str();
char szTempName[MAX_PATH];
char lpPathBuffer[BUFSIZE];
char buffer[BUFSIZE];
char lbuffer[BUFSIZE];
char lEndLineBuffer[BUFSIZE];
int i=0,j=0,k=0;
int found=0;
int EventType;
char szLineEndings[3];
szLineEndings[0]=13;
szLineEndings[1]=10;
szLineEndings[2]=0;
char szStartLine[42]="<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
strcat_s(szStartLine,sizeof(szStartLine),szLineEndings);
char szEventlistTag[14]="<eventlist>";
strcat_s(szEventlistTag,sizeof(szEventlistTag),szLineEndings);
char szEventlistEndTag[]="</eventlist>";
xercesc::DOMDocumentType* outXmlDoctype = NULL;
xercesc::DOMDocument* outXmlDoc = NULL;
xercesc::DOMNode *pNode = NULL;
xercesc::DOMImplementation *pImplement = NULL;
// these two are needed to display DOM output.
xercesc::DOMWriter *pSerializer = NULL;
xercesc::XMLFormatTarget *pTarget = new
LocalFileFormatTarget(dsOutXMLFile_.asXMLString());;
// get a serializer, an instance of DOMWriter (the "LS" stands for
load-save).
pImplement =
DOMImplementationRegistry::getDOMImplementation(XercesString("LS"));
pSerializer = ( (DOMImplementationLS*)pImplement )->createDOMWriter();
// set user specified end of line sequence and output encoding
pSerializer->setNewLine( XercesString("\n") );
// set feature if the serializer supports the feature/mode
if ( pSerializer->canSetFeature(XMLUni::fgDOMWRTSplitCdataSections, false)
)
pSerializer->setFeature(XMLUni::fgDOMWRTSplitCdataSections, false);
if ( pSerializer->canSetFeature(XMLUni::fgDOMWRTDiscardDefaultContent,
false) )
pSerializer->setFeature(XMLUni::fgDOMWRTDiscardDefaultContent, false);
if ( pSerializer->canSetFeature(XMLUni::fgDOMWRTFormatPrettyPrint, false)
)
pSerializer->setFeature(XMLUni::fgDOMWRTFormatPrettyPrint, false);
if ( pSerializer->canSetFeature(XMLUni::fgDOMWRTBOM, false) )
pSerializer->setFeature(XMLUni::fgDOMWRTBOM, false);
// create a document
outXmlDoctype = pImplement->createDocumentType(XercesString("LS"
),NULL,NULL);
//creates a root tag called <Product>
outXmlDoc = pImplement->createDocument( NULL, XercesString("Product"),NULL
);
if (outXmlDoc)
{
// output related nodes are prefixed with "svg"
// to distinguish them from input nodes.
outXmlDoc->setEncoding( XercesString("UTF-8") );
outXmlDoc->setStandalone(true);
outXmlDoc->setVersion( XercesString("1.0") );
DOMElement *pRoot = outXmlDoc->getDocumentElement();
DOMAttr* attrName = outXmlDoc->createAttribute( XercesString("name")
);
attrName->setValue( XercesString("Name") );
pRoot->setAttributeNode(attrName);
DOMAttr* attrVersion= outXmlDoc->createAttribute( XercesString("version")
);
attrVersion->setValue( XercesString("Version") );
pRoot->setAttributeNode(attrVersion);
// Create a root element
DOMElement* pFiles = outXmlDoc->createElement(XercesString("Files") );
pRoot->appendChild(pFiles);
DOMElement* pAddedFiles=outXmlDoc->createElement(XercesString("Added"));
pFiles->appendChild(pAddedFiles);
DOMElement*
pUpdatedFiles=outXmlDoc->createElement(XercesString("Updated"));
pFiles->appendChild(pUpdatedFiles);
DOMElement*
pDeletedFiles=outXmlDoc->createElement(XercesString("Deleted"));
pFiles->appendChild(pDeletedFiles);
DOMElement* pDirs = outXmlDoc->createElement( XercesString("Dirs") );
pRoot->appendChild(pDirs);
DOMElement* pAddedDirs = outXmlDoc->createElement( XercesString("Added")
);
pDirs->appendChild(pAddedDirs);
DOMElement* pUpdatedDirs = outXmlDoc->createElement(
XercesString("Updated") );
pDirs->appendChild(pUpdatedDirs);
DOMElement* pDeletedDirs = outXmlDoc->createElement(
XercesString("Deleted") );
pDirs->appendChild(pDeletedDirs);
DOMElement* pRegKey = outXmlDoc->createElement( XercesString("RegistryKey")
);
pRoot->appendChild(pRegKey);
DOMElement* pAddedRegKey = outXmlDoc->createElement(
XercesString("Added") );
pRegKey->appendChild(pAddedRegKey);
DOMElement* pUpdatedRegKey = outXmlDoc->createElement(
XercesString("Updated") );
pRegKey->appendChild(pUpdatedRegKey);
DOMElement* pDeletedRegKey = outXmlDoc->createElement(
XercesString("Deleted") );
pRegKey->appendChild(pDeletedRegKey);
DOMElement* pRegValue = outXmlDoc->createElement(
XercesString("RegistryValue") );
pRoot->appendChild(pRegValue);
DOMElement* pAddedRegValue = outXmlDoc->createElement(
XercesString("Added") );
pRegValue->appendChild(pAddedRegValue);
DOMElement* pUpdatedRegValue = outXmlDoc->createElement(
XercesString("Updated") );
pRegValue->appendChild(pUpdatedRegValue);
DOMElement* pDeletedRegValue = outXmlDoc->createElement(
XercesString("Deleted") );
pRegValue->appendChild(pDeletedRegValue);
DOMElement* pServices = outXmlDoc->createElement( XercesString("Services")
);
pRoot->appendChild(pServices);
DOMElement* pAddedServices = outXmlDoc->createElement(
XercesString("Added") );
pServices->appendChild(pAddedServices);
DOMElement* pUpdatedServices = outXmlDoc->createElement(
XercesString("Updated") );
pServices->appendChild(pUpdatedServices);
DOMElement* pDeletedServices = outXmlDoc->createElement(
XercesString("Deleted") );
pServices->appendChild(pDeletedServices);
parser_.setValidationScheme( xercesc::XercesDOMParser::Val_Never ) ;
parser_.setDoNamespaces( false ) ;
parser_.setDoSchema( false ) ;
parser_.setLoadExternalDTD( false ) ;
hSourceFile = CreateFile(szSourceFile, // file name
GENERIC_READ, // open for reading
FILE_SHARE_READ, // Enables subsequent open
operations on an object to request read access
NULL, // default security
OPEN_EXISTING, // existing file only
FILE_ATTRIBUTE_NORMAL, // normal file
NULL); // no template
if (hSourceFile == INVALID_HANDLE_VALUE)
{
std::cout<<"Error: "<<GetLastError()<<std::endl<<"Could not open source
File"<<std::endl;
return;
}
GetTempPath(dwBufSize, // length of the buffer
lpPathBuffer); // buffer for path
// Create a temporary file.
GetTempFileName(lpPathBuffer, // directory for temp files
"NEW", // temp file name prefix
0, // create unique name
szTempName); // buffer for name
XercesString src(szTempName);
std::cout<<"szTempName: "<<szTempName<<std::endl;
do
{
i=0;
ReadFile(hSourceFile, buffer, BUFSIZE,&dwBytesRead, NULL);
found=0;
do
{
k=GetLine(buffer+i, lbuffer,&i,dwBytesRead);
i+=k;
if(i>dwBytesRead)
{
SetFilePointer(hSourceFile,-k,NULL,FILE_CURRENT);
break;
}
if(strstr(lbuffer,TEXT("<eventlist>")))
{
found=1;
}
}while((i<dwBytesRead)&&(found==0));
}while((dwBytesRead==BUFSIZE)&&(found==0));
SetFilePointer(hSourceFile,-(BUFSIZE-i),NULL,FILE_CURRENT);
do{
{
LocalFileInputSource source(src);
hTempFile = CreateFile((LPTSTR) szTempName, // file name
GENERIC_READ | GENERIC_WRITE, // open for read/write
FILE_SHARE_READ, // Enables subsequent open operations on
an object to request read access
NULL, // default security
CREATE_ALWAYS, // overwrite existing file
FILE_ATTRIBUTE_NORMAL, // normal file
NULL); // no template
if (hTempFile == INVALID_HANDLE_VALUE)
{
std::cout<<"Error: "<<GetLastError()<<std::endl<<"Could not open temp
File"<<std::endl;
return;
}
WriteFile(hTempFile,szStartLine,sizeof(szStartLine)-2,&dwBytesWritten,NULL);
WriteFile(hTempFile,szEventlistTag,sizeof(szEventlistTag)-1,&dwBytesWritten,NULL);
i=0;
ReadFile(hSourceFile, buffer, BUFSIZE,&dwBytesRead, NULL);
do
{
j=0;
k=GetCurEvent(buffer+i,lbuffer,&j,&i,&dwBytesRead);
i+=k;
if((i>dwBytesRead)||(j==1))
{
k=dwBytesRead-(i-k);
SetFilePointer(hSourceFile,-k,NULL,FILE_CURRENT);
break;
}
if(j==-1)
{
break;
}
WriteFile(hTempFile,lbuffer,k,&dwBytesWritten,NULL);
}while(i<BUFSIZE);
WriteFile(hTempFile,szEventlistEndTag,sizeof(szEventlistEndTag)-1,&dwBytesWritten,NULL);
CloseHandle(hTempFile);
//Event Handle functions here...
//
try{
parser_.parse(source);
dwErrorCount+=parser_.getErrorCount();
//std::cout<<"parser_getErrorCount()
"<<parser_.getErrorCount()<<std::endl;
if(!(parser_.getErrorCount()))
{
xercesc::DOMDocument* xmlDoc = parser_.getDocument() ;
xercesc::DOMElement* elementConfig = xmlDoc->getDocumentElement() ;
if( NULL == elementConfig ){
throw( std::runtime_error( "empty XML document" ) ) ;
}
xercesc::DOMNodeList* children = elementConfig->getElementsByTagName
(tags_.TAG_event.asXMLString());
const XMLSize_t nodeCount = children->getLength() ;
for( XMLSize_t ix = 0 ; ix < nodeCount ; ++ix ){
xercesc::DOMNode* currentNode = children->item( ix ) ;
if( NULL == currentNode ){
// null node...
continue ;
}
if( xercesc::DOMNode::ELEMENT_NODE != currentNode->getNodeType() ){
// not an element node -> not of interest here
continue ;
}
xercesc::DOMElement* currentElement = (xercesc::DOMElement*)(
currentNode ) ;
EventType=handleEvent( currentElement ) ;
switch(EventType)
{
case 10:
{
DOMNodeIterator* iterator =
outXmlDoc->createNodeIterator(pAddedFiles, DOMNodeFilter::SHOW_TEXT, NULL,
true);
DOMNode* pCurrent = NULL;
// use the tree walker to print out the text nodes.
for ( pCurrent = iterator->nextNode();pCurrent != 0; pCurrent =
iterator->nextNode())
{
char *strValue = XMLString::transcode( pCurrent->getNodeValue() );
if(!strcmp(strValue,Path_.c_str()))
{
EventType=0;
}
//XMLString::release(&strValue);
}
if(EventType)
{
DOMElement* pPath = outXmlDoc->createElement( XercesString("Path")
);
pAddedFiles->appendChild(pPath);
DOMText* pPathnode=outXmlDoc->createTextNode(
XercesString(Path_.c_str()) );
pPath->appendChild(pPathnode);
}
iterator->release();
break;
}//case 10;
case 11:
{
DOMNodeIterator* iterator=
outXmlDoc->createNodeIterator(pAddedFiles, DOMNodeFilter::SHOW_TEXT, NULL,
true);
DOMNodeIterator* uiterator =
outXmlDoc->createNodeIterator(pUpdatedFiles, DOMNodeFilter::SHOW_TEXT, NULL,
true);
DOMNode* pCurrent = NULL;
// use the tree walker to print out the text nodes.
for ( pCurrent = iterator->nextNode();pCurrent != 0; pCurrent =
iterator->nextNode())
{
char *strValue = XMLString::transcode( pCurrent->getNodeValue() );
if(!strcmp(strValue,Path_.c_str()))
{
EventType=0;
}
//XMLString::release(&strValue);
}
if(EventType){
for ( pCurrent = uiterator->nextNode();pCurrent != 0; pCurrent =
uiterator->nextNode())
{
char *strValue = XMLString::transcode( pCurrent->getNodeValue() );
if(!strcmp(strValue,Path_.c_str()))
{
EventType=0;
}
//XMLString::release(&strValue);
}}
if(EventType)
{
//XMLString::release(&strValue);
DOMElement* pPath = outXmlDoc->createElement( XercesString("Path")
);
pUpdatedFiles->appendChild(pPath);
DOMText* pPathnode=outXmlDoc->createTextNode(
XercesString(Path_.c_str()) );
pPath->appendChild(pPathnode);
}
uiterator->release();
iterator->release();
break;
}// case(11)...
case(12):
{
DOMElement* pPath = outXmlDoc->createElement( XercesString("Path")
);
pDeletedFiles->appendChild(pPath);
DOMText* pPathnode=outXmlDoc->createTextNode(
XercesString(Path_.c_str()) );
pPath->appendChild(pPathnode);
break;
}
case (20):
{
DOMNodeIterator* iterator =
outXmlDoc->createNodeIterator(pAddedDirs, DOMNodeFilter::SHOW_TEXT, NULL,
true);
DOMNode* pCurrent = NULL;
// use the tree walker to print out the text nodes.
for ( pCurrent = iterator->nextNode();pCurrent != 0; pCurrent =
iterator->nextNode())
{
char *strValue = XMLString::transcode( pCurrent->getNodeValue() );
if(!strcmp(strValue,Path_.c_str()))
{
EventType=0;
}
//XMLString::release(&strValue);
}
if(EventType)
{
DOMElement* pPath = outXmlDoc->createElement( XercesString("Path")
);
pAddedDirs->appendChild(pPath);
DOMText* pPathnode=outXmlDoc->createTextNode(
XercesString(Path_.c_str()) );
pPath->appendChild(pPathnode);
}
iterator->release();
break;
}
case (21):
{
break;
}
case (22):
{
DOMElement* pPath = outXmlDoc->createElement( XercesString("Path")
);
pDeletedDirs->appendChild(pPath);
DOMText* pPathnode=outXmlDoc->createTextNode(
XercesString(Path_.c_str()) );
pPath->appendChild(pPathnode);
break;
}
case (30):
{
DOMNodeIterator* iterator =
outXmlDoc->createNodeIterator(pAddedRegKey, DOMNodeFilter::SHOW_TEXT, NULL,
true);
DOMNode* pCurrent = NULL;
// use the tree walker to print out the text nodes.
for ( pCurrent = iterator->nextNode();pCurrent != 0; pCurrent =
iterator->nextNode())
{
char *strValue = XMLString::transcode( pCurrent->getNodeValue() );
if(!strcmp(strValue,Path_.c_str()))
{
EventType=0;
}
//XMLString::release(&strValue);
}
if(EventType)
{
DOMElement* pPath = outXmlDoc->createElement( XercesString("Path")
);
pAddedRegKey->appendChild(pPath);
DOMText* pPathnode=outXmlDoc->createTextNode(
XercesString(Path_.c_str()) );
pPath->appendChild(pPathnode);
}
iterator->release();
break;
}
}//end of switch
}//for loop..
xmlDoc->release();
//std::cin>>dwBytesWritten;
}
}
catch( xercesc::XMLException& e ){
// believe it or not, XMLException is not
// a parent class of DOMException
char* message = xercesc::XMLString::transcode( e.getMessage() ) ;
std::ostringstream buf ;
buf << "Error parsing file: "
<< message
<< std::flush;
xercesc::XMLString::release( &message ) ;
throw( std::runtime_error( buf.str() ) ) ;
}
catch( const xercesc::DOMException& e ){
char* message = xercesc::XMLString::transcode( e.getMessage() ) ;
std::ostringstream buf ;
buf << "Encountered DOM Exception: "
<< message<< std::flush;
xercesc::XMLString::release( &message ) ;
throw( std::runtime_error( buf.str() ) ) ;
}
if((j==-1))
{
break;
}
}
DOMRange* range = outXmlDoc->createRange();
range->release();
parser_.resetDocument();
}while(dwBytesRead==BUFSIZE);
//Here should be write file...
pSerializer->writeNode(pTarget, *outXmlDoc);
if(dwErrorCount)
{
std::cout<<"Number of errors in file = "<<dwErrorCount<<std::endl<<":-(";
}
else
{
std::cout<<"No Errors found in file ;-)"<<std::endl;
std::cout<<"Number of Files Created= "<<CreateFileCount<<std::endl;
std::cout<<"Number of Files Deleted= "<<DeleteFileCount<<std::endl;
std::cout<<"Number of Folders Created= "<<CreateFolderCount<<std::endl;
}
//std::cin>>dwBytesWritten;
}//if (outXmlFile)...end
delete pTarget;
delete pSerializer;
delete outXmlDoc;
delete outXmlDoctype;
DeleteFile(szTempName);
CloseHandle(hSourceFile);
return ;
} // load()
--
Regards
Sree Harsha Vardhana S.N
"When you want something, all the universe conspires in helping you to
achieve it."