That was it - and that makes complete sense. I forgot about by value vs. by reference - doh! Thanks for helping me out with that - much appreciated!
-C On Wed, Mar 26, 2014 at 10:17 AM, Alberto Massari <albertomass...@tiscali.it > wrote: > The str() method of the std::stringstream class returns an object by > value, so the MemBufInputSource is looking at deleted memory, and the > parser will read garbage. > Try changing the code to do > > > //create an input source for the XML parser > std::string buff = sbuffer.str(); > xercesc::MemBufInputSource XMLBuf((const XMLByte*)(buff.c_str()), > buff.size(), "InputXML"); > > Alberto > > Il 26/03/14 16:56, tenspd137 . ha scritto: > > Hi all - >> >> I have a situation where I have to process bad XML - ie like the >> following: >> (saved as test_metadata.xml) >> >> <?xml version="1.0" encoding="utf-8"?> >> <MyTest> >> <Version>4354</Version> >> </MyTest> >> <RemoveMe> >> <?xml version="1.0" encoding="utf-8"?> >> <Metadata> >> <Version>4356</Version> >> <a>4928</a> >> <b>6400</b> >> </Metadata> >> </RemoveMe> >> >> it is like 2 XML files in one. What I am doing is reading the file into a >> stringstream buffer, taking only the contents between the remove me tag, >> and treting it as my memory buffer to be parsed. I keep getting invalid >> multibyte sequence errors. I am running a UTF8 enabled system, and am >> pretty sure that I can save UTF8 files. I have tried the second file I >> made with emacs and printed it with DOMPrint: >> >> saved as test2_metadata.xml >> >> <?xml version="1.0" encoding="utf-8"?> >> <MyTest> >> <Version>4354</Version> >> <Id>1</Id> >> </MyTest> >> >> but my own code still gives me an invalid multibyte sequence in the first >> line. >> >> My code is: >> >> #include <fstream> >> #include <string> >> #include <sstream> >> #include <iostream> >> >> //stuff to parse XML >> #include <xercesc/parsers/XercesDOMParser.hpp> >> #include <xercesc/dom/DOM.hpp> >> #include <xercesc/sax/HandlerBase.hpp> >> #include <xercesc/framework/MemBufInputSource.hpp> >> #include <xercesc/util/XMLString.hpp> >> #include <xercesc/util/PlatformUtils.hpp> >> #include <xercesc/util/XercesDefs.hpp> >> >> class XmlDomErrorHandler : public xercesc::HandlerBase >> { >> public: >> void fatalError(const xercesc::SAXParseException &exc) { >> printf("Fatal parsing error at line %d, col %d\n", >> (int)exc.getLineNumber(), (int)exc.getColumnNumber()); >> char* msg = xercesc::XMLString::transcode( exc.getMessage() ); >> printf("%s\n", msg); >> xercesc::XMLString::release( &msg ); >> exit(-1); >> } >> }; >> >> int main(int argc, char* argv[]) >> { >> std::ifstream metadata(argv[1]); >> std::string line; >> std::string startLine("<RemoveMe>\n"); >> std::string stopLine("</RemoveMe>\n"); >> std::stringstream sbuffer; >> xercesc::XercesDOMParser* parser; >> xercesc::ErrorHandler* errorHandler; >> >> try { xercesc::XMLPlatformUtils::Initialize(); } >> catch (const xercesc::XMLException& toCatch) >> { >> char* message = xercesc::XMLString::transcode(toCatch.getMessage()); >> std::cout << "Error during Xerces initalization! :" << std::endl << >> message << std::endl; >> xercesc::XMLString::release(&message); >> return 1; >> } >> >> { >> std::cout << "Init good..." << std::endl; >> >> parser = new xercesc::XercesDOMParser(); >> errorHandler = (xercesc::ErrorHandler*) new XmlDomErrorHandler(); >> parser->setErrorHandler(errorHandler); >> //int count = 0; >> //while( line != stopLine ) >> //{ >> // std::getline(metadata, line); >> //} >> >> >> //read the reast of the file into a buffer >> >> sbuffer << metadata.rdbuf(); >> >> metadata.close(); >> >> std::cout << "BEGIN BUFFER DUMP" << std::endl; >> >> std::cout << sbuffer.str() << std::endl; >> >> std::size_t start = sbuffer.str().find(startLine) + >> startLine.length(); >> std::size_t stop = sbuffer.str().find(stopLine); >> std::size_t length = stop-start; >> >> >> if (start != std::string::npos && stop != std::string::npos) >> sbuffer.str(sbuffer.str().substr(start,length)); >> >> std::cout << "Start:" << start << std::endl << "Stop:" << stop << >> std::endl << "Length:" << length << std::endl; >> std::cout << "Second Buffer Dump" << std::endl; >> std::cout << sbuffer.str() << "END" << std::endl; >> >> //create an input source for the XML parser >> xercesc::MemBufInputSource XMLBuf((const >> XMLByte*)(sbuffer.str().c_str()), sbuffer.str().size(), "InputXML"); >> >> >> //XMLBuf.setEncoding(xercesc::XMLString::transcode("LATIN1")); >> >> parser->parse(XMLBuf); >> >> std::cout << "Shouldn't make it here..." << std::endl; >> xercesc::DOMElement* docRootNode; >> xercesc::DOMDocument* doc; >> xercesc::DOMNodeIterator* walker; >> >> doc = parser->getDocument(); >> docRootNode = doc->getDocumentElement(); >> >> walker = >> doc->createNodeIterator(docRootNode,xercesc::DOMNodeFilter::SHOW_ELEMENT, >> NULL,true); >> >> //Walk the XML >> xercesc::DOMNode * CurrentNode = NULL; >> std::string thisNodeName; >> std::string parentNodeName; >> >> for (CurrentNode = walker->nextNode(); CurrentNode !=0; CurrentNode = >> walker->nextNode()) >> { >> thisNodeName = >> xercesc::XMLString::transcode(CurrentNode->getNodeName()); >> parentNodeName = >> xercesc::XMLString::transcode(CurrentNode->getParentNode()-> >> getNodeName()); >> >> std::cout << thisNodeName << std::endl; >> std::cout << parentNodeName << std::endl; >> } >> } >> >> xercesc::XMLPlatformUtils::Terminate(); >> >> return 0; >> >> } >> >> compiled with: >> >> g++ test.cpp -o test `pkg-config xerces-c --cflags --libs` >> >> Just run it with ./test <file> >> >> Can anyone help me figure out what I am doing wrong? I know that >> processing text in C++ can be tricky, and I am probably missing something >> stupid, but it seems like this should be do-able. >> >> Thanks for any help in advance. >> >> -C >> >> >