That was it - and that makes complete sense.  I forgot about by value vs.
by reference - doh!  Thanks for helping me out with that - much appreciated!

-C


On Wed, Mar 26, 2014 at 10:17 AM, Alberto Massari <albertomass...@tiscali.it
> wrote:

> The str() method of the std::stringstream class returns an object by
> value, so the MemBufInputSource is looking at deleted memory, and the
> parser will read garbage.
> Try changing the code to do
>
>
>     //create an input source for the XML parser
>     std::string buff = sbuffer.str();
>     xercesc::MemBufInputSource XMLBuf((const XMLByte*)(buff.c_str()),
> buff.size(), "InputXML");
>
> Alberto
>
> Il 26/03/14 16:56, tenspd137 . ha scritto:
>
>  Hi all -
>>
>> I have a situation where I have to process bad XML - ie like the
>> following:
>>   (saved as test_metadata.xml)
>>
>> <?xml version="1.0" encoding="utf-8"?>
>> <MyTest>
>> <Version>4354</Version>
>> </MyTest>
>> <RemoveMe>
>> <?xml version="1.0" encoding="utf-8"?>
>> <Metadata>
>> <Version>4356</Version>
>> <a>4928</a>
>> <b>6400</b>
>> </Metadata>
>> </RemoveMe>
>>
>> it is like 2 XML files in one.  What I am doing is reading the file into a
>> stringstream buffer, taking only the contents between the remove me tag,
>> and treting it as my memory buffer to be parsed.  I keep getting invalid
>> multibyte sequence errors.  I am running a UTF8 enabled system, and am
>> pretty sure that I can save UTF8 files.  I have tried the second file I
>> made with emacs and printed it with DOMPrint:
>>
>> saved as test2_metadata.xml
>>
>> <?xml version="1.0" encoding="utf-8"?>
>> <MyTest>
>> <Version>4354</Version>
>> <Id>1</Id>
>> </MyTest>
>>
>> but my own code still gives me an invalid multibyte sequence in the first
>> line.
>>
>> My code is:
>>
>> #include <fstream>
>> #include <string>
>> #include <sstream>
>> #include <iostream>
>>
>> //stuff to parse XML
>> #include <xercesc/parsers/XercesDOMParser.hpp>
>> #include <xercesc/dom/DOM.hpp>
>> #include <xercesc/sax/HandlerBase.hpp>
>> #include <xercesc/framework/MemBufInputSource.hpp>
>> #include <xercesc/util/XMLString.hpp>
>> #include <xercesc/util/PlatformUtils.hpp>
>> #include <xercesc/util/XercesDefs.hpp>
>>
>> class XmlDomErrorHandler : public xercesc::HandlerBase
>> {
>>    public:
>>      void fatalError(const xercesc::SAXParseException &exc) {
>>          printf("Fatal parsing error at line %d, col %d\n",
>> (int)exc.getLineNumber(), (int)exc.getColumnNumber());
>>          char* msg = xercesc::XMLString::transcode( exc.getMessage() );
>>      printf("%s\n", msg);
>>          xercesc::XMLString::release( &msg );
>>      exit(-1);
>>      }
>> };
>>
>> int main(int argc, char* argv[])
>> {
>>      std::ifstream metadata(argv[1]);
>>      std::string line;
>>      std::string startLine("<RemoveMe>\n");
>>      std::string stopLine("</RemoveMe>\n");
>>      std::stringstream sbuffer;
>>      xercesc::XercesDOMParser* parser;
>>      xercesc::ErrorHandler* errorHandler;
>>
>>      try { xercesc::XMLPlatformUtils::Initialize(); }
>>      catch (const xercesc::XMLException& toCatch)
>>      {
>>      char* message = xercesc::XMLString::transcode(toCatch.getMessage());
>>      std::cout << "Error during Xerces initalization! :" << std::endl <<
>> message << std::endl;
>>      xercesc::XMLString::release(&message);
>>      return 1;
>>      }
>>
>>      {
>>      std::cout << "Init good..." << std::endl;
>>
>>      parser = new xercesc::XercesDOMParser();
>>      errorHandler = (xercesc::ErrorHandler*) new XmlDomErrorHandler();
>>      parser->setErrorHandler(errorHandler);
>>      //int count = 0;
>>      //while( line != stopLine )
>>      //{
>>      //    std::getline(metadata, line);
>>      //}
>>
>>
>>      //read the reast of the file into a buffer
>>
>>      sbuffer << metadata.rdbuf();
>>
>>      metadata.close();
>>
>>      std::cout << "BEGIN BUFFER DUMP" << std::endl;
>>
>>      std::cout << sbuffer.str() << std::endl;
>>
>>      std::size_t start = sbuffer.str().find(startLine) +
>> startLine.length();
>>      std::size_t stop  = sbuffer.str().find(stopLine);
>>      std::size_t length = stop-start;
>>
>>
>>      if (start != std::string::npos && stop != std::string::npos)
>>          sbuffer.str(sbuffer.str().substr(start,length));
>>
>>      std::cout << "Start:" << start << std::endl << "Stop:" << stop <<
>> std::endl << "Length:" << length << std::endl;
>>      std::cout << "Second Buffer Dump" << std::endl;
>>      std::cout << sbuffer.str() << "END" << std::endl;
>>
>>      //create an input source for the XML parser
>>      xercesc::MemBufInputSource XMLBuf((const
>> XMLByte*)(sbuffer.str().c_str()), sbuffer.str().size(), "InputXML");
>>
>>
>>      //XMLBuf.setEncoding(xercesc::XMLString::transcode("LATIN1"));
>>
>>      parser->parse(XMLBuf);
>>
>>      std::cout << "Shouldn't make it here..." << std::endl;
>>      xercesc::DOMElement* docRootNode;
>>      xercesc::DOMDocument* doc;
>>      xercesc::DOMNodeIterator* walker;
>>
>>      doc = parser->getDocument();
>>      docRootNode = doc->getDocumentElement();
>>
>>      walker =
>> doc->createNodeIterator(docRootNode,xercesc::DOMNodeFilter::SHOW_ELEMENT,
>> NULL,true);
>>
>>      //Walk the XML
>>      xercesc::DOMNode * CurrentNode = NULL;
>>      std::string thisNodeName;
>>      std::string parentNodeName;
>>
>>      for (CurrentNode = walker->nextNode(); CurrentNode !=0; CurrentNode =
>> walker->nextNode())
>>      {
>>          thisNodeName =
>> xercesc::XMLString::transcode(CurrentNode->getNodeName());
>>          parentNodeName =
>> xercesc::XMLString::transcode(CurrentNode->getParentNode()->
>> getNodeName());
>>
>>          std::cout << thisNodeName << std::endl;
>>          std::cout << parentNodeName << std::endl;
>>      }
>>      }
>>
>>      xercesc::XMLPlatformUtils::Terminate();
>>
>>      return 0;
>>
>> }
>>
>> compiled with:
>>
>> g++ test.cpp -o test `pkg-config xerces-c --cflags --libs`
>>
>> Just run it with ./test <file>
>>
>> Can anyone help me figure out what I am doing wrong?  I know that
>> processing text in C++ can be tricky, and I am probably missing something
>> stupid, but it seems like this should be do-able.
>>
>> Thanks for any help in advance.
>>
>> -C
>>
>>
>

Reply via email to