... or you can use Everts serializer classes (often posted here) Adib.
Keyur Dalal wrote: > No, you can either use the DOMPrint sample provided with the source or write > your own > "serializer". I believe the Java API has a built-in serializer. > > Keyur Dalal > > ----- Original Message ----- > From: "Arun Ramdas" <[EMAIL PROTECTED]> > To: <[EMAIL PROTECTED]> > Sent: Tuesday, February 26, 2002 6:40 PM > Subject: DOM to String Conversion > > > >>Is there any API provided by Xerces-c, to convert the DOM tree into a >> > string > >>buffer? >> >>Thanks >>Arun >> >> >> >>--------------------------------------------------------------------- >>To unsubscribe, e-mail: [EMAIL PROTECTED] >>For additional commands, e-mail: [EMAIL PROTECTED] >> >> > > > --------------------------------------------------------------------- > To unsubscribe, e-mail: [EMAIL PROTECTED] > For additional commands, e-mail: [EMAIL PROTECTED] > >
//--------------------------------------------------------------------------- // // File: DOMSerializer.cpp // Description: Class to serialize a DOM to a stream. Largely taken from // the Xerces-c DOMPrint example. // Works with Xerces-C 1.4 // // Rev: $Id: DOMSerializer.cpp,v 1.1 2001/04/05 13:04:39 evert Exp $ // Created: 2001/03/30 // Author: Evert Haasdijk // mail: [EMAIL PROTECTED] // //--------------------------------------------------------------------------- /* * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * */ // --------------------------------------------------------------------------- // Includes // --------------------------------------------------------------------------- #include "DOMSerializer.hpp" #include <util/PlatformUtils.hpp> #include <util/XMLString.hpp> #include <util/XMLUniDefs.hpp> #include <framework/XMLFormatter.hpp> #include <util/TranscodingException.hpp> #include <dom/DOM_DOMException.hpp> #include <dom/DOM.hpp> #include <dom/DOMString.hpp> #include <string> #include <ostream> #include <stdlib> // --------------------------------------------------------------------------- // DOMSerializerFormatTarget methods // --------------------------------------------------------------------------- DOMSerializerFormatTarget::DOMSerializerFormatTarget(std::ostream& os) : os_(os) {}; DOMSerializerFormatTarget::~DOMSerializerFormatTarget() {}; void DOMSerializerFormatTarget::writeChars(const XMLByte* const toWrite, const unsigned int count, XMLFormatter * const formatter) { // Surprisingly, Solaris was the only platform on which // required the char* cast to print out the string correctly. // Without the cast, it was printing the pointer value in hex. // Quite annoying, considering every other platform printed // the string with the explicit cast to char* below. os_.write((char *) toWrite, (int) count); }; std::ostream& DOMSerializerFormatTarget::stream(void) { return os_; }; // --------------------------------------------------------------------------- // Static DOMSerializer members // // Note: This is the 'safe' way to do these strings. If you compiler supports // L"" style strings, and portability is not a concern, you can use // those types constants directly. // --------------------------------------------------------------------------- const XMLCh DOMSerializer::gEndElement[] = // </ { chOpenAngle, chForwardSlash, chNull }; const XMLCh DOMSerializer::gEndPI[] = // ?> { chQuestion, chCloseAngle, chNull }; const XMLCh DOMSerializer::gStartPI[] = // <? { chOpenAngle, chQuestion, chNull }; const XMLCh DOMSerializer::gXMLDecl1[] = // <xml version=" { chOpenAngle, chQuestion, chLatin_x, chLatin_m, chLatin_l , chSpace, chLatin_v, chLatin_e, chLatin_r, chLatin_s, chLatin_i , chLatin_o, chLatin_n, chEqual, chDoubleQuote, chNull }; const XMLCh DOMSerializer::gXMLDecl2[] = // " encoding=" { chDoubleQuote, chSpace, chLatin_e, chLatin_n, chLatin_c , chLatin_o, chLatin_d, chLatin_i, chLatin_n, chLatin_g, chEqual , chDoubleQuote, chNull }; const XMLCh DOMSerializer::gXMLDecl3[] = // " standalone=" { chDoubleQuote, chSpace, chLatin_s, chLatin_t, chLatin_a , chLatin_n, chLatin_d, chLatin_a, chLatin_l, chLatin_o , chLatin_n, chLatin_e, chEqual, chDoubleQuote, chNull }; const XMLCh DOMSerializer::gXMLDecl4[] = // "?>\r\n { chDoubleQuote, chQuestion, chCloseAngle , chCR, chLF, chNull }; const XMLCh DOMSerializer::gStartCDATA[] = // <![CDATA[ { chOpenAngle, chBang, chOpenSquare, chLatin_C, chLatin_D, chLatin_A, chLatin_T, chLatin_A, chOpenSquare, chNull }; const XMLCh DOMSerializer::gEndCDATA[] = // ]]> { chCloseSquare, chCloseSquare, chCloseAngle, chNull }; const XMLCh DOMSerializer::gStartComment[] = // <!-- { chOpenAngle, chBang, chDash, chDash, chNull }; const XMLCh DOMSerializer::gEndComment[] = // --> { chDash, chDash, chCloseAngle, chNull }; const XMLCh DOMSerializer::gStartDoctype[] = // <!DOCTYPE { chOpenAngle, chBang, chLatin_D, chLatin_O, chLatin_C, chLatin_T, chLatin_Y, chLatin_P, chLatin_E, chSpace, chNull }; const XMLCh DOMSerializer::gPublic[] = // PUBLIC " { chLatin_P, chLatin_U, chLatin_B, chLatin_L, chLatin_I, chLatin_C, chSpace, chDoubleQuote, chNull }; const XMLCh DOMSerializer::gSystem[] = // SYSTEM " { chLatin_S, chLatin_Y, chLatin_S, chLatin_T, chLatin_E, chLatin_M, chSpace, chDoubleQuote, chNull }; const XMLCh DOMSerializer::gStartEntity[] = // <!ENTITY { chOpenAngle, chBang, chLatin_E, chLatin_N, chLatin_T, chLatin_I, chLatin_T, chLatin_Y, chSpace, chNull }; const XMLCh DOMSerializer::gNotation[] = // NDATA " { chLatin_N, chLatin_D, chLatin_A, chLatin_T, chLatin_A, chSpace, chDoubleQuote, chNull }; const std::string DOMSerializer::defaultEncoding_( "UTF-8" ); // --------------------------------------------------------------------------- // DOMSerializer member functions // --------------------------------------------------------------------------- DOMSerializer::DOMSerializer(const std::string& encoding) { encoding_ = encoding; formatter_ = 0; } void DOMSerializer::serialize(DOMSerializerFormatTarget* target, const DOM_Node& node) { assert(target); // // convert encoding_ to XMLCh[] // DOMString encName = DOMString::transcode( encoding_.c_str() ); formatter_ = new XMLFormatter(encName.rawBuffer(), target, XMLFormatter::NoEscapes, XMLFormatter::UnRep_CharRef); doSerialize(target->stream(), node); delete formatter_; formatter_ = 0; } std::ostream& DOMSerializer::doSerialize( std::ostream& target, const DOM_Node& toWrite) { // Get the name and value out for convenience DOMString nodeName = toWrite.getNodeName(); DOMString nodeValue = toWrite.getNodeValue(); unsigned long lent = nodeValue.length(); switch (toWrite.getNodeType()) { case DOM_Node::TEXT_NODE: { formatter_->formatBuf(nodeValue.rawBuffer(), lent, XMLFormatter::CharEscapes); break; } case DOM_Node::PROCESSING_INSTRUCTION_NODE : { *formatter_ << XMLFormatter::NoEscapes << gStartPI << nodeName; if (lent > 0) { *formatter_ << chSpace << nodeValue; } *formatter_ << XMLFormatter::NoEscapes << gEndPI; break; } case DOM_Node::DOCUMENT_NODE : { DOM_Node child = toWrite.getFirstChild(); while( child != 0) { doSerialize(target, child); // target << child << std::endl; child = child.getNextSibling(); } break; } case DOM_Node::ELEMENT_NODE : { // The name has to be representable without any escapes *formatter_ << XMLFormatter::NoEscapes << chOpenAngle << nodeName; // Output the element start tag. // Output any attributes on this element DOM_NamedNodeMap attributes = toWrite.getAttributes(); int attrCount = attributes.getLength(); for (int i = 0; i < attrCount; i++) { DOM_Node attribute = attributes.item(i); // // Again the name has to be completely representable. But the // attribute can have refs and requires the attribute style // escaping. // *formatter_ << XMLFormatter::NoEscapes << chSpace << attribute.getNodeName() << chEqual << chDoubleQuote << XMLFormatter::AttrEscapes << attribute.getNodeValue() << XMLFormatter::NoEscapes << chDoubleQuote; } // // Test for the presence of children, which includes both // text content and nested elements. // DOM_Node child = toWrite.getFirstChild(); if (child != 0) { // There are children. Close start-tag, and output children. // No escapes are legal here *formatter_ << XMLFormatter::NoEscapes << chCloseAngle; while( child != 0) { doSerialize(target, child); // target << child; child = child.getNextSibling(); } // // Done with children. Output the end tag. // *formatter_ << XMLFormatter::NoEscapes << gEndElement << nodeName << chCloseAngle; } else { // // There were no children. Output the short form close of // the element start tag, making it an empty-element tag. // *formatter_ << XMLFormatter::NoEscapes << chForwardSlash << chCloseAngle; } break; } case DOM_Node::ENTITY_REFERENCE_NODE: { DOM_Node child; #if 0 for (child = toWrite.getFirstChild(); child != 0; child = child.getNextSibling()) { doSerialize(target, child); // target << child; } #else // // Instead of printing the refernece tree // we'd output the actual text as it appeared in the xml file. // This would be the case when -e option was chosen // *formatter_ << XMLFormatter::NoEscapes << chAmpersand << nodeName << chSemiColon; #endif break; } case DOM_Node::CDATA_SECTION_NODE: { *formatter_ << XMLFormatter::NoEscapes << gStartCDATA << nodeValue << gEndCDATA; break; } case DOM_Node::COMMENT_NODE: { *formatter_ << XMLFormatter::NoEscapes << gStartComment << nodeValue << gEndComment; break; } case DOM_Node::DOCUMENT_TYPE_NODE: { DOM_DocumentType doctype = (DOM_DocumentType &)toWrite;; *formatter_ << XMLFormatter::NoEscapes << gStartDoctype << nodeName; DOMString id = doctype.getPublicId(); if (id != 0) { *formatter_ << XMLFormatter::NoEscapes << chSpace << gPublic << id << chDoubleQuote; id = doctype.getSystemId(); if (id != 0) { *formatter_ << XMLFormatter::NoEscapes << chSpace << chDoubleQuote << id << chDoubleQuote; } } else { id = doctype.getSystemId(); if (id != 0) { *formatter_ << XMLFormatter::NoEscapes << chSpace << gSystem << id << chDoubleQuote; } } id = doctype.getInternalSubset(); if (id !=0) *formatter_ << XMLFormatter::NoEscapes << chOpenSquare << id << chCloseSquare; *formatter_ << XMLFormatter::NoEscapes << chCloseAngle; break; } case DOM_Node::ENTITY_NODE: { *formatter_ << XMLFormatter::NoEscapes << gStartEntity << nodeName; DOMString id = ((DOM_Entity &)toWrite).getPublicId(); if (id != 0) *formatter_ << XMLFormatter::NoEscapes << gPublic << id << chDoubleQuote; id = ((DOM_Entity &)toWrite).getSystemId(); if (id != 0) *formatter_ << XMLFormatter::NoEscapes << gSystem << id << chDoubleQuote; id = ((DOM_Entity &)toWrite).getNotationName(); if (id != 0) *formatter_ << XMLFormatter::NoEscapes << gNotation << id << chDoubleQuote; *formatter_ << XMLFormatter::NoEscapes << chCloseAngle << chCR << chLF; break; } case DOM_Node::XML_DECL_NODE: { DOMString str; *formatter_ << gXMLDecl1 << ((DOM_XMLDecl &)toWrite).getVersion(); *formatter_ << gXMLDecl2 << formatter_->getEncodingName(); str = ((DOM_XMLDecl &)toWrite).getStandalone(); if (str != 0) *formatter_ << gXMLDecl3 << str; *formatter_ << gXMLDecl4; break; } default: { throw std::runtime_error("Unrecognized node type"); // << (long)toWrite.getNodeType() << endl; } } return target; } // --------------------------------------------------------------------------- // ostream << DOMString // --------------------------------------------------------------------------- std::ostream& operator<< (std::ostream& target, const DOMString& s) { std::auto_ptr<char> p( s.transcode() ); return target << p.get(); } // --------------------------------------------------------------------------- XMLFormatter& operator<< (XMLFormatter& strm, const DOMString& s) { unsigned int lent = s.length(); if (lent == 0) { return strm; } // else std::auto_ptr<XMLCh> buf( new XMLCh[lent + 1] ); XMLString::copyNString(buf.get(), s.rawBuffer(), lent); buf.get()[lent] = 0; return strm << buf.get(); } // --------------------------------------------------------------------------- std::ostream& operator<< (std::ostream& s, const DOM_Node& node) { // // create serializer, use default encoding (UTF-8) // DOMSerializer serializer; // // assign s to format target // std::auto_ptr<DOMSerializerFormatTarget> target ( new DOMSerializerFormatTarget(s) ); // // let the serializer do its thing // serializer.serialize(target.get(), node); return s; }
//--------------------------------------------------------------------------- // // File: DOMSerializer.h // Description: Class to serialize a DOM to a stream. Largely taken from // the Xerces-c DOMPrint example // Works with Xerces-C 1.4 // // Rev: $Id: DOMSerializer.h,v 1.1 2001/04/05 13:04:39 evert Exp $ // Created: 2001/03/30 // Author: Evert Haasdijk // mail: [EMAIL PROTECTED] // //--------------------------------------------------------------------------- #ifndef DOMSerializerH #define DOMSerializerH //--------------------------------------------------------------------------- /* * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * */ // --------------------------------------------------------------------------- // Includes // --------------------------------------------------------------------------- #include <util/PlatformUtils.hpp> #include <util/XMLString.hpp> #include <util/XMLUniDefs.hpp> #include <framework/XMLFormatter.hpp> #include <util/TranscodingException.hpp> #include <dom/DOM_DOMException.hpp> #include <dom/DOM.hpp> #include <dom/DOMString.hpp> #include <string> #include <ostream> #include <stdlib> // --------------------------------------------------------------------------- /// XMLFormatTarget-derived class for use with the DOMSerializer. class DOMSerializerFormatTarget : public XMLFormatTarget { public: /** Constructor. @param os reference to the stream to write to. */ DOMSerializerFormatTarget(std::ostream& os); /// Destructor ~DOMSerializerFormatTarget(); /// Implementations of the format target interface void writeChars(const XMLByte* const toWrite, const unsigned int count, XMLFormatter * const formatter); std::ostream& stream(void); private: //@name: Unimplemented methods. //@{ /// copy constructor DOMSerializerFormatTarget(const DOMSerializerFormatTarget& other); /// assignement operator void operator=(const DOMSerializerFormatTarget& rhs); //@} /// reference to the stream to write to std::ostream& os_; }; // --------------------------------------------------------------------------- /** Serializes an in-memory DOM to a stream. */ class DOMSerializer { public: /// Default encoding for output - 'UTF-8' static const std::string defaultEncoding_; /** Constructor. @param encoding const reference to a string denoting the encoding. Defaults to 'UTF-8'. */ DOMSerializer(const std::string& encoding = defaultEncoding_); /// serialize the node and its descendants to the target void serialize(DOMSerializerFormatTarget* target, const DOM_Node& node); private: /** Stream out a DOM node, and, recursively, all of its children. This function is the heart of writing a DOM tree out as XML source. Give it a document node and it will do the whole thing. */ std::ostream& doSerialize(std::ostream& target, const DOM_Node& toWrite); static const XMLCh gEndElement[]; static const XMLCh gEndPI[]; static const XMLCh gStartPI[]; static const XMLCh gXMLDecl1[]; static const XMLCh gXMLDecl2[]; static const XMLCh gXMLDecl3[]; static const XMLCh gXMLDecl4[]; static const XMLCh gStartCDATA[]; static const XMLCh gEndCDATA[]; static const XMLCh gStartComment[]; static const XMLCh gEndComment[]; static const XMLCh gStartDoctype[]; static const XMLCh gPublic[]; static const XMLCh gSystem[]; static const XMLCh gStartEntity[]; static const XMLCh gNotation[]; XMLFormatter* formatter_; std::string encoding_; }; // --------------------------------------------------------------------------- /** ostream << DOM_Node Serialize a DOM Node to stream s. Uses UTF-8 encoding. */ std::ostream& operator<< (std::ostream& s, const DOM_Node& node); // --------------------------------------------------------------------------- /** ostream << DOMString Stream out a DOM string. Doing this requires that we first transcode to char * form in the default code page for the system. */ std::ostream& operator<< (std::ostream& target, const DOMString& s); XMLFormatter& operator<< (XMLFormatter& strm, const DOMString& s); #endif
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]
