... or you can use Everts serializer classes (often posted here)

Adib.

Keyur Dalal wrote:
> No, you can either use the DOMPrint sample provided with the source or write
> your own
> "serializer". I believe the Java API has a built-in serializer.
> 
> Keyur Dalal
> 
> ----- Original Message -----
> From: "Arun Ramdas" <[EMAIL PROTECTED]>
> To: <[EMAIL PROTECTED]>
> Sent: Tuesday, February 26, 2002 6:40 PM
> Subject: DOM to String Conversion
> 
> 
> 
>>Is there any API provided by Xerces-c, to convert the DOM tree into a
>>
> string
> 
>>buffer?
>>
>>Thanks
>>Arun
>>
>>
>>
>>---------------------------------------------------------------------
>>To unsubscribe, e-mail: [EMAIL PROTECTED]
>>For additional commands, e-mail: [EMAIL PROTECTED]
>>
>>
> 
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: [EMAIL PROTECTED]
> For additional commands, e-mail: [EMAIL PROTECTED]
> 
> 


//---------------------------------------------------------------------------
//
//     File:         DOMSerializer.cpp
//     Description:  Class to serialize a DOM to a stream. Largely taken from
//                   the Xerces-c DOMPrint example.
//                   Works with Xerces-C 1.4
//
//     Rev:          $Id: DOMSerializer.cpp,v 1.1 2001/04/05 13:04:39 evert Exp $
//     Created:      2001/03/30
//     Author:       Evert Haasdijk
//     mail:         [EMAIL PROTECTED]
//
//---------------------------------------------------------------------------

/*
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 */

// ---------------------------------------------------------------------------
//  Includes
// ---------------------------------------------------------------------------
#include "DOMSerializer.hpp"

#include <util/PlatformUtils.hpp>
#include <util/XMLString.hpp>
#include <util/XMLUniDefs.hpp>
#include <framework/XMLFormatter.hpp>
#include <util/TranscodingException.hpp>
#include <dom/DOM_DOMException.hpp>
#include <dom/DOM.hpp>
#include <dom/DOMString.hpp>

#include <string>
#include <ostream>
#include <stdlib>

// ---------------------------------------------------------------------------
//  DOMSerializerFormatTarget methods
// ---------------------------------------------------------------------------

DOMSerializerFormatTarget::DOMSerializerFormatTarget(std::ostream& os) :
  os_(os)
{};

DOMSerializerFormatTarget::~DOMSerializerFormatTarget() {};

void DOMSerializerFormatTarget::writeChars(const XMLByte* const toWrite,
                                      const unsigned int count,
                                      XMLFormatter * const formatter)
{
  // Surprisingly, Solaris was the only platform on which
  // required the char* cast to print out the string correctly.
  // Without the cast, it was printing the pointer value in hex.
  // Quite annoying, considering every other platform printed
  // the string with the explicit cast to char* below.
  os_.write((char *) toWrite, (int) count);
};

std::ostream& DOMSerializerFormatTarget::stream(void)
{
  return os_;
};

// ---------------------------------------------------------------------------
//  Static DOMSerializer members
//
//  Note: This is the 'safe' way to do these strings. If you compiler supports
//        L"" style strings, and portability is not a concern, you can use
//        those types constants directly.
// ---------------------------------------------------------------------------
const XMLCh  DOMSerializer::gEndElement[] =       // </
{
  chOpenAngle, chForwardSlash, chNull
};
const XMLCh  DOMSerializer::gEndPI[] =            // ?>
{
  chQuestion, chCloseAngle, chNull
};
const XMLCh  DOMSerializer::gStartPI[] =          // <?
{
  chOpenAngle, chQuestion, chNull
};
const XMLCh  DOMSerializer::gXMLDecl1[] =         // <xml version="
{
        chOpenAngle, chQuestion, chLatin_x, chLatin_m, chLatin_l
    ,   chSpace, chLatin_v, chLatin_e, chLatin_r, chLatin_s, chLatin_i
    ,   chLatin_o, chLatin_n, chEqual, chDoubleQuote, chNull
};
const XMLCh  DOMSerializer::gXMLDecl2[] =         // " encoding="
{
        chDoubleQuote, chSpace, chLatin_e, chLatin_n, chLatin_c
    ,   chLatin_o, chLatin_d, chLatin_i, chLatin_n, chLatin_g, chEqual
    ,   chDoubleQuote, chNull
};
const XMLCh  DOMSerializer::gXMLDecl3[] =         // " standalone="
{
        chDoubleQuote, chSpace, chLatin_s, chLatin_t, chLatin_a
    ,   chLatin_n, chLatin_d, chLatin_a, chLatin_l, chLatin_o
    ,   chLatin_n, chLatin_e, chEqual, chDoubleQuote, chNull
};
const XMLCh  DOMSerializer::gXMLDecl4[] =         // "?>\r\n
{
        chDoubleQuote, chQuestion, chCloseAngle
    ,   chCR, chLF, chNull
};
const XMLCh  DOMSerializer::gStartCDATA[] =       // <![CDATA[
{
        chOpenAngle, chBang, chOpenSquare, chLatin_C, chLatin_D,
        chLatin_A, chLatin_T, chLatin_A, chOpenSquare, chNull
};
const XMLCh  DOMSerializer::gEndCDATA[] =         // ]]>
{
    chCloseSquare, chCloseSquare, chCloseAngle, chNull
};
const XMLCh  DOMSerializer::gStartComment[] =     // <!--
{
    chOpenAngle, chBang, chDash, chDash, chNull
};
const XMLCh  DOMSerializer::gEndComment[] =       // -->
{
    chDash, chDash, chCloseAngle, chNull
};
const XMLCh  DOMSerializer::gStartDoctype[] =     // <!DOCTYPE
{
    chOpenAngle, chBang, chLatin_D, chLatin_O, chLatin_C, chLatin_T,
    chLatin_Y, chLatin_P, chLatin_E, chSpace, chNull
};
const XMLCh  DOMSerializer::gPublic[] =           // PUBLIC "
{
    chLatin_P, chLatin_U, chLatin_B, chLatin_L, chLatin_I,
    chLatin_C, chSpace, chDoubleQuote, chNull
};
const XMLCh  DOMSerializer::gSystem[] =           // SYSTEM "
{
    chLatin_S, chLatin_Y, chLatin_S, chLatin_T, chLatin_E,
    chLatin_M, chSpace, chDoubleQuote, chNull
};
const XMLCh  DOMSerializer::gStartEntity[] =      // <!ENTITY
{
    chOpenAngle, chBang, chLatin_E, chLatin_N, chLatin_T, chLatin_I,
    chLatin_T, chLatin_Y, chSpace, chNull
};
const XMLCh  DOMSerializer::gNotation[] =         // NDATA "
{
    chLatin_N, chLatin_D, chLatin_A, chLatin_T, chLatin_A,
    chSpace, chDoubleQuote, chNull
};

const std::string DOMSerializer::defaultEncoding_( "UTF-8" );

// ---------------------------------------------------------------------------
//  DOMSerializer member functions
// ---------------------------------------------------------------------------
DOMSerializer::DOMSerializer(const std::string& encoding)
{
  encoding_ = encoding;

  formatter_ = 0;
}

void DOMSerializer::serialize(DOMSerializerFormatTarget* target,
                              const DOM_Node& node)
{
  assert(target);

  //
  // convert encoding_ to XMLCh[]
  //
  DOMString encName = DOMString::transcode( encoding_.c_str() );

  formatter_ = new XMLFormatter(encName.rawBuffer(),
                                target,
                                XMLFormatter::NoEscapes,
                                XMLFormatter::UnRep_CharRef);

  doSerialize(target->stream(), node);

  delete formatter_;
  formatter_ = 0;
}

std::ostream& DOMSerializer::doSerialize( std::ostream& target,
                                          const DOM_Node& toWrite)
{
    // Get the name and value out for convenience
    DOMString   nodeName = toWrite.getNodeName();
    DOMString   nodeValue = toWrite.getNodeValue();
    unsigned long lent = nodeValue.length();

    switch (toWrite.getNodeType())
    {
        case DOM_Node::TEXT_NODE:
        {
            formatter_->formatBuf(nodeValue.rawBuffer(), 
                                  lent, XMLFormatter::CharEscapes);
            break;
        }

        case DOM_Node::PROCESSING_INSTRUCTION_NODE :
        {
            *formatter_ << XMLFormatter::NoEscapes << gStartPI  << nodeName;
            if (lent > 0)
            {
                *formatter_ << chSpace << nodeValue;
            }
            *formatter_ << XMLFormatter::NoEscapes << gEndPI;
            break;
        }

        case DOM_Node::DOCUMENT_NODE :
        {

            DOM_Node child = toWrite.getFirstChild();
            while( child != 0)
            {
                doSerialize(target, child);
//                target << child << std::endl;
                child = child.getNextSibling();
            }
            break;
        }


        case DOM_Node::ELEMENT_NODE :
        {
            // The name has to be representable without any escapes
            *formatter_  << XMLFormatter::NoEscapes
                         << chOpenAngle << nodeName;

            // Output the element start tag.

            // Output any attributes on this element
            DOM_NamedNodeMap attributes = toWrite.getAttributes();
            int attrCount = attributes.getLength();
            for (int i = 0; i < attrCount; i++)
            {
                DOM_Node  attribute = attributes.item(i);

                //
                //  Again the name has to be completely representable. But the
                //  attribute can have refs and requires the attribute style
                //  escaping.
                //
                *formatter_  << XMLFormatter::NoEscapes
                             << chSpace << attribute.getNodeName()
                             << chEqual << chDoubleQuote
                             << XMLFormatter::AttrEscapes
                             << attribute.getNodeValue()
                             << XMLFormatter::NoEscapes
                             << chDoubleQuote;
            }

            //
            //  Test for the presence of children, which includes both
            //  text content and nested elements.
            //
            DOM_Node child = toWrite.getFirstChild();
            if (child != 0)
            {
                // There are children. Close start-tag, and output children.
                // No escapes are legal here
                *formatter_ << XMLFormatter::NoEscapes << chCloseAngle;

                while( child != 0)
                {
                    doSerialize(target, child);
//                target << child;
                    child = child.getNextSibling();
                }

                //
                // Done with children.  Output the end tag.
                //
                *formatter_ << XMLFormatter::NoEscapes << gEndElement
                            << nodeName << chCloseAngle;
            }
            else
            {
                //
                //  There were no children. Output the short form close of
                //  the element start tag, making it an empty-element tag.
                //
                *formatter_ << XMLFormatter::NoEscapes << chForwardSlash << 
chCloseAngle;
            }
            break;
        }
        
        
        case DOM_Node::ENTITY_REFERENCE_NODE:
            {
                DOM_Node child;
#if 0
                for (child = toWrite.getFirstChild();
                child != 0;
                child = child.getNextSibling())
                {
                    doSerialize(target, child);
//                target << child;
                }
#else
                //
                // Instead of printing the refernece tree 
                // we'd output the actual text as it appeared in the xml file.
                // This would be the case when -e option was chosen
                //
                    *formatter_ << XMLFormatter::NoEscapes << chAmpersand
                        << nodeName << chSemiColon;
#endif
                break;
            }
            
            
        case DOM_Node::CDATA_SECTION_NODE:
            {
            *formatter_ << XMLFormatter::NoEscapes << gStartCDATA
                        << nodeValue << gEndCDATA;
            break;
        }

        
        case DOM_Node::COMMENT_NODE:
        {
            *formatter_ << XMLFormatter::NoEscapes << gStartComment
                        << nodeValue << gEndComment;
            break;
        }

        
        case DOM_Node::DOCUMENT_TYPE_NODE:
        {
            DOM_DocumentType doctype = (DOM_DocumentType &)toWrite;;

            *formatter_ << XMLFormatter::NoEscapes  << gStartDoctype
                        << nodeName;
 
            DOMString id = doctype.getPublicId();
            if (id != 0)
            {
                *formatter_ << XMLFormatter::NoEscapes << chSpace << gPublic
                    << id << chDoubleQuote;
                id = doctype.getSystemId();
                if (id != 0)
                {
                    *formatter_ << XMLFormatter::NoEscapes << chSpace 
                       << chDoubleQuote << id << chDoubleQuote;
                }
            }
            else
            {
                id = doctype.getSystemId();
                if (id != 0)
                {
                    *formatter_ << XMLFormatter::NoEscapes << chSpace << gSystem
                        << id << chDoubleQuote;
                }
            }
            
            id = doctype.getInternalSubset(); 
            if (id !=0)
                *formatter_ << XMLFormatter::NoEscapes << chOpenSquare
                            << id << chCloseSquare;

            *formatter_ << XMLFormatter::NoEscapes << chCloseAngle;
            break;
        }
        
        
        case DOM_Node::ENTITY_NODE:
        {
            *formatter_ << XMLFormatter::NoEscapes << gStartEntity
                        << nodeName;

            DOMString id = ((DOM_Entity &)toWrite).getPublicId();
            if (id != 0)
                *formatter_ << XMLFormatter::NoEscapes << gPublic
                            << id << chDoubleQuote;

            id = ((DOM_Entity &)toWrite).getSystemId();
            if (id != 0)
                *formatter_ << XMLFormatter::NoEscapes << gSystem
                            << id << chDoubleQuote;
            
            id = ((DOM_Entity &)toWrite).getNotationName();
            if (id != 0)
                *formatter_ << XMLFormatter::NoEscapes << gNotation
                            << id << chDoubleQuote;

            *formatter_ << XMLFormatter::NoEscapes << chCloseAngle << chCR << chLF;

            break;
        }
        
        
        case DOM_Node::XML_DECL_NODE:
        {
            DOMString  str;

            *formatter_ << gXMLDecl1 << ((DOM_XMLDecl &)toWrite).getVersion();

            *formatter_ << gXMLDecl2 << formatter_->getEncodingName();
            
            str = ((DOM_XMLDecl &)toWrite).getStandalone();
            if (str != 0)
                *formatter_ << gXMLDecl3 << str;
            
            *formatter_ << gXMLDecl4;

            break;
        }

        default:
        {
          throw std::runtime_error("Unrecognized node type");
          //       << (long)toWrite.getNodeType() << endl;
        }
    }

    return target;
}



// ---------------------------------------------------------------------------
//  ostream << DOMString
// ---------------------------------------------------------------------------
std::ostream& operator<< (std::ostream& target, const DOMString& s)
{
    std::auto_ptr<char> p( s.transcode() );
    return target << p.get();
}

// ---------------------------------------------------------------------------
XMLFormatter& operator<< (XMLFormatter& strm, const DOMString& s)
{
  unsigned int lent = s.length();

        if (lent == 0)
  {
    return strm;
  }
  // else

  std::auto_ptr<XMLCh>  buf( new XMLCh[lent + 1] );
  XMLString::copyNString(buf.get(), s.rawBuffer(), lent);
  buf.get()[lent] = 0;
  return  strm << buf.get();
}
                   
// ---------------------------------------------------------------------------
std::ostream& operator<< (std::ostream& s, const DOM_Node& node)
{
  //
  // create serializer, use default encoding (UTF-8)
  //
  DOMSerializer serializer;

  //
  // assign s to format target
  //
  std::auto_ptr<DOMSerializerFormatTarget> target (
                                          new DOMSerializerFormatTarget(s) );

  //
  // let the serializer do its thing
  //
  serializer.serialize(target.get(), node);

  return s;
}

//---------------------------------------------------------------------------
//
//     File:         DOMSerializer.h
//     Description:  Class to serialize a DOM to a stream. Largely taken from
//                   the Xerces-c DOMPrint example
//                   Works with Xerces-C 1.4
//
//     Rev:          $Id: DOMSerializer.h,v 1.1 2001/04/05 13:04:39 evert Exp $
//     Created:      2001/03/30
//     Author:       Evert Haasdijk
//     mail:         [EMAIL PROTECTED]
//
//---------------------------------------------------------------------------
#ifndef DOMSerializerH
#define DOMSerializerH
//---------------------------------------------------------------------------

/*
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 */

// ---------------------------------------------------------------------------
//  Includes
// ---------------------------------------------------------------------------

#include <util/PlatformUtils.hpp>
#include <util/XMLString.hpp>
#include <util/XMLUniDefs.hpp>
#include <framework/XMLFormatter.hpp>
#include <util/TranscodingException.hpp>
#include <dom/DOM_DOMException.hpp>
#include <dom/DOM.hpp>
#include <dom/DOMString.hpp>

#include <string>
#include <ostream>
#include <stdlib>


// ---------------------------------------------------------------------------
/// XMLFormatTarget-derived class for use with the DOMSerializer.
class DOMSerializerFormatTarget : public XMLFormatTarget
{
public:
    /**
      Constructor.

      @param os reference to the stream to write to.
    */
    DOMSerializerFormatTarget(std::ostream& os);

    /// Destructor
    ~DOMSerializerFormatTarget();

    ///  Implementations of the format target interface
    void writeChars(const XMLByte* const toWrite,
                    const unsigned int count,
                    XMLFormatter * const formatter);

    std::ostream& stream(void);

private:
    //@name:  Unimplemented methods.
    //@{
    /// copy constructor
    DOMSerializerFormatTarget(const DOMSerializerFormatTarget& other);
    /// assignement operator
    void operator=(const DOMSerializerFormatTarget& rhs);
    //@}

    /// reference to the stream to write to
    std::ostream& os_;
};

// ---------------------------------------------------------------------------

/**
  Serializes an in-memory DOM to a stream.
*/
class DOMSerializer
{
  public:
    /// Default encoding for output - 'UTF-8'
    static const std::string defaultEncoding_;
    /**
      Constructor.

      @param encoding const reference to a string denoting the encoding.
             Defaults to 'UTF-8'.
    */
    DOMSerializer(const std::string& encoding = defaultEncoding_);

    /// serialize the node and its descendants to the target
    void serialize(DOMSerializerFormatTarget* target, const DOM_Node& node);

  private:

    /**
      Stream out a DOM node, and, recursively, all of its children. This
      function is the heart of writing a DOM tree out as XML source. Give it
      a document node and it will do the whole thing.
    */
    std::ostream& doSerialize(std::ostream& target, const DOM_Node& toWrite);

    static const XMLCh  gEndElement[];
    static const XMLCh  gEndPI[];
    static const XMLCh  gStartPI[];
    static const XMLCh  gXMLDecl1[];
    static const XMLCh  gXMLDecl2[];
    static const XMLCh  gXMLDecl3[];
    static const XMLCh  gXMLDecl4[];
    static const XMLCh  gStartCDATA[];
    static const XMLCh  gEndCDATA[];
    static const XMLCh  gStartComment[];
    static const XMLCh  gEndComment[];
    static const XMLCh  gStartDoctype[];
    static const XMLCh  gPublic[];
    static const XMLCh  gSystem[];
    static const XMLCh  gStartEntity[];
    static const XMLCh  gNotation[];

    XMLFormatter* formatter_;

    std::string encoding_;
};

// ---------------------------------------------------------------------------
/**
  ostream << DOM_Node

  Serialize a DOM Node to stream s. Uses UTF-8 encoding.
*/
std::ostream& operator<< (std::ostream& s, const DOM_Node& node);


// ---------------------------------------------------------------------------
/**
  ostream << DOMString

  Stream out a DOM string. Doing this requires that we first transcode
  to char * form in the default code page for the system.
*/
std::ostream& operator<< (std::ostream& target, const DOMString& s);

XMLFormatter& operator<< (XMLFormatter& strm, const DOMString& s);

#endif

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to