serialize BaseSerializer.java ElementState.java HTMLEntities.res HTMLSerializer.java HTMLdtd.java Makefile OutputFormat.java Serializer.java XHTMLSerializer.java XMLSerializer.java

Assaf Arkin 23 Nov 1999 22:30:17 -0000

arkin       99/11/23 14:29:53


  Added:       java/src/org/apache/xml/serialize BaseSerializer.java
                        ElementState.java HTMLEntities.res
                        HTMLSerializer.java HTMLdtd.java Makefile
                        OutputFormat.java Serializer.java
                        XHTMLSerializer.java XMLSerializer.java
  Log:
  First checkin.
  
  Revision  Changes    Path
  1.1                  
xml-xerces/java/src/org/apache/xml/serialize/BaseSerializer.java
  
  Index: BaseSerializer.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:  
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Xerces" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written 
   *    permission, please contact [EMAIL PROTECTED]
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  
  package org.apache.xml.serialize;
  
  
  import java.io.*;
  import java.util.Vector;
  import java.util.Hashtable;
  import java.util.StringTokenizer;
  
  import org.w3c.dom.*;
  import org.xml.sax.DocumentHandler;
  import org.xml.sax.DTDHandler;
  import org.xml.sax.Locator;
  import org.xml.sax.SAXException;
  import org.xml.sax.misc.LexicalHandler;
  import org.xml.sax.misc.DeclHandler;
  
  
  /**
   * Base class for a serializer supporting both DOM and SAX pretty
   * serializing of XML/HTML/XHTML documents. Derives classes perform
   * the method-specific serializing, this class provides the common
   * serializing mechanisms.
   * <p>
   * The serializer must be initialized with the proper writer and
   * output format before it can be used by calling [EMAIL PROTECTED] #init}.
   * The serializer can be reused any number of times, but cannot
   * be used concurrently by two threads.
   * <p>
   * If an output stream is used, the encoding is taken from the
   * output format (defaults to <tt>UTF8</tt>). If a writer is
   * used, make sure the writer uses the same encoding (if applies)
   * as specified in the output format.
   * <p>
   * The serializer supports both DOM and SAX. DOM serializing is done
   * by calling [EMAIL PROTECTED] #serialize} and SAX serializing is done by 
firing
   * SAX events and using the serializer as a document handler.
   * This also applies to derived class.
   * <p>
   * If an I/O exception occurs while serializing, the serializer
   * will not throw an exception directly, but only throw it
   * at the end of serializing (either DOM or SAX's [EMAIL PROTECTED]
   * org.xml.sax.DocumentHandler#endDocument}.
   * <p>
   * For elements that are not specified as whitespace preserving,
   * the serializer will potentially break long text lines at space
   * boundaries, indent lines, and serialize elements on separate
   * lines. Line terminators will be regarded as spaces, and
   * spaces at beginning of line will be stripped.
   * <p>
   * When indenting, the serializer is capable of detecting seemingly
   * element content, and serializing these elements indented on separate
   * lines. An element is serialized indented when it is the first or
   * last child of an element, or immediate following or preceding
   * another element.
   * 
   *
   * @version
   * @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
   * @see Serializer
   * @see XMLSerializer
   */
  abstract class BaseSerializer
      extends Serializer
      implements DocumentHandler, LexicalHandler, DTDHandler, DeclHandler
  {
  
  
      /**
       * Identifies the last printable character in the Unicode range
       * that is supported by the encoding used with this serializer.
       * For 8-bit encodings this will be either 0x7E or 0xFF.
       * For 16-bit encodings this will be 0xFFFF. Characters that are
       * not printable will be escaped using character references.
       */
      private int              _lastPrintable = 0x7E;
  
  
      /**
       * The output format associated with this serializer. This will never
       * be a null reference. If no format was passed to the constructor,
       * the default one for this document type will be used. The format
       * object is never changed by the serializer.
       */
      protected OutputFormat  _format;
  
  
      /**
       * The writer to which the document is written.
       */
      private Writer          _writer;
  
  
      /**
       * Holds the currently accumulating text line. This buffer will constantly
       * be reused by deleting its contents instead of reallocating it.
       */
      private StringBuffer    _line;
  
  
      /**
       * Holds the currently accumulating text that follows [EMAIL PROTECTED] 
#_line}.
       * When the end of the part is identified by a call to [EMAIL PROTECTED] 
#printSpace}
       * or [EMAIL PROTECTED] #breakLine}, this part is added to the 
accumulated line.
       */
      private StringBuffer    _text;
  
  
      /**
       * Counts how many white spaces come between the accumulated line and the
       * current accumulated text. Multiple spaces at the end of the a line
       * will not be printed.
       */
      private int             _spaces;
  
  
      /**
       * Holds the indentation for the current line that is now accumulating in
       * memory and will be sent for printing shortly.
       */
      private int             _thisIndent;
      
      
      /**
       * Holds the indentation for the next line to be printed. After this line 
is
       * printed, [EMAIL PROTECTED] #_nextIndent} is assigned to [EMAIL 
PROTECTED] #_thisIndent}.
       */
      private int             _nextIndent;
  
  
      /**
       * Holds the exception thrown by the serializer.  Exceptions do not cause
       * the serializer to quit, but are held and one is thrown at the end.
       */
      protected IOException   _exception;
  
  
      /**
       * Holds array of all element states that have been entered.
       * The array is automatically resized. When leaving an element,
       * it's state is not removed but reused when later returning
       * to the same nesting level.
       */
      private ElementState[]  _elementStates = new ElementState[ 5 ];
  
  
      /**
       * The index of the next state to place in the array,
       * or one plus the index of the current state. When zero,
       * we are in no state.
       */
      private int             _elementStateCount;
  
  
      /**
       * Vector holding comments and PIs that come before the root
       * element (even after it), see [EMAIL PROTECTED] #serializePreRoot}.
       */
      private Vector          _preRoot;
  
  
      /**
       * If the document has been started (header serialized), this
       * flag is set to true so it's not started twice.
       */
      protected boolean       _started;
  
  
      /**
       * The DTD writer. When we switch to DTD mode, all output is
       * accumulated in this DTD writer. When we switch out of it,
       * the output is obtained as a string. Must not be reset to
       * null until we're done with the document.
       */
      private StringWriter    _dtdWriter;
  
  
      /**
       * Holds a reference to the document writer while we are
       * in DTD mode.
       */
      private Writer          _docWriter;
  
      
  
  
      //--------------------------------//
      // Constructor and initialization //
      //--------------------------------//
  
  
      /**
       * Protected constructor can only be used by derived class.
       * Must initialize the serializer before serializing any document,
       * see [EMAIL PROTECTED] #init}.
       */
      protected BaseSerializer()
      {
        int i;
  
        for ( i = 0 ; i < _elementStates.length ; ++i )
            _elementStates[ i ] = new ElementState();
      }
  
  
      /**
       * Initialize the serializer with the specified writer and output format.
       * Must be called before calling any of the serialize methods.
       *
       * @param writer The writer to use
       * @param format The output format
       */
      public synchronized void init( Writer writer, OutputFormat format )
      {
        if ( format == null )
            throw new NullPointerException( "Argument 'format' is null." );
        _format = format;
        if ( writer == null )
            throw new NullPointerException( "Argument 'format' is null." );
        _writer = new BufferedWriter( writer );
  
        // Determine the last printable character based on the output format
        _lastPrintable = _format.getLastPrintable();
  
        // Initialize everything for a first/second run.
        _line = new StringBuffer( 80 );
        _text = new StringBuffer( 20 );
        _spaces = 0;
        _thisIndent = _nextIndent = 0;
        _exception = null;
        _elementStateCount = 0;
        _started = false;
        _dtdWriter = null;
      }
  
  
      /**
       * Initialize the serializer with the specified output stream and output 
format.
       * Must be called before calling any of the serialize methods.
       *
       * @param output The output stream to use
       * @param format The output format
       * @throws UnsupportedEncodingException The encoding specified
       *   in the output format is not supported
       */
      public synchronized void init( OutputStream output, OutputFormat format )
          throws UnsupportedEncodingException
      {
        String encoding;
  
        encoding = ( format.getEncoding() == null ? "ASCII" : 
format.getEncoding() );
        init( new OutputStreamWriter( output, encoding ), format );
      }
  
  
      //-------------------------------//
      // DOM document serializing methods //
      //-------------------------------//
  
  
      /**
       * Serializes the DOM element using the previously specified
       * writer and output format. Throws an exception only if
       * an I/O exception occured while serializing.
       *
       * @param elem The element to serialize
       * @throws IOException An I/O exception occured while
       *   serializing
       */
      public void serialize( Element elem )
          throws IOException
      {
        try {
            startDocument();
        } catch ( SAXException except ) { }
        serializeNode( elem );
        flush();
        if ( _exception != null )
            throw _exception;
      }
  
  
      /**
       * Serializes the DOM document using the previously specified
       * writer and output format. Throws an exception only if
       * an I/O exception occured while serializing.
       *
       * @param doc The document to serialize
       * @throws IOException An I/O exception occured while
       *   serializing
       */
      public void serialize( Document doc )
          throws IOException
      {
        try {
            startDocument();
        } catch ( SAXException except ) { }
        serializeNode( doc );
        serializePreRoot();
          flush();
        if ( _exception != null )
            throw _exception;
      }
  
  
      //---------------------------------------//
      // SAX document handler serializing methods //
      //---------------------------------------//
  
  
      public void characters( char[] chars, int start, int length )
      {
        characters( new String( chars, start, length ), false, false );
      }
  
  
      public void ignorableWhitespace( char[] chars, int start, int length )
      {
        int i;
  
        content();
  
        // Print ignorable whitespaces only when indenting, after
        // all they are indentation. Cancel the indentation to
        // not indent twice.
        if ( _format.getIndenting() ) {
            _thisIndent = 0;
            for ( i = start ; length-- > 0 ; ++i ) {
                if ( chars[ i ] == '\n' || chars[ i ] == '\r' )
                    breakLine();
                else
                    _text.append( chars[ i ] );
            }
        }
      }
  
  
      public void processingInstruction( String target, String code )
      {
        int          index;
        StringBuffer buffer;
        ElementState state;
  
        state = content();
        buffer = new StringBuffer( 40 );
  
        // Create the processing instruction textual representation.
        // Make sure we don't have '?>' inside either target or code.
        index = target.indexOf( "?>" );
        if ( index >= 0 )
            buffer.append( "<?" ).append( target.substring( 0, index ) );
        else
            buffer.append( "<?" ).append( target );
        if ( code != null ) {
            buffer.append( ' ' );
            index = code.indexOf( "?>" );
            if ( index >= 0 )
                buffer.append( code.substring( 0, index ) );
            else
                buffer.append( code );
        }
        buffer.append( "?>" );
  
        // If before the root element (or after it), do not print
        // the PI directly but place it in the pre-root vector.
        if ( state == null ) {
            if ( _preRoot == null )
                _preRoot = new Vector();
            _preRoot.addElement( buffer.toString() );
        }
        else {
            indent();
            printText( buffer, true );
            unindent();
        }
      }
  
  
      public void comment( char[] chars, int start, int length )
      {
        comment( new String( chars, start, length ) );
      }
  
  
      public void comment( String text )
      {
        StringBuffer buffer;
        int          index;
        ElementState state;
  
        state  = content();
        buffer = new StringBuffer( 40 );
        // Create the processing comment textual representation.
        // Make sure we don't have '-->' inside the comment.
        index = text.indexOf( "-->" );
        if ( index >= 0 )
            buffer.append( "<!--" ).append( text.substring( 0, index ) 
).append( "-->" );
        else
            buffer.append( "<!--" ).append( text ).append( "-->" );
  
        // If before the root element (or after it), do not print
        // the comment directly but place it in the pre-root vector.
        if ( state == null ) {
            if ( _preRoot == null )
                _preRoot = new Vector();
            _preRoot.addElement( buffer.toString() );
        }
        else {
            indent();
            printText( buffer, false );
            unindent();
        }
      }
  
  
      public void startCDATA()
      {
        ElementState state;
  
        state = getElementState();
        if ( state != null )
            state.cdata = true;
      }
  
  
      public void endCDATA()
      {
        ElementState state;
  
        state = getElementState();
        if ( state != null )
            state.cdata = false;
      }
  
  
      /**
       * Called at the end of the document to wrap it up.
       * Will flush the output stream and throw an exception
       * if any I/O error occured while serializing.
       *
       * @throws SAXException An I/O exception occured during
       *  serializing
       */
      public void endDocument()
          throws SAXException
      {
        // Print all the elements accumulated outside of
        // the root element.
        serializePreRoot();
        // Flush the output, this is necessary for buffered output.
          flush();
        // If an exception was thrown during serializing, this would
        // be the best time to report it.
        if ( _exception != null )
            throw new SAXException( _exception );
      }
  
  
      public void startEntity( String name )
      {
        // ???
      }
  
  
      public void endEntity( String name )
      {
        // ???
      }
  
  
      public void setDocumentLocator( Locator locator )
      {
        // Nothing to do
      }
  
  
      //---------------------------------------//
      // SAX DTD/Decl handler serializing methods //
      //---------------------------------------//
  
  
      public void startDTD( String name, String publicId, String systemId )
      {
        enterDTD();
        // For the moment this simply overrides any settings performed
        // on the output format.
        _format.setDoctype( publicId, systemId );
      }
  
  
      public void endDTD()
      {
        // Nothing to do here, all the magic occurs in startDocument(String).
      }
  
  
      public void elementDecl( String name, String model )
      {
        enterDTD();
        printText( "<!ELEMENT " + name + " " + model + ">" );
        if ( _format.getIndenting() )
            breakLine();
      }
  
  
      public void attributeDecl( String eName, String aName, String type,
                               String valueDefault, String value )
      {
        StringBuffer buffer;
  
        enterDTD();
        buffer = new StringBuffer( 40 );
        buffer.append( "<!ATTLIST " ).append( eName ).append( ' ' );
        buffer.append( aName ).append( ' ' ).append( type );
        if ( valueDefault != null )
            buffer.append( ' ' ).append( valueDefault );
        if ( value != null )
            buffer.append( " \"" ).append( escape( value ) ).append( '"' );
        buffer.append( '>' );
        printText( buffer.toString() );
        if ( _format.getIndenting() )
            breakLine();
      }
  
  
      public void internalEntityDecl( String name, String value )
      {
        enterDTD();
        printText( "<!ENTITY " + name + " \"" + escape( value ) + "\">" );
        if ( _format.getIndenting() )
            breakLine();
      }
  
  
      public void externalEntityDecl( String name, String publicId, String 
systemId )
      {
        enterDTD();
        unparsedEntityDecl( name, publicId, systemId, null );
      }
  
  
      public void unparsedEntityDecl( String name, String publicId,
                                    String systemId, String notationName )
      {
        enterDTD();
        if ( publicId != null ) {
            printText( "<!ENTITY " + name + " SYSTEM " );
            printDoctypeURL( systemId );
        } else {
            printText( "<!ENTITY " + name + " PUBLIC " );
            printDoctypeURL( publicId );
            printText( " " );
            printDoctypeURL( systemId );
        }
        if ( notationName != null )
            printText( " NDATA " + notationName );
        printText( ">" );
        if ( _format.getIndenting() )
            breakLine();
      }
  
  
      public void notationDecl( String name, String publicId, String systemId )
      {
        enterDTD();
        if ( publicId != null ) {
            printText( "<!NOTATION " + name + " PUBLIC " );
            printDoctypeURL( publicId );
            if ( systemId != null ) {
                printText( "  " );
                printDoctypeURL( systemId );
            }
        } else {
            printText( "<!NOTATION " + name + " SYSTEM " );
            printDoctypeURL( systemId );
        }
        printText( ">" );
        if ( _format.getIndenting() )
            breakLine();
      }
  
  
      /**
       * Called by any of the DTD handlers to enter DTD mode.
       * Once entered, all output will be accumulated in a string
       * that can be printed as part of the document's DTD.
       * This method may be called any number of time but will only
       * have affect the first time it's called. To exist DTD state
       * and get the accumulated DTD, call [EMAIL PROTECTED] #leaveDTD}.
       */
      protected void enterDTD()
      {
        // Can only enter DTD state once. Once we're out of DTD
        // state, can no longer re-enter it.
        if ( _dtdWriter == null ) {
            _line.append( _text );
            _text = new StringBuffer( 20 );
            flushLine();
            _dtdWriter = new StringWriter();
            _docWriter = _writer;
            _writer = _dtdWriter;
        }
      }
  
  
      /**
       * Called by the root element to leave DTD mode and if any
       * DTD parts were printer, will return a string with their
       * textual content.
       */
      protected String leaveDTD()
      {
        // Only works if we're going out of DTD mode.
        if ( _writer == _dtdWriter ) {
            _line.append( _text );
            _text = new StringBuffer( 20 );
            flushLine();
            _writer = _docWriter;
            return _dtdWriter.toString();
        } else
            return null;
      }
  
  
      //------------------------------------------//
      // Generic node serializing methods methods //
      //------------------------------------------//
  
  
      /**
       * Serialize the DOM node. This method is shared across XML, HTML and 
XHTML
       * serializers and the differences are masked out in a separate [EMAIL 
PROTECTED]
       * #serializeElement}.
       *
       * @param node The node to serialize
       * @see #serializeElement
       */
      protected void serializeNode( Node node )
      {
        // Based on the node type call the suitable SAX handler.
        // Only comments entities and documents which are not
        // handled by SAX are serialized directly.
          switch ( node.getNodeType() ) {
        case Node.TEXT_NODE :
            characters( node.getNodeValue(), false, false );
            break;
  
        case Node.CDATA_SECTION_NODE :
            characters( node.getNodeValue(), true, false );
            break;
  
        case Node.COMMENT_NODE :
            comment( node.getNodeValue() );
            break;
  
        case Node.ENTITY_REFERENCE_NODE :
            // Entity reference printed directly in text, do not break or pause.
            content();
            printText( '&' + node.getNodeName() + ';' );
            break;
  
        case Node.PROCESSING_INSTRUCTION_NODE :
            processingInstruction( node.getNodeName(), node.getNodeValue() );
            break;
  
        case Node.ELEMENT_NODE :
            serializeElement( (Element) node );
            break;
  
        case Node.DOCUMENT_NODE :
            DocumentType docType;
            NamedNodeMap map;
            Entity       entity;
            Notation     notation;
            int          i;
         
            // If there is a document type, use the SAX events to
            // serialize it.
            docType = ( (Document) node ).getDoctype();
            if ( docType != null ) {
                startDTD( docType.getName(), null, null );
                map = docType.getEntities();
                if ( map != null ) {
                    for ( i = 0 ; i < map.getLength() ; ++i ) {
                        entity = (Entity) map.item( i );
                        unparsedEntityDecl( entity.getNodeName(), 
entity.getPublicId(),
                                    entity.getSystemId(), 
entity.getNotationName() );
                    }
                }
                map = docType.getNotations();
                if ( map != null ) {
                    for ( i = 0 ; i < map.getLength() ; ++i ) {
                        notation = (Notation) map.item( i );
                        notationDecl( notation.getNodeName(), 
notation.getPublicId(), notation.getSystemId() );
                    }
                }
                endDTD();
            }
            // !! Fall through
        case Node.DOCUMENT_FRAGMENT_NODE : {
            Node         child;
            
            // By definition this will happen if the node is a document,
            // document fragment, etc. Just serialize its contents. It will
            // work well for other nodes that we do not know how to serialize.
            child = node.getFirstChild();
            while ( child != null ) {
                serializeNode( child );
                child = child.getNextSibling();
            }
            break;
        }
  
        default:
            break;
        }
      }
  
  
      /**
       * Must be called by a method about to print any type of content.
       * If the element was just opened, the opening tag is closed and
       * will be matched to a closing tag. Returns the current element
       * state with <tt>empty</tt> and <tt>afterElement</tt> set to false.
       *
       * @return The current element state
       */    
      protected ElementState content()
      {
        ElementState state;
  
        state = getElementState();
        if ( state != null ) {
            // If this is the first content in the element,
            // change the state to not-empty and close the
            // opening element tag.
            if ( state.empty ) {
                printText( ">" );
                state.empty = false;
            }
            // Except for one content type, all of them
            // are not last element. That one content
            // type will take care of itself.
            state.afterElement = false;
        }
        return state;
      }
  
  
      /**
       * Called to print the text contents in the prevailing element format.
       * Since this method is capable of printing text as CDATA, it is used
       * for that purpose as well. White space handling is determined by the
       * current element state. In addition, the output format can dictate
       * whether the text is printed as CDATA or unescaped.
       *
       * @param text The text to print
       * @param cdata True is should print as CDATA
       * @param unescaped True is should print unescaped
       */
      protected void characters( String text, boolean cdata, boolean unescaped )
      {
        ElementState state;
  
        state = content();
        cdata = state.cdata;
        // Check if text should be print as CDATA section or unescaped
        // based on elements listed in the output format (the element
        // state) or whether we are inside a CDATA section or entity.
        if ( state != null ) {
            cdata = cdata || state.cdata;
            unescaped = unescaped || state.unescaped;
        }
  
        if ( cdata ) {
            StringBuffer buffer;
            int          index;
            int          saveIndent;
  
            // Print a CDATA section. The text is not escaped, but ']]>'
            // appearing in the code must be identified and dealt with.
            // The contents of a text node is considered space preserving.
            buffer = new StringBuffer( text.length() );
            index = text.indexOf( "]]>" );
            while ( index >= 0 ) {
                buffer.append( "<![CDATA[" ).append( text.substring( 0, index + 
2 ) ).append( "]]>" );
                text = text.substring( index + 2 );
                index = text.indexOf( "]]>" );
            }
            buffer.append( "<![CDATA[" ).append( text ).append( "]]>" );
            saveIndent = _nextIndent;
            _nextIndent = 0;
            printText( buffer, true );
            _nextIndent = saveIndent;
  
        } else {
  
            int saveIndent;
  
            if ( unescaped ) {
                // If the text node of this element should be printed
                // unescaped, then cancel indentation and print it
                // directly without escaping.
                saveIndent = _nextIndent;
                _nextIndent = 0;
                printText( text, true );
                _nextIndent = saveIndent;
                
            } else if ( state != null && state.preserveSpace ) {
                // If preserving space then hold of indentation so no
                // excessive spaces are printed at line breaks, escape
                // the text content without replacing spaces and print
                // the text breaking only at line breaks.
                saveIndent = _nextIndent;
                _nextIndent = 0;
                printText( escape( text ), true );
                _nextIndent = saveIndent;
                
            } else {
                // This is the last, but the most common case of
                // printing without preserving spaces. If indentation was
                // requested, line will wrap at space boundaries.
                // All whitespaces will print as space characters.
                printText( escape( text ), false );
            }
  
        }
      }
  
  
      /**
       * Returns the suitable entity reference for this character value,
       * or null if no such entity exists. Calling this method with 
<tt>'&amp;'</tt>
       * will return <tt>"&amp;amp;"</tt>.
       *
       * @param ch Character value
       * @return Character entity name, or null
       */
      protected abstract String getEntityRef( char ch );
  
  
      /**
       * Called to serializee the DOM element. The element is serialized based 
on
       * the serializer's method (XML, HTML, XHTML).
       *
       * @param elem The element to serialize
       */
      protected abstract void serializeElement( Element elem );
  
  
      /**
       * Comments and PIs cannot be serialized before the root element,
       * because the root element serializes the document type, which
       * generally comes first. Instead such PIs and comments are
       * accumulated inside a vector and serialized by calling this
       * method. Will be called when the root element is serialized
       * and when the document finished serializing.
       */
      protected void serializePreRoot()
      {
        int i;
  
        if ( _preRoot != null ) {
            for ( i = 0 ; i < _preRoot.size() ; ++i ) {
                printText( (String) _preRoot.elementAt( i ), true );
                breakLine();
            }
            _preRoot.removeAllElements();
        }
      }
  
  
      //---------------------------------------------//
      // Text pretty printing and formatting methods //
      //---------------------------------------------//
  
  
      /**
       * Called to print additional text. Each time this method is called
       * it accumulates more text. When a space is printed ([EMAIL PROTECTED]
       * #printSpace}) all the accumulated text becomes one part and is
       * added to the accumulate line. When a line is long enough, it can
       * be broken at its text boundary.
       *
       * @param text The text to print
       */
      protected final void printText( String text )
      {
        // Add this text to the accumulated text which will not be
        // print until the next space break.
        _text.append( text );
      }
  
  
      protected final void printText( char[] chars, int start, int end )
      {
        _text.append( chars, start, end );
      }
  
  
      /**
       * Called to print additional text with whitespace handling.
       * If spaces are preserved, the text is printed as if by calling
       * [EMAIL PROTECTED] #printText(String)} with a call to [EMAIL PROTECTED] 
#breakLine}
       * for each new line. If spaces are not preserved, the text is
       * broken at space boundaries if longer than the line width;
       * Multiple spaces are printed as such, but spaces at beginning
       * of line are removed.
       *
       * @param text The text to print
       * @param preserveSpace Space preserving flag
       */
      protected final void printText( String text, boolean preserveSpace )
      {
        int index;
        char ch;
  
          if ( preserveSpace ) {
            // Preserving spaces: the text must print exactly as it is,
            // without breaking when spaces appear in the text and without
            // consolidating spaces. If a line terminator is used, a line
            // break will occur.
            for ( index = 0 ; index < text.length() ; ++index ) {
                ch = text.charAt( index );
                if ( ch == '\n' || ch == '\r' )
                    breakLine();
                else
                    _text.append( ch );
            }
          }
          else
          {
            // Not preserving spaces: print one part at a time, and
            // use spaces between parts to break them into different
            // lines. Spaces at beginning of line will be stripped
            // by printing mechanism. Line terminator is treated
            // no different than other text part.
            for ( index = 0 ; index < text.length() ; ++index ) {
                ch = text.charAt( index );
                if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch 
== '\r' )
                    printSpace();
                else
                    _text.append( ch );             
            }
          }
      }
  
  
      protected final void printText( StringBuffer text, boolean preserveSpace )
      {
        int index;
        char ch;
  
          if ( preserveSpace ) {
            // Preserving spaces: the text must print exactly as it is,
            // without breaking when spaces appear in the text and without
            // consolidating spaces. If a line terminator is used, a line
            // break will occur.
            for ( index = 0 ; index < text.length() ; ++index ) {
                ch = text.charAt( index );
                if ( ch == '\n' || ch == '\r' )
                    breakLine();
                else
                    _text.append( ch );
            }
          }
          else
          {
            // Not preserving spaces: print one part at a time, and
            // use spaces between parts to break them into different
            // lines. Spaces at beginning of line will be stripped
            // by printing mechanism. Line terminator is treated
            // no different than other text part.
            for ( index = 0 ; index < text.length() ; ++index ) {
                ch = text.charAt( index );
                if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch 
== '\r' )
                    printSpace();
                else
                    _text.append( ch );             
            }
          }
      }
  
  
      /**
       * Called to print a single space between text parts that may be
       * broken into separate lines. Must not be called to print a space
       * when preserving spaces. The text accumulated so far with [EMAIL 
PROTECTED]
       * #printText} will be added to the accumulated line, and a space
       * separator will be counted. If the line accumulated so far is
       * long enough, it will be printed.
       */
      protected final void printSpace()
      {
        // The line consists of the text accumulated in _line,
        // followed by one or more spaces as counted by _spaces,
        // followed by more space accumulated in _text:
        // -  Text is printed and accumulated into _text.
        // -  A space is printed, so _text is added to _line and
        //    a space is counted.
        // -  More text is printed and accumulated into _text.
        // -  A space is printed, the previous spaces are added
        //    to _line, the _text is added to _line, and a new
        //    space is counted.
  
        // If text was accumulated with printText(), then the space
        // means we have to move that text into the line and
        // start accumulating new text with printText().
        if ( _text.length() > 0 ) {
  
            // If the text breaks a line bounary, wrap to the next line.
            // The printed line size consists of the indentation we're going
            // to use next, the accumulated line so far, some spaces and the
            // accumulated text so far.
            if ( _format.getLineWidth() > 0 &&
                 _thisIndent + _line.length() + _spaces + _text.length() > 
_format.getLineWidth() ) {
                flushLine();
                try {
                    // Print line and new line, then zero the line contents.
                    _writer.write( _format.getLineSeparator() );
                } catch ( IOException except ) {
                    // We don't throw an exception, but hold it
                    // until the end of the document.
                    if ( _exception == null )
                        _exception = except;
                }
            }
  
            // Add as many spaces as we accumulaed before.
            // At the end of this loop, _spaces is zero.
            while ( _spaces > 0 ) {
                _line.append( ' ' );
                --_spaces;
            }
            _line.append( _text );
            _text = new StringBuffer( 20 );
        }
        // Starting a new word: accumulate the text between the line
        // and this new word; not a new word: just add another space.
        ++_spaces;
      }
  
  
      /**
       * Called to print a line consisting of the text accumulated so
       * far. This is equivalent to calling [EMAIL PROTECTED] #printSpace} but
       * forcing the line to print and starting a new line ([EMAIL PROTECTED]
       * #printSpace} will only start a new line if the current line
       * is long enough).
       */
      protected final void breakLine()
      {
        // Equivalent to calling printSpace and forcing a flushLine.
        if ( _text.length() > 0 ) {
            while ( _spaces > 0 ) {
                _line.append( ' ' );
                --_spaces;
            }       
            _line.append( _text );
            _text = new StringBuffer( 20 );
        }
          flushLine();
        try {
            // Print line and new line, then zero the line contents.
            _writer.write( _format.getLineSeparator() );
        } catch ( IOException except ) {
            // We don't throw an exception, but hold it
            // until the end of the document.
            if ( _exception == null )
                _exception = except;
        }
      }
  
  
      /**
       * Flushes the line accumulated so far to the writer and get ready
       * to accumulate the next line. This method is called by [EMAIL PROTECTED]
       * #printText} and [EMAIL PROTECTED] #printSpace} when the accumulated 
line plus
       * accumulated text are two long to fit on a given line. At the end of
       * this method [EMAIL PROTECTED] #_line} is empty and [EMAIL PROTECTED] 
#_spaces} is zero.
       */
      private void flushLine()
      {
          int     indent;
  
        if ( _line.length() > 0 ) {
            try {
  
                if ( _format.getIndenting() ) {
                    // Make sure the indentation does not blow us away.
                    indent = _thisIndent;
                    if ( ( 2 * indent ) > _format.getLineWidth() && 
_format.getLineWidth() > 0 )
                        indent = _format.getLineWidth() / 2;
                    // Print the indentation as spaces and set the current
                    // indentation to the next expected indentation.
                    while ( indent > 0 ) {
                        _writer.write( ' ' );
                        --indent;
                    }
                }
                _thisIndent = _nextIndent;
  
                // There is no need to print the spaces at the end of the line,
                // they are simply stripped and replaced with a single line
                // separator.
                _spaces = 0;
                _writer.write( _line.toString() );
  
                _line = new StringBuffer( 40 );
            } catch ( IOException except ) {
                // We don't throw an exception, but hold it
                // until the end of the document.
                if ( _exception == null )
                    _exception = except;
            }
        }
      }
  
  
      /**
       * Flush the output stream. Must be called when done printing
       * the document, otherwise some text might be buffered.
       */
      public void flush()
      {
        breakLine();
        try {
            _writer.flush();
        } catch ( IOException except ) {
            // We don't throw an exception, but hold it
            // until the end of the document.
            if ( _exception == null )
                _exception = except;
        }
      }
  
  
      /**
       * Increment the indentation for the next line.
       */
      protected void indent()
      {
        _nextIndent += _format.getIndent();
      }
  
  
      /**
       * Decrement the indentation for the next line.
       */
      protected void unindent()
      {
        _nextIndent -= _format.getIndent();
        if ( _nextIndent < 0 )
            _nextIndent = 0;
        // If there is no current line and we're de-identing then
        // this indentation level is actually the next level.
        if ( ( _line.length() + _spaces + _text.length() ) == 0 )
            _thisIndent = _nextIndent;
      }
  
  
      /**
       * Print a document type public or system identifier URL.
       * Encapsulates the URL in double quotes, escapes non-printing
       * characters and print it equivalent to [EMAIL PROTECTED] #printText}.
       *
       * @param url The document type url to print
       */
      protected void printDoctypeURL( String url )
      {
          StringBuffer    result;
          int                i;
  
          _text.append( '"' );
          for( i = 0 ; i < url.length() ; ++i ) {
              if ( url.charAt( i ) == '"' ||  url.charAt( i ) < 0x20 || 
url.charAt( i ) > 0x7F )
                  _text.append( "%" ).append( Integer.toHexString( url.charAt( 
i ) ) );
              else
                  _text.append( url.charAt( i ) );
          }
          _text.append( '"' );
      }
  
  
      /**
       * Escapes a string so it may be printed as text content or attribute
       * value. Non printable characters are escaped using character references.
       * Where the format specifies a deault entity reference, that reference
       * is used (e.g. <tt>&amp;lt;</tt>).
       *
       * @param source The string to escape
       * @return The escaped string
       */
      protected String escape( String source )
      {
          StringBuffer    result;
          int             i;
          char            ch;
          String          charRef;
  
          result = new StringBuffer( source.length() );
          for ( i = 0 ; i < source.length() ; ++i )  {
              ch = source.charAt( i );
            // If the character is not printable, print as character reference.
            // Non printables are below ASCII space but not tab or line
            // terminator, ASCII delete, or above a certain Unicode threshold.
            if ( ( ch < ' ' && ch != '\t' && ch != '\n' && ch != '\r' ) ||
                 ch > _lastPrintable || ch == 0xF7 )
                    result.append( "&#" ).append( Integer.toString( ch ) 
).append( ';' );
            else {
                    // If there is a suitable entity reference for this
                    // character, print it. The list of available entity
                    // references is almost but not identical between
                    // XML and HTML.
                    charRef = getEntityRef( ch );
                    if ( charRef == null )
                        result.append( ch );
                    else
                        result.append( '&' ).append( charRef ).append( ';' );
            }
          }
          return result.toString();
      }
  
  
      //--------------------------------//
      // Element state handling methods //
      //--------------------------------//
  
  
      /**
       * Return the state of the current element, or null
       * if not within any element (e.g. before entering
       * root element).
       *
       * @return Current element state, or null
       */
      protected ElementState getElementState()
      {
        if ( _elementStateCount == 0 )
            return null;
        else
            return _elementStates[ _elementStateCount - 1 ];
      }
  
  
      /**
       * Enter a new element state for the specified element.
       * Tag name and space preserving is specified, element
       * state is initially empty.
       *
       * @return Current element state, or null
       */
      protected ElementState enterElementState( String tagName, boolean 
preserveSpace )
      {
        ElementState state;
  
        if ( _elementStateCount == _elementStates.length ) {
            ElementState[] newStates;
            int            i;
  
            // Need to create a larger array of states.
            // This does not happen often, unless the document
            // is really deep.
            newStates = new ElementState[ _elementStates.length + 5 ];
            System.arraycopy( _elementStates, 0, newStates, 0, 
_elementStates.length );
            _elementStates = newStates;
            for ( i = _elementStateCount ; i < _elementStates.length ; ++i )
                _elementStates[ i ] = new ElementState();
        }
        state = _elementStates[ _elementStateCount ];
        state.tagName = tagName;
        state.preserveSpace = preserveSpace;
        state.empty = true;
        state.afterElement = false;
        ++_elementStateCount;
        return state;
      }
  
  
      /**
       * Leave the current element state and return to the
       * state of the parent element, or no state if this
       * is the root element.
       *
       * @return Previous element state, or null
       */
      protected ElementState leaveElementState()
      {
        if ( _elementStateCount > 1 ) {
            -- _elementStateCount;
            return _elementStates[ _elementStateCount - 1 ];
        } else if ( _elementStateCount == 1 ) {
            -- _elementStateCount;
            return null;
        } else
            return null;
      }
  
  
  }
  
  
  
  1.1                  
xml-xerces/java/src/org/apache/xml/serialize/ElementState.java
  
  Index: ElementState.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:  
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Xerces" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written 
   *    permission, please contact [EMAIL PROTECTED]
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  
  package org.apache.xml.serialize;
  
  
  /**
   * Holds the state of the currently serialized element.
   *
   *
   * @version
   * @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
   * @see BaseSerializer
   */
  class ElementState
  {
  
  
      /**
       * The element's tag name.
       */
      String tagName;
  
  
      /**
       * True if element is space preserving.
       */
      boolean preserveSpace;
  
  
      /**
       * True if element is empty. Turns false immediately
       * after serializing the first contents of the element.
       */
      boolean empty;
  
  
      /**
       * True if the last serialized node was an element node.
       */
      boolean afterElement;
  
  
      /**
       * True if textual content of current element should be
       * serialized as CDATA section.
       */
      boolean cdata;
  
  
      /**
       * True if textual content of current element should be
       * serialized as raw characters (unescaped).
       */
      boolean unescaped;
  
  
  }
  
  
  
  1.1                  
xml-xerces/java/src/org/apache/xml/serialize/HTMLEntities.res
  
  Index: HTMLEntities.res
  ===================================================================
  # $Id: HTMLEntities.res,v 1.1 1999/11/23 22:29:52 arkin Exp $
  #
  # @version $Revision: 1.1 $ $Date: 1999/11/23 22:29:52 $
  # @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
  #
  # Character entity references for markup-significant
  #
  quot 34
  amp 38
  lt 60
  gt 62
  nbsp 160
  #
  # Character entity references for ISO 8859-1 characters
  #
  iexcl 161
  cent 162
  pound 163
  curren 164
  yen 165
  brvbar 166
  sect 167
  uml 168
  copy 169
  ordf 170
  laquo 171
  not 172
  shy 173
  reg 174
  macr 175
  deg 176
  plusmn 177
  sup2 178
  sup3 179
  acute 180
  micro 181
  para 182
  middot 183
  cedil 184
  sup1 185
  ordm 186
  raquo 187
  frac14 188
  frac12 189
  frac34 190
  iquest 191
  Agrave 192
  Aacute 193
  Acirc 194
  Atilde 195
  Auml 196
  Aring 197
  AElig 198
  Ccedil 199
  Egrave 200
  Eacute 201
  Ecirc 202
  Euml 203
  Igrave 204
  Iacute 205
  Icirc 206
  Iuml 207
  ETH 208
  Ntilde 209
  Ograve 210
  Oacute 211
  Ocirc 212
  Otilde 213
  Ouml 214
  times 215
  Oslash 216
  Ugrave 217
  Uacute 218
  Ucirc 219
  Uuml 220
  Yacute 221
  THORN 222
  szlig 223
  agrave 224
  aacute 225
  acirc 226
  atilde 227
  auml 228
  aring 229
  aelig 230
  ccedil 231
  egrave 232
  eacute 233
  ecirc 234
  euml 235
  igrave 236
  iacute 237
  icirc 238
  iuml 239
  eth 240
  ntilde 241
  ograve 242
  oacute 243
  ocirc 244
  otilde 245
  ouml 246
  divide 247
  oslash 248
  ugrave 249
  uacute 250
  ucirc 251
  uuml 252
  yacute 253
  thorn 254
  yuml 255
  #
  # Character entity references for symbols, mathematical symbols, and Greek 
letters
  #
  # Latin Extended
  fnof 402
  #
  # Greek
  Alpha 913
  Beta 914
  Gamma 915
  Delta 916
  Epsilon 917
  Zeta 918
  Eta 919
  Theta 920
  Iota 921
  Kappa 922
  Lambda 923
  Mu 924
  Nu 925
  Xi 926
  Omicron 927
  Pi 928
  Rho 929
  Sigma 931
  Tau 932
  Upsilon 933
  Phi 934
  Chi 935
  Psi 936
  Omega 937
  alpha 945
  beta 946
  gamma 947
  delta 948
  epsilon 949
  zeta 950
  eta 951
  theta 952
  iota 953
  kappa 954
  lambda 955
  mu 956
  nu 957
  xi 958
  omicron 959
  pi 960
  rho 961
  sigmaf 962
  sigma 963
  tau 964
  upsilon 965
  phi 966
  chi 967
  psi 968
  omega 969
  thetasym 977
  upsih 978
  piv 982
  #
  # General Punctuation
  bull 8226
  hellip 8230
  prime 8242
  Prime 8243
  oline 8254
  frasl 8260
  #
  # Letterlike Symbols
  weierp 8472
  image 8465
  real 8476
  trade 8482
  alefsym 8501
  #
  # Arrows
  larr 8592
  uarr 8593
  rarr 8594
  darr 8595
  harr 8596
  crarr 8629
  lArr 8656
  uArr 8657
  rArr 8658
  dArr 8659
  hArr 8660
  #
  # Mathematical Operators
  forall 8704
  part 8706
  exist 8707
  empty 8709
  nabla 8711
  isin 8712
  notin 8713
  ni 8715
  prod 8719
  sum 8721
  minus 8722
  lowast 8727
  radic 8730
  prop 8733
  infin 8734
  ang 8736
  and 8743
  or 8744
  cap 8745
  cup 8746
  int 8747
  there4 8756
  sim 8764
  cong 8773
  asymp 8776
  ne 8800
  equiv 8801
  le 8804
  ge 8805
  sub 8834
  sup 8835
  nsub 8836
  sube 8838
  supe 8839
  oplus 8853
  otimes 8855
  perp 8869
  sdot 8901
  #
  # Miscellaneous Technical
  lceil 8968
  rceil 8969
  lfloor 8970
  rfloor 8971
  lang 9001
  rang 9002
  #
  # Geometric Shapes
  loz 9674
  #
  # Miscellaneous Symbols
  spades 9824
  clubs 9827
  hearts 9829
  diams 9830
  #
  # Character entity references for internationalization characters
  #
  # Latin Extended-A
  OElig 338
  oelig 339
  Scaron 352
  scaron 353
  Yuml 376
  #
  # Spacing Modifier Letters
  circ 710
  tilde 732
  #
  # General Punctuation
  ensp 8194
  emsp 8195
  thinsp 8201
  zwnj 8204
  zwj 8205
  lrm 8206
  rlm 8207
  ndash 8211
  mdash 8212
  lsquo 8216
  rsquo 8217
  sbquo 8218
  ldquo 8220
  rdquo 8221
  bdquo 8222
  dagger 8224
  Dagger 8225
  permil 8240
  lsaquo 8249
  rsaquo 8250
  euro 8364
  
  
  1.1                  
xml-xerces/java/src/org/apache/xml/serialize/HTMLSerializer.java
  
  Index: HTMLSerializer.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:  
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Xerces" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written 
   *    permission, please contact [EMAIL PROTECTED]
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  
  package org.apache.xml.serialize;
  
  
  import java.io.IOException;
  import java.io.UnsupportedEncodingException;
  import java.io.OutputStream;
  import java.io.Writer;
  
  import org.w3c.dom.Element;
  import org.w3c.dom.Attr;
  import org.w3c.dom.Node;
  import org.w3c.dom.NamedNodeMap;
  import org.xml.sax.DocumentHandler;
  import org.xml.sax.AttributeList;
  
  
  /**
   * Implements an HTML/XHTML serializer supporting both DOM and SAX
   * pretty serializing. HTML/XHTML mode is determined in the
   * constructor.  For usage instructions see [EMAIL PROTECTED] Serializer}.
   * <p>
   * If an output stream is used, the encoding is taken from the
   * output format (defaults to <tt>UTF8</tt>). If a writer is
   * used, make sure the writer uses the same encoding (if applies)
   * as specified in the output format.
   * <p>
   * The serializer supports both DOM and SAX. DOM serializing is done
   * by calling [EMAIL PROTECTED] #serialize} and SAX serializing is done by 
firing
   * SAX events and using the serializer as a document handler.
   * <p>
   * If an I/O exception occurs while serializing, the serializer
   * will not throw an exception directly, but only throw it
   * at the end of serializing (either DOM or SAX's [EMAIL PROTECTED]
   * org.xml.sax.DocumentHandler#endDocument}.
   * <p>
   * For elements that are not specified as whitespace preserving,
   * the serializer will potentially break long text lines at space
   * boundaries, indent lines, and serialize elements on separate
   * lines. Line terminators will be regarded as spaces, and
   * spaces at beginning of line will be stripped.
   * <p>
   * XHTML is slightly different than HTML:
   * <ul>
   * <li>Element/attribute names are lower case and case matters
   * <li>Attributes must specify value, even if empty string
   * <li>Empty elements must have '/' in empty tag
   * <li>Contents of SCRIPT and STYLE elements serialized as CDATA
   * </ul>
   *
   *
   * @version
   * @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
   * @see Serializer
   */
  public class HTMLSerializer
      extends BaseSerializer
  {
  
  
      /**
       * True if serializing in XHTML format.
       */
      private static boolean _xhtml;
  
  
  
  
      /**
       * Constructs a new HTML/XHTML serializer depending on the value of
       * <tt>xhtml</tt>. The serializer cannot be used without calling
       * [EMAIL PROTECTED] #init} first.
       *
       * @param xhtml True if XHTML serializing
       */
      protected HTMLSerializer( boolean xhtml )
      {
        super();
        _xhtml = xhtml;
      }
  
  
      /**
       * Constructs a new serializer. The serializer cannot be used without
       * calling [EMAIL PROTECTED] #init} first.
       */
      public HTMLSerializer()
      {
        this( false );
      }
  
  
      /**
       * Constructs a new serializer that writes to the specified writer
       * using the specified output format. If <tt>format</tt> is null,
       * will use a default output format.
       *
       * @param writer The writer to use
       * @param format The output format to use, null for the default
       */
      public HTMLSerializer( Writer writer, OutputFormat format )
      {
        this( false );
        if ( format == null )
            format = new OutputFormat( OutputFormat.METHOD_HTML, null, false );
        init( writer, format );
      }
  
  
      /**
       * Constructs a new serializer that writes to the specified output
       * stream using the specified output format. If <tt>format</tt>
       * is null, will use a default output format.
       *
       * @param output The output stream to use
       * @param format The output format to use, null for the default
       */
      public HTMLSerializer( OutputStream output, OutputFormat format )
      {
        this( false );
        if ( format == null )
            format = new OutputFormat( OutputFormat.METHOD_HTML, null, false );
        try {
            init( output, format );
        } catch ( UnsupportedEncodingException except ) {
            // Should never happend, we use UTF8 by default
        }
      }
  
  
      //------------------------------------------//
      // SAX document handler serializing methods //
      //------------------------------------------//
  
  
      public void startDocument()
      {
        // Do nothing for HTML/XHTML, browser might not respond
        // well to <?xml ...?>
      }
  
  
      public void startElement( String tagName, AttributeList attrs )
      {
        int          i;
        boolean      preserveSpace;
        ElementState state;
        String       name;
        String       value;
  
        state = getElementState();
        if ( state == null ) {
            // If this is the root element handle it differently.
            // If the first root element in the document, serialize
            // the document's DOCTYPE. Space preserving defaults
            // to that of the output format.
            if ( ! _started )
                startDocument( tagName );
            preserveSpace = _format.getPreserveSpace();
        } else {
            // For any other element, if first in parent, then
            // close parent's opening tag and use the parnet's
            // space preserving.
            if ( state.empty )
                printText( ">" );
            preserveSpace = state.preserveSpace;
            // Indent this element on a new line if the first
            // content of the parent element or immediately
            // following an element.
            if ( _format.getIndenting() && ! state.preserveSpace &&
                 ( state.empty || state.afterElement ) )
                breakLine();
        }
        // Do not change the current element state yet.
        // This only happens in endElement().
  
        // XHTML: element names are lower case, DOM will be different
        if ( _xhtml )
            printText( '<' + tagName.toLowerCase() );
        else
            printText( '<' + tagName );
        indent();
  
        // For each attribute serialize it's name and value as one part,
        // separated with a space so the element can be broken on
        // multiple lines.
        if ( attrs != null ) {
            for ( i = 0 ; i < attrs.getLength() ; ++i ) {
                printSpace();
                name = attrs.getName( i ).toLowerCase();;
                value = attrs.getValue( i );
                if ( _xhtml ) {
                    // XHTML: print empty string for null values.
                    if ( value == null )
                        printText( name + "=\"\"" );
                    else
                        printText( name + "=\"" + escape( value ) + '"' );
                } else {
                    // HTML: Non values print as attribute name, no value.
                    if ( value == null )
                        printText( name );
                    else
                        printText( name + "=\"" + escape( value ) + '"' );
                }
            }
        }
        if ( HTMLdtd.isPreserveSpace( tagName ) )
            preserveSpace = true;
  
        // Now it's time to enter a new element state
        // with the tag name and space preserving.
        // We still do not change the curent element state.
        enterElementState( tagName, preserveSpace );
      }
  
  
      public void endElement( String tagName )
      {
        ElementState state;
  
        // Works much like content() with additions for closing
        // an element. Note the different checks for the closed
        // element's state and the parent element's state.
        unindent();
        state = getElementState();
        if ( state.empty ) {
            if ( _xhtml )
                printText( " />" );
            else
                printText( ">" );
        } else {
            // This element is not empty and that last content was
            // another element, so print a line break before that
            // last element and this element's closing tag.
            // [keith] Provided this is not an anchor.
            // XHTML: element names are lower case, DOM will be different
            // HTML: some elements do not print closing tag (e.g. LI)
            if ( _xhtml )
                printText( "</" + tagName.toLowerCase() + ">" );
            else if ( ! HTMLdtd.isOnlyOpening( tagName ) ) {
                if ( ! tagName.equalsIgnoreCase( "A" )  && 
_format.getIndenting() &&
                     ! state.preserveSpace && state.afterElement )
                    breakLine();
                printText( "</" + tagName + ">" );
            }
        }
        // Leave the element state and update that of the parent
        // (if we're not root) to not empty and after element.
        state = leaveElementState();
        if ( state != null ) {
            state.afterElement = true;
            state.empty = false;
        } else {
            // [keith] If we're done printing the document but don't
            // get to call endDocument(), the buffer should be flushed.
            flush();
        }
      }
  
  
      //------------------------------------------//
      // Generic node serializing methods methods //
      //------------------------------------------//
  
  
      /**
       * Called to serialize the document's DOCTYPE by the root element.
       * The document type declaration must name the root element,
       * but the root element is only known when that element is serialized,
       * and not at the start of the document.
       * <p>
       * This method will check if it has not been called before ([EMAIL 
PROTECTED] #_started}),
       * will serialize the document type declaration, and will serialize all
       * pre-root comments and PIs that were accumulated in the document
       * (see [EMAIL PROTECTED] #serializePreRoot}). Pre-root will be 
serialized even if
       * this is not the first root element of the document.
       */
      protected void startDocument( String rootTagName )
      {
        StringBuffer buffer;
        String       publicId;
        String       systemId;
  
        // Not supported in HTML/XHTML, but we still have to switch
        // out of DTD mode.
        leaveDTD();
        if ( ! _started ) {
            // If the public and system identifiers were not specified
            // in the output format, use the appropriate ones for HTML
            // or XHTML.
            publicId = _format.getDoctypePublic();
            systemId = _format.getDoctypeSystem();
            if ( publicId == null && systemId == null ) {
                if ( _xhtml ) {
                    publicId = OutputFormat.DOCTYPE_XHTML_PUBLIC;
                    systemId = OutputFormat.DOCTYPE_XHTML_SYSTEM;
                } else {
                    publicId = OutputFormat.DOCTYPE_HTML_PUBLIC;
                    systemId = OutputFormat.DOCTYPE_HTML_SYSTEM;
                }
            }
  
            // XHTML: If public idnentifier and system identifier
            //  specified, print them, else print just system identifier
            // HTML: If public identifier specified, print it with
            //  system identifier, if specified.
            if ( publicId != null && ( ! _xhtml || systemId != null )  ) {
                printText( "<!DOCTYPE HTML PUBLIC " );
                printDoctypeURL( publicId );
                if ( systemId != null ) {
                    if ( _format.getIndenting() ) {
                        breakLine();
                        printText( "                      " );
                    }
                    printDoctypeURL( systemId );
                }
                printText( ">" );
                breakLine();
            } else if ( systemId != null ) {
                printText( "<!DOCTYPE HTML SYSTEM " );
                printDoctypeURL( systemId );
                printText( ">" );
                breakLine();
            }
        }
  
        _started = true;
        // Always serialize these, even if not te first root element.
        serializePreRoot();
      }
  
  
      /**
       * Called to serialize a DOM element. Equivalent to calling [EMAIL 
PROTECTED]
       * #startElement}, [EMAIL PROTECTED] #endElement} and serializing 
everything
       * inbetween, but better optimized.
       */
      protected void serializeElement( Element elem )
      {
        Attr         attr;
        NamedNodeMap attrMap;
        int          i;
        Node         child;
        ElementState state;
        boolean      preserveSpace;
        String       name;
        String       value;
  
        state = getElementState();
        if ( state == null ) {
            // If this is the root element handle it differently.
            // If the first root element in the document, serialize
            // the document's DOCTYPE. Space preserving defaults
            // to that of the output format.
            if ( ! _started )
                startDocument( elem.getTagName() );
            preserveSpace = _format.getPreserveSpace();
        } else {
            // For any other element, if first in parent, then
            // close parent's opening tag and use the parnet's
            // space preserving.
            if ( state.empty )
                printText( ">" );
            preserveSpace = state.preserveSpace;
            // Indent this element on a new line if the first
            // content of the parent element or immediately
            // following an element.
            if ( _format.getIndenting() && ! state.preserveSpace &&
                 ( state.empty || state.afterElement ) )
                breakLine();
        }
        // Do not change the current element state yet.
        // This only happens in endElement().
  
        // XHTML: element names are lower case, DOM will be different
        if ( _xhtml )
            printText( '<' + elem.getTagName().toLowerCase() );
        else
            printText( '<' + elem.getTagName() );
        indent();
  
        // Lookup the element's attribute, but only print specified
        // attributes. (Unspecified attributes are derived from the DTD.
        // For each attribute print it's name and value as one part,
        // separated with a space so the element can be broken on
        // multiple lines.
        attrMap = elem.getAttributes();
        if ( attrMap != null ) {
            for ( i = 0 ; i < attrMap.getLength() ; ++i ) {
                attr = (Attr) attrMap.item( i );
                name = attr.getName().toLowerCase();
                value = attr.getValue();
                if ( attr.getSpecified() ) {
                    printSpace();
                    if ( _xhtml ) {
                        // XHTML: print empty string for null values.
                        if ( value == null )
                            printText( name + "=\"\"" );
                        else
                            printText( name + "=\"" + escape( value ) + '"' );
                    } else {
                        // HTML: Non values print as attribute name, no value.
                        if ( value == null )
                            printText( name );
                        else
                            printText( name + "=\"" + escape( value ) + '"' );
                    }
                }
            }
        }
        if ( HTMLdtd.isPreserveSpace( elem.getTagName() ) )
            preserveSpace = true;
        
        // If element has children, or if element is not an empty tag,
        // serialize an opening tag.
        if ( elem.hasChildNodes() || ! HTMLdtd.isEmptyTag( elem.getTagName() ) 
) {
            // Enter an element state, and serialize the children
            // one by one. Finally, end the element.
            enterElementState( elem.getTagName(), preserveSpace );
            child = elem.getFirstChild();
            while ( child != null ) {
                serializeNode( child );
                child = child.getNextSibling();
            }
            endElement( elem.getTagName() );
        } else {
            unindent();
            // XHTML: Close empty tag with ' />' so it's XML and HTML 
compatible.
            // HTML: Empty tags are defined as such in DTD no in document.
            if ( _xhtml )
                printText( " />" );
            else
                printText( ">" );
            if ( state != null ) {
                // After element but parent element is no longer empty.
                state.afterElement = true;
                state.empty = false;
            }
        }
      }
  
  
      protected void characters( String text, boolean cdata, boolean unescaped )
      {
        ElementState state;
  
        // Override for special HTML/XHTML case of SCRIPT/STYLE elements:
        // XHTML: print their text contents as CDATA
        // HTML: print their text contents unescaped
        state = content();
        if ( state != null && ( state.tagName.equalsIgnoreCase( "SCRIPT" ) ||
                                state.tagName.equalsIgnoreCase( "STYLE" ) ) ) {
            if ( _xhtml )
                super.characters( text, true, false );
            else
                super.characters( text, false, true );
        } else
            super.characters( text, cdata, unescaped );
      }
  
  
      protected String getEntityRef( char ch )
      {
          return HTMLdtd.fromChar( ch );
      }
  
  
  }
  
  
  
  
  
  1.1                  xml-xerces/java/src/org/apache/xml/serialize/HTMLdtd.java
  
  Index: HTMLdtd.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:  
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Xerces" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written 
   *    permission, please contact [EMAIL PROTECTED]
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  
  package org.apache.xml.serialize;
  
  
  import java.io.InputStream;
  import java.io.InputStreamReader;
  import java.io.BufferedReader;
  import java.util.Hashtable;
  
  
  /**
   * Utility class for accessing information specific to HTML documents.
   * The HTML DTD is expressed as three utility function groups. Two methods
   * allow for checking whether an element requires an open tag on printing
   * ([EMAIL PROTECTED] #isEmptyTag}) or on parsing ([EMAIL PROTECTED] 
#isOptionalClosing}).
   * <P>
   * Two other methods translate character references from name to value and
   * from value to name. A small entities resource is loaded into memory the
   * first time any of these methods is called for fast and efficient access.
   *
   *
   * @version
   * @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
   */
  final class HTMLdtd
  {
  
  
      /**
       * Table of reverse character reference mapping. Character codes are held
       * as single-character strings, mapped to their reference name.
       */
      private static Hashtable        _byChar;
  
  
      /**
       * Table of entity name to value mapping. Entities are held as strings,
       * character references as <TT>Character</TT> objects.
       */
      private static Hashtable        _byName;
  
  
      /**
       * Locates the HTML entities file that is loaded upon initialization.
       * This file is a resource loaded with the default class loader.
       */
      private static final String     ENTITIES_RESOURCE = "HTMLEntities.res";
  
  
      /**
       * Holds element definitions.
       */
      private static Hashtable        _elemDefs;
  
  
      /**
       * Element is empty.
       */
      private static final int EMPTY        = 0x0001;
  
      /**
       * Element contains element content only.
       */
      private static final int ELEM_CONTENT = 0x0002;
  
      /**
       * Element preserve spaces.
       */
      private static final int PRESERVE     = 0x0004;
  
      /**
       * Optional closing tag.
       */
      private static final int OPT_CLOSING  = 0x0008;
  
      /**
       * Only opening tag should be printed.
       */
      private static final int ONLY_OPENING = 0x0010;
  
      /**
       * Allowed to appear in head.
       */
      private static final int ALLOWED_HEAD = 0x0020;
  
      /**
       * When opened, closes P.
       */
      private static final int CLOSE_P      = 0x0040;
  
      /**
       * When opened, closes DD or DT.
       */
      private static final int CLOSE_DD_DT  = 0x0080;
  
      /**
       * When opened, closes itself.
       */
      private static final int CLOSE_SELF   = 0x0100;
  
  
      /**
       * When opened, closes another table section.
       */
      private static final int CLOSE_TABLE  = 0x0200;
  
      /**
       * When opened, closes TH or TD.
       */
      private static final int CLOSE_TH_TD  = 0x04000;
  
  
  
  
      /**
       * Returns true if element is declared to be empty. HTML elements are
       * defines as empty in the DTD, not by the document syntax.
       * 
       * @param tagName The element tag name (upper case)
       * @return True if element is empty
       */
      public static boolean isEmptyTag( String tagName )
      {
          // BR AREA LINK IMG PARAM HR INPUT COL BASE META BASEFONT ISINDEX
        /*
          return ( tagName.equals( "BR" ) || tagName.equals( "AREA" ) ||
                   tagName.equals( "LINK" ) || tagName.equals( "IMG" ) ||
                   tagName.equals( "PARAM" ) || tagName.equals( "HR" ) ||
                   tagName.equals( "INPUT" ) || tagName.equals( "COL" ) ||
                   tagName.equals( "BASE" ) || tagName.equals( "META" ) ||
                   tagName.equals( "BASEFONT" ) || tagName.equals( "ISINDEX" ) 
);
        */
        return isElement( tagName, EMPTY );
      }
  
  
      /**
       * Returns true if element is declared to have element content.
       * Whitespaces appearing inside element content will be ignored,
       * other text will simply report an error.
       * 
       * @param tagName The element tag name (upper case)
       * @return True if element content
       */
      public static boolean isElementContent( String tagName )
      {
          // DL OL UL SELECT OPTGROUP TABLE THEAD TFOOT TBODY COLGROUP TR HEAD 
HTML
        /*
          return ( tagName.equals( "DL" ) || tagName.equals( "OL" ) ||
                   tagName.equals( "UL" ) || tagName.equals( "SELECT" ) ||
                   tagName.equals( "OPTGROUP" ) || tagName.equals( "TABLE" ) ||
                   tagName.equals( "THEAD" ) || tagName.equals( "TFOOT" ) ||
                   tagName.equals( "TBODY" ) || tagName.equals( "COLGROUP" ) ||
                   tagName.equals( "TR" ) || tagName.equals( "HEAD" ) ||
                   tagName.equals( "HTML" ) );
        */
        return isElement( tagName, ELEM_CONTENT );
      }
  
      
      /**
       * Returns true if element's textual contents preserves spaces.
       * This only applies to PRE and TEXTAREA, all other HTML elements
       * do not preserve space.
       * 
       * @param tagName The element tag name (upper case)
       * @return True if element's text content preserves spaces
       */
      public static boolean isPreserveSpace( String tagName )
      {
          // PRE TEXTAREA
        /*
          return ( tagName.equals( "PRE" ) || tagName.equals( "TEXTAREA" ) );
        */
        return isElement( tagName, PRESERVE );
      }
  
  
      /**
       * Returns true if element's closing tag is optional and need not
       * exist. An error will not be reported for such elements if they
       * are not closed. For example, <tt>LI</tt> is most often not closed.
       *
       * @param tagName The element tag name (upper case)
       * @return True if closing tag implied
       */
      public static boolean isOptionalClosing( String tagName )
      {
          // BODY HEAD HTML P DT DD LI OPTION THEAD TFOOT TBODY TR COLGROUP TH 
TD FRAME
        /*
          return ( tagName.equals( "BODY" ) || tagName.equals( "HEAD" ) ||
                   tagName.equals( "HTML" ) || tagName.equals( "P" ) ||
                   tagName.equals( "DT" ) || tagName.equals( "DD" ) ||
                   tagName.equals( "LI" ) || tagName.equals( "OPTION" ) ||
                   tagName.equals( "THEAD" ) || tagName.equals( "TFOOT" ) ||
                   tagName.equals( "TBODY" ) || tagName.equals( "TR" ) ||
                   tagName.equals( "COLGROUP" ) || tagName.equals( "TH" ) ||
                   tagName.equals( "TD" ) || tagName.equals( "FRAME" ) );
        */
        return isElement( tagName, OPT_CLOSING );
      }
  
  
      /**
       * Returns true if element's closing tag is generally not printed.
       * For example, <tt>LI</tt> should not print the closing tag.
       *
       * @param tagName The element tag name (upper case)
       * @return True if only opening tag should be printed
       */
      public static boolean isOnlyOpening( String tagName )
      {
          //DT DD LI OPTION
        /*
          return ( tagName.equals( "DT" ) || tagName.equals( "DD" ) ||
                 tagName.equals( "LI" ) || tagName.equals( "OPTION" ) );
        */
        return isElement( tagName, ONLY_OPENING );
      }
  
  
      /**
       * Returns true if the opening of one element (<tt>tagName</tt>) implies
       * the closing of another open element (<tt>openTag</tt>). For example,
       * every opening <tt>LI</tt> will close the previously open <tt>LI</tt>,
       * and every opening <tt>BODY</tt> will close the previously open 
<tt>HEAD</tt>.
       *
       * @param tagName The newly opened element
       * @param openTag The already opened element
       * @return True if closing tag closes opening tag
       */    
      public static boolean isClosing( String tagName, String openTag )
      {
          // BODY (closing HTML, end of document)
          // HEAD (BODY, closing HTML, end of document)
          if ( openTag.equalsIgnoreCase( "HEAD" ) )
            /*
              return ! ( tagName.equals( "ISINDEX" ) || tagName.equals( "TITLE" 
) ||
                       tagName.equals( "META" ) || tagName.equals( "SCRIPT" ) ||
                       tagName.equals( "STYLE" ) || tagName.equals( "LINK" ) );
            */
            return ! isElement( tagName, ALLOWED_HEAD );
          // P (P, H1-H6, UL, OL, DL, PRE, DIV, BLOCKQUOTE, FORM, HR, TABLE, 
ADDRESS, FIELDSET, closing BODY, closing HTML, end of document)
          if ( openTag.equalsIgnoreCase( "P" ) )
            /*
              return ( tagName.endsWith( "P" ) || tagName.endsWith( "H1" ) ||
                       tagName.endsWith( "H2" ) || tagName.endsWith( "H3" ) ||
                       tagName.endsWith( "H4" ) || tagName.endsWith( "H5" ) ||
                       tagName.endsWith( "H6" ) || tagName.endsWith( "UL" ) ||
                       tagName.endsWith( "OL" ) || tagName.endsWith( "DL" ) ||
                       tagName.endsWith( "PRE" ) || tagName.endsWith( "DIV" ) ||
                       tagName.endsWith( "BLOCKQUOTE" ) || tagName.endsWith( 
"FORM" ) ||
                       tagName.endsWith( "HR" ) || tagName.endsWith( "TABLE" ) 
||
                       tagName.endsWith( "ADDRESS" ) || tagName.endsWith( 
"FIELDSET" ) );
            */
            return isElement( tagName, CLOSE_P );
          if ( openTag.equalsIgnoreCase( "DT" ) || openTag.equalsIgnoreCase( 
"DD" ) )
            return isElement( tagName, CLOSE_DD_DT );
          // DT (DD)
        /*
          if ( openTag.equals( "DT" ) )
              return tagName.endsWith( "DD" );
        */
          // DD (DT, closing DL)
        /*
        if ( openTag.equals( "DD" ) )
              return tagName.endsWith( "DT" );
        */
          if ( openTag.equalsIgnoreCase( "LI" ) || openTag.equalsIgnoreCase( 
"OPTION" ) )
            return isElement( tagName, CLOSE_SELF );
          // LI (LI, closing UL/OL)
        /*
          if ( openTag.equals( "LI" ) )
              return tagName.endsWith( "LI" );
        */
          // OPTION (OPTION, OPTGROUP closing or opening, closing SELECT)
        /*
          if ( openTag.equals( "OPTION" ) )
              return tagName.endsWith( "OPTION" );
        */
          // THEAD (TFOOT, TBODY, TR, closing TABLE
          // TFOOT (TBODY, TR, closing TABLE)
          // TBODY (TBODY, closing TABLE)
          // COLGROUP (THEAD, TBODY, TR, closing TABLE)
          // TR (TR, closing THEAD, TFOOT, TBODY, TABLE)
          if ( openTag.equalsIgnoreCase( "THEAD" ) || openTag.equalsIgnoreCase( 
"TFOOT" ) ||
               openTag.equalsIgnoreCase( "TBODY" ) || openTag.equalsIgnoreCase( 
"TR" ) || 
               openTag.equalsIgnoreCase( "COLGROUP" ) )
            /*
              return ( tagName.endsWith( "THEAD" ) || tagName.endsWith( "TFOOT" 
) ||
                       tagName.endsWith( "TBODY" ) || tagName.endsWith( "TR" ) 
||
                       tagName.endsWith( "COLGROUP" ) );
            */
            return isElement( tagName, CLOSE_TABLE );
          // TH (TD, TH, closing TR)
          // TD (TD, TH, closing TR)
          if ( openTag.equalsIgnoreCase( "TH" ) || openTag.equalsIgnoreCase( 
"TD" ) )
            /*
              return ( tagName.endsWith( "TD" ) || tagName.endsWith( "TH" ) );
            */
            return isElement( tagName, CLOSE_TH_TD );
          return false;
      }
  
          
      /**
       * Returns the value of an HTML character reference by its name. If the
       * reference is not found or was not defined as a character reference,
       * returns EOF (-1).
       *
       * @param name Name of character reference
       * @return Character code or EOF (-1)
       */
      public static int charFromName( String name )
      {
          Object    value;
  
          initialize();
          value = _byName.get( name );
          if ( value != null && value instanceof Character )
              return ( (Character) value ).charValue();
          else
              return -1;
      }
  
  
      /**
       * Returns the name of an HTML character reference based on its character
       * value. Only valid for entities defined from character references. If no
       * such character value was defined, return null.
       *
       * @param value Character value of entity
       * @return Entity's name or null
       */
      public static String fromChar( char value )
      {
          String    name;
  
          initialize();
          name = (String) _byChar.get( String.valueOf( value ) );
          if ( name == null )
              return null;
          else
              return name;
      }
  
  
      /**
       * Initialize upon first access. Will load all the HTML character 
references
       * into a list that is accessible by name or character value and is 
optimized
       * for character substitution. This method may be called any number of 
times
       * but will execute only once.
       */
      private static void initialize()
      {
          InputStream     is = null;
          BufferedReader  reader = null;
          int             index;
          String          name;
          String          value;
          int             code;
          String          line;
  
          // Make sure not to initialize twice.
          if ( _byName != null )
              return;
          try
          {
              _byName = new Hashtable();
              _byChar = new Hashtable();
              is = HTMLdtd.class.getResourceAsStream( ENTITIES_RESOURCE );
              if ( is == null )
                  throw new RuntimeException( "The resource [" + 
ENTITIES_RESOURCE + "] could not be found." );
              reader = new BufferedReader( new InputStreamReader( is ) );
              line = reader.readLine();
              while ( line != null )
              {
                  if ( line.length() == 0 || line.charAt( 0 ) == '#' )
                  {
                      line = reader.readLine();
                      continue;
                  }
                  index = line.indexOf( ' ' );
                  if ( index > 1 )
                  {
                      name = line.substring( 0, index );
                      ++index;
                      if ( index < line.length() )
                      {
                          value = line.substring( index );
                          index = value.indexOf( ' ' );
                          if ( index > 0 )
                              value = value.substring( 0, index );
                          code = Integer.parseInt( value );
                          defineEntity( name, (char) code );
                      }
                  }
                  line = reader.readLine();
              }
              is.close();
          }
          catch ( Exception except )
          {
              throw new RuntimeException( "The resource [" + ENTITIES_RESOURCE 
+ "] could not load: " +
                                        except.toString() );
          }
          finally
          {
              if ( is != null )
              {
                  try
                  {
                      is.close();
                  }
                  catch ( Exception except )
                  {
                  }
              }
          }
      }
  
  
      /**
       * Defines a new character reference. The reference's name and value are
       * supplied. Nothing happens if the character reference is already 
defined.
       * <P>
       * Unlike internal entities, character references are a string to single
       * character mapping. They are used to map non-ASCII characters both on
       * parsing and printing, primarily for HTML documents. '&lt;amp;' is an
       * example of a character reference.
       *
       * @param name The entity's name
       * @param value The entity's value
       */
      private static void defineEntity( String name, char value )
      {
          if ( _byName.get( name ) == null )
          {
              _byName.put( name, new Character( value ) );
              _byChar.put( String.valueOf( value ), name );
          }
      }
  
  
      private static void defineElement( String name, int flags )
      {
        _elemDefs.put( name, new Integer( flags ) );
      }
  
  
      private static boolean isElement( String name, int flag )
      {
        Integer flags;
  
        flags = (Integer) _elemDefs.get( name.toUpperCase() );
        if ( flags == null )
            return false;
        else
            return ( ( flags.intValue() & flag ) != 0 );
      }
  
  
      static
      {
        _elemDefs = new Hashtable();
        defineElement( "ADDRESS", CLOSE_P );
        defineElement( "AREA", EMPTY );
        defineElement( "BASE", EMPTY );
        defineElement( "BASEFONT", EMPTY );
        defineElement( "BLOCKQUOTE", CLOSE_P );
        defineElement( "BODY", OPT_CLOSING );
        defineElement( "BR", EMPTY );
        defineElement( "COL", EMPTY );
        defineElement( "COLGROUP", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
        defineElement( "DD", OPT_CLOSING | ONLY_OPENING | CLOSE_DD_DT );
        defineElement( "DIV", CLOSE_P );
        defineElement( "DL", ELEM_CONTENT | CLOSE_P );
        defineElement( "DT", OPT_CLOSING | ONLY_OPENING | CLOSE_DD_DT );
        defineElement( "FIELDSET", CLOSE_P );
        defineElement( "FORM", CLOSE_P );
        defineElement( "FRAME", OPT_CLOSING );
        defineElement( "H1", CLOSE_P );
        defineElement( "H2", CLOSE_P );
        defineElement( "H3", CLOSE_P );
        defineElement( "H4", CLOSE_P );
        defineElement( "H5", CLOSE_P );
        defineElement( "H6", CLOSE_P );
        defineElement( "HEAD", ELEM_CONTENT | OPT_CLOSING );
        defineElement( "HR", EMPTY | CLOSE_P );
        defineElement( "HTML", ELEM_CONTENT | OPT_CLOSING );
        defineElement( "IMG", EMPTY );
        defineElement( "INPUT", EMPTY );
        defineElement( "ISINDEX", EMPTY | ALLOWED_HEAD );
        defineElement( "LI", OPT_CLOSING | ONLY_OPENING | CLOSE_SELF );
        defineElement( "LINK", EMPTY | ALLOWED_HEAD );
        defineElement( "META", EMPTY | ALLOWED_HEAD );
        defineElement( "OL", ELEM_CONTENT | CLOSE_P );
        defineElement( "OPTGROUP", ELEM_CONTENT );
        defineElement( "OPTION", OPT_CLOSING | ONLY_OPENING | CLOSE_SELF );
        defineElement( "P", OPT_CLOSING | CLOSE_P | CLOSE_SELF );
        defineElement( "PARAM", EMPTY );
        defineElement( "PRE", PRESERVE | CLOSE_P );
        defineElement( "SCRIPT", ALLOWED_HEAD | PRESERVE );
        defineElement( "SELECT", ELEM_CONTENT );
        defineElement( "STYLE", ALLOWED_HEAD | PRESERVE );
        defineElement( "TABLE", ELEM_CONTENT | CLOSE_P );
        defineElement( "TBODY", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
        defineElement( "TD", OPT_CLOSING | CLOSE_TH_TD );
        defineElement( "TEXTAREA", PRESERVE );
        defineElement( "TFOOT", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
        defineElement( "TH", OPT_CLOSING | CLOSE_TH_TD );
        defineElement( "THEAD", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
        defineElement( "TITLE", ALLOWED_HEAD );
        defineElement( "TR", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
        defineElement( "UL", ELEM_CONTENT | CLOSE_P );
      }
  
  
  
  }
  
  
  
  
  1.1                  xml-xerces/java/src/org/apache/xml/serialize/Makefile
  
  Index: Makefile
  ===================================================================
  include ../../../../../src/Makefile.incl
  # Makefile for directory ./org/apache/xml/serialize
  #
  # This makefile depends on the following environment variables
  # already being defined:
  #
  #   JAVAC   Java compiler (with options)
  #   RM      Host delete file command (with options)
  #
  # In addition, the CLASSPATH environment variable must
  # include the absolute path of the base source directory.
  
  TARGETS=\
        OutputFormat.class\
        Serializer.class\
        BaseSerializer.class\
        XMLSerializer.class\
        HTMLSerializer.class\
        XHTMLSerializer.class\
        ElementState.class\
        HTMLdtd.class
  
  all: dirs compile
  
  dirs:
  
  compile: ${TARGETS}
  
  .SUFFIXES:
  
  .SUFFIXES: .class .java
  
  .java.class:
        ${JAVAC} $<
  
        touch ../../../../../src/classfiles_updated
  
  clean:
        ${RM} *.class
  
  
  
  
  1.1                  
xml-xerces/java/src/org/apache/xml/serialize/OutputFormat.java
  
  Index: OutputFormat.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:  
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Xerces" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written 
   *    permission, please contact [EMAIL PROTECTED]
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  
  package org.apache.xml.serialize;
  
  
  import java.util.Hashtable;
  
  import org.w3c.dom.Document;
  import org.w3c.dom.DocumentType;
  import org.w3c.dom.Node;
  // XXX  Delayed until the HTML DOM is introduced into the code base
  // import org.w3c.dom.html.HTMLDocument;
  
  
  /**
   * Specifies an output format to control the serializer. Based on the
   * XSLT specification for output format, plus additional parameters.
   * Used to select the suitable serializer and determine how the
   * document should be formatted on output.
   * <p>
   * The two interesting constructors are:
   * <ul>
   * <li>[EMAIL PROTECTED] #OutputFormat(String,String,boolean)} creates a 
format
   *  for the specified method (XML, HTML, etc), encoding and indentation
   * <li>[EMAIL PROTECTED] #OutputFormat(Document,String,boolean)} creates a 
format
   *  compatible with the document type (XML, HTML), encoding and indentation
   * </ul>
   * 
   *
   * @version
   * @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
   *         <a href="mailto:[EMAIL PROTECTED]">Keith Visco</a>
   * @see Serializer
   */
  public class OutputFormat
  {
  
  
      /**
       * Holds the output method specified for this document,
       * or null if no method was specified.
       */
      private String _method;
  
  
      /**
       * Specifies the version of the output method.
       */
      private String _version;
  
  
      /**
       * The indentation level, or zero if no indentation
       * was requested.
       */
      private int _indent = 0;
  
  
      /**
       * The encoding to use, if an input stream is used.
       * The default is always UTF-8.
       */
      private String _encoding = DEFAULT_ENCODING;
  
  
      /**
       * The specified media type or null.
       */
      private String _mediaType;
  
  
      /**
       * The specified document type system identifier, or null.
       */
      private String _doctypeSystem;
  
  
      /**
       * The specified document type public identifier, or null.
       */
      private String _doctypePublic;
  
  
      /**
       * Ture if the XML declaration should be ommited;
       */
      private boolean _omitXmlDeclaration = false;
  
  
      /**
       * True if the document type should be marked as standalone.
       */
      private boolean _standalone = false;
  
  
      /**
       * List of element tag names whose text node children must
       * be output as CDATA.
       */
      private String[] _cdataElements;
  
  
      /**
       * List of element tag names whose text node children must
       * be output unescaped.
       */
      private String[] _nonEscapingElements;
  
  
      /**
       * The selected line separator.
       */
      private String _lineSeparator = LINE_SEPARATOR_WEB;
  
  
      /**
       * The line width at which to wrap long lines when indenting.
       */
      private int _lineWidth = DEFAULT_LINE_WIDTH;
  
  
      /**
       * True if spaces should be preserved in elements that do not
       * specify otherwise, or specify the default behavior.
       */
      private boolean _preserve = false;
  
  
      /**
       * If indentation is turned on, the default identation
       * level is 4.
       *
       * @see #setIndenting(boolean)
       */
      public static final int DEFAULT_INDENT = 4;
  
  
      /**
       * The default encoding for Web documents it UTF8.
       *
       * @see #getEncoding()
       */
      public static final String DEFAULT_ENCODING = "UTF8";
  
  
      /**
       * The default line width at which to break long lines
       * when identing. This is set to 72.
       */
      public static final int DEFAULT_LINE_WIDTH = 72;
  
  
      /**
       * The output method for XML documents.
       */
      public static final String METHOD_XML = "xml";
  
  
      /**
       * The output method for HTML documents.
       */
      public static final String METHOD_HTML = "html";
  
  
      /**
       * The output method for HTML documents as XHTML.
       */
      public static final String METHOD_XHTML = "xhtml";
  
  
      /**
       * The output method for text documents.
       */
      public static final String METHOD_TEXT = "text";
  
  
      /**
       * Line separator for Unix systems (<tt>\n</tt>).
       */
      public static final String LINE_SEPARATOR_UNIX = "\n";
  
  
      /**
       * Line separator for Windows systems (<tt>\r\n</tt>).
       */
      public static final String LINE_SEPARATOR_WIN = "\r\n";
  
  
      /**
       * Line separator for Macintosh systems (<tt>\r</tt>).
       */
      public static final String LINE_SEPARATOR_MAC = "\r";
  
  
      /**
       * Line separator for the Web (<tt>\n</tt>).
       */
      public static final String LINE_SEPARATOR_WEB = "\n";
  
  
      /**
       * Public identifier for HTML document type.
       */
      public static final String DOCTYPE_HTML_PUBLIC = "-//W3C//DTD HTML 
4.0//EN";
  
  
      /**
       * System identifier for HTML document type.
       */
      public static final String DOCTYPE_HTML_SYSTEM =
        "http://www.w3.org/TR/WD-html-in-xml/DTD/xhtml1-strict.dtd";;
  
  
      /**
       * Public identifier for XHTML document type.
       */
      public static final String DOCTYPE_XHTML_PUBLIC = "-//W3C//DTD XHTML 1.0 
Strict//EN";
  
  
      /**
       * System identifier for XHTML document type.
       */
      public static final String DOCTYPE_XHTML_SYSTEM =
        "http://www.w3.org/TR/WD-html-in-xml/DTD/xhtml1-strict.dtd";;
  
  
  
  
      /**
       * Constructs a new output format with the default values.
       */
      public OutputFormat()
      {
      }
  
  
      /**
       * Constructs a new output format with the default values for
       * the specified method and encoding. If <tt>indent</tt>
       * is true, the document will be pretty printed with the default
       * indentation level and default line wrapping.
       *
       * @param method The specified output method
       * @param encoding The specified encoding
       * @param indenting True for pretty printing
       * @see #setEncoding
       * @see #setIndenting
       * @see #setMethod
       */
      public OutputFormat( String method, String encoding, boolean indenting )
      {
        setMethod( method );
        setEncoding( encoding );
        setIndenting( indenting );
      }
  
  
      /**
       * Constructs a new output format with the proper method,
       * document type identifiers and media type for the specified
       * document.
       *
       * @param doc The document to output
       * @see #whichMethod
       */
      public OutputFormat( Document doc )
      {
        setMethod( whichMethod( doc ) );
        setDoctype( whichDoctypePublic( doc ), whichDoctypeSystem( doc ) );
        setMediaType( whichMediaType( getMethod() ) );
      }
      
  
      /**
       * Constructs a new output format with the proper method,
       * document type identifiers and media type for the specified
       * document, and with the specified encoding. If <tt>indent</tt>
       * is true, the document will be pretty printed with the default
       * indentation level and default line wrapping.
       *
       * @param doc The document to output
       * @param encoding The specified encoding
       * @param indenting True for pretty printing
       * @see #setEncoding
       * @see #setIndenting
       * @see #whichMethod
       */
      public OutputFormat( Document doc, String encoding, boolean indenting )
      {
        this( doc );
        setEncoding( encoding );
        setIndenting( indenting );
      }
  
  
      /**
       * Returns the method specified for this output format.
       * Typically the method will be <tt>xml</tt>, <tt>html</tt>
       * or <tt>text</tt>, but it might be other values.
       * If no method was specified, null will be returned
       * and the most suitable method will be determined for
       * the document by calling [EMAIL PROTECTED] #whichMethod}.
       *
       * @return The specified output method, or null
       */
      public String getMethod()
      {
          return _method;
      }
  
  
      /**
       * Sets the method for this output format.
       *
       * @see #getMethod
       * @param method The output method, or null
       */
      public void setMethod( String method )
      {
        _method = method;
      }
  
  
      /**
       * Returns the version for this output method.
       * If no version was specified, will return null
       * and the default version number will be used.
       * If the serializerr does not support that particular
       * version, it should default to a supported version.
       *
       * @return The specified method version, or null
       */
      public String getVersion()
      {
        return _version;
      }
  
  
      /**
       * Sets the version for this output method.
       * For XML the value would be "1.0", for HTML
       * it would be "4.0".
       *
       * @see #getVersion
       * @param version The output method version, or null
       */
      public void setVersion( String version )
      {
        _version = version;
      }
  
  
      /**
       * Returns the indentation specified. If no indentation
       * was specified, zero is returned and the document
       * should not be indented.
       *
       * @return The indentation or zero
       * @see #setIndenting
       */
      public int getIndent()
      {
        return _indent;
      }
  
  
      /**
       * Returns true if indentation was specified.
       */
      public boolean getIndenting()
      {
        return ( _indent > 0 );
      }
  
  
      /**
       * Sets the indentation. The document will not be
       * indented if the indentation is set to zero.
       * Calling [EMAIL PROTECTED] #setIndenting} will reset this
       * value to zero (off) or the default (on).
       *
       * @param indent The indentation, or zero
       */
      public void setIndent( int indent )
      {
        if ( indent < 0 )
            _indent = 0;
        else
            _indent = indent;
      }
  
  
      /**
       * Sets the indentation on and off. When set on, the default
       * indentation level and default line wrapping is used
       * (see [EMAIL PROTECTED] #DEFAULT_INDENT} and [EMAIL PROTECTED] 
#DEFAULT_LINE_WIDTH}).
       * To specify a different indentation level or line wrapping,
       * use [EMAIL PROTECTED] #setIndent} and [EMAIL PROTECTED] #setLineWidth}.
       *
       * @param on True if indentation should be on
       */
      public void setIndenting( boolean on )
      {
        if ( on ) {
            _indent = DEFAULT_INDENT;
            _lineWidth = DEFAULT_LINE_WIDTH;
        } else {
            _indent = 0;
            _lineWidth = 0;
        }
      }
  
  
      /**
       * Returns the specified encoding. If no encoding was
       * specified, the default is always "UTF8".
       *
       * @return The encoding
       */
      public String getEncoding()
      {
        return _encoding;
      }
  
  
      /**
       * Sets the encoding for this output method. If no
       * encoding was specified, the default is always "UTF8".
       * Make sure the encoding is compatible with the one
       * used by the [EMAIL PROTECTED] java.io.Writer}.
       *
       * @see #getEncoding
       * @param encoding The encoding, or null
       */
      public void setEncoding( String encoding )
      {
        _encoding = encoding;
      }
  
  
      /**
       * Returns the specified media type, or null.
       * To determine the media type based on the
       * document type, use [EMAIL PROTECTED] #whichMediaType}.
       *
       * @return The specified media type, or null
       */
      public String getMediaType()
      {
        return _mediaType;
      }
  
  
      /**
       * Sets the media type.
       *
       * @see #getMediaType
       * @param mediaType The specified media type
       */
      public void setMediaType( String mediaType )
      {
        _mediaType = mediaType;
      }
  
  
      /**
       * Sets the document type public and system identifiers.
       * No <tt>DOCTYPE</tt> will be serialized if both identifiers
       * are null. A system identifier is required if a public
       * identified is specified.
       *
       * @param publicId The public identifier, or null
       * @param systemId The system identifier, or null
       */
      public void setDoctype( String publicId, String systemId )
      {
        _doctypePublic = publicId;
        _doctypeSystem = systemId;
      }
  
  
      /**
       * Returns the specified document type public identifier,
       * or null.
       */
      public String getDoctypePublic()
      {
        return _doctypePublic;
      }
  
  
      /**
       * Returns the specified document type system identifier,
       * or null.
       */
      public String getDoctypeSystem()
      {
        return _doctypeSystem;
      }
  
  
      /**
       * Returns true if the XML document declaration should
       * be ommited. The default is false.
       */
      public boolean getOmitXMLDeclaration()
      {
        return _omitXmlDeclaration;
      }
  
  
      /**
       * Sets XML declaration omitting on and off.
       *
       * @param omit True if XML declaration should be ommited
       */
      public void setOmitXMLDeclaration( boolean omit )
      {
        _omitXmlDeclaration = omit;
      }
  
  
      /**
       * Returns true if the document type is standalone.
       * The default is false.
       */
      public boolean getStandalone()
      {
        return _standalone;
      }
  
  
      /**
       * Sets document DTD standalone. The public and system
       * identifiers must be null for the document to be
       * serialized as standalone.
       *
       * @param standalone True if document DTD is standalone
       */
      public void setStandalone( boolean standalone )
      {
        _standalone = standalone;
      }
  
  
      /**
       * Returns a list of all the elements whose text node children
       * should be output as CDATA, or null if no such elements were
       * specified.
       */
      public String[] getCDataElements()
      {
        return _cdataElements;
      }
  
  
      /**
       * Returns true if the text node children of the given elements
       * should be output as CDATA.
       *
       * @param tagName The element's tag name
       * @return True if should serialize as CDATA
       */
      public boolean isCDataElement( String tagName )
      {
        int i;
  
        if ( _cdataElements == null )
            return false;
        for ( i = 0 ; i < _cdataElements.length ; ++i )
            if ( _cdataElements[ i ].equals( tagName ) )
                return true;
        return false;
      }
  
  
      /**
       * Sets the list of elements for which text node children
       * should be output as CDATA.
       *
       * @param cdataElements List of CDATA element tag names
       */
      public void setCDataElements( String[] cdataElements )
      {
        _cdataElements = cdataElements;
      }
  
  
      /**
       * Returns a list of all the elements whose text node children
       * should be output unescaped (no character references), or null
       * if no such elements were specified.
       */
      public String[] getNonEscapingElements()
      {
        return _nonEscapingElements;
      }
  
  
      /**
       * Returns true if the text node children of the given elements
       * should be output unescaped.
       *
       * @param tagName The element's tag name
       * @return True if should serialize unescaped
       */
      public boolean isNonEscapingElement( String tagName )
      {
        int i;
  
        if ( _nonEscapingElements == null )
            return false;
        for ( i = 0 ; i < _nonEscapingElements.length ; ++i )
            if ( _nonEscapingElements[ i ].equals( tagName ) )
                return true;
        return false;
      }
  
  
      /**
       * Sets the list of elements for which text node children
       * should be output unescaped (no character references).
       *
       * @param nonEscapingElements List of unescaped element tag names
       */
      public void setNonEscapingElements( String[] nonEscapingElements )
      {
        _nonEscapingElements = nonEscapingElements;
      }
  
  
  
      /**
       * Returns a specific line separator to use. The default is the
       * Web line separator (<tt>\n</tt>). A string is returned to
       * support double codes (CR + LF).
       *
       * @return The specified line separator
       */
      public String getLineSeparator()
      {
        return _lineSeparator;
      }
  
  
      /**
       * Sets the line separator. The default is the Web line separator
       * (<tt>\n</tt>). The machine's line separator can be obtained
       * from the system property <tt>line.separator</tt>, but is only
       * useful if the document is edited on machines of the same type.
       * For general documents, use the Web line separator.
       *
       * @param lineSeparator The specified line separator
       */
      public void setLineSeparator( String lineSeparator )
      {
        if ( lineSeparator == null )
            _lineSeparator =  LINE_SEPARATOR_WEB;
        else
            _lineSeparator = lineSeparator;
      }
  
  
      /**
       * Returns true if the default behavior for this format is to
       * preserve spaces. All elements that do not specify otherwise
       * or specify the default behavior will be formatted based on
       * this rule. All elements that specify space preserving will
       * always preserve space.
       */
      public boolean getPreserveSpace()
      {
        return _preserve;
      }
  
  
      /**
       * Sets space preserving as the default behavior. The default is
       * space stripping and all elements that do not specify otherwise
       * or use the default value will not preserve spaces.
       *
       * @param preserve True if spaces should be preserved
       */
      public void setPreserveSpace( boolean preserve )
      {
        _preserve = preserve;
      }
  
  
      /**
       * Return the selected line width for breaking up long lines.
       * When indenting, and only when indenting, long lines will be
       * broken at space boundaries based on this line width.
       * No line wrapping occurs if this value is zero.
       */
      public int getLineWidth()
      {
        return _lineWidth;
      }
  
  
      /**
       * Sets the line width. If zero then no line wrapping will
       * occur. Calling [EMAIL PROTECTED] #setIndenting} will reset this
       * value to zero (off) or the default (on).
       *
       * @param lineWidth The line width to use, zero for default
       * @see #getLineWidth
       * @see #setIndenting
       */
      public void setLineWidth( int lineWidth )
      {
        if ( lineWidth <= 0 )
            _lineWidth = 0;
        else
            _lineWidth = lineWidth;
      }
  
  
      /**
       * Returns the last printable character based on the selected
       * encoding. Control characters and non-printable characters
       * are always printed as character references.
       */
      public char getLastPrintable()
      {
        if ( getEncoding() != null &&
             ( getEncoding().equalsIgnoreCase( "ASCII" ) ) )
            return 0xFF;
        else
            return 0xFFFF;
      }
  
  
      /**
       * Determine the output method for the specified document.
       * If the document is an instance of [EMAIL PROTECTED] 
org.w3c.dom.html.HTMLDocument}
       * then the method is said to be <tt>html</tt>. If the root
       * element is 'html' and all text nodes preceding the root
       * element are all whitespace, then the method is said to be
       * <tt>html</tt>. Otherwise the method is <tt>xml</tt>.
       *
       * @param doc The document to check
       * @return The suitable method
       */
      public static String whichMethod( Document doc )
      {
          Node    node;
        String  value;
        int     i;
  
        // If document is derived from HTMLDocument then the default
        // method is html.
          /* XXX  Delayed until the HTML DOM is introduced into the code base
          if ( doc instanceof HTMLDocument )
              return METHOD_HTML;
          */
        // Lookup the root element and the text nodes preceding it.
        // If root element is html and all text nodes contain whitespace
        // only, the method is html.
          node = doc.getFirstChild();
          while ( node != null ) {
            // If the root element is html, the method is html.
            if ( node.getNodeType() == Node.ELEMENT_NODE ) {
                if ( node.getNodeName().equalsIgnoreCase( "html" ) )
                    return METHOD_HTML;
                else
                    return METHOD_XML;
            }
            else
            if ( node.getNodeType() == Node.TEXT_NODE ) {
                // If a text node preceding the root element contains
                // only whitespace, this might be html, otherwise it's
                // definitely xml.
                value = node.getNodeValue();
                for ( i = 0 ; i < value.length() ; ++i )
                    if ( value.charAt( i ) != 0x20 && value.charAt( i ) != 0x0A 
&&
                         value.charAt( i ) != 0x09 && value.charAt( i ) != 0x0D 
)
                        return METHOD_XML;
            }
            node = node.getNextSibling();
        }
        // Anything else, the method is xml.
        return METHOD_XML;
      }
  
  
      /**
       * Returns the document type public identifier
       * specified for this document, or null.
       */
      public static String whichDoctypePublic( Document doc )
      {
        DocumentType doctype;
  
          /* XXX  Delayed until DOM Level 2 is introduced into the code base
        doctype = doc.getDoctype();
        if ( doctype != null ) {
            // Note on catch: DOM Level 1 does not specify this method
            // and the code will throw a NoSuchMethodError
            try {
                return doctype.getPublicID();
            } catch ( Error except ) {  }
        }
        */
          /* XXX  Delayed until the HTML DOM is introduced into the code base
        if ( doc instanceof HTMLDocument )
            return DOCTYPE_XHTML_PUBLIC;
           */
        return null;
      }
  
  
      /**
       * Returns the document type system identifier
       * specified for this document, or null.
       */
      public static String whichDoctypeSystem( Document doc )
      {
        DocumentType doctype;
  
          /* XXX  Delayed until DOM Level 2 is introduced into the code base
        doctype = doc.getDoctype();
        if ( doctype != null ) {
            // Note on catch: DOM Level 1 does not specify this method
            // and the code will throw a NoSuchMethodError
            try {
                return doctype.getSystemID();
            } catch ( Error except ) { }
        }
        */
          /* XXX  Delayed until the HTML DOM is introduced into the code base
        if ( doc instanceof HTMLDocument )
            return DOCTYPE_XHTML_SYSTEM;
           */
        return null;
      }
  
  
      /**
       * Returns the suitable media format for a document
       * output with the specified method.
       */
      public static String whichMediaType( String method )
      {
        if ( method.equalsIgnoreCase( METHOD_XML ) )
            return "text/xml";
        if ( method.equalsIgnoreCase( METHOD_HTML ) )
            return "text/html";
        if ( method.equalsIgnoreCase( METHOD_TEXT ) )
            return "text/plain";
        return null;
      }
  
  
  }
  
  
  
  
  1.1                  
xml-xerces/java/src/org/apache/xml/serialize/Serializer.java
  
  Index: Serializer.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:  
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Xerces" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written 
   *    permission, please contact [EMAIL PROTECTED]
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  
  package org.apache.xml.serialize;
  
  
  import java.io.Writer;
  import java.io.OutputStream;
  import java.io.IOException;
  import java.io.UnsupportedEncodingException;
  
  import org.w3c.dom.Element;
  import org.w3c.dom.Document;
  import org.xml.sax.DocumentHandler;
  
  
  /**
   * Interface for a DOM serializer implementation, factory for DOM and SAX
   * serializers, and static methods for serializing DOM documents.
   * <p>
   * To serialize a document using SAX events, create a compatible serializer
   * using [EMAIL PROTECTED] #makeSAXSerializer} and pass it around as a [EMAIL 
PROTECTED]
   * DocumentHandler}. If an I/O error occurs while serializing, it will
   * be thrown by [EMAIL PROTECTED] DocumentHandler#endDocument}. The SAX 
serializer
   * may also be used as [EMAIL PROTECTED] DTDHandler}, [EMAIL PROTECTED] 
DeclHandler} and
   * [EMAIL PROTECTED] LexicalHandler}.
   * <p>
   * To serialize a DOM document or DOM element, create a compatible
   * serializer using [EMAIL PROTECTED] #makeSerializer} and call it's [EMAIL 
PROTECTED]
   * #serialize(Document)} or [EMAIL PROTECTED] #serialize(Element)} methods.
   * Both methods would produce a full XML document, to serizlie only
   * the portion of the document use [EMAIL PROTECTED] 
OutputFormat#setOmitXMLDeclaration}
   * and specify no document type.
   * <p>
   * The convenience method [EMAIL PROTECTED] 
#serialize(Document,Writer,OutputFormat)}
   * creates a serializer and calls [EMAIL PROTECTED] #serizlie(Document)} on 
that
   * serialized.
   * <p>
   * The [EMAIL PROTECTED] OutputFormat} dictates what underlying serialized is 
used
   * to serialize the document based on the specified method. If the output
   * format or method are missing, the default is an XML serializer with
   * UTF8 encoding and now indentation.
   * 
   *
   * @version
   * @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
   * @see DocumentHandler
   * @see OutputFormat
   */
  public abstract class Serializer
  {
  
  
      /**
       * Serialized the DOM element. Throws an exception only if
       * an I/O exception occured while serializing.
       *
       * @param elem The element to serialize
       * @throws IOException An I/O exception occured while
       *   serializing
       */
      public abstract void serialize( Element elem )
          throws IOException;
  
  
      /**
       * Serializes the DOM document. Throws an exception only if
       * an I/O exception occured while serializing.
       *
       * @param doc The document to serialize
       * @throws IOException An I/O exception occured while
       *   serializing
       */
      public abstract void serialize( Document doc )
          throws IOException;
  
  
      /**
       * Creates a compatible serialized for the specified writer
       * and output format. If the output format is missing,
       * the default is an XML format with UTF8 encoding.
       *
       * @param writer The writer
       * @param format The output format
       * @return A compatible serializer
       */
      public static Serializer makeSerializer( Writer writer, OutputFormat 
format )
      {
        BaseSerializer serializer;
  
        serializer = makeBaseSerializer( format );
        serializer.init( writer, format );
        return serializer;
      }
  
  
      /**
       * Creates a compatible serializer for the specified output stream
       * and output format. If the output format is missing, the default
       * is an XML format with UTF8 encoding.
       *
       * @param output The output stream
       * @param format The output format
       * @return A compatible serializer
       * @throws UnsupportedEncodingException Encoding specified
       *   in the output format is not supported
       */
      public static Serializer makeSerializer( OutputStream output, 
OutputFormat format )
          throws UnsupportedEncodingException
      {
        BaseSerializer serializer;
  
        serializer = makeBaseSerializer( format );
        serializer.init( output, format );
        return serializer;
      }
  
  
      /**
       * Creates a compatible SAX serializer for the specified writer
       * and output format. If the output format is missing, the default
       * is an XML format with UTF8 encoding.
       *
       * @param writer The writer
       * @param format The output format
       * @return A compatible SAX serializer
       */
      public static DocumentHandler makeSAXSerializer( Writer writer, 
OutputFormat format )
      {
        BaseSerializer serializer;
  
        serializer = makeBaseSerializer( format );
        serializer.init( writer, format );
        return serializer;
      }
  
  
      /**
       * Creates a compatible SAX serializer for the specified output stream
       * and output format. If the output format is missing, the default
       * is an XML format with UTF8 encoding.
       *
       * @param output The output stream
       * @param format The output format
       * @return A compatible SAX serializer
       * @throws UnsupportedEncodingException Encoding specified
       *   in the output format is not supported
       */
      public static DocumentHandler makeSAXSerializer( OutputStream output, 
OutputFormat format )
          throws UnsupportedEncodingException
      {
        BaseSerializer serializer;
  
        serializer = makeBaseSerializer( format );
        serializer.init( output, format );
        return serializer;
      }
  
  
      /**
       * Convenience method serializes the specified document to
       * the writer using the specified output format.
       * <p>
       * Equivalent to calling [EMAIL PROTECTED] #serialize(Document)} on
       * a compatible DOM serializer.
       *
       * @param doc The document to serialize
       * @param writer The writer
       * @param format The output format
       * @throws IOException An I/O exception occured while serializing
       * @throws UnsupportedEncodingException Encoding specified
       *   in the output format is not supported
       */
      public static void serialize( Document doc, Writer writer, OutputFormat 
format )
          throws IOException
      {
        BaseSerializer serializer;
  
        if ( format == null )
            format = new OutputFormat( doc );
        serializer = makeBaseSerializer( format );
        serializer.init( writer, format );
        serializer.serialize( doc );
      }
  
  
      /**
       * Convenience method serializes the specified document to
       * the output stream using the specified output format.
       * <p>
       * Equivalent to calling [EMAIL PROTECTED] #serialize(Document)} on
       * a compatible DOM serializer.
       *
       * @param doc The document to serialize
       * @param output The output stream
       * @param format The output format
       * @throws IOException An I/O exception occured while serializing
       */
      public static void serialize( Document doc, OutputStream output, 
OutputFormat format )
          throws UnsupportedEncodingException, IOException
      {
        BaseSerializer serializer;
  
        if ( format == null )
            format = new OutputFormat( doc );
        serializer = makeBaseSerializer( format );
        serializer.init( output, format );
        serializer.serialize( doc );
      }
  
  
      private static BaseSerializer makeBaseSerializer( OutputFormat format )
      {
        BaseSerializer serializer;
  
        if ( format == null ) {
            format = new OutputFormat( "xml", "UTF8", false );
            serializer = new XMLSerializer();
        } else {
            if ( format.getMethod().equalsIgnoreCase( "html" ) )
                serializer = new XHTMLSerializer();
            else
            if ( format.getMethod().equalsIgnoreCase( "xhtml" ) )
                serializer = new HTMLSerializer();
            else
                serializer = new XMLSerializer();
        }
        return serializer;
      }
  
  
  }
  
  
  
  
  
  
  
  
  1.1                  
xml-xerces/java/src/org/apache/xml/serialize/XHTMLSerializer.java
  
  Index: XHTMLSerializer.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:  
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Xerces" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written 
   *    permission, please contact [EMAIL PROTECTED]
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  
  package org.apache.xml.serialize;
  
  
  import java.io.OutputStream;
  import java.io.Writer;
  import java.io.UnsupportedEncodingException;
  
  
  /**
   * Implements an XHTML serializer supporting both DOM and SAX
   * pretty serializing. For usage instructions see either [EMAIL PROTECTED]
   * Serializer} or [EMAIL PROTECTED] BaseSerializer}.
   *
   *
   * @version
   * @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
   * @see Serializer
   */
  public final class XHTMLSerializer
      extends HTMLSerializer
  {
  
  
      /**
       * Constructs a new serializer. The serializer cannot be used without
       * calling [EMAIL PROTECTED] #init} first.
       */
      public XHTMLSerializer()
      {
        super( true );
      }
  
  
      /**
       * Constructs a new serializer that writes to the specified writer
       * using the specified output format. If <tt>format</tt> is null,
       * will use a default output format.
       *
       * @param writer The writer to use
       * @param format The output format to use, null for the default
       */
      public XHTMLSerializer( Writer writer, OutputFormat format )
      {
        super( true );
        if ( format == null )
            format = new OutputFormat( OutputFormat.METHOD_XHTML, null, false );
        init( writer, format );
      }
  
  
      /**
       * Constructs a new serializer that writes to the specified output
       * stream using the specified output format. If <tt>format</tt>
       * is null, will use a default output format.
       *
       * @param output The output stream to use
       * @param format The output format to use, null for the default
       */
      public XHTMLSerializer( OutputStream output, OutputFormat format )
      {
        super( true );
        if ( format == null )
            format = new OutputFormat( OutputFormat.METHOD_XHTML, null, false );
        try {
            init( output, format );
        } catch ( UnsupportedEncodingException except ) {
            // Should never happend, we use UTF8 by default
        }
      }
  
  
  }
  
  
  
  1.1                  
xml-xerces/java/src/org/apache/xml/serialize/XMLSerializer.java
  
  Index: XMLSerializer.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:  
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Xerces" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written 
   *    permission, please contact [EMAIL PROTECTED]
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  
  package org.apache.xml.serialize;
  
  
  import java.io.IOException;
  import java.io.UnsupportedEncodingException;
  import java.io.OutputStream;
  import java.io.Writer;
  
  import org.w3c.dom.*;
  import org.xml.sax.DocumentHandler;
  import org.xml.sax.AttributeList;
  
  
  /**
   * Implements an XML serializer supporting both DOM and SAX pretty
   * serializing. For usage instructions see [EMAIL PROTECTED] Serializer}.
   * <p>
   * If an output stream is used, the encoding is taken from the
   * output format (defaults to <tt>UTF8</tt>). If a writer is
   * used, make sure the writer uses the same encoding (if applies)
   * as specified in the output format.
   * <p>
   * The serializer supports both DOM and SAX. DOM serializing is done
   * by calling [EMAIL PROTECTED] #serialize} and SAX serializing is done by 
firing
   * SAX events and using the serializer as a document handler.
   * <p>
   * If an I/O exception occurs while serializing, the serializer
   * will not throw an exception directly, but only throw it
   * at the end of serializing (either DOM or SAX's [EMAIL PROTECTED]
   * org.xml.sax.DocumentHandler#endDocument}.
   * <p>
   * For elements that are not specified as whitespace preserving,
   * the serializer will potentially break long text lines at space
   * boundaries, indent lines, and serialize elements on separate
   * lines. Line terminators will be regarded as spaces, and
   * spaces at beginning of line will be stripped.
   *
   *
   * @version
   * @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
   * @see Serializer
   */
  public final class XMLSerializer
      extends BaseSerializer
  {
  
  
      /**
       * Constructs a new serializer. The serializer cannot be used without
       * calling [EMAIL PROTECTED] #init} first.
       */
      public XMLSerializer()
      {
        super();
      }
  
  
      /**
       * Constructs a new serializer that writes to the specified writer
       * using the specified output format. If <tt>format</tt> is null,
       * will use a default output format.
       *
       * @param writer The writer to use
       * @param format The output format to use, null for the default
       */
      public XMLSerializer( Writer writer, OutputFormat format )
      {
        super();
        if ( format == null )
            format = new OutputFormat( OutputFormat.METHOD_XML, null, false );
        init( writer, format );
      }
  
  
      /**
       * Constructs a new serializer that writes to the specified output
       * stream using the specified output format. If <tt>format</tt>
       * is null, will use a default output format.
       *
       * @param output The output stream to use
       * @param format The output format to use, null for the default
       */
      public XMLSerializer( OutputStream output, OutputFormat format )
      {
        super();
        if ( format == null )
            format = new OutputFormat( OutputFormat.METHOD_XML, null, false );
        try {
            init( output, format );
        } catch ( UnsupportedEncodingException except ) {
            // Should never happend, we use UTF8 by default
        }
      }
  
  
      //------------------------------------------//
      // SAX document handler serializing methods //
      //------------------------------000---------//
  
  
      public void startDocument()
      {
        // Nothing to do here. All the magic happens in startDocument(String)
      }
  
  
      public void startElement( String tagName, AttributeList attrs )
      {
        int          i;
        boolean      preserveSpace;
        ElementState state;
        String       name;
        String       value;
  
        state = getElementState();
        if ( state == null ) {
            // If this is the root element handle it differently.
            // If the first root element in the document, serialize
            // the document's DOCTYPE. Space preserving defaults
            // to that of the output format.
            if ( ! _started )
                startDocument( tagName );
            preserveSpace = _format.getPreserveSpace();
        } else {
            // For any other element, if first in parent, then
            // close parent's opening tag and use the parnet's
            // space preserving.
            if ( state.empty )
                printText( ">" );
            preserveSpace = state.preserveSpace;
            // Indent this element on a new line if the first
            // content of the parent element or immediately
            // following an element.
            if ( _format.getIndenting() && ! state.preserveSpace &&
                 ( state.empty || state.afterElement ) )
                breakLine();
        }
        // Do not change the current element state yet.
        // This only happens in endElement().
  
        printText( '<' + tagName );
        indent();
  
        // For each attribute print it's name and value as one part,
        // separated with a space so the element can be broken on
        // multiple lines.
        if ( attrs != null ) {
            for ( i = 0 ; i < attrs.getLength() ; ++i ) {
                printSpace();
                name = attrs.getName( i );
                value = attrs.getValue( i );
                if ( value == null )
                    value = "";
                printText( name + "=\"" + escape( value ) + '"' );
                
                // If the attribute xml:space exists, determine whether
                // to preserve spaces in this and child nodes based on
                // its value.
                if ( name.equals( "xml:space" ) ) {
                    if ( value.equals( "preserve" ) )
                        preserveSpace = true;
                    else
                        preserveSpace = _format.getPreserveSpace();
                }
            }
        }
        // Now it's time to enter a new element state
        // with the tag name and space preserving.
        // We still do not change the curent element state.
        state = enterElementState( tagName, preserveSpace );
        state.cdata = _format.isCDataElement( tagName );
        state.unescaped = _format.isNonEscapingElement( tagName );
      }
  
  
      public void endElement( String tagName )
      {
        ElementState state;
  
        // Works much like content() with additions for closing
        // an element. Note the different checks for the closed
        // element's state and the parent element's state.
        unindent();
        state = getElementState();
        if ( state.empty ) {
            printText( "/>" );
        } else {
            // This element is not empty and that last content was
            // another element, so print a line break before that
            // last element and this element's closing tag.
            if ( _format.getIndenting() && ! state.preserveSpace &&  
state.afterElement )
                breakLine();
            printText( "</" + tagName + ">" );
        }
        // Leave the element state and update that of the parent
        // (if we're not root) to not empty and after element.
        state = leaveElementState();
        if ( state != null ) {
            state.afterElement = true;
            state.empty = false;
        } else {
            // [keith] If we're done printing the document but don't
            // get to call endDocument(), the buffer should be flushed.
            flush();
        }
      }
  
  
  
      //------------------------------------------//
      // Generic node serializing methods methods //
      //------------------------------------------//
  
  
      /**
       * Called to serialize the document's DOCTYPE by the root element.
       * The document type declaration must name the root element,
       * but the root element is only known when that element is serialized,
       * and not at the start of the document.
       * <p>
       * This method will check if it has not been called before ([EMAIL 
PROTECTED] #_started}),
       * will serialize the document type declaration, and will serialize all
       * pre-root comments and PIs that were accumulated in the document
       * (see [EMAIL PROTECTED] #serializePreRoot}). Pre-root will be 
serialized even if
       * this is not the first root element of the document.
       */
      protected void startDocument( String rootTagName )
      {
        int    i;
        String dtd;
  
        dtd = leaveDTD();
        if ( ! _started ) {
  
            if ( ! _format.getOmitXMLDeclaration() ) {
                StringBuffer    buffer;
                
                // Serialize the document declaration appreaing at the head
                // of very XML document (unless asked not to).
                buffer = new StringBuffer( "<?xml version=\"" );
                if ( _format.getVersion() != null )
                    buffer.append( _format.getVersion() );
                else
                    buffer.append( "1.0" );
                buffer.append( '"' );
                if ( _format.getEncoding() != null ) {
                    buffer.append( " encoding=\"" );
                    buffer.append( _format.getEncoding() );
                    buffer.append( '"' );
                }
                if ( _format.getStandalone() && _format.getDoctypeSystem() == 
null &&
                     _format.getDoctypePublic() == null )
                    buffer.append( " standalone=\"yes\"" );
                buffer.append( "?>" );
                printText( buffer.toString() );
                breakLine();
            }
  
            if ( _format.getDoctypeSystem() != null ) {
                // System identifier must be specified to print DOCTYPE.
                // If public identifier is specified print 'PUBLIC
                // <public> <system>', if not, print 'SYSTEM <system>'.
                printText( "<!DOCTYPE " );
                printText( rootTagName );
                if ( _format.getDoctypePublic() != null ) {
                    printText( " PUBLIC " );
                    printDoctypeURL( _format.getDoctypePublic() );
                    if ( _format.getIndenting() ) {
                        breakLine();
                        for ( i = 0 ; i < 18 + rootTagName.length() ; ++i )
                            printText( " " );
                    }
                    printDoctypeURL( _format.getDoctypeSystem() );
                }
                else {
                    printText( " SYSTEM " );
                    printDoctypeURL( _format.getDoctypeSystem() );
                }
  
                // If we accumulated any DTD contents while printing.
                // this would be the place to print it.
                if ( dtd != null && dtd.length() > 0 ) {
                    printText( " [" );
                    indent();
                    if ( _format.getIndenting() )
                        breakLine();
                    printText( dtd, true );
                    unindent();
                    printText( "]" );
                }
  
                printText( ">" );
                breakLine();
            }
        }
        _started = true;
        // Always serialize these, even if not te first root element.
        serializePreRoot();
      }
  
  
      /**
       * Called to serialize a DOM element. Equivalent to calling [EMAIL 
PROTECTED]
       * #startElement}, [EMAIL PROTECTED] #endElement} and serializing 
everything
       * inbetween, but better optimized.
       */
      protected void serializeElement( Element elem )
      {
        Attr         attr;
        NamedNodeMap attrMap;
        int          i;
        Node         child;
        ElementState state;
        boolean      preserveSpace;
        String       name;
        String       value;
  
        state = getElementState();
        if ( state == null ) {
            // If this is the root element handle it differently.
            // If the first root element in the document, serialize
            // the document's DOCTYPE. Space preserving defaults
            // to that of the output format.
            if ( ! _started )
                startDocument( elem.getTagName() );
            preserveSpace = _format.getPreserveSpace();
        } else {
            // For any other element, if first in parent, then
            // close parent's opening tag and use the parnet's
            // space preserving.
            if ( state.empty )
                printText( ">" );
            preserveSpace = state.preserveSpace;
            // Indent this element on a new line if the first
            // content of the parent element or immediately
            // following an element.
            if ( _format.getIndenting() && ! state.preserveSpace &&
                 ( state.empty || state.afterElement ) )
                breakLine();
        }
        // Do not change the current element state yet.
        // This only happens in endElement().
  
        printText( '<' + elem.getTagName() );
        indent();
  
        // Lookup the element's attribute, but only print specified
        // attributes. (Unspecified attributes are derived from the DTD.
        // For each attribute print it's name and value as one part,
        // separated with a space so the element can be broken on
        // multiple lines.
        attrMap = elem.getAttributes();
        if ( attrMap != null ) {
            for ( i = 0 ; i < attrMap.getLength() ; ++i ) {
                attr = (Attr) attrMap.item( i );
                name = attr.getName();
                value = attr.getValue();
                if ( value == null )
                    value = "";
                if ( attr.getSpecified() ) {
                    printSpace();
                    printText( name + "=\"" + escape( value ) + '"' );
                }
                // If the attribute xml:space exists, determine whether
                // to preserve spaces in this and child nodes based on
                // its value.
                if ( name.equals( "xml:space" ) ) {
                    if ( value.equals( "preserve" ) )
                        preserveSpace = true;
                    else
                        preserveSpace = _format.getPreserveSpace();             
    
                }
            }
        }
  
        // If element has children, then serialize them, otherwise
        // serialize en empty tag.
        if ( elem.hasChildNodes() ) {
            // Enter an element state, and serialize the children
            // one by one. Finally, end the element.
            state = enterElementState( elem.getTagName(), preserveSpace );
            state.cdata = _format.isCDataElement( elem.getTagName() );
            state.unescaped = _format.isNonEscapingElement( elem.getTagName() );
            child = elem.getFirstChild();
            while ( child != null ) {
                serializeNode( child );
                child = child.getNextSibling();
            }
            endElement( elem.getTagName() );
        } else {
            unindent();
            printText( "/>" );
            if ( state != null ) {
                // After element but parent element is no longer empty.
                state.afterElement = true;
                state.empty = false;
            }
        }
      }
  
  
      protected String getEntityRef( char ch )
      {
        // Encode special XML characters into the equivalent character 
references.
        // These five are defined by default for all XML documents.
          switch ( ch ) {
        case '<':
            return "lt";
        case '>':
            return "gt";
        case '"':
            return "quot";
        case '\'':
            return "apos";
        case '&':
            return "amp";
          }
          return null;
      }
  
  
  }

cvs commit: xml-xerces/java/src/org/apache/xml/serialize BaseSerializer.java ElementState.java HTMLEntities.res HTMLSerializer.java HTMLdtd.java Makefile OutputFormat.java Serializer.java XHTMLSerializer.java XMLSerializer.java

Reply via email to