arkin 99/11/23 14:29:53
Added: java/src/org/apache/xml/serialize BaseSerializer.java
ElementState.java HTMLEntities.res
HTMLSerializer.java HTMLdtd.java Makefile
OutputFormat.java Serializer.java
XHTMLSerializer.java XMLSerializer.java
Log:
First checkin.
Revision Changes Path
1.1
xml-xerces/java/src/org/apache/xml/serialize/BaseSerializer.java
Index: BaseSerializer.java
===================================================================
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.xml.serialize;
import java.io.*;
import java.util.Vector;
import java.util.Hashtable;
import java.util.StringTokenizer;
import org.w3c.dom.*;
import org.xml.sax.DocumentHandler;
import org.xml.sax.DTDHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.misc.LexicalHandler;
import org.xml.sax.misc.DeclHandler;
/**
* Base class for a serializer supporting both DOM and SAX pretty
* serializing of XML/HTML/XHTML documents. Derives classes perform
* the method-specific serializing, this class provides the common
* serializing mechanisms.
* <p>
* The serializer must be initialized with the proper writer and
* output format before it can be used by calling [EMAIL PROTECTED] #init}.
* The serializer can be reused any number of times, but cannot
* be used concurrently by two threads.
* <p>
* If an output stream is used, the encoding is taken from the
* output format (defaults to <tt>UTF8</tt>). If a writer is
* used, make sure the writer uses the same encoding (if applies)
* as specified in the output format.
* <p>
* The serializer supports both DOM and SAX. DOM serializing is done
* by calling [EMAIL PROTECTED] #serialize} and SAX serializing is done by
firing
* SAX events and using the serializer as a document handler.
* This also applies to derived class.
* <p>
* If an I/O exception occurs while serializing, the serializer
* will not throw an exception directly, but only throw it
* at the end of serializing (either DOM or SAX's [EMAIL PROTECTED]
* org.xml.sax.DocumentHandler#endDocument}.
* <p>
* For elements that are not specified as whitespace preserving,
* the serializer will potentially break long text lines at space
* boundaries, indent lines, and serialize elements on separate
* lines. Line terminators will be regarded as spaces, and
* spaces at beginning of line will be stripped.
* <p>
* When indenting, the serializer is capable of detecting seemingly
* element content, and serializing these elements indented on separate
* lines. An element is serialized indented when it is the first or
* last child of an element, or immediate following or preceding
* another element.
*
*
* @version
* @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
* @see Serializer
* @see XMLSerializer
*/
abstract class BaseSerializer
extends Serializer
implements DocumentHandler, LexicalHandler, DTDHandler, DeclHandler
{
/**
* Identifies the last printable character in the Unicode range
* that is supported by the encoding used with this serializer.
* For 8-bit encodings this will be either 0x7E or 0xFF.
* For 16-bit encodings this will be 0xFFFF. Characters that are
* not printable will be escaped using character references.
*/
private int _lastPrintable = 0x7E;
/**
* The output format associated with this serializer. This will never
* be a null reference. If no format was passed to the constructor,
* the default one for this document type will be used. The format
* object is never changed by the serializer.
*/
protected OutputFormat _format;
/**
* The writer to which the document is written.
*/
private Writer _writer;
/**
* Holds the currently accumulating text line. This buffer will constantly
* be reused by deleting its contents instead of reallocating it.
*/
private StringBuffer _line;
/**
* Holds the currently accumulating text that follows [EMAIL PROTECTED]
#_line}.
* When the end of the part is identified by a call to [EMAIL PROTECTED]
#printSpace}
* or [EMAIL PROTECTED] #breakLine}, this part is added to the
accumulated line.
*/
private StringBuffer _text;
/**
* Counts how many white spaces come between the accumulated line and the
* current accumulated text. Multiple spaces at the end of the a line
* will not be printed.
*/
private int _spaces;
/**
* Holds the indentation for the current line that is now accumulating in
* memory and will be sent for printing shortly.
*/
private int _thisIndent;
/**
* Holds the indentation for the next line to be printed. After this line
is
* printed, [EMAIL PROTECTED] #_nextIndent} is assigned to [EMAIL
PROTECTED] #_thisIndent}.
*/
private int _nextIndent;
/**
* Holds the exception thrown by the serializer. Exceptions do not cause
* the serializer to quit, but are held and one is thrown at the end.
*/
protected IOException _exception;
/**
* Holds array of all element states that have been entered.
* The array is automatically resized. When leaving an element,
* it's state is not removed but reused when later returning
* to the same nesting level.
*/
private ElementState[] _elementStates = new ElementState[ 5 ];
/**
* The index of the next state to place in the array,
* or one plus the index of the current state. When zero,
* we are in no state.
*/
private int _elementStateCount;
/**
* Vector holding comments and PIs that come before the root
* element (even after it), see [EMAIL PROTECTED] #serializePreRoot}.
*/
private Vector _preRoot;
/**
* If the document has been started (header serialized), this
* flag is set to true so it's not started twice.
*/
protected boolean _started;
/**
* The DTD writer. When we switch to DTD mode, all output is
* accumulated in this DTD writer. When we switch out of it,
* the output is obtained as a string. Must not be reset to
* null until we're done with the document.
*/
private StringWriter _dtdWriter;
/**
* Holds a reference to the document writer while we are
* in DTD mode.
*/
private Writer _docWriter;
//--------------------------------//
// Constructor and initialization //
//--------------------------------//
/**
* Protected constructor can only be used by derived class.
* Must initialize the serializer before serializing any document,
* see [EMAIL PROTECTED] #init}.
*/
protected BaseSerializer()
{
int i;
for ( i = 0 ; i < _elementStates.length ; ++i )
_elementStates[ i ] = new ElementState();
}
/**
* Initialize the serializer with the specified writer and output format.
* Must be called before calling any of the serialize methods.
*
* @param writer The writer to use
* @param format The output format
*/
public synchronized void init( Writer writer, OutputFormat format )
{
if ( format == null )
throw new NullPointerException( "Argument 'format' is null." );
_format = format;
if ( writer == null )
throw new NullPointerException( "Argument 'format' is null." );
_writer = new BufferedWriter( writer );
// Determine the last printable character based on the output format
_lastPrintable = _format.getLastPrintable();
// Initialize everything for a first/second run.
_line = new StringBuffer( 80 );
_text = new StringBuffer( 20 );
_spaces = 0;
_thisIndent = _nextIndent = 0;
_exception = null;
_elementStateCount = 0;
_started = false;
_dtdWriter = null;
}
/**
* Initialize the serializer with the specified output stream and output
format.
* Must be called before calling any of the serialize methods.
*
* @param output The output stream to use
* @param format The output format
* @throws UnsupportedEncodingException The encoding specified
* in the output format is not supported
*/
public synchronized void init( OutputStream output, OutputFormat format )
throws UnsupportedEncodingException
{
String encoding;
encoding = ( format.getEncoding() == null ? "ASCII" :
format.getEncoding() );
init( new OutputStreamWriter( output, encoding ), format );
}
//-------------------------------//
// DOM document serializing methods //
//-------------------------------//
/**
* Serializes the DOM element using the previously specified
* writer and output format. Throws an exception only if
* an I/O exception occured while serializing.
*
* @param elem The element to serialize
* @throws IOException An I/O exception occured while
* serializing
*/
public void serialize( Element elem )
throws IOException
{
try {
startDocument();
} catch ( SAXException except ) { }
serializeNode( elem );
flush();
if ( _exception != null )
throw _exception;
}
/**
* Serializes the DOM document using the previously specified
* writer and output format. Throws an exception only if
* an I/O exception occured while serializing.
*
* @param doc The document to serialize
* @throws IOException An I/O exception occured while
* serializing
*/
public void serialize( Document doc )
throws IOException
{
try {
startDocument();
} catch ( SAXException except ) { }
serializeNode( doc );
serializePreRoot();
flush();
if ( _exception != null )
throw _exception;
}
//---------------------------------------//
// SAX document handler serializing methods //
//---------------------------------------//
public void characters( char[] chars, int start, int length )
{
characters( new String( chars, start, length ), false, false );
}
public void ignorableWhitespace( char[] chars, int start, int length )
{
int i;
content();
// Print ignorable whitespaces only when indenting, after
// all they are indentation. Cancel the indentation to
// not indent twice.
if ( _format.getIndenting() ) {
_thisIndent = 0;
for ( i = start ; length-- > 0 ; ++i ) {
if ( chars[ i ] == '\n' || chars[ i ] == '\r' )
breakLine();
else
_text.append( chars[ i ] );
}
}
}
public void processingInstruction( String target, String code )
{
int index;
StringBuffer buffer;
ElementState state;
state = content();
buffer = new StringBuffer( 40 );
// Create the processing instruction textual representation.
// Make sure we don't have '?>' inside either target or code.
index = target.indexOf( "?>" );
if ( index >= 0 )
buffer.append( "<?" ).append( target.substring( 0, index ) );
else
buffer.append( "<?" ).append( target );
if ( code != null ) {
buffer.append( ' ' );
index = code.indexOf( "?>" );
if ( index >= 0 )
buffer.append( code.substring( 0, index ) );
else
buffer.append( code );
}
buffer.append( "?>" );
// If before the root element (or after it), do not print
// the PI directly but place it in the pre-root vector.
if ( state == null ) {
if ( _preRoot == null )
_preRoot = new Vector();
_preRoot.addElement( buffer.toString() );
}
else {
indent();
printText( buffer, true );
unindent();
}
}
public void comment( char[] chars, int start, int length )
{
comment( new String( chars, start, length ) );
}
public void comment( String text )
{
StringBuffer buffer;
int index;
ElementState state;
state = content();
buffer = new StringBuffer( 40 );
// Create the processing comment textual representation.
// Make sure we don't have '-->' inside the comment.
index = text.indexOf( "-->" );
if ( index >= 0 )
buffer.append( "<!--" ).append( text.substring( 0, index )
).append( "-->" );
else
buffer.append( "<!--" ).append( text ).append( "-->" );
// If before the root element (or after it), do not print
// the comment directly but place it in the pre-root vector.
if ( state == null ) {
if ( _preRoot == null )
_preRoot = new Vector();
_preRoot.addElement( buffer.toString() );
}
else {
indent();
printText( buffer, false );
unindent();
}
}
public void startCDATA()
{
ElementState state;
state = getElementState();
if ( state != null )
state.cdata = true;
}
public void endCDATA()
{
ElementState state;
state = getElementState();
if ( state != null )
state.cdata = false;
}
/**
* Called at the end of the document to wrap it up.
* Will flush the output stream and throw an exception
* if any I/O error occured while serializing.
*
* @throws SAXException An I/O exception occured during
* serializing
*/
public void endDocument()
throws SAXException
{
// Print all the elements accumulated outside of
// the root element.
serializePreRoot();
// Flush the output, this is necessary for buffered output.
flush();
// If an exception was thrown during serializing, this would
// be the best time to report it.
if ( _exception != null )
throw new SAXException( _exception );
}
public void startEntity( String name )
{
// ???
}
public void endEntity( String name )
{
// ???
}
public void setDocumentLocator( Locator locator )
{
// Nothing to do
}
//---------------------------------------//
// SAX DTD/Decl handler serializing methods //
//---------------------------------------//
public void startDTD( String name, String publicId, String systemId )
{
enterDTD();
// For the moment this simply overrides any settings performed
// on the output format.
_format.setDoctype( publicId, systemId );
}
public void endDTD()
{
// Nothing to do here, all the magic occurs in startDocument(String).
}
public void elementDecl( String name, String model )
{
enterDTD();
printText( "<!ELEMENT " + name + " " + model + ">" );
if ( _format.getIndenting() )
breakLine();
}
public void attributeDecl( String eName, String aName, String type,
String valueDefault, String value )
{
StringBuffer buffer;
enterDTD();
buffer = new StringBuffer( 40 );
buffer.append( "<!ATTLIST " ).append( eName ).append( ' ' );
buffer.append( aName ).append( ' ' ).append( type );
if ( valueDefault != null )
buffer.append( ' ' ).append( valueDefault );
if ( value != null )
buffer.append( " \"" ).append( escape( value ) ).append( '"' );
buffer.append( '>' );
printText( buffer.toString() );
if ( _format.getIndenting() )
breakLine();
}
public void internalEntityDecl( String name, String value )
{
enterDTD();
printText( "<!ENTITY " + name + " \"" + escape( value ) + "\">" );
if ( _format.getIndenting() )
breakLine();
}
public void externalEntityDecl( String name, String publicId, String
systemId )
{
enterDTD();
unparsedEntityDecl( name, publicId, systemId, null );
}
public void unparsedEntityDecl( String name, String publicId,
String systemId, String notationName )
{
enterDTD();
if ( publicId != null ) {
printText( "<!ENTITY " + name + " SYSTEM " );
printDoctypeURL( systemId );
} else {
printText( "<!ENTITY " + name + " PUBLIC " );
printDoctypeURL( publicId );
printText( " " );
printDoctypeURL( systemId );
}
if ( notationName != null )
printText( " NDATA " + notationName );
printText( ">" );
if ( _format.getIndenting() )
breakLine();
}
public void notationDecl( String name, String publicId, String systemId )
{
enterDTD();
if ( publicId != null ) {
printText( "<!NOTATION " + name + " PUBLIC " );
printDoctypeURL( publicId );
if ( systemId != null ) {
printText( " " );
printDoctypeURL( systemId );
}
} else {
printText( "<!NOTATION " + name + " SYSTEM " );
printDoctypeURL( systemId );
}
printText( ">" );
if ( _format.getIndenting() )
breakLine();
}
/**
* Called by any of the DTD handlers to enter DTD mode.
* Once entered, all output will be accumulated in a string
* that can be printed as part of the document's DTD.
* This method may be called any number of time but will only
* have affect the first time it's called. To exist DTD state
* and get the accumulated DTD, call [EMAIL PROTECTED] #leaveDTD}.
*/
protected void enterDTD()
{
// Can only enter DTD state once. Once we're out of DTD
// state, can no longer re-enter it.
if ( _dtdWriter == null ) {
_line.append( _text );
_text = new StringBuffer( 20 );
flushLine();
_dtdWriter = new StringWriter();
_docWriter = _writer;
_writer = _dtdWriter;
}
}
/**
* Called by the root element to leave DTD mode and if any
* DTD parts were printer, will return a string with their
* textual content.
*/
protected String leaveDTD()
{
// Only works if we're going out of DTD mode.
if ( _writer == _dtdWriter ) {
_line.append( _text );
_text = new StringBuffer( 20 );
flushLine();
_writer = _docWriter;
return _dtdWriter.toString();
} else
return null;
}
//------------------------------------------//
// Generic node serializing methods methods //
//------------------------------------------//
/**
* Serialize the DOM node. This method is shared across XML, HTML and
XHTML
* serializers and the differences are masked out in a separate [EMAIL
PROTECTED]
* #serializeElement}.
*
* @param node The node to serialize
* @see #serializeElement
*/
protected void serializeNode( Node node )
{
// Based on the node type call the suitable SAX handler.
// Only comments entities and documents which are not
// handled by SAX are serialized directly.
switch ( node.getNodeType() ) {
case Node.TEXT_NODE :
characters( node.getNodeValue(), false, false );
break;
case Node.CDATA_SECTION_NODE :
characters( node.getNodeValue(), true, false );
break;
case Node.COMMENT_NODE :
comment( node.getNodeValue() );
break;
case Node.ENTITY_REFERENCE_NODE :
// Entity reference printed directly in text, do not break or pause.
content();
printText( '&' + node.getNodeName() + ';' );
break;
case Node.PROCESSING_INSTRUCTION_NODE :
processingInstruction( node.getNodeName(), node.getNodeValue() );
break;
case Node.ELEMENT_NODE :
serializeElement( (Element) node );
break;
case Node.DOCUMENT_NODE :
DocumentType docType;
NamedNodeMap map;
Entity entity;
Notation notation;
int i;
// If there is a document type, use the SAX events to
// serialize it.
docType = ( (Document) node ).getDoctype();
if ( docType != null ) {
startDTD( docType.getName(), null, null );
map = docType.getEntities();
if ( map != null ) {
for ( i = 0 ; i < map.getLength() ; ++i ) {
entity = (Entity) map.item( i );
unparsedEntityDecl( entity.getNodeName(),
entity.getPublicId(),
entity.getSystemId(),
entity.getNotationName() );
}
}
map = docType.getNotations();
if ( map != null ) {
for ( i = 0 ; i < map.getLength() ; ++i ) {
notation = (Notation) map.item( i );
notationDecl( notation.getNodeName(),
notation.getPublicId(), notation.getSystemId() );
}
}
endDTD();
}
// !! Fall through
case Node.DOCUMENT_FRAGMENT_NODE : {
Node child;
// By definition this will happen if the node is a document,
// document fragment, etc. Just serialize its contents. It will
// work well for other nodes that we do not know how to serialize.
child = node.getFirstChild();
while ( child != null ) {
serializeNode( child );
child = child.getNextSibling();
}
break;
}
default:
break;
}
}
/**
* Must be called by a method about to print any type of content.
* If the element was just opened, the opening tag is closed and
* will be matched to a closing tag. Returns the current element
* state with <tt>empty</tt> and <tt>afterElement</tt> set to false.
*
* @return The current element state
*/
protected ElementState content()
{
ElementState state;
state = getElementState();
if ( state != null ) {
// If this is the first content in the element,
// change the state to not-empty and close the
// opening element tag.
if ( state.empty ) {
printText( ">" );
state.empty = false;
}
// Except for one content type, all of them
// are not last element. That one content
// type will take care of itself.
state.afterElement = false;
}
return state;
}
/**
* Called to print the text contents in the prevailing element format.
* Since this method is capable of printing text as CDATA, it is used
* for that purpose as well. White space handling is determined by the
* current element state. In addition, the output format can dictate
* whether the text is printed as CDATA or unescaped.
*
* @param text The text to print
* @param cdata True is should print as CDATA
* @param unescaped True is should print unescaped
*/
protected void characters( String text, boolean cdata, boolean unescaped )
{
ElementState state;
state = content();
cdata = state.cdata;
// Check if text should be print as CDATA section or unescaped
// based on elements listed in the output format (the element
// state) or whether we are inside a CDATA section or entity.
if ( state != null ) {
cdata = cdata || state.cdata;
unescaped = unescaped || state.unescaped;
}
if ( cdata ) {
StringBuffer buffer;
int index;
int saveIndent;
// Print a CDATA section. The text is not escaped, but ']]>'
// appearing in the code must be identified and dealt with.
// The contents of a text node is considered space preserving.
buffer = new StringBuffer( text.length() );
index = text.indexOf( "]]>" );
while ( index >= 0 ) {
buffer.append( "<![CDATA[" ).append( text.substring( 0, index +
2 ) ).append( "]]>" );
text = text.substring( index + 2 );
index = text.indexOf( "]]>" );
}
buffer.append( "<![CDATA[" ).append( text ).append( "]]>" );
saveIndent = _nextIndent;
_nextIndent = 0;
printText( buffer, true );
_nextIndent = saveIndent;
} else {
int saveIndent;
if ( unescaped ) {
// If the text node of this element should be printed
// unescaped, then cancel indentation and print it
// directly without escaping.
saveIndent = _nextIndent;
_nextIndent = 0;
printText( text, true );
_nextIndent = saveIndent;
} else if ( state != null && state.preserveSpace ) {
// If preserving space then hold of indentation so no
// excessive spaces are printed at line breaks, escape
// the text content without replacing spaces and print
// the text breaking only at line breaks.
saveIndent = _nextIndent;
_nextIndent = 0;
printText( escape( text ), true );
_nextIndent = saveIndent;
} else {
// This is the last, but the most common case of
// printing without preserving spaces. If indentation was
// requested, line will wrap at space boundaries.
// All whitespaces will print as space characters.
printText( escape( text ), false );
}
}
}
/**
* Returns the suitable entity reference for this character value,
* or null if no such entity exists. Calling this method with
<tt>'&'</tt>
* will return <tt>"&amp;"</tt>.
*
* @param ch Character value
* @return Character entity name, or null
*/
protected abstract String getEntityRef( char ch );
/**
* Called to serializee the DOM element. The element is serialized based
on
* the serializer's method (XML, HTML, XHTML).
*
* @param elem The element to serialize
*/
protected abstract void serializeElement( Element elem );
/**
* Comments and PIs cannot be serialized before the root element,
* because the root element serializes the document type, which
* generally comes first. Instead such PIs and comments are
* accumulated inside a vector and serialized by calling this
* method. Will be called when the root element is serialized
* and when the document finished serializing.
*/
protected void serializePreRoot()
{
int i;
if ( _preRoot != null ) {
for ( i = 0 ; i < _preRoot.size() ; ++i ) {
printText( (String) _preRoot.elementAt( i ), true );
breakLine();
}
_preRoot.removeAllElements();
}
}
//---------------------------------------------//
// Text pretty printing and formatting methods //
//---------------------------------------------//
/**
* Called to print additional text. Each time this method is called
* it accumulates more text. When a space is printed ([EMAIL PROTECTED]
* #printSpace}) all the accumulated text becomes one part and is
* added to the accumulate line. When a line is long enough, it can
* be broken at its text boundary.
*
* @param text The text to print
*/
protected final void printText( String text )
{
// Add this text to the accumulated text which will not be
// print until the next space break.
_text.append( text );
}
protected final void printText( char[] chars, int start, int end )
{
_text.append( chars, start, end );
}
/**
* Called to print additional text with whitespace handling.
* If spaces are preserved, the text is printed as if by calling
* [EMAIL PROTECTED] #printText(String)} with a call to [EMAIL PROTECTED]
#breakLine}
* for each new line. If spaces are not preserved, the text is
* broken at space boundaries if longer than the line width;
* Multiple spaces are printed as such, but spaces at beginning
* of line are removed.
*
* @param text The text to print
* @param preserveSpace Space preserving flag
*/
protected final void printText( String text, boolean preserveSpace )
{
int index;
char ch;
if ( preserveSpace ) {
// Preserving spaces: the text must print exactly as it is,
// without breaking when spaces appear in the text and without
// consolidating spaces. If a line terminator is used, a line
// break will occur.
for ( index = 0 ; index < text.length() ; ++index ) {
ch = text.charAt( index );
if ( ch == '\n' || ch == '\r' )
breakLine();
else
_text.append( ch );
}
}
else
{
// Not preserving spaces: print one part at a time, and
// use spaces between parts to break them into different
// lines. Spaces at beginning of line will be stripped
// by printing mechanism. Line terminator is treated
// no different than other text part.
for ( index = 0 ; index < text.length() ; ++index ) {
ch = text.charAt( index );
if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch
== '\r' )
printSpace();
else
_text.append( ch );
}
}
}
protected final void printText( StringBuffer text, boolean preserveSpace )
{
int index;
char ch;
if ( preserveSpace ) {
// Preserving spaces: the text must print exactly as it is,
// without breaking when spaces appear in the text and without
// consolidating spaces. If a line terminator is used, a line
// break will occur.
for ( index = 0 ; index < text.length() ; ++index ) {
ch = text.charAt( index );
if ( ch == '\n' || ch == '\r' )
breakLine();
else
_text.append( ch );
}
}
else
{
// Not preserving spaces: print one part at a time, and
// use spaces between parts to break them into different
// lines. Spaces at beginning of line will be stripped
// by printing mechanism. Line terminator is treated
// no different than other text part.
for ( index = 0 ; index < text.length() ; ++index ) {
ch = text.charAt( index );
if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch
== '\r' )
printSpace();
else
_text.append( ch );
}
}
}
/**
* Called to print a single space between text parts that may be
* broken into separate lines. Must not be called to print a space
* when preserving spaces. The text accumulated so far with [EMAIL
PROTECTED]
* #printText} will be added to the accumulated line, and a space
* separator will be counted. If the line accumulated so far is
* long enough, it will be printed.
*/
protected final void printSpace()
{
// The line consists of the text accumulated in _line,
// followed by one or more spaces as counted by _spaces,
// followed by more space accumulated in _text:
// - Text is printed and accumulated into _text.
// - A space is printed, so _text is added to _line and
// a space is counted.
// - More text is printed and accumulated into _text.
// - A space is printed, the previous spaces are added
// to _line, the _text is added to _line, and a new
// space is counted.
// If text was accumulated with printText(), then the space
// means we have to move that text into the line and
// start accumulating new text with printText().
if ( _text.length() > 0 ) {
// If the text breaks a line bounary, wrap to the next line.
// The printed line size consists of the indentation we're going
// to use next, the accumulated line so far, some spaces and the
// accumulated text so far.
if ( _format.getLineWidth() > 0 &&
_thisIndent + _line.length() + _spaces + _text.length() >
_format.getLineWidth() ) {
flushLine();
try {
// Print line and new line, then zero the line contents.
_writer.write( _format.getLineSeparator() );
} catch ( IOException except ) {
// We don't throw an exception, but hold it
// until the end of the document.
if ( _exception == null )
_exception = except;
}
}
// Add as many spaces as we accumulaed before.
// At the end of this loop, _spaces is zero.
while ( _spaces > 0 ) {
_line.append( ' ' );
--_spaces;
}
_line.append( _text );
_text = new StringBuffer( 20 );
}
// Starting a new word: accumulate the text between the line
// and this new word; not a new word: just add another space.
++_spaces;
}
/**
* Called to print a line consisting of the text accumulated so
* far. This is equivalent to calling [EMAIL PROTECTED] #printSpace} but
* forcing the line to print and starting a new line ([EMAIL PROTECTED]
* #printSpace} will only start a new line if the current line
* is long enough).
*/
protected final void breakLine()
{
// Equivalent to calling printSpace and forcing a flushLine.
if ( _text.length() > 0 ) {
while ( _spaces > 0 ) {
_line.append( ' ' );
--_spaces;
}
_line.append( _text );
_text = new StringBuffer( 20 );
}
flushLine();
try {
// Print line and new line, then zero the line contents.
_writer.write( _format.getLineSeparator() );
} catch ( IOException except ) {
// We don't throw an exception, but hold it
// until the end of the document.
if ( _exception == null )
_exception = except;
}
}
/**
* Flushes the line accumulated so far to the writer and get ready
* to accumulate the next line. This method is called by [EMAIL PROTECTED]
* #printText} and [EMAIL PROTECTED] #printSpace} when the accumulated
line plus
* accumulated text are two long to fit on a given line. At the end of
* this method [EMAIL PROTECTED] #_line} is empty and [EMAIL PROTECTED]
#_spaces} is zero.
*/
private void flushLine()
{
int indent;
if ( _line.length() > 0 ) {
try {
if ( _format.getIndenting() ) {
// Make sure the indentation does not blow us away.
indent = _thisIndent;
if ( ( 2 * indent ) > _format.getLineWidth() &&
_format.getLineWidth() > 0 )
indent = _format.getLineWidth() / 2;
// Print the indentation as spaces and set the current
// indentation to the next expected indentation.
while ( indent > 0 ) {
_writer.write( ' ' );
--indent;
}
}
_thisIndent = _nextIndent;
// There is no need to print the spaces at the end of the line,
// they are simply stripped and replaced with a single line
// separator.
_spaces = 0;
_writer.write( _line.toString() );
_line = new StringBuffer( 40 );
} catch ( IOException except ) {
// We don't throw an exception, but hold it
// until the end of the document.
if ( _exception == null )
_exception = except;
}
}
}
/**
* Flush the output stream. Must be called when done printing
* the document, otherwise some text might be buffered.
*/
public void flush()
{
breakLine();
try {
_writer.flush();
} catch ( IOException except ) {
// We don't throw an exception, but hold it
// until the end of the document.
if ( _exception == null )
_exception = except;
}
}
/**
* Increment the indentation for the next line.
*/
protected void indent()
{
_nextIndent += _format.getIndent();
}
/**
* Decrement the indentation for the next line.
*/
protected void unindent()
{
_nextIndent -= _format.getIndent();
if ( _nextIndent < 0 )
_nextIndent = 0;
// If there is no current line and we're de-identing then
// this indentation level is actually the next level.
if ( ( _line.length() + _spaces + _text.length() ) == 0 )
_thisIndent = _nextIndent;
}
/**
* Print a document type public or system identifier URL.
* Encapsulates the URL in double quotes, escapes non-printing
* characters and print it equivalent to [EMAIL PROTECTED] #printText}.
*
* @param url The document type url to print
*/
protected void printDoctypeURL( String url )
{
StringBuffer result;
int i;
_text.append( '"' );
for( i = 0 ; i < url.length() ; ++i ) {
if ( url.charAt( i ) == '"' || url.charAt( i ) < 0x20 ||
url.charAt( i ) > 0x7F )
_text.append( "%" ).append( Integer.toHexString( url.charAt(
i ) ) );
else
_text.append( url.charAt( i ) );
}
_text.append( '"' );
}
/**
* Escapes a string so it may be printed as text content or attribute
* value. Non printable characters are escaped using character references.
* Where the format specifies a deault entity reference, that reference
* is used (e.g. <tt>&lt;</tt>).
*
* @param source The string to escape
* @return The escaped string
*/
protected String escape( String source )
{
StringBuffer result;
int i;
char ch;
String charRef;
result = new StringBuffer( source.length() );
for ( i = 0 ; i < source.length() ; ++i ) {
ch = source.charAt( i );
// If the character is not printable, print as character reference.
// Non printables are below ASCII space but not tab or line
// terminator, ASCII delete, or above a certain Unicode threshold.
if ( ( ch < ' ' && ch != '\t' && ch != '\n' && ch != '\r' ) ||
ch > _lastPrintable || ch == 0xF7 )
result.append( "&#" ).append( Integer.toString( ch )
).append( ';' );
else {
// If there is a suitable entity reference for this
// character, print it. The list of available entity
// references is almost but not identical between
// XML and HTML.
charRef = getEntityRef( ch );
if ( charRef == null )
result.append( ch );
else
result.append( '&' ).append( charRef ).append( ';' );
}
}
return result.toString();
}
//--------------------------------//
// Element state handling methods //
//--------------------------------//
/**
* Return the state of the current element, or null
* if not within any element (e.g. before entering
* root element).
*
* @return Current element state, or null
*/
protected ElementState getElementState()
{
if ( _elementStateCount == 0 )
return null;
else
return _elementStates[ _elementStateCount - 1 ];
}
/**
* Enter a new element state for the specified element.
* Tag name and space preserving is specified, element
* state is initially empty.
*
* @return Current element state, or null
*/
protected ElementState enterElementState( String tagName, boolean
preserveSpace )
{
ElementState state;
if ( _elementStateCount == _elementStates.length ) {
ElementState[] newStates;
int i;
// Need to create a larger array of states.
// This does not happen often, unless the document
// is really deep.
newStates = new ElementState[ _elementStates.length + 5 ];
System.arraycopy( _elementStates, 0, newStates, 0,
_elementStates.length );
_elementStates = newStates;
for ( i = _elementStateCount ; i < _elementStates.length ; ++i )
_elementStates[ i ] = new ElementState();
}
state = _elementStates[ _elementStateCount ];
state.tagName = tagName;
state.preserveSpace = preserveSpace;
state.empty = true;
state.afterElement = false;
++_elementStateCount;
return state;
}
/**
* Leave the current element state and return to the
* state of the parent element, or no state if this
* is the root element.
*
* @return Previous element state, or null
*/
protected ElementState leaveElementState()
{
if ( _elementStateCount > 1 ) {
-- _elementStateCount;
return _elementStates[ _elementStateCount - 1 ];
} else if ( _elementStateCount == 1 ) {
-- _elementStateCount;
return null;
} else
return null;
}
}
1.1
xml-xerces/java/src/org/apache/xml/serialize/ElementState.java
Index: ElementState.java
===================================================================
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.xml.serialize;
/**
* Holds the state of the currently serialized element.
*
*
* @version
* @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
* @see BaseSerializer
*/
class ElementState
{
/**
* The element's tag name.
*/
String tagName;
/**
* True if element is space preserving.
*/
boolean preserveSpace;
/**
* True if element is empty. Turns false immediately
* after serializing the first contents of the element.
*/
boolean empty;
/**
* True if the last serialized node was an element node.
*/
boolean afterElement;
/**
* True if textual content of current element should be
* serialized as CDATA section.
*/
boolean cdata;
/**
* True if textual content of current element should be
* serialized as raw characters (unescaped).
*/
boolean unescaped;
}
1.1
xml-xerces/java/src/org/apache/xml/serialize/HTMLEntities.res
Index: HTMLEntities.res
===================================================================
# $Id: HTMLEntities.res,v 1.1 1999/11/23 22:29:52 arkin Exp $
#
# @version $Revision: 1.1 $ $Date: 1999/11/23 22:29:52 $
# @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
#
# Character entity references for markup-significant
#
quot 34
amp 38
lt 60
gt 62
nbsp 160
#
# Character entity references for ISO 8859-1 characters
#
iexcl 161
cent 162
pound 163
curren 164
yen 165
brvbar 166
sect 167
uml 168
copy 169
ordf 170
laquo 171
not 172
shy 173
reg 174
macr 175
deg 176
plusmn 177
sup2 178
sup3 179
acute 180
micro 181
para 182
middot 183
cedil 184
sup1 185
ordm 186
raquo 187
frac14 188
frac12 189
frac34 190
iquest 191
Agrave 192
Aacute 193
Acirc 194
Atilde 195
Auml 196
Aring 197
AElig 198
Ccedil 199
Egrave 200
Eacute 201
Ecirc 202
Euml 203
Igrave 204
Iacute 205
Icirc 206
Iuml 207
ETH 208
Ntilde 209
Ograve 210
Oacute 211
Ocirc 212
Otilde 213
Ouml 214
times 215
Oslash 216
Ugrave 217
Uacute 218
Ucirc 219
Uuml 220
Yacute 221
THORN 222
szlig 223
agrave 224
aacute 225
acirc 226
atilde 227
auml 228
aring 229
aelig 230
ccedil 231
egrave 232
eacute 233
ecirc 234
euml 235
igrave 236
iacute 237
icirc 238
iuml 239
eth 240
ntilde 241
ograve 242
oacute 243
ocirc 244
otilde 245
ouml 246
divide 247
oslash 248
ugrave 249
uacute 250
ucirc 251
uuml 252
yacute 253
thorn 254
yuml 255
#
# Character entity references for symbols, mathematical symbols, and Greek
letters
#
# Latin Extended
fnof 402
#
# Greek
Alpha 913
Beta 914
Gamma 915
Delta 916
Epsilon 917
Zeta 918
Eta 919
Theta 920
Iota 921
Kappa 922
Lambda 923
Mu 924
Nu 925
Xi 926
Omicron 927
Pi 928
Rho 929
Sigma 931
Tau 932
Upsilon 933
Phi 934
Chi 935
Psi 936
Omega 937
alpha 945
beta 946
gamma 947
delta 948
epsilon 949
zeta 950
eta 951
theta 952
iota 953
kappa 954
lambda 955
mu 956
nu 957
xi 958
omicron 959
pi 960
rho 961
sigmaf 962
sigma 963
tau 964
upsilon 965
phi 966
chi 967
psi 968
omega 969
thetasym 977
upsih 978
piv 982
#
# General Punctuation
bull 8226
hellip 8230
prime 8242
Prime 8243
oline 8254
frasl 8260
#
# Letterlike Symbols
weierp 8472
image 8465
real 8476
trade 8482
alefsym 8501
#
# Arrows
larr 8592
uarr 8593
rarr 8594
darr 8595
harr 8596
crarr 8629
lArr 8656
uArr 8657
rArr 8658
dArr 8659
hArr 8660
#
# Mathematical Operators
forall 8704
part 8706
exist 8707
empty 8709
nabla 8711
isin 8712
notin 8713
ni 8715
prod 8719
sum 8721
minus 8722
lowast 8727
radic 8730
prop 8733
infin 8734
ang 8736
and 8743
or 8744
cap 8745
cup 8746
int 8747
there4 8756
sim 8764
cong 8773
asymp 8776
ne 8800
equiv 8801
le 8804
ge 8805
sub 8834
sup 8835
nsub 8836
sube 8838
supe 8839
oplus 8853
otimes 8855
perp 8869
sdot 8901
#
# Miscellaneous Technical
lceil 8968
rceil 8969
lfloor 8970
rfloor 8971
lang 9001
rang 9002
#
# Geometric Shapes
loz 9674
#
# Miscellaneous Symbols
spades 9824
clubs 9827
hearts 9829
diams 9830
#
# Character entity references for internationalization characters
#
# Latin Extended-A
OElig 338
oelig 339
Scaron 352
scaron 353
Yuml 376
#
# Spacing Modifier Letters
circ 710
tilde 732
#
# General Punctuation
ensp 8194
emsp 8195
thinsp 8201
zwnj 8204
zwj 8205
lrm 8206
rlm 8207
ndash 8211
mdash 8212
lsquo 8216
rsquo 8217
sbquo 8218
ldquo 8220
rdquo 8221
bdquo 8222
dagger 8224
Dagger 8225
permil 8240
lsaquo 8249
rsaquo 8250
euro 8364
1.1
xml-xerces/java/src/org/apache/xml/serialize/HTMLSerializer.java
Index: HTMLSerializer.java
===================================================================
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.xml.serialize;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.io.OutputStream;
import java.io.Writer;
import org.w3c.dom.Element;
import org.w3c.dom.Attr;
import org.w3c.dom.Node;
import org.w3c.dom.NamedNodeMap;
import org.xml.sax.DocumentHandler;
import org.xml.sax.AttributeList;
/**
* Implements an HTML/XHTML serializer supporting both DOM and SAX
* pretty serializing. HTML/XHTML mode is determined in the
* constructor. For usage instructions see [EMAIL PROTECTED] Serializer}.
* <p>
* If an output stream is used, the encoding is taken from the
* output format (defaults to <tt>UTF8</tt>). If a writer is
* used, make sure the writer uses the same encoding (if applies)
* as specified in the output format.
* <p>
* The serializer supports both DOM and SAX. DOM serializing is done
* by calling [EMAIL PROTECTED] #serialize} and SAX serializing is done by
firing
* SAX events and using the serializer as a document handler.
* <p>
* If an I/O exception occurs while serializing, the serializer
* will not throw an exception directly, but only throw it
* at the end of serializing (either DOM or SAX's [EMAIL PROTECTED]
* org.xml.sax.DocumentHandler#endDocument}.
* <p>
* For elements that are not specified as whitespace preserving,
* the serializer will potentially break long text lines at space
* boundaries, indent lines, and serialize elements on separate
* lines. Line terminators will be regarded as spaces, and
* spaces at beginning of line will be stripped.
* <p>
* XHTML is slightly different than HTML:
* <ul>
* <li>Element/attribute names are lower case and case matters
* <li>Attributes must specify value, even if empty string
* <li>Empty elements must have '/' in empty tag
* <li>Contents of SCRIPT and STYLE elements serialized as CDATA
* </ul>
*
*
* @version
* @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
* @see Serializer
*/
public class HTMLSerializer
extends BaseSerializer
{
/**
* True if serializing in XHTML format.
*/
private static boolean _xhtml;
/**
* Constructs a new HTML/XHTML serializer depending on the value of
* <tt>xhtml</tt>. The serializer cannot be used without calling
* [EMAIL PROTECTED] #init} first.
*
* @param xhtml True if XHTML serializing
*/
protected HTMLSerializer( boolean xhtml )
{
super();
_xhtml = xhtml;
}
/**
* Constructs a new serializer. The serializer cannot be used without
* calling [EMAIL PROTECTED] #init} first.
*/
public HTMLSerializer()
{
this( false );
}
/**
* Constructs a new serializer that writes to the specified writer
* using the specified output format. If <tt>format</tt> is null,
* will use a default output format.
*
* @param writer The writer to use
* @param format The output format to use, null for the default
*/
public HTMLSerializer( Writer writer, OutputFormat format )
{
this( false );
if ( format == null )
format = new OutputFormat( OutputFormat.METHOD_HTML, null, false );
init( writer, format );
}
/**
* Constructs a new serializer that writes to the specified output
* stream using the specified output format. If <tt>format</tt>
* is null, will use a default output format.
*
* @param output The output stream to use
* @param format The output format to use, null for the default
*/
public HTMLSerializer( OutputStream output, OutputFormat format )
{
this( false );
if ( format == null )
format = new OutputFormat( OutputFormat.METHOD_HTML, null, false );
try {
init( output, format );
} catch ( UnsupportedEncodingException except ) {
// Should never happend, we use UTF8 by default
}
}
//------------------------------------------//
// SAX document handler serializing methods //
//------------------------------------------//
public void startDocument()
{
// Do nothing for HTML/XHTML, browser might not respond
// well to <?xml ...?>
}
public void startElement( String tagName, AttributeList attrs )
{
int i;
boolean preserveSpace;
ElementState state;
String name;
String value;
state = getElementState();
if ( state == null ) {
// If this is the root element handle it differently.
// If the first root element in the document, serialize
// the document's DOCTYPE. Space preserving defaults
// to that of the output format.
if ( ! _started )
startDocument( tagName );
preserveSpace = _format.getPreserveSpace();
} else {
// For any other element, if first in parent, then
// close parent's opening tag and use the parnet's
// space preserving.
if ( state.empty )
printText( ">" );
preserveSpace = state.preserveSpace;
// Indent this element on a new line if the first
// content of the parent element or immediately
// following an element.
if ( _format.getIndenting() && ! state.preserveSpace &&
( state.empty || state.afterElement ) )
breakLine();
}
// Do not change the current element state yet.
// This only happens in endElement().
// XHTML: element names are lower case, DOM will be different
if ( _xhtml )
printText( '<' + tagName.toLowerCase() );
else
printText( '<' + tagName );
indent();
// For each attribute serialize it's name and value as one part,
// separated with a space so the element can be broken on
// multiple lines.
if ( attrs != null ) {
for ( i = 0 ; i < attrs.getLength() ; ++i ) {
printSpace();
name = attrs.getName( i ).toLowerCase();;
value = attrs.getValue( i );
if ( _xhtml ) {
// XHTML: print empty string for null values.
if ( value == null )
printText( name + "=\"\"" );
else
printText( name + "=\"" + escape( value ) + '"' );
} else {
// HTML: Non values print as attribute name, no value.
if ( value == null )
printText( name );
else
printText( name + "=\"" + escape( value ) + '"' );
}
}
}
if ( HTMLdtd.isPreserveSpace( tagName ) )
preserveSpace = true;
// Now it's time to enter a new element state
// with the tag name and space preserving.
// We still do not change the curent element state.
enterElementState( tagName, preserveSpace );
}
public void endElement( String tagName )
{
ElementState state;
// Works much like content() with additions for closing
// an element. Note the different checks for the closed
// element's state and the parent element's state.
unindent();
state = getElementState();
if ( state.empty ) {
if ( _xhtml )
printText( " />" );
else
printText( ">" );
} else {
// This element is not empty and that last content was
// another element, so print a line break before that
// last element and this element's closing tag.
// [keith] Provided this is not an anchor.
// XHTML: element names are lower case, DOM will be different
// HTML: some elements do not print closing tag (e.g. LI)
if ( _xhtml )
printText( "</" + tagName.toLowerCase() + ">" );
else if ( ! HTMLdtd.isOnlyOpening( tagName ) ) {
if ( ! tagName.equalsIgnoreCase( "A" ) &&
_format.getIndenting() &&
! state.preserveSpace && state.afterElement )
breakLine();
printText( "</" + tagName + ">" );
}
}
// Leave the element state and update that of the parent
// (if we're not root) to not empty and after element.
state = leaveElementState();
if ( state != null ) {
state.afterElement = true;
state.empty = false;
} else {
// [keith] If we're done printing the document but don't
// get to call endDocument(), the buffer should be flushed.
flush();
}
}
//------------------------------------------//
// Generic node serializing methods methods //
//------------------------------------------//
/**
* Called to serialize the document's DOCTYPE by the root element.
* The document type declaration must name the root element,
* but the root element is only known when that element is serialized,
* and not at the start of the document.
* <p>
* This method will check if it has not been called before ([EMAIL
PROTECTED] #_started}),
* will serialize the document type declaration, and will serialize all
* pre-root comments and PIs that were accumulated in the document
* (see [EMAIL PROTECTED] #serializePreRoot}). Pre-root will be
serialized even if
* this is not the first root element of the document.
*/
protected void startDocument( String rootTagName )
{
StringBuffer buffer;
String publicId;
String systemId;
// Not supported in HTML/XHTML, but we still have to switch
// out of DTD mode.
leaveDTD();
if ( ! _started ) {
// If the public and system identifiers were not specified
// in the output format, use the appropriate ones for HTML
// or XHTML.
publicId = _format.getDoctypePublic();
systemId = _format.getDoctypeSystem();
if ( publicId == null && systemId == null ) {
if ( _xhtml ) {
publicId = OutputFormat.DOCTYPE_XHTML_PUBLIC;
systemId = OutputFormat.DOCTYPE_XHTML_SYSTEM;
} else {
publicId = OutputFormat.DOCTYPE_HTML_PUBLIC;
systemId = OutputFormat.DOCTYPE_HTML_SYSTEM;
}
}
// XHTML: If public idnentifier and system identifier
// specified, print them, else print just system identifier
// HTML: If public identifier specified, print it with
// system identifier, if specified.
if ( publicId != null && ( ! _xhtml || systemId != null ) ) {
printText( "<!DOCTYPE HTML PUBLIC " );
printDoctypeURL( publicId );
if ( systemId != null ) {
if ( _format.getIndenting() ) {
breakLine();
printText( " " );
}
printDoctypeURL( systemId );
}
printText( ">" );
breakLine();
} else if ( systemId != null ) {
printText( "<!DOCTYPE HTML SYSTEM " );
printDoctypeURL( systemId );
printText( ">" );
breakLine();
}
}
_started = true;
// Always serialize these, even if not te first root element.
serializePreRoot();
}
/**
* Called to serialize a DOM element. Equivalent to calling [EMAIL
PROTECTED]
* #startElement}, [EMAIL PROTECTED] #endElement} and serializing
everything
* inbetween, but better optimized.
*/
protected void serializeElement( Element elem )
{
Attr attr;
NamedNodeMap attrMap;
int i;
Node child;
ElementState state;
boolean preserveSpace;
String name;
String value;
state = getElementState();
if ( state == null ) {
// If this is the root element handle it differently.
// If the first root element in the document, serialize
// the document's DOCTYPE. Space preserving defaults
// to that of the output format.
if ( ! _started )
startDocument( elem.getTagName() );
preserveSpace = _format.getPreserveSpace();
} else {
// For any other element, if first in parent, then
// close parent's opening tag and use the parnet's
// space preserving.
if ( state.empty )
printText( ">" );
preserveSpace = state.preserveSpace;
// Indent this element on a new line if the first
// content of the parent element or immediately
// following an element.
if ( _format.getIndenting() && ! state.preserveSpace &&
( state.empty || state.afterElement ) )
breakLine();
}
// Do not change the current element state yet.
// This only happens in endElement().
// XHTML: element names are lower case, DOM will be different
if ( _xhtml )
printText( '<' + elem.getTagName().toLowerCase() );
else
printText( '<' + elem.getTagName() );
indent();
// Lookup the element's attribute, but only print specified
// attributes. (Unspecified attributes are derived from the DTD.
// For each attribute print it's name and value as one part,
// separated with a space so the element can be broken on
// multiple lines.
attrMap = elem.getAttributes();
if ( attrMap != null ) {
for ( i = 0 ; i < attrMap.getLength() ; ++i ) {
attr = (Attr) attrMap.item( i );
name = attr.getName().toLowerCase();
value = attr.getValue();
if ( attr.getSpecified() ) {
printSpace();
if ( _xhtml ) {
// XHTML: print empty string for null values.
if ( value == null )
printText( name + "=\"\"" );
else
printText( name + "=\"" + escape( value ) + '"' );
} else {
// HTML: Non values print as attribute name, no value.
if ( value == null )
printText( name );
else
printText( name + "=\"" + escape( value ) + '"' );
}
}
}
}
if ( HTMLdtd.isPreserveSpace( elem.getTagName() ) )
preserveSpace = true;
// If element has children, or if element is not an empty tag,
// serialize an opening tag.
if ( elem.hasChildNodes() || ! HTMLdtd.isEmptyTag( elem.getTagName() )
) {
// Enter an element state, and serialize the children
// one by one. Finally, end the element.
enterElementState( elem.getTagName(), preserveSpace );
child = elem.getFirstChild();
while ( child != null ) {
serializeNode( child );
child = child.getNextSibling();
}
endElement( elem.getTagName() );
} else {
unindent();
// XHTML: Close empty tag with ' />' so it's XML and HTML
compatible.
// HTML: Empty tags are defined as such in DTD no in document.
if ( _xhtml )
printText( " />" );
else
printText( ">" );
if ( state != null ) {
// After element but parent element is no longer empty.
state.afterElement = true;
state.empty = false;
}
}
}
protected void characters( String text, boolean cdata, boolean unescaped )
{
ElementState state;
// Override for special HTML/XHTML case of SCRIPT/STYLE elements:
// XHTML: print their text contents as CDATA
// HTML: print their text contents unescaped
state = content();
if ( state != null && ( state.tagName.equalsIgnoreCase( "SCRIPT" ) ||
state.tagName.equalsIgnoreCase( "STYLE" ) ) ) {
if ( _xhtml )
super.characters( text, true, false );
else
super.characters( text, false, true );
} else
super.characters( text, cdata, unescaped );
}
protected String getEntityRef( char ch )
{
return HTMLdtd.fromChar( ch );
}
}
1.1 xml-xerces/java/src/org/apache/xml/serialize/HTMLdtd.java
Index: HTMLdtd.java
===================================================================
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.xml.serialize;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.BufferedReader;
import java.util.Hashtable;
/**
* Utility class for accessing information specific to HTML documents.
* The HTML DTD is expressed as three utility function groups. Two methods
* allow for checking whether an element requires an open tag on printing
* ([EMAIL PROTECTED] #isEmptyTag}) or on parsing ([EMAIL PROTECTED]
#isOptionalClosing}).
* <P>
* Two other methods translate character references from name to value and
* from value to name. A small entities resource is loaded into memory the
* first time any of these methods is called for fast and efficient access.
*
*
* @version
* @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
*/
final class HTMLdtd
{
/**
* Table of reverse character reference mapping. Character codes are held
* as single-character strings, mapped to their reference name.
*/
private static Hashtable _byChar;
/**
* Table of entity name to value mapping. Entities are held as strings,
* character references as <TT>Character</TT> objects.
*/
private static Hashtable _byName;
/**
* Locates the HTML entities file that is loaded upon initialization.
* This file is a resource loaded with the default class loader.
*/
private static final String ENTITIES_RESOURCE = "HTMLEntities.res";
/**
* Holds element definitions.
*/
private static Hashtable _elemDefs;
/**
* Element is empty.
*/
private static final int EMPTY = 0x0001;
/**
* Element contains element content only.
*/
private static final int ELEM_CONTENT = 0x0002;
/**
* Element preserve spaces.
*/
private static final int PRESERVE = 0x0004;
/**
* Optional closing tag.
*/
private static final int OPT_CLOSING = 0x0008;
/**
* Only opening tag should be printed.
*/
private static final int ONLY_OPENING = 0x0010;
/**
* Allowed to appear in head.
*/
private static final int ALLOWED_HEAD = 0x0020;
/**
* When opened, closes P.
*/
private static final int CLOSE_P = 0x0040;
/**
* When opened, closes DD or DT.
*/
private static final int CLOSE_DD_DT = 0x0080;
/**
* When opened, closes itself.
*/
private static final int CLOSE_SELF = 0x0100;
/**
* When opened, closes another table section.
*/
private static final int CLOSE_TABLE = 0x0200;
/**
* When opened, closes TH or TD.
*/
private static final int CLOSE_TH_TD = 0x04000;
/**
* Returns true if element is declared to be empty. HTML elements are
* defines as empty in the DTD, not by the document syntax.
*
* @param tagName The element tag name (upper case)
* @return True if element is empty
*/
public static boolean isEmptyTag( String tagName )
{
// BR AREA LINK IMG PARAM HR INPUT COL BASE META BASEFONT ISINDEX
/*
return ( tagName.equals( "BR" ) || tagName.equals( "AREA" ) ||
tagName.equals( "LINK" ) || tagName.equals( "IMG" ) ||
tagName.equals( "PARAM" ) || tagName.equals( "HR" ) ||
tagName.equals( "INPUT" ) || tagName.equals( "COL" ) ||
tagName.equals( "BASE" ) || tagName.equals( "META" ) ||
tagName.equals( "BASEFONT" ) || tagName.equals( "ISINDEX" )
);
*/
return isElement( tagName, EMPTY );
}
/**
* Returns true if element is declared to have element content.
* Whitespaces appearing inside element content will be ignored,
* other text will simply report an error.
*
* @param tagName The element tag name (upper case)
* @return True if element content
*/
public static boolean isElementContent( String tagName )
{
// DL OL UL SELECT OPTGROUP TABLE THEAD TFOOT TBODY COLGROUP TR HEAD
HTML
/*
return ( tagName.equals( "DL" ) || tagName.equals( "OL" ) ||
tagName.equals( "UL" ) || tagName.equals( "SELECT" ) ||
tagName.equals( "OPTGROUP" ) || tagName.equals( "TABLE" ) ||
tagName.equals( "THEAD" ) || tagName.equals( "TFOOT" ) ||
tagName.equals( "TBODY" ) || tagName.equals( "COLGROUP" ) ||
tagName.equals( "TR" ) || tagName.equals( "HEAD" ) ||
tagName.equals( "HTML" ) );
*/
return isElement( tagName, ELEM_CONTENT );
}
/**
* Returns true if element's textual contents preserves spaces.
* This only applies to PRE and TEXTAREA, all other HTML elements
* do not preserve space.
*
* @param tagName The element tag name (upper case)
* @return True if element's text content preserves spaces
*/
public static boolean isPreserveSpace( String tagName )
{
// PRE TEXTAREA
/*
return ( tagName.equals( "PRE" ) || tagName.equals( "TEXTAREA" ) );
*/
return isElement( tagName, PRESERVE );
}
/**
* Returns true if element's closing tag is optional and need not
* exist. An error will not be reported for such elements if they
* are not closed. For example, <tt>LI</tt> is most often not closed.
*
* @param tagName The element tag name (upper case)
* @return True if closing tag implied
*/
public static boolean isOptionalClosing( String tagName )
{
// BODY HEAD HTML P DT DD LI OPTION THEAD TFOOT TBODY TR COLGROUP TH
TD FRAME
/*
return ( tagName.equals( "BODY" ) || tagName.equals( "HEAD" ) ||
tagName.equals( "HTML" ) || tagName.equals( "P" ) ||
tagName.equals( "DT" ) || tagName.equals( "DD" ) ||
tagName.equals( "LI" ) || tagName.equals( "OPTION" ) ||
tagName.equals( "THEAD" ) || tagName.equals( "TFOOT" ) ||
tagName.equals( "TBODY" ) || tagName.equals( "TR" ) ||
tagName.equals( "COLGROUP" ) || tagName.equals( "TH" ) ||
tagName.equals( "TD" ) || tagName.equals( "FRAME" ) );
*/
return isElement( tagName, OPT_CLOSING );
}
/**
* Returns true if element's closing tag is generally not printed.
* For example, <tt>LI</tt> should not print the closing tag.
*
* @param tagName The element tag name (upper case)
* @return True if only opening tag should be printed
*/
public static boolean isOnlyOpening( String tagName )
{
//DT DD LI OPTION
/*
return ( tagName.equals( "DT" ) || tagName.equals( "DD" ) ||
tagName.equals( "LI" ) || tagName.equals( "OPTION" ) );
*/
return isElement( tagName, ONLY_OPENING );
}
/**
* Returns true if the opening of one element (<tt>tagName</tt>) implies
* the closing of another open element (<tt>openTag</tt>). For example,
* every opening <tt>LI</tt> will close the previously open <tt>LI</tt>,
* and every opening <tt>BODY</tt> will close the previously open
<tt>HEAD</tt>.
*
* @param tagName The newly opened element
* @param openTag The already opened element
* @return True if closing tag closes opening tag
*/
public static boolean isClosing( String tagName, String openTag )
{
// BODY (closing HTML, end of document)
// HEAD (BODY, closing HTML, end of document)
if ( openTag.equalsIgnoreCase( "HEAD" ) )
/*
return ! ( tagName.equals( "ISINDEX" ) || tagName.equals( "TITLE"
) ||
tagName.equals( "META" ) || tagName.equals( "SCRIPT" ) ||
tagName.equals( "STYLE" ) || tagName.equals( "LINK" ) );
*/
return ! isElement( tagName, ALLOWED_HEAD );
// P (P, H1-H6, UL, OL, DL, PRE, DIV, BLOCKQUOTE, FORM, HR, TABLE,
ADDRESS, FIELDSET, closing BODY, closing HTML, end of document)
if ( openTag.equalsIgnoreCase( "P" ) )
/*
return ( tagName.endsWith( "P" ) || tagName.endsWith( "H1" ) ||
tagName.endsWith( "H2" ) || tagName.endsWith( "H3" ) ||
tagName.endsWith( "H4" ) || tagName.endsWith( "H5" ) ||
tagName.endsWith( "H6" ) || tagName.endsWith( "UL" ) ||
tagName.endsWith( "OL" ) || tagName.endsWith( "DL" ) ||
tagName.endsWith( "PRE" ) || tagName.endsWith( "DIV" ) ||
tagName.endsWith( "BLOCKQUOTE" ) || tagName.endsWith(
"FORM" ) ||
tagName.endsWith( "HR" ) || tagName.endsWith( "TABLE" )
||
tagName.endsWith( "ADDRESS" ) || tagName.endsWith(
"FIELDSET" ) );
*/
return isElement( tagName, CLOSE_P );
if ( openTag.equalsIgnoreCase( "DT" ) || openTag.equalsIgnoreCase(
"DD" ) )
return isElement( tagName, CLOSE_DD_DT );
// DT (DD)
/*
if ( openTag.equals( "DT" ) )
return tagName.endsWith( "DD" );
*/
// DD (DT, closing DL)
/*
if ( openTag.equals( "DD" ) )
return tagName.endsWith( "DT" );
*/
if ( openTag.equalsIgnoreCase( "LI" ) || openTag.equalsIgnoreCase(
"OPTION" ) )
return isElement( tagName, CLOSE_SELF );
// LI (LI, closing UL/OL)
/*
if ( openTag.equals( "LI" ) )
return tagName.endsWith( "LI" );
*/
// OPTION (OPTION, OPTGROUP closing or opening, closing SELECT)
/*
if ( openTag.equals( "OPTION" ) )
return tagName.endsWith( "OPTION" );
*/
// THEAD (TFOOT, TBODY, TR, closing TABLE
// TFOOT (TBODY, TR, closing TABLE)
// TBODY (TBODY, closing TABLE)
// COLGROUP (THEAD, TBODY, TR, closing TABLE)
// TR (TR, closing THEAD, TFOOT, TBODY, TABLE)
if ( openTag.equalsIgnoreCase( "THEAD" ) || openTag.equalsIgnoreCase(
"TFOOT" ) ||
openTag.equalsIgnoreCase( "TBODY" ) || openTag.equalsIgnoreCase(
"TR" ) ||
openTag.equalsIgnoreCase( "COLGROUP" ) )
/*
return ( tagName.endsWith( "THEAD" ) || tagName.endsWith( "TFOOT"
) ||
tagName.endsWith( "TBODY" ) || tagName.endsWith( "TR" )
||
tagName.endsWith( "COLGROUP" ) );
*/
return isElement( tagName, CLOSE_TABLE );
// TH (TD, TH, closing TR)
// TD (TD, TH, closing TR)
if ( openTag.equalsIgnoreCase( "TH" ) || openTag.equalsIgnoreCase(
"TD" ) )
/*
return ( tagName.endsWith( "TD" ) || tagName.endsWith( "TH" ) );
*/
return isElement( tagName, CLOSE_TH_TD );
return false;
}
/**
* Returns the value of an HTML character reference by its name. If the
* reference is not found or was not defined as a character reference,
* returns EOF (-1).
*
* @param name Name of character reference
* @return Character code or EOF (-1)
*/
public static int charFromName( String name )
{
Object value;
initialize();
value = _byName.get( name );
if ( value != null && value instanceof Character )
return ( (Character) value ).charValue();
else
return -1;
}
/**
* Returns the name of an HTML character reference based on its character
* value. Only valid for entities defined from character references. If no
* such character value was defined, return null.
*
* @param value Character value of entity
* @return Entity's name or null
*/
public static String fromChar( char value )
{
String name;
initialize();
name = (String) _byChar.get( String.valueOf( value ) );
if ( name == null )
return null;
else
return name;
}
/**
* Initialize upon first access. Will load all the HTML character
references
* into a list that is accessible by name or character value and is
optimized
* for character substitution. This method may be called any number of
times
* but will execute only once.
*/
private static void initialize()
{
InputStream is = null;
BufferedReader reader = null;
int index;
String name;
String value;
int code;
String line;
// Make sure not to initialize twice.
if ( _byName != null )
return;
try
{
_byName = new Hashtable();
_byChar = new Hashtable();
is = HTMLdtd.class.getResourceAsStream( ENTITIES_RESOURCE );
if ( is == null )
throw new RuntimeException( "The resource [" +
ENTITIES_RESOURCE + "] could not be found." );
reader = new BufferedReader( new InputStreamReader( is ) );
line = reader.readLine();
while ( line != null )
{
if ( line.length() == 0 || line.charAt( 0 ) == '#' )
{
line = reader.readLine();
continue;
}
index = line.indexOf( ' ' );
if ( index > 1 )
{
name = line.substring( 0, index );
++index;
if ( index < line.length() )
{
value = line.substring( index );
index = value.indexOf( ' ' );
if ( index > 0 )
value = value.substring( 0, index );
code = Integer.parseInt( value );
defineEntity( name, (char) code );
}
}
line = reader.readLine();
}
is.close();
}
catch ( Exception except )
{
throw new RuntimeException( "The resource [" + ENTITIES_RESOURCE
+ "] could not load: " +
except.toString() );
}
finally
{
if ( is != null )
{
try
{
is.close();
}
catch ( Exception except )
{
}
}
}
}
/**
* Defines a new character reference. The reference's name and value are
* supplied. Nothing happens if the character reference is already
defined.
* <P>
* Unlike internal entities, character references are a string to single
* character mapping. They are used to map non-ASCII characters both on
* parsing and printing, primarily for HTML documents. '<amp;' is an
* example of a character reference.
*
* @param name The entity's name
* @param value The entity's value
*/
private static void defineEntity( String name, char value )
{
if ( _byName.get( name ) == null )
{
_byName.put( name, new Character( value ) );
_byChar.put( String.valueOf( value ), name );
}
}
private static void defineElement( String name, int flags )
{
_elemDefs.put( name, new Integer( flags ) );
}
private static boolean isElement( String name, int flag )
{
Integer flags;
flags = (Integer) _elemDefs.get( name.toUpperCase() );
if ( flags == null )
return false;
else
return ( ( flags.intValue() & flag ) != 0 );
}
static
{
_elemDefs = new Hashtable();
defineElement( "ADDRESS", CLOSE_P );
defineElement( "AREA", EMPTY );
defineElement( "BASE", EMPTY );
defineElement( "BASEFONT", EMPTY );
defineElement( "BLOCKQUOTE", CLOSE_P );
defineElement( "BODY", OPT_CLOSING );
defineElement( "BR", EMPTY );
defineElement( "COL", EMPTY );
defineElement( "COLGROUP", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
defineElement( "DD", OPT_CLOSING | ONLY_OPENING | CLOSE_DD_DT );
defineElement( "DIV", CLOSE_P );
defineElement( "DL", ELEM_CONTENT | CLOSE_P );
defineElement( "DT", OPT_CLOSING | ONLY_OPENING | CLOSE_DD_DT );
defineElement( "FIELDSET", CLOSE_P );
defineElement( "FORM", CLOSE_P );
defineElement( "FRAME", OPT_CLOSING );
defineElement( "H1", CLOSE_P );
defineElement( "H2", CLOSE_P );
defineElement( "H3", CLOSE_P );
defineElement( "H4", CLOSE_P );
defineElement( "H5", CLOSE_P );
defineElement( "H6", CLOSE_P );
defineElement( "HEAD", ELEM_CONTENT | OPT_CLOSING );
defineElement( "HR", EMPTY | CLOSE_P );
defineElement( "HTML", ELEM_CONTENT | OPT_CLOSING );
defineElement( "IMG", EMPTY );
defineElement( "INPUT", EMPTY );
defineElement( "ISINDEX", EMPTY | ALLOWED_HEAD );
defineElement( "LI", OPT_CLOSING | ONLY_OPENING | CLOSE_SELF );
defineElement( "LINK", EMPTY | ALLOWED_HEAD );
defineElement( "META", EMPTY | ALLOWED_HEAD );
defineElement( "OL", ELEM_CONTENT | CLOSE_P );
defineElement( "OPTGROUP", ELEM_CONTENT );
defineElement( "OPTION", OPT_CLOSING | ONLY_OPENING | CLOSE_SELF );
defineElement( "P", OPT_CLOSING | CLOSE_P | CLOSE_SELF );
defineElement( "PARAM", EMPTY );
defineElement( "PRE", PRESERVE | CLOSE_P );
defineElement( "SCRIPT", ALLOWED_HEAD | PRESERVE );
defineElement( "SELECT", ELEM_CONTENT );
defineElement( "STYLE", ALLOWED_HEAD | PRESERVE );
defineElement( "TABLE", ELEM_CONTENT | CLOSE_P );
defineElement( "TBODY", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
defineElement( "TD", OPT_CLOSING | CLOSE_TH_TD );
defineElement( "TEXTAREA", PRESERVE );
defineElement( "TFOOT", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
defineElement( "TH", OPT_CLOSING | CLOSE_TH_TD );
defineElement( "THEAD", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
defineElement( "TITLE", ALLOWED_HEAD );
defineElement( "TR", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
defineElement( "UL", ELEM_CONTENT | CLOSE_P );
}
}
1.1 xml-xerces/java/src/org/apache/xml/serialize/Makefile
Index: Makefile
===================================================================
include ../../../../../src/Makefile.incl
# Makefile for directory ./org/apache/xml/serialize
#
# This makefile depends on the following environment variables
# already being defined:
#
# JAVAC Java compiler (with options)
# RM Host delete file command (with options)
#
# In addition, the CLASSPATH environment variable must
# include the absolute path of the base source directory.
TARGETS=\
OutputFormat.class\
Serializer.class\
BaseSerializer.class\
XMLSerializer.class\
HTMLSerializer.class\
XHTMLSerializer.class\
ElementState.class\
HTMLdtd.class
all: dirs compile
dirs:
compile: ${TARGETS}
.SUFFIXES:
.SUFFIXES: .class .java
.java.class:
${JAVAC} $<
touch ../../../../../src/classfiles_updated
clean:
${RM} *.class
1.1
xml-xerces/java/src/org/apache/xml/serialize/OutputFormat.java
Index: OutputFormat.java
===================================================================
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.xml.serialize;
import java.util.Hashtable;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Node;
// XXX Delayed until the HTML DOM is introduced into the code base
// import org.w3c.dom.html.HTMLDocument;
/**
* Specifies an output format to control the serializer. Based on the
* XSLT specification for output format, plus additional parameters.
* Used to select the suitable serializer and determine how the
* document should be formatted on output.
* <p>
* The two interesting constructors are:
* <ul>
* <li>[EMAIL PROTECTED] #OutputFormat(String,String,boolean)} creates a
format
* for the specified method (XML, HTML, etc), encoding and indentation
* <li>[EMAIL PROTECTED] #OutputFormat(Document,String,boolean)} creates a
format
* compatible with the document type (XML, HTML), encoding and indentation
* </ul>
*
*
* @version
* @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
* <a href="mailto:[EMAIL PROTECTED]">Keith Visco</a>
* @see Serializer
*/
public class OutputFormat
{
/**
* Holds the output method specified for this document,
* or null if no method was specified.
*/
private String _method;
/**
* Specifies the version of the output method.
*/
private String _version;
/**
* The indentation level, or zero if no indentation
* was requested.
*/
private int _indent = 0;
/**
* The encoding to use, if an input stream is used.
* The default is always UTF-8.
*/
private String _encoding = DEFAULT_ENCODING;
/**
* The specified media type or null.
*/
private String _mediaType;
/**
* The specified document type system identifier, or null.
*/
private String _doctypeSystem;
/**
* The specified document type public identifier, or null.
*/
private String _doctypePublic;
/**
* Ture if the XML declaration should be ommited;
*/
private boolean _omitXmlDeclaration = false;
/**
* True if the document type should be marked as standalone.
*/
private boolean _standalone = false;
/**
* List of element tag names whose text node children must
* be output as CDATA.
*/
private String[] _cdataElements;
/**
* List of element tag names whose text node children must
* be output unescaped.
*/
private String[] _nonEscapingElements;
/**
* The selected line separator.
*/
private String _lineSeparator = LINE_SEPARATOR_WEB;
/**
* The line width at which to wrap long lines when indenting.
*/
private int _lineWidth = DEFAULT_LINE_WIDTH;
/**
* True if spaces should be preserved in elements that do not
* specify otherwise, or specify the default behavior.
*/
private boolean _preserve = false;
/**
* If indentation is turned on, the default identation
* level is 4.
*
* @see #setIndenting(boolean)
*/
public static final int DEFAULT_INDENT = 4;
/**
* The default encoding for Web documents it UTF8.
*
* @see #getEncoding()
*/
public static final String DEFAULT_ENCODING = "UTF8";
/**
* The default line width at which to break long lines
* when identing. This is set to 72.
*/
public static final int DEFAULT_LINE_WIDTH = 72;
/**
* The output method for XML documents.
*/
public static final String METHOD_XML = "xml";
/**
* The output method for HTML documents.
*/
public static final String METHOD_HTML = "html";
/**
* The output method for HTML documents as XHTML.
*/
public static final String METHOD_XHTML = "xhtml";
/**
* The output method for text documents.
*/
public static final String METHOD_TEXT = "text";
/**
* Line separator for Unix systems (<tt>\n</tt>).
*/
public static final String LINE_SEPARATOR_UNIX = "\n";
/**
* Line separator for Windows systems (<tt>\r\n</tt>).
*/
public static final String LINE_SEPARATOR_WIN = "\r\n";
/**
* Line separator for Macintosh systems (<tt>\r</tt>).
*/
public static final String LINE_SEPARATOR_MAC = "\r";
/**
* Line separator for the Web (<tt>\n</tt>).
*/
public static final String LINE_SEPARATOR_WEB = "\n";
/**
* Public identifier for HTML document type.
*/
public static final String DOCTYPE_HTML_PUBLIC = "-//W3C//DTD HTML
4.0//EN";
/**
* System identifier for HTML document type.
*/
public static final String DOCTYPE_HTML_SYSTEM =
"http://www.w3.org/TR/WD-html-in-xml/DTD/xhtml1-strict.dtd";
/**
* Public identifier for XHTML document type.
*/
public static final String DOCTYPE_XHTML_PUBLIC = "-//W3C//DTD XHTML 1.0
Strict//EN";
/**
* System identifier for XHTML document type.
*/
public static final String DOCTYPE_XHTML_SYSTEM =
"http://www.w3.org/TR/WD-html-in-xml/DTD/xhtml1-strict.dtd";
/**
* Constructs a new output format with the default values.
*/
public OutputFormat()
{
}
/**
* Constructs a new output format with the default values for
* the specified method and encoding. If <tt>indent</tt>
* is true, the document will be pretty printed with the default
* indentation level and default line wrapping.
*
* @param method The specified output method
* @param encoding The specified encoding
* @param indenting True for pretty printing
* @see #setEncoding
* @see #setIndenting
* @see #setMethod
*/
public OutputFormat( String method, String encoding, boolean indenting )
{
setMethod( method );
setEncoding( encoding );
setIndenting( indenting );
}
/**
* Constructs a new output format with the proper method,
* document type identifiers and media type for the specified
* document.
*
* @param doc The document to output
* @see #whichMethod
*/
public OutputFormat( Document doc )
{
setMethod( whichMethod( doc ) );
setDoctype( whichDoctypePublic( doc ), whichDoctypeSystem( doc ) );
setMediaType( whichMediaType( getMethod() ) );
}
/**
* Constructs a new output format with the proper method,
* document type identifiers and media type for the specified
* document, and with the specified encoding. If <tt>indent</tt>
* is true, the document will be pretty printed with the default
* indentation level and default line wrapping.
*
* @param doc The document to output
* @param encoding The specified encoding
* @param indenting True for pretty printing
* @see #setEncoding
* @see #setIndenting
* @see #whichMethod
*/
public OutputFormat( Document doc, String encoding, boolean indenting )
{
this( doc );
setEncoding( encoding );
setIndenting( indenting );
}
/**
* Returns the method specified for this output format.
* Typically the method will be <tt>xml</tt>, <tt>html</tt>
* or <tt>text</tt>, but it might be other values.
* If no method was specified, null will be returned
* and the most suitable method will be determined for
* the document by calling [EMAIL PROTECTED] #whichMethod}.
*
* @return The specified output method, or null
*/
public String getMethod()
{
return _method;
}
/**
* Sets the method for this output format.
*
* @see #getMethod
* @param method The output method, or null
*/
public void setMethod( String method )
{
_method = method;
}
/**
* Returns the version for this output method.
* If no version was specified, will return null
* and the default version number will be used.
* If the serializerr does not support that particular
* version, it should default to a supported version.
*
* @return The specified method version, or null
*/
public String getVersion()
{
return _version;
}
/**
* Sets the version for this output method.
* For XML the value would be "1.0", for HTML
* it would be "4.0".
*
* @see #getVersion
* @param version The output method version, or null
*/
public void setVersion( String version )
{
_version = version;
}
/**
* Returns the indentation specified. If no indentation
* was specified, zero is returned and the document
* should not be indented.
*
* @return The indentation or zero
* @see #setIndenting
*/
public int getIndent()
{
return _indent;
}
/**
* Returns true if indentation was specified.
*/
public boolean getIndenting()
{
return ( _indent > 0 );
}
/**
* Sets the indentation. The document will not be
* indented if the indentation is set to zero.
* Calling [EMAIL PROTECTED] #setIndenting} will reset this
* value to zero (off) or the default (on).
*
* @param indent The indentation, or zero
*/
public void setIndent( int indent )
{
if ( indent < 0 )
_indent = 0;
else
_indent = indent;
}
/**
* Sets the indentation on and off. When set on, the default
* indentation level and default line wrapping is used
* (see [EMAIL PROTECTED] #DEFAULT_INDENT} and [EMAIL PROTECTED]
#DEFAULT_LINE_WIDTH}).
* To specify a different indentation level or line wrapping,
* use [EMAIL PROTECTED] #setIndent} and [EMAIL PROTECTED] #setLineWidth}.
*
* @param on True if indentation should be on
*/
public void setIndenting( boolean on )
{
if ( on ) {
_indent = DEFAULT_INDENT;
_lineWidth = DEFAULT_LINE_WIDTH;
} else {
_indent = 0;
_lineWidth = 0;
}
}
/**
* Returns the specified encoding. If no encoding was
* specified, the default is always "UTF8".
*
* @return The encoding
*/
public String getEncoding()
{
return _encoding;
}
/**
* Sets the encoding for this output method. If no
* encoding was specified, the default is always "UTF8".
* Make sure the encoding is compatible with the one
* used by the [EMAIL PROTECTED] java.io.Writer}.
*
* @see #getEncoding
* @param encoding The encoding, or null
*/
public void setEncoding( String encoding )
{
_encoding = encoding;
}
/**
* Returns the specified media type, or null.
* To determine the media type based on the
* document type, use [EMAIL PROTECTED] #whichMediaType}.
*
* @return The specified media type, or null
*/
public String getMediaType()
{
return _mediaType;
}
/**
* Sets the media type.
*
* @see #getMediaType
* @param mediaType The specified media type
*/
public void setMediaType( String mediaType )
{
_mediaType = mediaType;
}
/**
* Sets the document type public and system identifiers.
* No <tt>DOCTYPE</tt> will be serialized if both identifiers
* are null. A system identifier is required if a public
* identified is specified.
*
* @param publicId The public identifier, or null
* @param systemId The system identifier, or null
*/
public void setDoctype( String publicId, String systemId )
{
_doctypePublic = publicId;
_doctypeSystem = systemId;
}
/**
* Returns the specified document type public identifier,
* or null.
*/
public String getDoctypePublic()
{
return _doctypePublic;
}
/**
* Returns the specified document type system identifier,
* or null.
*/
public String getDoctypeSystem()
{
return _doctypeSystem;
}
/**
* Returns true if the XML document declaration should
* be ommited. The default is false.
*/
public boolean getOmitXMLDeclaration()
{
return _omitXmlDeclaration;
}
/**
* Sets XML declaration omitting on and off.
*
* @param omit True if XML declaration should be ommited
*/
public void setOmitXMLDeclaration( boolean omit )
{
_omitXmlDeclaration = omit;
}
/**
* Returns true if the document type is standalone.
* The default is false.
*/
public boolean getStandalone()
{
return _standalone;
}
/**
* Sets document DTD standalone. The public and system
* identifiers must be null for the document to be
* serialized as standalone.
*
* @param standalone True if document DTD is standalone
*/
public void setStandalone( boolean standalone )
{
_standalone = standalone;
}
/**
* Returns a list of all the elements whose text node children
* should be output as CDATA, or null if no such elements were
* specified.
*/
public String[] getCDataElements()
{
return _cdataElements;
}
/**
* Returns true if the text node children of the given elements
* should be output as CDATA.
*
* @param tagName The element's tag name
* @return True if should serialize as CDATA
*/
public boolean isCDataElement( String tagName )
{
int i;
if ( _cdataElements == null )
return false;
for ( i = 0 ; i < _cdataElements.length ; ++i )
if ( _cdataElements[ i ].equals( tagName ) )
return true;
return false;
}
/**
* Sets the list of elements for which text node children
* should be output as CDATA.
*
* @param cdataElements List of CDATA element tag names
*/
public void setCDataElements( String[] cdataElements )
{
_cdataElements = cdataElements;
}
/**
* Returns a list of all the elements whose text node children
* should be output unescaped (no character references), or null
* if no such elements were specified.
*/
public String[] getNonEscapingElements()
{
return _nonEscapingElements;
}
/**
* Returns true if the text node children of the given elements
* should be output unescaped.
*
* @param tagName The element's tag name
* @return True if should serialize unescaped
*/
public boolean isNonEscapingElement( String tagName )
{
int i;
if ( _nonEscapingElements == null )
return false;
for ( i = 0 ; i < _nonEscapingElements.length ; ++i )
if ( _nonEscapingElements[ i ].equals( tagName ) )
return true;
return false;
}
/**
* Sets the list of elements for which text node children
* should be output unescaped (no character references).
*
* @param nonEscapingElements List of unescaped element tag names
*/
public void setNonEscapingElements( String[] nonEscapingElements )
{
_nonEscapingElements = nonEscapingElements;
}
/**
* Returns a specific line separator to use. The default is the
* Web line separator (<tt>\n</tt>). A string is returned to
* support double codes (CR + LF).
*
* @return The specified line separator
*/
public String getLineSeparator()
{
return _lineSeparator;
}
/**
* Sets the line separator. The default is the Web line separator
* (<tt>\n</tt>). The machine's line separator can be obtained
* from the system property <tt>line.separator</tt>, but is only
* useful if the document is edited on machines of the same type.
* For general documents, use the Web line separator.
*
* @param lineSeparator The specified line separator
*/
public void setLineSeparator( String lineSeparator )
{
if ( lineSeparator == null )
_lineSeparator = LINE_SEPARATOR_WEB;
else
_lineSeparator = lineSeparator;
}
/**
* Returns true if the default behavior for this format is to
* preserve spaces. All elements that do not specify otherwise
* or specify the default behavior will be formatted based on
* this rule. All elements that specify space preserving will
* always preserve space.
*/
public boolean getPreserveSpace()
{
return _preserve;
}
/**
* Sets space preserving as the default behavior. The default is
* space stripping and all elements that do not specify otherwise
* or use the default value will not preserve spaces.
*
* @param preserve True if spaces should be preserved
*/
public void setPreserveSpace( boolean preserve )
{
_preserve = preserve;
}
/**
* Return the selected line width for breaking up long lines.
* When indenting, and only when indenting, long lines will be
* broken at space boundaries based on this line width.
* No line wrapping occurs if this value is zero.
*/
public int getLineWidth()
{
return _lineWidth;
}
/**
* Sets the line width. If zero then no line wrapping will
* occur. Calling [EMAIL PROTECTED] #setIndenting} will reset this
* value to zero (off) or the default (on).
*
* @param lineWidth The line width to use, zero for default
* @see #getLineWidth
* @see #setIndenting
*/
public void setLineWidth( int lineWidth )
{
if ( lineWidth <= 0 )
_lineWidth = 0;
else
_lineWidth = lineWidth;
}
/**
* Returns the last printable character based on the selected
* encoding. Control characters and non-printable characters
* are always printed as character references.
*/
public char getLastPrintable()
{
if ( getEncoding() != null &&
( getEncoding().equalsIgnoreCase( "ASCII" ) ) )
return 0xFF;
else
return 0xFFFF;
}
/**
* Determine the output method for the specified document.
* If the document is an instance of [EMAIL PROTECTED]
org.w3c.dom.html.HTMLDocument}
* then the method is said to be <tt>html</tt>. If the root
* element is 'html' and all text nodes preceding the root
* element are all whitespace, then the method is said to be
* <tt>html</tt>. Otherwise the method is <tt>xml</tt>.
*
* @param doc The document to check
* @return The suitable method
*/
public static String whichMethod( Document doc )
{
Node node;
String value;
int i;
// If document is derived from HTMLDocument then the default
// method is html.
/* XXX Delayed until the HTML DOM is introduced into the code base
if ( doc instanceof HTMLDocument )
return METHOD_HTML;
*/
// Lookup the root element and the text nodes preceding it.
// If root element is html and all text nodes contain whitespace
// only, the method is html.
node = doc.getFirstChild();
while ( node != null ) {
// If the root element is html, the method is html.
if ( node.getNodeType() == Node.ELEMENT_NODE ) {
if ( node.getNodeName().equalsIgnoreCase( "html" ) )
return METHOD_HTML;
else
return METHOD_XML;
}
else
if ( node.getNodeType() == Node.TEXT_NODE ) {
// If a text node preceding the root element contains
// only whitespace, this might be html, otherwise it's
// definitely xml.
value = node.getNodeValue();
for ( i = 0 ; i < value.length() ; ++i )
if ( value.charAt( i ) != 0x20 && value.charAt( i ) != 0x0A
&&
value.charAt( i ) != 0x09 && value.charAt( i ) != 0x0D
)
return METHOD_XML;
}
node = node.getNextSibling();
}
// Anything else, the method is xml.
return METHOD_XML;
}
/**
* Returns the document type public identifier
* specified for this document, or null.
*/
public static String whichDoctypePublic( Document doc )
{
DocumentType doctype;
/* XXX Delayed until DOM Level 2 is introduced into the code base
doctype = doc.getDoctype();
if ( doctype != null ) {
// Note on catch: DOM Level 1 does not specify this method
// and the code will throw a NoSuchMethodError
try {
return doctype.getPublicID();
} catch ( Error except ) { }
}
*/
/* XXX Delayed until the HTML DOM is introduced into the code base
if ( doc instanceof HTMLDocument )
return DOCTYPE_XHTML_PUBLIC;
*/
return null;
}
/**
* Returns the document type system identifier
* specified for this document, or null.
*/
public static String whichDoctypeSystem( Document doc )
{
DocumentType doctype;
/* XXX Delayed until DOM Level 2 is introduced into the code base
doctype = doc.getDoctype();
if ( doctype != null ) {
// Note on catch: DOM Level 1 does not specify this method
// and the code will throw a NoSuchMethodError
try {
return doctype.getSystemID();
} catch ( Error except ) { }
}
*/
/* XXX Delayed until the HTML DOM is introduced into the code base
if ( doc instanceof HTMLDocument )
return DOCTYPE_XHTML_SYSTEM;
*/
return null;
}
/**
* Returns the suitable media format for a document
* output with the specified method.
*/
public static String whichMediaType( String method )
{
if ( method.equalsIgnoreCase( METHOD_XML ) )
return "text/xml";
if ( method.equalsIgnoreCase( METHOD_HTML ) )
return "text/html";
if ( method.equalsIgnoreCase( METHOD_TEXT ) )
return "text/plain";
return null;
}
}
1.1
xml-xerces/java/src/org/apache/xml/serialize/Serializer.java
Index: Serializer.java
===================================================================
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.xml.serialize;
import java.io.Writer;
import java.io.OutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import org.w3c.dom.Element;
import org.w3c.dom.Document;
import org.xml.sax.DocumentHandler;
/**
* Interface for a DOM serializer implementation, factory for DOM and SAX
* serializers, and static methods for serializing DOM documents.
* <p>
* To serialize a document using SAX events, create a compatible serializer
* using [EMAIL PROTECTED] #makeSAXSerializer} and pass it around as a [EMAIL
PROTECTED]
* DocumentHandler}. If an I/O error occurs while serializing, it will
* be thrown by [EMAIL PROTECTED] DocumentHandler#endDocument}. The SAX
serializer
* may also be used as [EMAIL PROTECTED] DTDHandler}, [EMAIL PROTECTED]
DeclHandler} and
* [EMAIL PROTECTED] LexicalHandler}.
* <p>
* To serialize a DOM document or DOM element, create a compatible
* serializer using [EMAIL PROTECTED] #makeSerializer} and call it's [EMAIL
PROTECTED]
* #serialize(Document)} or [EMAIL PROTECTED] #serialize(Element)} methods.
* Both methods would produce a full XML document, to serizlie only
* the portion of the document use [EMAIL PROTECTED]
OutputFormat#setOmitXMLDeclaration}
* and specify no document type.
* <p>
* The convenience method [EMAIL PROTECTED]
#serialize(Document,Writer,OutputFormat)}
* creates a serializer and calls [EMAIL PROTECTED] #serizlie(Document)} on
that
* serialized.
* <p>
* The [EMAIL PROTECTED] OutputFormat} dictates what underlying serialized is
used
* to serialize the document based on the specified method. If the output
* format or method are missing, the default is an XML serializer with
* UTF8 encoding and now indentation.
*
*
* @version
* @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
* @see DocumentHandler
* @see OutputFormat
*/
public abstract class Serializer
{
/**
* Serialized the DOM element. Throws an exception only if
* an I/O exception occured while serializing.
*
* @param elem The element to serialize
* @throws IOException An I/O exception occured while
* serializing
*/
public abstract void serialize( Element elem )
throws IOException;
/**
* Serializes the DOM document. Throws an exception only if
* an I/O exception occured while serializing.
*
* @param doc The document to serialize
* @throws IOException An I/O exception occured while
* serializing
*/
public abstract void serialize( Document doc )
throws IOException;
/**
* Creates a compatible serialized for the specified writer
* and output format. If the output format is missing,
* the default is an XML format with UTF8 encoding.
*
* @param writer The writer
* @param format The output format
* @return A compatible serializer
*/
public static Serializer makeSerializer( Writer writer, OutputFormat
format )
{
BaseSerializer serializer;
serializer = makeBaseSerializer( format );
serializer.init( writer, format );
return serializer;
}
/**
* Creates a compatible serializer for the specified output stream
* and output format. If the output format is missing, the default
* is an XML format with UTF8 encoding.
*
* @param output The output stream
* @param format The output format
* @return A compatible serializer
* @throws UnsupportedEncodingException Encoding specified
* in the output format is not supported
*/
public static Serializer makeSerializer( OutputStream output,
OutputFormat format )
throws UnsupportedEncodingException
{
BaseSerializer serializer;
serializer = makeBaseSerializer( format );
serializer.init( output, format );
return serializer;
}
/**
* Creates a compatible SAX serializer for the specified writer
* and output format. If the output format is missing, the default
* is an XML format with UTF8 encoding.
*
* @param writer The writer
* @param format The output format
* @return A compatible SAX serializer
*/
public static DocumentHandler makeSAXSerializer( Writer writer,
OutputFormat format )
{
BaseSerializer serializer;
serializer = makeBaseSerializer( format );
serializer.init( writer, format );
return serializer;
}
/**
* Creates a compatible SAX serializer for the specified output stream
* and output format. If the output format is missing, the default
* is an XML format with UTF8 encoding.
*
* @param output The output stream
* @param format The output format
* @return A compatible SAX serializer
* @throws UnsupportedEncodingException Encoding specified
* in the output format is not supported
*/
public static DocumentHandler makeSAXSerializer( OutputStream output,
OutputFormat format )
throws UnsupportedEncodingException
{
BaseSerializer serializer;
serializer = makeBaseSerializer( format );
serializer.init( output, format );
return serializer;
}
/**
* Convenience method serializes the specified document to
* the writer using the specified output format.
* <p>
* Equivalent to calling [EMAIL PROTECTED] #serialize(Document)} on
* a compatible DOM serializer.
*
* @param doc The document to serialize
* @param writer The writer
* @param format The output format
* @throws IOException An I/O exception occured while serializing
* @throws UnsupportedEncodingException Encoding specified
* in the output format is not supported
*/
public static void serialize( Document doc, Writer writer, OutputFormat
format )
throws IOException
{
BaseSerializer serializer;
if ( format == null )
format = new OutputFormat( doc );
serializer = makeBaseSerializer( format );
serializer.init( writer, format );
serializer.serialize( doc );
}
/**
* Convenience method serializes the specified document to
* the output stream using the specified output format.
* <p>
* Equivalent to calling [EMAIL PROTECTED] #serialize(Document)} on
* a compatible DOM serializer.
*
* @param doc The document to serialize
* @param output The output stream
* @param format The output format
* @throws IOException An I/O exception occured while serializing
*/
public static void serialize( Document doc, OutputStream output,
OutputFormat format )
throws UnsupportedEncodingException, IOException
{
BaseSerializer serializer;
if ( format == null )
format = new OutputFormat( doc );
serializer = makeBaseSerializer( format );
serializer.init( output, format );
serializer.serialize( doc );
}
private static BaseSerializer makeBaseSerializer( OutputFormat format )
{
BaseSerializer serializer;
if ( format == null ) {
format = new OutputFormat( "xml", "UTF8", false );
serializer = new XMLSerializer();
} else {
if ( format.getMethod().equalsIgnoreCase( "html" ) )
serializer = new XHTMLSerializer();
else
if ( format.getMethod().equalsIgnoreCase( "xhtml" ) )
serializer = new HTMLSerializer();
else
serializer = new XMLSerializer();
}
return serializer;
}
}
1.1
xml-xerces/java/src/org/apache/xml/serialize/XHTMLSerializer.java
Index: XHTMLSerializer.java
===================================================================
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.xml.serialize;
import java.io.OutputStream;
import java.io.Writer;
import java.io.UnsupportedEncodingException;
/**
* Implements an XHTML serializer supporting both DOM and SAX
* pretty serializing. For usage instructions see either [EMAIL PROTECTED]
* Serializer} or [EMAIL PROTECTED] BaseSerializer}.
*
*
* @version
* @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
* @see Serializer
*/
public final class XHTMLSerializer
extends HTMLSerializer
{
/**
* Constructs a new serializer. The serializer cannot be used without
* calling [EMAIL PROTECTED] #init} first.
*/
public XHTMLSerializer()
{
super( true );
}
/**
* Constructs a new serializer that writes to the specified writer
* using the specified output format. If <tt>format</tt> is null,
* will use a default output format.
*
* @param writer The writer to use
* @param format The output format to use, null for the default
*/
public XHTMLSerializer( Writer writer, OutputFormat format )
{
super( true );
if ( format == null )
format = new OutputFormat( OutputFormat.METHOD_XHTML, null, false );
init( writer, format );
}
/**
* Constructs a new serializer that writes to the specified output
* stream using the specified output format. If <tt>format</tt>
* is null, will use a default output format.
*
* @param output The output stream to use
* @param format The output format to use, null for the default
*/
public XHTMLSerializer( OutputStream output, OutputFormat format )
{
super( true );
if ( format == null )
format = new OutputFormat( OutputFormat.METHOD_XHTML, null, false );
try {
init( output, format );
} catch ( UnsupportedEncodingException except ) {
// Should never happend, we use UTF8 by default
}
}
}
1.1
xml-xerces/java/src/org/apache/xml/serialize/XMLSerializer.java
Index: XMLSerializer.java
===================================================================
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.xml.serialize;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.io.OutputStream;
import java.io.Writer;
import org.w3c.dom.*;
import org.xml.sax.DocumentHandler;
import org.xml.sax.AttributeList;
/**
* Implements an XML serializer supporting both DOM and SAX pretty
* serializing. For usage instructions see [EMAIL PROTECTED] Serializer}.
* <p>
* If an output stream is used, the encoding is taken from the
* output format (defaults to <tt>UTF8</tt>). If a writer is
* used, make sure the writer uses the same encoding (if applies)
* as specified in the output format.
* <p>
* The serializer supports both DOM and SAX. DOM serializing is done
* by calling [EMAIL PROTECTED] #serialize} and SAX serializing is done by
firing
* SAX events and using the serializer as a document handler.
* <p>
* If an I/O exception occurs while serializing, the serializer
* will not throw an exception directly, but only throw it
* at the end of serializing (either DOM or SAX's [EMAIL PROTECTED]
* org.xml.sax.DocumentHandler#endDocument}.
* <p>
* For elements that are not specified as whitespace preserving,
* the serializer will potentially break long text lines at space
* boundaries, indent lines, and serialize elements on separate
* lines. Line terminators will be regarded as spaces, and
* spaces at beginning of line will be stripped.
*
*
* @version
* @author <a href="mailto:[EMAIL PROTECTED]">Assaf Arkin</a>
* @see Serializer
*/
public final class XMLSerializer
extends BaseSerializer
{
/**
* Constructs a new serializer. The serializer cannot be used without
* calling [EMAIL PROTECTED] #init} first.
*/
public XMLSerializer()
{
super();
}
/**
* Constructs a new serializer that writes to the specified writer
* using the specified output format. If <tt>format</tt> is null,
* will use a default output format.
*
* @param writer The writer to use
* @param format The output format to use, null for the default
*/
public XMLSerializer( Writer writer, OutputFormat format )
{
super();
if ( format == null )
format = new OutputFormat( OutputFormat.METHOD_XML, null, false );
init( writer, format );
}
/**
* Constructs a new serializer that writes to the specified output
* stream using the specified output format. If <tt>format</tt>
* is null, will use a default output format.
*
* @param output The output stream to use
* @param format The output format to use, null for the default
*/
public XMLSerializer( OutputStream output, OutputFormat format )
{
super();
if ( format == null )
format = new OutputFormat( OutputFormat.METHOD_XML, null, false );
try {
init( output, format );
} catch ( UnsupportedEncodingException except ) {
// Should never happend, we use UTF8 by default
}
}
//------------------------------------------//
// SAX document handler serializing methods //
//------------------------------000---------//
public void startDocument()
{
// Nothing to do here. All the magic happens in startDocument(String)
}
public void startElement( String tagName, AttributeList attrs )
{
int i;
boolean preserveSpace;
ElementState state;
String name;
String value;
state = getElementState();
if ( state == null ) {
// If this is the root element handle it differently.
// If the first root element in the document, serialize
// the document's DOCTYPE. Space preserving defaults
// to that of the output format.
if ( ! _started )
startDocument( tagName );
preserveSpace = _format.getPreserveSpace();
} else {
// For any other element, if first in parent, then
// close parent's opening tag and use the parnet's
// space preserving.
if ( state.empty )
printText( ">" );
preserveSpace = state.preserveSpace;
// Indent this element on a new line if the first
// content of the parent element or immediately
// following an element.
if ( _format.getIndenting() && ! state.preserveSpace &&
( state.empty || state.afterElement ) )
breakLine();
}
// Do not change the current element state yet.
// This only happens in endElement().
printText( '<' + tagName );
indent();
// For each attribute print it's name and value as one part,
// separated with a space so the element can be broken on
// multiple lines.
if ( attrs != null ) {
for ( i = 0 ; i < attrs.getLength() ; ++i ) {
printSpace();
name = attrs.getName( i );
value = attrs.getValue( i );
if ( value == null )
value = "";
printText( name + "=\"" + escape( value ) + '"' );
// If the attribute xml:space exists, determine whether
// to preserve spaces in this and child nodes based on
// its value.
if ( name.equals( "xml:space" ) ) {
if ( value.equals( "preserve" ) )
preserveSpace = true;
else
preserveSpace = _format.getPreserveSpace();
}
}
}
// Now it's time to enter a new element state
// with the tag name and space preserving.
// We still do not change the curent element state.
state = enterElementState( tagName, preserveSpace );
state.cdata = _format.isCDataElement( tagName );
state.unescaped = _format.isNonEscapingElement( tagName );
}
public void endElement( String tagName )
{
ElementState state;
// Works much like content() with additions for closing
// an element. Note the different checks for the closed
// element's state and the parent element's state.
unindent();
state = getElementState();
if ( state.empty ) {
printText( "/>" );
} else {
// This element is not empty and that last content was
// another element, so print a line break before that
// last element and this element's closing tag.
if ( _format.getIndenting() && ! state.preserveSpace &&
state.afterElement )
breakLine();
printText( "</" + tagName + ">" );
}
// Leave the element state and update that of the parent
// (if we're not root) to not empty and after element.
state = leaveElementState();
if ( state != null ) {
state.afterElement = true;
state.empty = false;
} else {
// [keith] If we're done printing the document but don't
// get to call endDocument(), the buffer should be flushed.
flush();
}
}
//------------------------------------------//
// Generic node serializing methods methods //
//------------------------------------------//
/**
* Called to serialize the document's DOCTYPE by the root element.
* The document type declaration must name the root element,
* but the root element is only known when that element is serialized,
* and not at the start of the document.
* <p>
* This method will check if it has not been called before ([EMAIL
PROTECTED] #_started}),
* will serialize the document type declaration, and will serialize all
* pre-root comments and PIs that were accumulated in the document
* (see [EMAIL PROTECTED] #serializePreRoot}). Pre-root will be
serialized even if
* this is not the first root element of the document.
*/
protected void startDocument( String rootTagName )
{
int i;
String dtd;
dtd = leaveDTD();
if ( ! _started ) {
if ( ! _format.getOmitXMLDeclaration() ) {
StringBuffer buffer;
// Serialize the document declaration appreaing at the head
// of very XML document (unless asked not to).
buffer = new StringBuffer( "<?xml version=\"" );
if ( _format.getVersion() != null )
buffer.append( _format.getVersion() );
else
buffer.append( "1.0" );
buffer.append( '"' );
if ( _format.getEncoding() != null ) {
buffer.append( " encoding=\"" );
buffer.append( _format.getEncoding() );
buffer.append( '"' );
}
if ( _format.getStandalone() && _format.getDoctypeSystem() ==
null &&
_format.getDoctypePublic() == null )
buffer.append( " standalone=\"yes\"" );
buffer.append( "?>" );
printText( buffer.toString() );
breakLine();
}
if ( _format.getDoctypeSystem() != null ) {
// System identifier must be specified to print DOCTYPE.
// If public identifier is specified print 'PUBLIC
// <public> <system>', if not, print 'SYSTEM <system>'.
printText( "<!DOCTYPE " );
printText( rootTagName );
if ( _format.getDoctypePublic() != null ) {
printText( " PUBLIC " );
printDoctypeURL( _format.getDoctypePublic() );
if ( _format.getIndenting() ) {
breakLine();
for ( i = 0 ; i < 18 + rootTagName.length() ; ++i )
printText( " " );
}
printDoctypeURL( _format.getDoctypeSystem() );
}
else {
printText( " SYSTEM " );
printDoctypeURL( _format.getDoctypeSystem() );
}
// If we accumulated any DTD contents while printing.
// this would be the place to print it.
if ( dtd != null && dtd.length() > 0 ) {
printText( " [" );
indent();
if ( _format.getIndenting() )
breakLine();
printText( dtd, true );
unindent();
printText( "]" );
}
printText( ">" );
breakLine();
}
}
_started = true;
// Always serialize these, even if not te first root element.
serializePreRoot();
}
/**
* Called to serialize a DOM element. Equivalent to calling [EMAIL
PROTECTED]
* #startElement}, [EMAIL PROTECTED] #endElement} and serializing
everything
* inbetween, but better optimized.
*/
protected void serializeElement( Element elem )
{
Attr attr;
NamedNodeMap attrMap;
int i;
Node child;
ElementState state;
boolean preserveSpace;
String name;
String value;
state = getElementState();
if ( state == null ) {
// If this is the root element handle it differently.
// If the first root element in the document, serialize
// the document's DOCTYPE. Space preserving defaults
// to that of the output format.
if ( ! _started )
startDocument( elem.getTagName() );
preserveSpace = _format.getPreserveSpace();
} else {
// For any other element, if first in parent, then
// close parent's opening tag and use the parnet's
// space preserving.
if ( state.empty )
printText( ">" );
preserveSpace = state.preserveSpace;
// Indent this element on a new line if the first
// content of the parent element or immediately
// following an element.
if ( _format.getIndenting() && ! state.preserveSpace &&
( state.empty || state.afterElement ) )
breakLine();
}
// Do not change the current element state yet.
// This only happens in endElement().
printText( '<' + elem.getTagName() );
indent();
// Lookup the element's attribute, but only print specified
// attributes. (Unspecified attributes are derived from the DTD.
// For each attribute print it's name and value as one part,
// separated with a space so the element can be broken on
// multiple lines.
attrMap = elem.getAttributes();
if ( attrMap != null ) {
for ( i = 0 ; i < attrMap.getLength() ; ++i ) {
attr = (Attr) attrMap.item( i );
name = attr.getName();
value = attr.getValue();
if ( value == null )
value = "";
if ( attr.getSpecified() ) {
printSpace();
printText( name + "=\"" + escape( value ) + '"' );
}
// If the attribute xml:space exists, determine whether
// to preserve spaces in this and child nodes based on
// its value.
if ( name.equals( "xml:space" ) ) {
if ( value.equals( "preserve" ) )
preserveSpace = true;
else
preserveSpace = _format.getPreserveSpace();
}
}
}
// If element has children, then serialize them, otherwise
// serialize en empty tag.
if ( elem.hasChildNodes() ) {
// Enter an element state, and serialize the children
// one by one. Finally, end the element.
state = enterElementState( elem.getTagName(), preserveSpace );
state.cdata = _format.isCDataElement( elem.getTagName() );
state.unescaped = _format.isNonEscapingElement( elem.getTagName() );
child = elem.getFirstChild();
while ( child != null ) {
serializeNode( child );
child = child.getNextSibling();
}
endElement( elem.getTagName() );
} else {
unindent();
printText( "/>" );
if ( state != null ) {
// After element but parent element is no longer empty.
state.afterElement = true;
state.empty = false;
}
}
}
protected String getEntityRef( char ch )
{
// Encode special XML characters into the equivalent character
references.
// These five are defined by default for all XML documents.
switch ( ch ) {
case '<':
return "lt";
case '>':
return "gt";
case '"':
return "quot";
case '\'':
return "apos";
case '&':
return "amp";
}
return null;
}
}