venu 2003/12/09 03:08:23 Modified: java/src/org/apache/xml/serialize Tag: jaxp13-2_6-branch BaseMarkupSerializer.java HTMLSerializer.java IndentPrinter.java XML11Serializer.java XMLSerializer.java Log: Fix for bug 22472. Thanks to [EMAIL PROTECTED] Revision Changes Path No revision No revision 1.48.2.1 +39 -16 xml-xerces/java/src/org/apache/xml/serialize/BaseMarkupSerializer.java Index: BaseMarkupSerializer.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/BaseMarkupSerializer.java,v retrieving revision 1.48 retrieving revision 1.48.2.1 diff -u -r1.48 -r1.48.2.1 --- BaseMarkupSerializer.java 18 Nov 2003 22:59:50 -0000 1.48 +++ BaseMarkupSerializer.java 9 Dec 2003 11:08:23 -0000 1.48.2.1 @@ -511,14 +511,27 @@ // Nothing to do here. All the magic happens in startDocument(String) } + /** + * Checks if the specified string entirely consists of whitespace. + */ + protected final boolean isIgnorable( char[] chars, int start, int length ) { + for( int i=start; i<start+length; i++ ) { + char ch = chars[i]; + if(ch==' ' || ch=='\t' || ch=='\r' || ch=='\n') + return false; + } + return true; + } public void characters( char[] chars, int start, int length ) throws SAXException { ElementState state; + + boolean ignorable = isIgnorable(chars,start,length); try { - state = content(); + state = content(ignorable); // Check if text should be print as CDATA section or unescaped // based on elements listed in the output format (the element @@ -582,7 +595,11 @@ printText( chars, start, length, true, state.unescaped ); _printer.setNextIndent( saveIndent ); } else { - printText( chars, start, length, false, state.unescaped ); + // if the string is whitespace only and we are indenting + // this characters are probably just for indentation. + // we will handle indentation by ourselves, so don't print it. + if(!ignorable || !_indenting) + printText( chars, start, length, false, state.unescaped ); } } } catch ( IOException except ) { @@ -597,7 +614,7 @@ int i; try { - content(); + content(true); // Print ignorable whitespaces only when indenting, after // all they are indentation. Cancel the indentation to @@ -629,7 +646,7 @@ int index; ElementState state; - state = content(); + state = content(false); // Create the processing instruction textual representation. // Make sure we don't have '?>' inside either target or code. @@ -686,7 +703,7 @@ if ( _format.getOmitComments() ) return; - state = content(); + state = content(false); // Create the processing comment textual representation. // Make sure we don't have '-->' inside the comment. index = text.indexOf( "-->" ); @@ -825,7 +842,7 @@ { try { endCDATA(); - content(); + content(false); _printer.printText( '&' ); _printer.printText( name ); _printer.printText( ';' ); @@ -1121,7 +1138,7 @@ Node child; endCDATA(); - content(); + content(false); if (fDOMFilter !=null && (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ENTITY_REFERENCE)!= 0) { @@ -1283,25 +1300,27 @@ break; } } - - + /** * Must be called by a method about to print any type of content. * If the element was just opened, the opening tag is closed and * will be matched to a closing tag. Returns the current element * state with <tt>empty</tt> and <tt>afterElement</tt> set to false. * + * @param ignorable + * If the content entirely consists of ignorable whitespaces. + * * @return The current element state * @throws IOException An I/O exception occured while * serializing */ - protected ElementState content() + protected ElementState content( boolean ignorable ) throws IOException { ElementState state; state = getElementState(); - if ( ! isDocumentState() ) { + if ( ! isDocumentState() && (!ignorable || state.preserveSpace)) { // Need to close CData section first if ( state.inCData && ! state.doCData ) { _printer.printText( "]]>" ); @@ -1343,8 +1362,8 @@ throws IOException { ElementState state; - - state = content(); + boolean ignorable = text.trim().length()==0; + state = content(ignorable); // Check if text should be print as CDATA section or unescaped // based on elements listed in the output format (the element // state) or whether we are inside a CDATA section or entity. @@ -1379,7 +1398,11 @@ printText( text, true, state.unescaped ); _printer.setNextIndent( saveIndent ); } else { - printText( text, false, state.unescaped ); + // if the string is whitespace only and we are indenting + // this characters are probably just for indentation. + // we will handle indentation by ourselves, so don't print it. + if( !ignorable || !_indenting ) + printText( text, false, state.unescaped ); } } } @@ -1514,7 +1537,7 @@ else { // REVISIT: For XML 1.1 should we perform extra checks here? // Should it be serialized as entity reference? - if (content().inCData ) { + if (content(false).inCData ) { _printer.printText("]]>&#x"); _printer.printText(Integer.toHexString(supplemental)); _printer.printText(";<![CDATA["); 1.23.2.1 +3 -3 xml-xerces/java/src/org/apache/xml/serialize/HTMLSerializer.java Index: HTMLSerializer.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/HTMLSerializer.java,v retrieving revision 1.23 retrieving revision 1.23.2.1 diff -u -r1.23 -r1.23.2.1 --- HTMLSerializer.java 23 Sep 2003 21:42:31 -0000 1.23 +++ HTMLSerializer.java 9 Dec 2003 11:08:23 -0000 1.23.2.1 @@ -488,7 +488,7 @@ try { // HTML: no CDATA section - state = content(); + state = content(isIgnorable(chars,start,length)); state.doCData = false; super.characters( chars, start, length ); } catch ( IOException except ) { @@ -860,7 +860,7 @@ ElementState state; // HTML: no CDATA section - state = content(); + state = content(text.trim().length()==0); super.characters( text ); } 1.8.6.1 +17 -16 xml-xerces/java/src/org/apache/xml/serialize/IndentPrinter.java Index: IndentPrinter.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/IndentPrinter.java,v retrieving revision 1.8 retrieving revision 1.8.6.1 diff -u -r1.8 -r1.8.6.1 --- IndentPrinter.java 13 Jan 2003 15:59:09 -0000 1.8 +++ IndentPrinter.java 9 Dec 2003 11:08:23 -0000 1.8.6.1 @@ -207,8 +207,15 @@ * separator will be counted. If the line accumulated so far is * long enough, it will be printed. */ - public void printSpace() - { + public void printSpace() { + printSpace(1); + } + + /** + * Prints a space <tt>width</tt> times, which may be broken into + * separate lines. + */ + private void printSpace(int width) { // The line consists of the text accumulated in _line, // followed by one or more spaces as counted by _spaces, // followed by more space accumulated in _text: @@ -253,7 +260,7 @@ } // Starting a new word: accumulate the text between the line // and this new word; not a new word: just add another space. - ++_spaces; + _spaces += width; } @@ -272,16 +279,12 @@ public void breakLine( boolean preserveSpace ) { - // Equivalent to calling printSpace and forcing a flushLine. - if ( _text.length() > 0 ) { - while ( _spaces > 0 ) { - _line.append( ' ' ); - --_spaces; - } - _line.append( _text ); - _text = new StringBuffer( 20 ); - } + // let the proper word wrapping happen between + // _line and _text + printSpace(0); flushLine( preserveSpace ); + _thisIndent = _nextIndent; + try { // Print line and new line, then zero the line contents. _writer.write( _format.getLineSeparator() ); @@ -298,7 +301,7 @@ * Flushes the line accumulated so far to the writer and get ready * to accumulate the next line. This method is called by [EMAIL PROTECTED] * #printText} and [EMAIL PROTECTED] #printSpace} when the accumulated line plus - * accumulated text are two long to fit on a given line. At the end of + * accumulated text are too long to fit on a given line. At the end of * this method _line is empty and _spaces is zero. */ public void flushLine( boolean preserveSpace ) @@ -320,8 +323,6 @@ --indent; } } - _thisIndent = _nextIndent; - // There is no need to print the spaces at the end of the line, // they are simply stripped and replaced with a single line // separator. 1.7.2.1 +3 -3 xml-xerces/java/src/org/apache/xml/serialize/XML11Serializer.java Index: XML11Serializer.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/XML11Serializer.java,v retrieving revision 1.7 retrieving revision 1.7.2.1 diff -u -r1.7 -r1.7.2.1 --- XML11Serializer.java 18 Nov 2003 22:59:50 -0000 1.7 +++ XML11Serializer.java 9 Dec 2003 11:08:23 -0000 1.7.2.1 @@ -224,7 +224,7 @@ ElementState state; try { - state = content(); + state = content(isIgnorable(chars,start,length)); // Check if text should be print as CDATA section or unescaped // based on elements listed in the output format (the element @@ -422,7 +422,7 @@ fatalError("The character '"+(char)supplemental+"' is an invalid XML character"); } else { - if (content().inCData ) { + if (content(false).inCData ) { _printer.printText("]]>&#x"); _printer.printText(Integer.toHexString(supplemental)); _printer.printText(";<![CDATA["); 1.55.2.1 +4 -2 xml-xerces/java/src/org/apache/xml/serialize/XMLSerializer.java Index: XMLSerializer.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/XMLSerializer.java,v retrieving revision 1.55 retrieving revision 1.55.2.1 diff -u -r1.55 -r1.55.2.1 --- XMLSerializer.java 18 Nov 2003 22:59:50 -0000 1.55 +++ XMLSerializer.java 9 Dec 2003 11:08:23 -0000 1.55.2.1 @@ -1386,7 +1386,9 @@ } continue; } - if ( unescaped ) + if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' ) + _printer.printSpace(); + else if ( unescaped ) _printer.printText( ch ); else printXMLChar( ch );
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]