serialize BaseMarkupSerializer.java HTMLSerializer.java IndentPrinter.java XML11Serializer.java XMLSerializer.java

venu Tue, 09 Dec 2003 04:11:12 -0800

venu        2003/12/09 03:08:23

  Modified:    java/src/org/apache/xml/serialize Tag: jaxp13-2_6-branch
                        BaseMarkupSerializer.java HTMLSerializer.java
                        IndentPrinter.java XML11Serializer.java
                        XMLSerializer.java
  Log:
  Fix for bug 22472. Thanks to [EMAIL PROTECTED]
  
  Revision  Changes    Path
  No                   revision
  No                   revision
  1.48.2.1  +39 -16    
xml-xerces/java/src/org/apache/xml/serialize/BaseMarkupSerializer.java
  
  Index: BaseMarkupSerializer.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xml/serialize/BaseMarkupSerializer.java,v
  retrieving revision 1.48
  retrieving revision 1.48.2.1
  diff -u -r1.48 -r1.48.2.1
  --- BaseMarkupSerializer.java 18 Nov 2003 22:59:50 -0000      1.48
  +++ BaseMarkupSerializer.java 9 Dec 2003 11:08:23 -0000       1.48.2.1
  @@ -511,14 +511,27 @@
           // Nothing to do here. All the magic happens in startDocument(String)
       }
       
  +    /**
  +     * Checks if the specified string entirely consists of whitespace.
  +     */
  +    protected final boolean isIgnorable( char[] chars, int start, int length ) {
  +        for( int i=start; i<start+length; i++ ) {
  +            char ch = chars[i];
  +            if(ch==' ' || ch=='\t' || ch=='\r' || ch=='\n')
  +                return false;
  +        }
  +        return true;
  +    }
       
       public void characters( char[] chars, int start, int length )
           throws SAXException
       {
           ElementState state;
  +        
  +        boolean ignorable = isIgnorable(chars,start,length);
   
           try {
  -        state = content();
  +        state = content(ignorable);
   
           // Check if text should be print as CDATA section or unescaped
           // based on elements listed in the output format (the element
  @@ -582,7 +595,11 @@
                   printText( chars, start, length, true, state.unescaped );
                   _printer.setNextIndent( saveIndent );
               } else {
  -                printText( chars, start, length, false, state.unescaped );
  +                // if the string is whitespace only and we are indenting
  +                // this characters are probably just for indentation.
  +                // we will handle indentation by ourselves, so don't print it.
  +                if(!ignorable || !_indenting)
  +                    printText( chars, start, length, false, state.unescaped );
               }
           }
           } catch ( IOException except ) {
  @@ -597,7 +614,7 @@
           int i;
   
           try {
  -        content();
  +        content(true);
   
           // Print ignorable whitespaces only when indenting, after
           // all they are indentation. Cancel the indentation to
  @@ -629,7 +646,7 @@
           int          index;
           ElementState state;
   
  -        state = content();
  +        state = content(false);
   
           // Create the processing instruction textual representation.
           // Make sure we don't have '?>' inside either target or code.
  @@ -686,7 +703,7 @@
           if ( _format.getOmitComments() )
               return;
   
  -        state  = content();
  +        state  = content(false);
           // Create the processing comment textual representation.
           // Make sure we don't have '-->' inside the comment.
           index = text.indexOf( "-->" );
  @@ -825,7 +842,7 @@
       {
           try {
           endCDATA();
  -        content();
  +        content(false);
           _printer.printText( '&' );
           _printer.printText( name );
           _printer.printText( ';' );
  @@ -1121,7 +1138,7 @@
               Node         child;
   
               endCDATA();
  -            content();
  +            content(false);
   
               if (fDOMFilter !=null && 
                     (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ENTITY_REFERENCE)!= 
0) {
  @@ -1283,25 +1300,27 @@
               break;
           }
       }
  -
  -
  +    
       /**
        * Must be called by a method about to print any type of content.
        * If the element was just opened, the opening tag is closed and
        * will be matched to a closing tag. Returns the current element
        * state with <tt>empty</tt> and <tt>afterElement</tt> set to false.
        *
  +     * @param ignorable
  +     *      If the content entirely consists of ignorable whitespaces.  
  +     * 
        * @return The current element state
        * @throws IOException An I/O exception occured while
        *   serializing
        */
  -    protected ElementState content()
  +    protected ElementState content( boolean ignorable )
           throws IOException
       {
           ElementState state;
   
           state = getElementState();
  -        if ( ! isDocumentState() ) {
  +        if ( ! isDocumentState() && (!ignorable || state.preserveSpace)) {
               // Need to close CData section first
               if ( state.inCData && ! state.doCData ) {
                   _printer.printText( "]]>" );
  @@ -1343,8 +1362,8 @@
           throws IOException
       {
           ElementState state;
  -
  -        state = content();
  +        boolean ignorable = text.trim().length()==0;
  +        state = content(ignorable);
           // Check if text should be print as CDATA section or unescaped
           // based on elements listed in the output format (the element
           // state) or whether we are inside a CDATA section or entity.
  @@ -1379,7 +1398,11 @@
                   printText( text, true, state.unescaped );
                   _printer.setNextIndent( saveIndent );
               } else {
  -                printText( text, false, state.unescaped );
  +                // if the string is whitespace only and we are indenting
  +                // this characters are probably just for indentation.
  +                // we will handle indentation by ourselves, so don't print it.
  +                if( !ignorable || !_indenting )
  +                    printText( text, false, state.unescaped );
               }
           }
       }
  @@ -1514,7 +1537,7 @@
                   else {
                       // REVISIT: For XML 1.1 should we perform extra checks here?
                       //          Should it be serialized as entity reference?
  -                    if (content().inCData ) {
  +                    if (content(false).inCData ) {
                           _printer.printText("]]>&#x");                        
                           _printer.printText(Integer.toHexString(supplemental));      
                  
                           _printer.printText(";<![CDATA[");
  
  
  
  1.23.2.1  +3 -3      xml-xerces/java/src/org/apache/xml/serialize/HTMLSerializer.java
  
  Index: HTMLSerializer.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xml/serialize/HTMLSerializer.java,v
  retrieving revision 1.23
  retrieving revision 1.23.2.1
  diff -u -r1.23 -r1.23.2.1
  --- HTMLSerializer.java       23 Sep 2003 21:42:31 -0000      1.23
  +++ HTMLSerializer.java       9 Dec 2003 11:08:23 -0000       1.23.2.1
  @@ -488,7 +488,7 @@
   
           try {
               // HTML: no CDATA section
  -            state = content();
  +            state = content(isIgnorable(chars,start,length));
               state.doCData = false;
               super.characters( chars, start, length );
           } catch ( IOException except ) {
  @@ -860,7 +860,7 @@
           ElementState state;
   
           // HTML: no CDATA section
  -        state = content();
  +        state = content(text.trim().length()==0);
           super.characters( text );
       }
   
  
  
  
  1.8.6.1   +17 -16    xml-xerces/java/src/org/apache/xml/serialize/IndentPrinter.java
  
  Index: IndentPrinter.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/IndentPrinter.java,v
  retrieving revision 1.8
  retrieving revision 1.8.6.1
  diff -u -r1.8 -r1.8.6.1
  --- IndentPrinter.java        13 Jan 2003 15:59:09 -0000      1.8
  +++ IndentPrinter.java        9 Dec 2003 11:08:23 -0000       1.8.6.1
  @@ -207,8 +207,15 @@
        * separator will be counted. If the line accumulated so far is
        * long enough, it will be printed.
        */
  -    public void printSpace()
  -    {
  +    public void printSpace() {
  +        printSpace(1);
  +    }
  +    
  +    /**
  +     * Prints a space <tt>width</tt> times, which may be broken into
  +     * separate lines. 
  +     */
  +    private void printSpace(int width) {
           // The line consists of the text accumulated in _line,
           // followed by one or more spaces as counted by _spaces,
           // followed by more space accumulated in _text:
  @@ -253,7 +260,7 @@
           }
           // Starting a new word: accumulate the text between the line
           // and this new word; not a new word: just add another space.
  -        ++_spaces;
  +        _spaces += width;
       }
   
   
  @@ -272,16 +279,12 @@
   
       public void breakLine( boolean preserveSpace )
       {
  -        // Equivalent to calling printSpace and forcing a flushLine.
  -        if ( _text.length() > 0 ) {
  -            while ( _spaces > 0 ) {
  -                _line.append( ' ' );
  -                --_spaces;
  -            }
  -            _line.append( _text );
  -            _text = new StringBuffer( 20 );
  -        }
  +        // let the proper word wrapping happen between
  +        // _line and _text
  +        printSpace(0);
           flushLine( preserveSpace );
  +        _thisIndent = _nextIndent;
  +        
           try {
               // Print line and new line, then zero the line contents.
               _writer.write( _format.getLineSeparator() );
  @@ -298,7 +301,7 @@
        * Flushes the line accumulated so far to the writer and get ready
        * to accumulate the next line. This method is called by [EMAIL PROTECTED]
        * #printText} and [EMAIL PROTECTED] #printSpace} when the accumulated line plus
  -     * accumulated text are two long to fit on a given line. At the end of
  +     * accumulated text are too long to fit on a given line. At the end of
        * this method _line is empty and _spaces is zero.
        */
       public void flushLine( boolean preserveSpace )
  @@ -320,8 +323,6 @@
                           --indent;
                       }
                   }
  -                _thisIndent = _nextIndent;
  -                
                   // There is no need to print the spaces at the end of the line,
                   // they are simply stripped and replaced with a single line
                   // separator.
  
  
  
  1.7.2.1   +3 -3      
xml-xerces/java/src/org/apache/xml/serialize/XML11Serializer.java
  
  Index: XML11Serializer.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xml/serialize/XML11Serializer.java,v
  retrieving revision 1.7
  retrieving revision 1.7.2.1
  diff -u -r1.7 -r1.7.2.1
  --- XML11Serializer.java      18 Nov 2003 22:59:50 -0000      1.7
  +++ XML11Serializer.java      9 Dec 2003 11:08:23 -0000       1.7.2.1
  @@ -224,7 +224,7 @@
           ElementState state;
   
           try {
  -            state = content();
  +            state = content(isIgnorable(chars,start,length));
   
               // Check if text should be print as CDATA section or unescaped
               // based on elements listed in the output format (the element
  @@ -422,7 +422,7 @@
                       fatalError("The character '"+(char)supplemental+"' is an 
invalid XML character"); 
                   }
                   else {
  -                    if (content().inCData ) {
  +                    if (content(false).inCData ) {
                           _printer.printText("]]>&#x");                        
                           _printer.printText(Integer.toHexString(supplemental));      
                  
                           _printer.printText(";<![CDATA[");
  
  
  
  1.55.2.1  +4 -2      xml-xerces/java/src/org/apache/xml/serialize/XMLSerializer.java
  
  Index: XMLSerializer.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/XMLSerializer.java,v
  retrieving revision 1.55
  retrieving revision 1.55.2.1
  diff -u -r1.55 -r1.55.2.1
  --- XMLSerializer.java        18 Nov 2003 22:59:50 -0000      1.55
  +++ XMLSerializer.java        9 Dec 2003 11:08:23 -0000       1.55.2.1
  @@ -1386,7 +1386,9 @@
                       }
                       continue;
                   }
  -                if ( unescaped )
  +                if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == 
'\r' )
  +                    _printer.printSpace();
  +                else if ( unescaped )
                       _printer.printText( ch );
                   else
                       printXMLChar( ch );


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

cvs commit: xml-xerces/java/src/org/apache/xml/serialize BaseMarkupSerializer.java HTMLSerializer.java IndentPrinter.java XML11Serializer.java XMLSerializer.java

Reply via email to