minchau     2003/07/11 00:09:40

  Modified:    java/src/org/apache/xml/serializer WriterToUTF8Buffered.java
  Log:
  Fix to WriterToUTF8Buffered to ALWAYS buffer regardless of the
  size of the input char array or String.  This is achieved by logically cutting
  the input into chunks, each of which will not blow the internal byte buffer,
  and calling itself recursively.
  
  PR: bugzilla 21491
  Submitted by: Brian Minchau
  
  Revision  Changes    Path
  1.4       +108 -105  
xml-xalan/java/src/org/apache/xml/serializer/WriterToUTF8Buffered.java
  
  Index: WriterToUTF8Buffered.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xalan/java/src/org/apache/xml/serializer/WriterToUTF8Buffered.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- WriterToUTF8Buffered.java 9 Jul 2003 22:46:47 -0000       1.3
  +++ WriterToUTF8Buffered.java 11 Jul 2003 07:09:40 -0000      1.4
  @@ -63,18 +63,26 @@
   
   
   /**
  - * This class writes ASCII to a byte stream as quickly as possible.  For the
  - * moment it does not do buffering, though I reserve the right to do some
  - * buffering down the line if I can prove that it will be faster even if the
  - * output stream is buffered.
  + * This class writes unicode characters to a byte stream 
(java.io.OutputStream)
  + * as quickly as possible. It buffers the output in an internal
  + * buffer which must be flushed to the OutputStream when done. This flushing
  + * is done via the close() flush() or flushBuffer() method. 
    */
   public final class WriterToUTF8Buffered extends Writer
   {
       
  -  /** number of characters that the buffer can hold.
  +  /** number of bytes that the byte buffer can hold.
      * This is a fixed constant is used rather than m_outputBytes.lenght for 
performance.
      */
  -  private static final int buf_length=16*1024;
  +  private static final int BYTES_MAX=16*1024;
  +  
  +  /** number of characters that the character buffer can hold.
  +   * This is 1/3 of the number of bytes because UTF-8 encoding
  +   * can expand one unicode character by up to 3 bytes.
  +   */
  +  private static final int CHARS_MAX=(BYTES_MAX/3);
  +  
  + // private static final int 
     
     /** The byte stream to write to. (sc & sb remove final to compile in JDK 
1.1.8) */
     private final OutputStream m_os;
  @@ -109,11 +117,11 @@
         m_os = out;
         // get 3 extra bytes to make buffer overflow checking simpler and 
faster
         // we won't have to keep checking for a few extra characters
  -      m_outputBytes = new byte[buf_length + 3];
  +      m_outputBytes = new byte[BYTES_MAX + 3];
         
         // Big enough to hold the input chars that will be transformed
         // into output bytes in m_ouputBytes.
  -      m_inputChars = new char[(buf_length/3) + 1];
  +      m_inputChars = new char[CHARS_MAX + 1];
         count = 0;
         
   //      the old body of this constructor, before the buffersize was changed 
to a constant      
  @@ -159,9 +167,9 @@
     {
       
       /* If we are close to the end of the buffer then flush it.
  -     * Remember the buffer can hold a few more characters than buf_length
  +     * Remember the buffer can hold a few more bytes than BYTES_MAX
        */ 
  -    if (count >= buf_length)
  +    if (count >= BYTES_MAX)
           flushBuffer();
   
       if (c < 0x80)
  @@ -181,77 +189,6 @@
       }
     }
   
  -  /**
  -   * Write a portion of an array of characters.
  -   *
  -   * @param  chars  Array of characters
  -   * @param  start   Offset from which to start writing characters
  -   * @param  length   Number of characters to write
  -   *
  -   * @exception  IOException  If an I/O error occurs
  -   *
  -   * @throws java.io.IOException
  -   */
  -  private final void writeWithoutBuffering(
  -          final char chars[], final int start, final int length)
  -            throws java.io.IOException
  -  {
  -
  -    final OutputStream os = m_os;
  -
  -    final int n = length+start;
  -    for (int i = start; i < n; i++)
  -    {
  -      final char c = chars[i];
  -
  -      if (c < 0x80)
  -        os.write(c);
  -      else if (c < 0x800)
  -      {
  -        os.write(0xc0 + (c >> 6));
  -        os.write(0x80 + (c & 0x3f));
  -      }
  -      else
  -      {
  -        os.write(0xe0 + (c >> 12));
  -        os.write(0x80 + ((c >> 6) & 0x3f));
  -        os.write(0x80 + (c & 0x3f));
  -      }
  -    }
  -  }
  -
  -  /**
  -   * Write a string.
  -   *
  -   * @param  s  String to be written
  -   *
  -   * @exception  IOException  If an I/O error occurs
  -   */
  -  private final void writeWithoutBuffering(final String s) throws IOException
  -  {
  -
  -    final int n = s.length();
  -    final OutputStream os = m_os;
  -
  -    for (int i = 0; i < n; i++)
  -    {
  -      final char c = s.charAt(i);
  -
  -      if (c < 0x80)
  -        os.write(c);
  -      else if (c < 0x800)
  -      {
  -        os.write(0xc0 + (c >> 6));
  -        os.write(0x80 + (c & 0x3f));
  -      }
  -      else
  -      {
  -        os.write(0xe0 + (c >> 12));
  -        os.write(0x80 + ((c >> 6) & 0x3f));
  -        os.write(0x80 + (c & 0x3f));
  -      }
  -    }
  -  }
   
     /**
      * Write a portion of an array of characters.
  @@ -272,22 +209,29 @@
       // of the characters that we can put into the buffer.  It is possible
       // for each Unicode character to expand to three bytes.
   
  -    int lengthx3 = (length << 1) + length;
  +    int lengthx3 = 3*length;
   
  -    if (lengthx3 >= buf_length - count)
  +    if (lengthx3 >= BYTES_MAX - count)
       {
         // The requested length is greater than the unused part of the buffer
         flushBuffer();
   
  -      if (lengthx3 >= buf_length)
  +      if (lengthx3 >= BYTES_MAX)
         {
           /*
  -         * The requested length exceeds the size of the buffer,
  -         * so don't bother to buffer this one, just write it out
  -         * directly. The buffer is already flushed so this is a 
  -         * safe thing to do.
  +         * The requested length exceeds the size of the buffer.
  +         * Cut the buffer up into chunks, each of which will
  +         * not cause an overflow to the output buffer m_outputBytes,
  +         * and make multiple recursive calls.
            */
  -        writeWithoutBuffering(chars, start, length);
  +        final int chunks = 1 + length/CHARS_MAX;
  +        for (int chunk =0 ; chunk < chunks; chunk++)
  +        {
  +            int start_chunk = start + ((length*chunk)/chunks);
  +            int end_chunk   = start + ((length*(chunk+1))/chunks);
  +            int len_chunk = (end_chunk - start_chunk);
  +            this.write(chars,start_chunk, len_chunk);
  +        }
           return;
         }
       }
  @@ -331,6 +275,53 @@
       count = count_loc;
   
     }
  +  
  +  /**
  +   * Writes out the character array 
  +   * @param chars a character array with only ASCII characters, so
  +   * the UTF-8 encoding is optimized.
  +   * @param start the first character in the input array
  +   * @param length the number of characters in the input array
  +   */
  +  private void directWrite(final char chars[], final int start, final int 
length)
  +          throws java.io.IOException
  +  {
  +
  +
  +
  +    if (length >= BYTES_MAX - count)
  +    {
  +      // The requested length is greater than the unused part of the buffer
  +      flushBuffer();
  +
  +      if (length >= BYTES_MAX)
  +      {
  +        /*
  +         * The requested length exceeds the size of the buffer.
  +         * Cut the buffer up into chunks, each of which will
  +         * not cause an overflow to the output buffer m_outputBytes,
  +         * and make multiple recursive calls.
  +         */          
  +        int chunks = 1 + length/CHARS_MAX;
  +        for (int chunk =0 ; chunk < chunks; chunk++)
  +        {
  +            int start_chunk = start + ((length*chunk)/chunks);
  +            int end_chunk   = start + ((length*(chunk+1))/chunks);
  +            int len_chunk = (end_chunk - start_chunk);
  +            this.directWrite(chars,start_chunk, len_chunk);
  +        }
  +        return;
  +      }
  +    }
  +
  +    final int n = length+start;
  +    final byte[] buf_loc = m_outputBytes; // local reference for faster 
access
  +    int count_loc = count;      // local integer for faster access
  +    for(int i=start; i < n ; i++ )
  +        buf_loc[count_loc++] = (byte) buf_loc[i];
  +    // Store the local integer back into the instance variable
  +    count = count_loc;
  +  }
   
     /**
      * Write a string.
  @@ -346,23 +337,30 @@
       // of the characters that we can put into the buffer.  It is possible
       // for each Unicode character to expand to three bytes.
       final int length = s.length();
  -    int lengthx3 = (length << 1) + length;
  +    int lengthx3 = 3*length;
   
  -    if (lengthx3 >= buf_length - count)
  +    if (lengthx3 >= BYTES_MAX - count)
       {
         // The requested length is greater than the unused part of the buffer
         flushBuffer();
   
  -      if (lengthx3 >= buf_length)
  +      if (lengthx3 >= BYTES_MAX)
         {
           /*
            * The requested length exceeds the size of the buffer,
  -         * so don't bother to buffer this one, just write it out
  -         * directly. The buffer is already flushed so this is a 
  -         * safe thing to do.
  +         * so break it up in chunks that don't exceed the buffer size.
            */
  -        writeWithoutBuffering(s);
  -        return;
  +         final int start = 0;
  +         int chunks = 1 + length/CHARS_MAX;
  +         for (int chunk =0 ; chunk < chunks; chunk++)
  +         {
  +             int start_chunk = start + ((length*chunk)/chunks);
  +             int end_chunk   = start + ((length*(chunk+1))/chunks);
  +             int len_chunk = (end_chunk - start_chunk);
  +             s.getChars(start_chunk,end_chunk, m_inputChars,0);
  +             this.write(m_inputChars,0, len_chunk);
  +         }
  +         return;
         }
       }
   
  @@ -474,18 +472,14 @@
     public void directWrite(final String s) throws IOException
     {
   
  -    // We multiply the length by three since this is the maximum length
  -    // of the characters that we can put into the buffer.  It is possible
  -    // for each Unicode character to expand to three bytes.
       final int length = s.length();
  -    int lengthx3 = (length << 1) + length;
  -
  -    if (lengthx3 >= buf_length - count)
  +    
  +    if (length >= BYTES_MAX - count)
       {
         // The requested length is greater than the unused part of the buffer
         flushBuffer();
   
  -      if (lengthx3 >= buf_length)
  +      if (length >= BYTES_MAX)
         {
           /*
            * The requested length exceeds the size of the buffer,
  @@ -493,7 +487,16 @@
            * directly. The buffer is already flushed so this is a 
            * safe thing to do.
            */
  -        writeWithoutBuffering(s);
  +         final int start = 0;
  +         int chunks = 1 + length/CHARS_MAX;
  +         for (int chunk =0 ; chunk < chunks; chunk++)
  +         {
  +             int start_chunk = start + ((length*chunk)/chunks);
  +             int end_chunk   = start + ((length*(chunk+1))/chunks);
  +             int len_chunk = (end_chunk - start_chunk);
  +             s.getChars(start_chunk,end_chunk, m_inputChars,0);
  +             this.directWrite(m_inputChars,0, len_chunk);
  +         }
           return;
         }
       }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to