Re: [cp-patches] RFC: InputStreamReader and OutputStreamWriter revisited

Roman Kennke Sun, 07 Jan 2007 13:01:56 -0800

Hello again,

> - The classes now make more effective use of the NIO encoders/decoders.
> The passed in char[] arrays are wrapped in CharBuffers and the
> underlying byte streams are buffered in a ByteBuffer (as documented
> btw). These are passed to the NIO codecs to encode and decode the data
> in bigger chunks (in contrast to char-by-char encoding as done before).
> - The CharBuffers are cached. We take advantage here of the fact that
> teh write(char[],int,int) and read(char[],int,int) are usually called
> repeatedly with the same array and don't rewrap this array over and over
> again.
> - For the single-step write(char) and read() methods we use a cached
> single element char array (you think that means nothing? Look at the
> benchmarks below).
> - I noticed that when the default encoding in file.encoding is invalid,
> the JDK falls back to UTF-8 rather than Latin1. I fixed it so we do too.
> (Should we adjust the default property setting in SystemProperties too?)


I found that this stuff did hang when running mauve. I had one little
flaw in the InputStreamReader.read() method. I made it so that it waited
until it could fill the target array fully. However, it should only
block until _some_ data is available and return when it has some and
receives no more. Find the modified patch attached.

/Roman

Index: java/io/InputStreamReader.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/io/InputStreamReader.java,v
retrieving revision 1.31
diff -u -1 -5 -r1.31 InputStreamReader.java
--- java/io/InputStreamReader.java	14 Sep 2006 08:03:56 -0000	1.31
+++ java/io/InputStreamReader.java	7 Jan 2007 20:59:55 -0000
@@ -86,226 +86,203 @@
  * <p>
  * Due to a deficiency the Java class library design, there is no standard
  * way for an application to install its own byte-character encoding.
  *
  * @see BufferedReader
  * @see InputStream
  *
  * @author Robert Schuster
  * @author Aaron M. Renn ([EMAIL PROTECTED])
  * @author Per Bothner ([EMAIL PROTECTED])
  * @date April 22, 1998.  
  */
 public class InputStreamReader extends Reader
 {
   /**
+   * The default buffer size.
+   */
+  private final static int BUFFER_SIZE = 1024;
+
+  /**
    * The input stream.
    */
   private InputStream in;
 
   /**
    * The charset decoder.
    */
   private CharsetDecoder decoder;
 
   /**
    * End of stream reached.
    */
   private boolean isDone = false;
 
   /**
-   * Need this.
-   */
-  private float maxBytesPerChar;
-
-  /**
    * Buffer holding surplus loaded bytes (if any)
    */
   private ByteBuffer byteBuffer;
 
   /**
    * java.io canonical name of the encoding.
    */
   private String encoding;
 
   /**
-   * We might decode to a 2-char UTF-16 surrogate, which won't fit in the
-   * output buffer. In this case we need to save the surrogate char.
+   * One char as array to be used in [EMAIL PROTECTED] #read()}.
    */
-  private char savedSurrogate;
-  private boolean hasSavedSurrogate = false;
+  private char[] oneChar = new char[1];
 
   /**
-   * A byte array to be reused in read(byte[], int, int).
+   * The last char array that has been passed to read(char[],int,int). This
+   * is used to cache the associated CharBuffer because read(char[],int,int)
+   * is usually called with the same array repeatedly and we don't want to
+   * allocate a new CharBuffer object on each call.
    */
-  private byte[] bytesCache;
+  private char[] lastArray;
 
   /**
-   * Locks the bytesCache above in read(byte[], int, int).
+   * The cached CharBuffer associated with the above array.
    */
-  private Object cacheLock = new Object();
+  private CharBuffer lastBuffer;
 
   /**
    * This method initializes a new instance of <code>InputStreamReader</code>
    * to read from the specified stream using the default encoding.
    *
    * @param in The <code>InputStream</code> to read from 
    */
   public InputStreamReader(InputStream in)
   {
     if (in == null)
       throw new NullPointerException();
+
     this.in = in;
-    try 
-	{ 
-	  encoding = SystemProperties.getProperty("file.encoding");
-	  // Don't use NIO if avoidable
-	  if(EncodingHelper.isISOLatin1(encoding))
-	    {
-	      encoding = "ISO8859_1";
-	      maxBytesPerChar = 1f;
-	      decoder = null;
-	      return;
-	    }
-	  Charset cs = EncodingHelper.getCharset(encoding);
-	  decoder = cs.newDecoder();
-	  encoding = EncodingHelper.getOldCanonical(cs.name());
-	  try {
-	      maxBytesPerChar = cs.newEncoder().maxBytesPerChar();
-	  } catch(UnsupportedOperationException _){
-	      maxBytesPerChar = 1f;
-	  } 
-	  decoder.onMalformedInput(CodingErrorAction.REPLACE);
-	  decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
-	  decoder.reset();
-	} catch(RuntimeException e) {
-	  encoding = "ISO8859_1";
-	  maxBytesPerChar = 1f;
-	  decoder = null;
-	} catch(UnsupportedEncodingException e) {
-	  encoding = "ISO8859_1";
-	  maxBytesPerChar = 1f;
-	  decoder = null;
-	}
+
+    String encodingName = SystemProperties.getProperty("file.encoding");
+    try
+      {
+        Charset cs = EncodingHelper.getCharset(encodingName);
+        decoder = cs.newDecoder();
+        // The encoding should be the old name, if such exists.
+        encoding = EncodingHelper.getOldCanonical(cs.name());
+      }
+    catch(RuntimeException e)
+      {
+        // For bootstrapping problems only.
+        decoder = null;
+        encoding = "ISO8859_1";
+      }
+    catch (UnsupportedEncodingException ex)
+      {
+        Charset cs = EncodingHelper.getDefaultCharset();
+        decoder = cs.newDecoder();
+        // The encoding should be the old name, if such exists.
+        encoding = EncodingHelper.getOldCanonical(cs.name());
+      }    
+    initDecoderAndBuffer();
   }
 
   /**
    * This method initializes a new instance of <code>InputStreamReader</code>
    * to read from the specified stream using a caller supplied character
    * encoding scheme.  Note that due to a deficiency in the Java language
    * design, there is no way to determine which encodings are supported.
    * 
    * @param in The <code>InputStream</code> to read from
    * @param encoding_name The name of the encoding scheme to use
    *
    * @exception UnsupportedEncodingException If the encoding scheme 
    * requested is not available.
    */
   public InputStreamReader(InputStream in, String encoding_name)
     throws UnsupportedEncodingException
   {
-    if (in == null
-        || encoding_name == null)
+    if (in == null || encoding_name == null)
       throw new NullPointerException();
-    
+
     this.in = in;
-    // Don't use NIO if avoidable
-    if(EncodingHelper.isISOLatin1(encoding_name))
+
+    try
       {
-	encoding = "ISO8859_1";
-	maxBytesPerChar = 1f;
-	decoder = null;
-	return;
+        Charset cs = EncodingHelper.getCharset(encoding_name);
+        decoder = cs.newDecoder();
+        // The encoding should be the old name, if such exists.
+        encoding = EncodingHelper.getOldCanonical(cs.name());
       }
-    try {
-      Charset cs = EncodingHelper.getCharset(encoding_name);
-      try {
-        maxBytesPerChar = cs.newEncoder().maxBytesPerChar();
-      } catch(UnsupportedOperationException _){
-	maxBytesPerChar = 1f;
-      } 
-
-      decoder = cs.newDecoder();
-      decoder.onMalformedInput(CodingErrorAction.REPLACE);
-      decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
-      decoder.reset();
-
-      // The encoding should be the old name, if such exists.
-      encoding = EncodingHelper.getOldCanonical(cs.name());
-    } catch(RuntimeException e) {
-      encoding = "ISO8859_1";
-      maxBytesPerChar = 1f;
-      decoder = null;
-    }
+    catch(RuntimeException e)
+      {
+        // For bootstrapping problems only.
+        decoder = null;
+        encoding = "ISO8859_1";
+      }
+
+    initDecoderAndBuffer();
   }
 
   /**
    * Creates an InputStreamReader that uses a decoder of the given
    * charset to decode the bytes in the InputStream into
    * characters.
    * 
    * @since 1.4
    */
-  public InputStreamReader(InputStream in, Charset charset) {
-    if (in == null)
+  public InputStreamReader(InputStream in, Charset charset)
+  {
+    if (in == null || charset == null)
       throw new NullPointerException();
+
     this.in = in;
     decoder = charset.newDecoder();
-
-    try {
-      maxBytesPerChar = charset.newEncoder().maxBytesPerChar();
-    } catch(UnsupportedOperationException _){
-      maxBytesPerChar = 1f;
-    }
-
-    decoder.onMalformedInput(CodingErrorAction.REPLACE);
-    decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
-    decoder.reset();
     encoding = EncodingHelper.getOldCanonical(charset.name());
+    initDecoderAndBuffer();
   }
 
   /**
    * Creates an InputStreamReader that uses the given charset decoder
    * to decode the bytes in the InputStream into characters.
    * 
    * @since 1.4
    */
-  public InputStreamReader(InputStream in, CharsetDecoder decoder) {
-    if (in == null)
+  public InputStreamReader(InputStream in, CharsetDecoder decoder)
+  {
+    if (in == null || decoder == null)
       throw new NullPointerException();
+
     this.in = in;
     this.decoder = decoder;
+    encoding = EncodingHelper.getOldCanonical(decoder.charset().name());
+    initDecoderAndBuffer();
+  }
 
-    Charset charset = decoder.charset();
-    try {
-      if (charset == null)
-        maxBytesPerChar = 1f;
-      else
-        maxBytesPerChar = charset.newEncoder().maxBytesPerChar();
-    } catch(UnsupportedOperationException _){
-	maxBytesPerChar = 1f;
-    } 
-
-    decoder.onMalformedInput(CodingErrorAction.REPLACE);
-    decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
-    decoder.reset();
-    if (charset == null)
-      encoding = "US-ASCII";
-    else
-      encoding = EncodingHelper.getOldCanonical(decoder.charset().name());      
+  /**
+   * Initializes the decoder and the input buffer.
+   */
+  private void initDecoderAndBuffer()
+  {
+    if (decoder != null)
+      {
+        decoder.onMalformedInput(CodingErrorAction.REPLACE);
+        decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+        decoder.reset();
+      }
+
+    byteBuffer = ByteBuffer.allocate(BUFFER_SIZE);
+    // No bytes available initially.
+    byteBuffer.position(byteBuffer.limit());
   }
-  
+
   /**
    * This method closes this stream, as well as the underlying 
    * <code>InputStream</code>.
    *
    * @exception IOException If an error occurs
    */
   public void close() throws IOException
   {
     synchronized (lock)
       {
 	// Makes sure all intermediate data is released by the decoder.
 	if (decoder != null)
 	   decoder.reset();
 	if (in != null)
 	   in.close();
@@ -330,180 +307,192 @@
   /**
    * This method checks to see if the stream is ready to be read.  It
    * will return <code>true</code> if is, or <code>false</code> if it is not.
    * If the stream is not ready to be read, it could (although is not required
    * to) block on the next read attempt.
    *
    * @return <code>true</code> if the stream is ready to be read, 
    * <code>false</code> otherwise
    *
    * @exception IOException If an error occurs
    */
   public boolean ready() throws IOException
   {
     if (in == null)
       throw new IOException("Reader has been closed");
-    
-    return in.available() != 0;
+    return byteBuffer.hasRemaining() || in.available() != 0;
   }
 
   /**
    * This method reads up to <code>length</code> characters from the stream into
    * the specified array starting at index <code>offset</code> into the
    * array.
    *
    * @param buf The character array to recieve the data read
    * @param offset The offset into the array to start storing characters
    * @param length The requested number of characters to read.
    *
    * @return The actual number of characters read, or -1 if end of stream.
    *
    * @exception IOException If an error occurs
    */
   public int read(char[] buf, int offset, int length) throws IOException
   {
     if (in == null)
       throw new IOException("Reader has been closed");
     if (isDone)
       return -1;
-    if(decoder != null)
-      {
-	int totalBytes = (int)((double) length * maxBytesPerChar);
-        if (byteBuffer != null)
-          totalBytes = Math.max(totalBytes, byteBuffer.remaining());
-	byte[] bytes;
-        // Fetch cached bytes array if available and big enough.
-        synchronized(cacheLock)
-          {
-            bytes = bytesCache;
-            if (bytes == null || bytes.length < totalBytes)
-              bytes = new byte[totalBytes];
-            else
-              bytesCache = null;
-          }
 
-	int remaining = 0;
-	if(byteBuffer != null)
-	{
-	    remaining = byteBuffer.remaining();
-	    byteBuffer.get(bytes, 0, remaining);
-	}
-	int read;
-	if(totalBytes - remaining > 0)
-	  {
-	    read = in.read(bytes, remaining, totalBytes - remaining);
-	    if(read == -1){
-	      read = remaining;
-	      isDone = true;
-	    } else
-	      read += remaining;
-	  } else 
-            read = remaining;
-	byteBuffer = ByteBuffer.wrap(bytes, 0, read);	
-	CharBuffer cb = CharBuffer.wrap(buf, offset, length);
-	int startPos = cb.position();
-
- 	if(hasSavedSurrogate){
- 	    hasSavedSurrogate = false;
- 	    cb.put(savedSurrogate);
-	    read++;
- 	}
-
-	CoderResult cr = decoder.decode(byteBuffer, cb, isDone);
-	decoder.reset();
-	// 1 char remains which is the first half of a surrogate pair.
-	if(cr.isOverflow() && cb.hasRemaining()){
-	    CharBuffer overflowbuf = CharBuffer.allocate(2);
-	    cr = decoder.decode(byteBuffer, overflowbuf, isDone);
-	    overflowbuf.flip();
-	    if(overflowbuf.hasRemaining())
-	    {
-	      cb.put(overflowbuf.get());
-	      savedSurrogate = overflowbuf.get();
-	      hasSavedSurrogate = true;	    
-	      isDone = false;
-	    }
-	}
-
-	if(byteBuffer.hasRemaining()) {
-	    byteBuffer.compact();
-	    byteBuffer.flip();	  
-	    isDone = false;
-	} else
-	    byteBuffer = null;
-
-	read = cb.position() - startPos;
-
-        // Put cached bytes array back if we are finished and the cache
-        // is null or smaller than the used bytes array.
-        synchronized (cacheLock)
-          {
-            if (byteBuffer == null
-                && (bytesCache == null || bytesCache.length < bytes.length))
-              bytesCache = bytes;
-          }
-        return (read <= 0) ? -1 : read;
-      }
-    else
+    CharBuffer outBuffer = getCharBuffer(buf, offset, length);
+    int startPos = outBuffer.position();
+    int remaining = outBuffer.remaining();
+    int start = remaining;
+    CoderResult cr = null;
+    while (remaining == start && ! isDone)
       {
-	byte[] bytes;
-        // Fetch cached bytes array if available and big enough.
-        synchronized (cacheLock)
-          {
-            bytes = bytesCache;
-            if (bytes == null || length < bytes.length)
-              bytes = new byte[length];
-            else
-              bytesCache = null;
-          }
-
-	int read = in.read(bytes);
-	for(int i=0;i<read;i++)
-          buf[offset+i] = (char)(bytes[i]&0xFF);
-
-        // Put back byte array into cache if appropriate.
-        synchronized (cacheLock)
+        if (byteBuffer.remaining() == 0
+            || (cr != null && (cr.isUnderflow())))
           {
-            if (bytesCache == null || bytesCache.length < bytes.length)
-              bytesCache = bytes;
+            refillInputBuffer();
           }
-	return read;
-    }
+        cr = decode(outBuffer);
+        remaining = outBuffer.remaining();
+      }
+    return outBuffer.position() - startPos;
   }
 
   /**
    * Reads an char from the input stream and returns it
    * as an int in the range of 0-65535.  This method also will return -1 if
    * the end of the stream has been reached.
    * <p>
    * This method will block until the char can be read.
    *
    * @return The char read or -1 if end of stream
    *
    * @exception IOException If an error occurs
    */
   public int read() throws IOException
   {
-    char[] buf = new char[1];
-    int count = read(buf, 0, 1);
-    return count > 0 ? buf[0] : -1;
+    int count = read(oneChar, 0, 1);
+    return count > 0 ? oneChar[0] : -1;
   }
 
   /**
    * Skips the specified number of chars in the stream.  It
    * returns the actual number of chars skipped, which may be less than the
    * requested amount.
    *
    * @param count The requested number of chars to skip
    *
    * @return The actual number of chars skipped.
    *
    * @exception IOException If an error occurs
    */
    public long skip(long count) throws IOException
    {
      if (in == null)
        throw new IOException("Reader has been closed");
-     
+
      return super.skip(count);
    }
+
+  /**
+   * Returns a CharBuffer that wraps the specified char array. This tries
+   * to return a cached instance because usually the read() method is called
+   * repeatedly with the same char array instance, or the no-arg read
+   * method is called repeatedly which uses the oneChar field of this class
+   * over and over again.
+   *
+   * @param buf the array to wrap
+   * @param offset the offset
+   * @param length the length
+   *
+   * @return a prepared CharBuffer to write to
+   */
+  private final CharBuffer getCharBuffer(char[] buf, int offset, int length)
+  {
+    CharBuffer outBuffer;
+    if (lastArray == buf)
+      {
+        outBuffer = lastBuffer;
+        outBuffer.position(offset);
+        outBuffer.limit(offset + length);
+      }
+    else
+      {
+        lastArray = buf;
+        lastBuffer = CharBuffer.wrap(buf, offset, length);
+        outBuffer = lastBuffer;
+      }
+    return outBuffer;
+  }
+
+  /**
+   * Refills the input buffer by reading a chunk of bytes from the underlying
+   * input stream
+   *
+   * @throws IOException from the underlying stream
+   */
+  private final void refillInputBuffer()
+    throws IOException
+  {
+    // Refill input buffer.
+    byteBuffer.compact();
+    if (byteBuffer.hasArray())
+      {
+        byte[] buffer = byteBuffer.array();
+        int offs = byteBuffer.arrayOffset();
+        int pos = byteBuffer.position();
+        int rem = byteBuffer.remaining();
+        int readBytes = in.read(buffer, offs + pos, rem);
+        if (readBytes > 0)
+          {
+            byteBuffer.position(pos + readBytes);
+            byteBuffer.limit(pos + readBytes);
+          }
+        isDone = readBytes == -1;
+      }
+    else
+      {
+        // Shouldn't happen, but anyway...
+        byte[] buffer = new byte[byteBuffer.limit()
+                                 - byteBuffer.position()];
+        int readBytes = in.read(buffer);
+        isDone = readBytes == -1;
+        byteBuffer.put(buffer);
+      }
+    byteBuffer.flip();
+  }
+
+  /**
+   * Decodes the current byteBuffer into the specified outBuffer. This takes
+   * care of the corner case when we have no decoder (i.e. bootstrap problems)
+   * and performs a primitive Latin1 decoding in this case.
+   *
+   * @param outBuffer the buffer to decode to
+   *
+   * @return the coder result
+   */
+  private CoderResult decode(CharBuffer outBuffer)
+  {
+    CoderResult cr;
+    if (decoder != null)
+      {
+        cr = decoder.decode(byteBuffer, outBuffer, false);
+      }
+    else
+      {
+        // Perform primitive Latin1 decoding.
+        while (outBuffer.hasRemaining() && byteBuffer.hasRemaining())
+          {
+            outBuffer.put((char) (0xff & byteBuffer.get()));
+          }
+        // One of the buffers must be drained.
+        if (! outBuffer.hasRemaining())
+          cr = CoderResult.OVERFLOW;
+        else
+          cr = CoderResult.UNDERFLOW;
+      }
+    return cr;
+  }
 }
Index: java/io/OutputStreamWriter.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/io/OutputStreamWriter.java,v
retrieving revision 1.22
diff -u -1 -5 -r1.22 OutputStreamWriter.java
--- java/io/OutputStreamWriter.java	8 Nov 2006 16:54:22 -0000	1.22
+++ java/io/OutputStreamWriter.java	7 Jan 2007 20:59:55 -0000
@@ -30,35 +30,34 @@
 terms of your choice, provided that you also meet, for each linked
 independent module, the terms and conditions of the license of that
 module.  An independent module is a module which is not derived from
 or based on this library.  If you modify this library, you may extend
 this exception to your version of the library, but you are not
 obligated to do so.  If you do not wish to do so, delete this
 exception statement from your version. */
 
 
 package java.io;
 
 import gnu.java.nio.charset.EncodingHelper;
 
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
-import java.nio.charset.CharacterCodingException;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
 import java.nio.charset.CodingErrorAction;
-import java.nio.charset.MalformedInputException;
 
 /**
  * This class writes characters to an output stream that is byte oriented
  * It converts the chars that are written to bytes using an encoding layer,
  * which is specific to a particular encoding standard.  The desired
  * encoding can either be specified by name, or if no encoding is specified,
  * the system default encoding will be used.  The system default encoding
  * name is determined from the system property <code>file.encoding</code>.
  * The only encodings that are guaranteed to be available are "8859_1"
  * (the Latin-1 character set) and "UTF8".  Unfortunately, Java does not
  * provide a mechanism for listing the encodings that are supported in
  * a given implementation.
  * <p>
  * Here is a list of standard encoding names that may be available:
  * <p>
@@ -72,192 +71,210 @@
  * <li>8859_7 (ISO-8859-7/Latin-7)
  * <li>8859_8 (ISO-8859-8/Latin-8)
  * <li>8859_9 (ISO-8859-9/Latin-9)
  * <li>ASCII (7-bit ASCII)
  * <li>UTF8 (UCS Transformation Format-8)
  * <li>More Later
  * </ul>
  *
  * @author Aaron M. Renn ([EMAIL PROTECTED])
  * @author Per Bothner ([EMAIL PROTECTED])
  * @date April 17, 1998.  
  */
 public class OutputStreamWriter extends Writer
 {
   /**
+   * The default buffer size.
+   */
+  private final static int BUFFER_SIZE = 1024;
+
+  /**
    * The output stream.
    */
   private OutputStream out;
 
   /**
    * The charset encoder.
    */
   private CharsetEncoder encoder;
 
   /**
    * java.io canonical name of the encoding.
    */
   private String encodingName;
 
   /**
-   * Buffer output before character conversion as it has costly overhead.
+   * This buffer receives the encoded data and is flushed to the underlying
+   * stream when it gets too full.
    */
-  private CharBuffer outputBuffer;
-  private final static int BUFFER_SIZE = 1024;
+  private ByteBuffer outputBuffer;
+
+  /**
+   * A one-char array to be reused in read().
+   */
+  private char[] oneChar = new char[1];
+
+  /**
+   * The last char array that has been passed to write(char[],int,int). This
+   * is used to cache the associated CharBuffer because write(char[],int,int)
+   * is usually called with the same array repeatedly and we don't want to
+   * allocate a new CharBuffer object on each call.
+   */
+  private Object lastArray;
+
+  /**
+   * The cached char buffer.
+   */
+  private CharBuffer lastBuffer;
 
   /**
    * This method initializes a new instance of <code>OutputStreamWriter</code>
    * to write to the specified stream using a caller supplied character
    * encoding scheme.  Note that due to a deficiency in the Java language
    * design, there is no way to determine which encodings are supported.
    *
    * @param out The <code>OutputStream</code> to write to
    * @param encoding_scheme The name of the encoding scheme to use for 
    * character to byte translation
    *
    * @exception UnsupportedEncodingException If the named encoding is 
    * not available.
    */
   public OutputStreamWriter (OutputStream out, String encoding_scheme) 
     throws UnsupportedEncodingException
   {
+    if (out == null || encoding_scheme == null)
+      throw new NullPointerException();
+
     this.out = out;
     try 
       {
-	// Don't use NIO if avoidable
-	if(EncodingHelper.isISOLatin1(encoding_scheme))
-	  {
-	    encodingName = "ISO8859_1";
-	    encoder = null;
-	    return;
-	  }
-
-	/*
-	 * Workraround for encodings with a byte-order-mark.
-	 * We only want to write it once per stream.
-	 */
-	try 
-	  {
-	    if(encoding_scheme.equalsIgnoreCase("UnicodeBig") || 
-	       encoding_scheme.equalsIgnoreCase("UTF-16") ||
-	       encoding_scheme.equalsIgnoreCase("UTF16"))
-	      {
-		encoding_scheme = "UTF-16BE";	  
-		out.write((byte)0xFE);
-		out.write((byte)0xFF);
-	      } 
-	    else if(encoding_scheme.equalsIgnoreCase("UnicodeLittle")){
-	      encoding_scheme = "UTF-16LE";
-	      out.write((byte)0xFF);
-	      out.write((byte)0xFE);
-	    }
-	  }
-	catch(IOException ioe)
-	  {
-	  }
-      
-	outputBuffer = CharBuffer.allocate(BUFFER_SIZE);
-
-	Charset cs = EncodingHelper.getCharset(encoding_scheme);
-	if(cs == null)
-	  throw new UnsupportedEncodingException("Encoding "+encoding_scheme+
-						 " unknown");
-	encoder = cs.newEncoder();
-	encodingName = EncodingHelper.getOldCanonical(cs.name());
+        /*
+         * Workraround for encodings with a byte-order-mark.
+         * We only want to write it once per stream.
+         */
+        try 
+          {
+            if(encoding_scheme.equalsIgnoreCase("UnicodeBig")
+                ||encoding_scheme.equalsIgnoreCase("UTF-16") ||
+                encoding_scheme.equalsIgnoreCase("UTF16"))
+              {
+                encoding_scheme = "UTF-16BE";	  
+                out.write((byte)0xFE);
+                out.write((byte)0xFF);
+              } 
+            else if(encoding_scheme.equalsIgnoreCase("UnicodeLittle")){
+              encoding_scheme = "UTF-16LE";
+              out.write((byte)0xFF);
+              out.write((byte)0xFE);
+            }
+          }
+        catch(IOException ioe)
+          {
+          }
+
+        Charset cs = EncodingHelper.getCharset(encoding_scheme);
+        encoder = cs.newEncoder();
+        encodingName = EncodingHelper.getOldCanonical(cs.name());
 
-	encoder.onMalformedInput(CodingErrorAction.REPLACE);
-	encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
       } 
     catch(RuntimeException e) 
       {
-	// Default to ISO Latin-1, will happen if this is called, for instance,
-	//  before the NIO provider is loadable.
-	encoder = null; 
-	encodingName = "ISO8859_1";
+        // Default to ISO Latin-1, will happen if this is called, for instance,
+        // before the NIO provider is loadable.
+        encoder = null; 
+        encodingName = "ISO8859_1";
       }
+    initEncoderAndBuffer();
   }
 
   /**
    * This method initializes a new instance of <code>OutputStreamWriter</code>
    * to write to the specified stream using the default encoding.
    *
    * @param out The <code>OutputStream</code> to write to
    */
   public OutputStreamWriter (OutputStream out)
   {
     this.out = out;
-    outputBuffer = null;
     try 
       {
-	String encoding = System.getProperty("file.encoding");
-	Charset cs = Charset.forName(encoding);
-	encoder = cs.newEncoder();
-	encodingName =  EncodingHelper.getOldCanonical(cs.name());
+        String encoding = System.getProperty("file.encoding");
+        Charset cs = Charset.forName(encoding);
+        encoder = cs.newEncoder();
+        encodingName =  EncodingHelper.getOldCanonical(cs.name());
       } 
     catch(RuntimeException e) 
       {
-	encoder = null; 
-	encodingName = "ISO8859_1";
+        // For bootstrap problems.
+        encoder = null;
+        encodingName = "ISO8859_1";
       }
 
-    if(encoder != null)
-      {
-	encoder.onMalformedInput(CodingErrorAction.REPLACE);
-	encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
-	outputBuffer = CharBuffer.allocate(BUFFER_SIZE);
-      }
+    initEncoderAndBuffer();
   }
 
   /**
    * This method initializes a new instance of <code>OutputStreamWriter</code>
    * to write to the specified stream using a given <code>Charset</code>.
    *
    * @param out The <code>OutputStream</code> to write to
    * @param cs The <code>Charset</code> of the encoding to use
    * 
    * @since 1.5
    */
   public OutputStreamWriter(OutputStream out, Charset cs)
   {
     this.out = out;
     encoder = cs.newEncoder();
-    encoder.onMalformedInput(CodingErrorAction.REPLACE);
-    encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
-    outputBuffer = CharBuffer.allocate(BUFFER_SIZE);
     encodingName = EncodingHelper.getOldCanonical(cs.name());
+    initEncoderAndBuffer();
   }
   
   /**
    * This method initializes a new instance of <code>OutputStreamWriter</code>
    * to write to the specified stream using a given
    * <code>CharsetEncoder</code>.
    *
    * @param out The <code>OutputStream</code> to write to
    * @param enc The <code>CharsetEncoder</code> to encode the output with
    * 
    * @since 1.5
    */
   public OutputStreamWriter(OutputStream out, CharsetEncoder enc)
   {
     this.out = out;
     encoder = enc;
-    outputBuffer = CharBuffer.allocate(BUFFER_SIZE);
     Charset cs = enc.charset();
     if (cs == null)
       encodingName = "US-ASCII";
     else
       encodingName = EncodingHelper.getOldCanonical(cs.name());
+    initEncoderAndBuffer();
+  }
+
+  /**
+   * Initializes the encoder and the output buffer.
+   */
+  private void initEncoderAndBuffer()
+  {
+    if (encoder != null)
+      {
+        encoder.onMalformedInput(CodingErrorAction.REPLACE);
+        encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+      }
+    outputBuffer = ByteBuffer.allocate(BUFFER_SIZE);
   }
 
   /**
    * This method closes this stream, and the underlying 
    * <code>OutputStream</code>
    *
    * @exception IOException If an error occurs
    */
   public void close () throws IOException
   {
     if(out == null)
       return;
     flush();
     out.close ();
     out = null;
@@ -270,135 +287,191 @@
    *
    * @return The encoding scheme name
    */
   public String getEncoding ()
   {
     return out != null ? encodingName : null;
   }
 
   /**
    * This method flushes any buffered bytes to the underlying output sink.
    *
    * @exception IOException If an error occurs
    */
   public void flush () throws IOException
   {
-      if(out != null){	  
-	  if(outputBuffer != null){
-	      char[] buf = new char[outputBuffer.position()];
-	      if(buf.length > 0){
-		  outputBuffer.flip();
-		  outputBuffer.get(buf);
-		  writeConvert(buf, 0, buf.length);
-		  outputBuffer.clear();
-	      }
-	  }
-	  out.flush ();
+    int len = outputBuffer.position();
+    if (len > 0)
+      {
+        outputBuffer.flip();
+        if (outputBuffer.hasArray())
+          {
+            byte[] bytes = outputBuffer.array();
+            int p = outputBuffer.arrayOffset();
+            out.write(bytes, p, len);
+          }
+        else
+          {
+            // Shouldn't happen for normal (non-direct) ByteBuffers.
+            byte[] bytes = new byte[len];
+            outputBuffer.get(bytes);
+            out.write(bytes, 0, len);
+          }
+        outputBuffer.clear();
       }
+    out.flush ();
   }
 
   /**
    * This method writes <code>count</code> characters from the specified
    * array to the output stream starting at position <code>offset</code>
    * into the array.
    *
    * @param buf The array of character to write from
    * @param offset The offset into the array to start writing chars from
    * @param count The number of chars to write.
    *
    * @exception IOException If an error occurs
    */
   public void write (char[] buf, int offset, int count) throws IOException
   {
     if(out == null)
       throw new IOException("Stream is closed.");
     if(buf == null)
       throw new IOException("Buffer is null.");
 
-    if(outputBuffer != null)
-	{
-	    if(count >= outputBuffer.remaining())
-		{
-		    int r = outputBuffer.remaining();
-		    outputBuffer.put(buf, offset, r);
-		    writeConvert(outputBuffer.array(), 0, BUFFER_SIZE);
-		    outputBuffer.clear();
-		    offset += r;
-		    count -= r;
-		    // if the remaining bytes is larger than the whole buffer, 
-		    // just don't buffer.
-		    if(count >= outputBuffer.remaining()){
-                      writeConvert(buf, offset, count);
-		      return;
-		    }
-		}
-	    outputBuffer.put(buf, offset, count);
-	} else writeConvert(buf, offset, count);
-  }
-
- /**
-  * Converts and writes characters.
-  */
-  private void writeConvert (char[] buf, int offset, int count) 
-      throws IOException
-  {
-    if(encoder == null)
-    {
-      byte[] b = new byte[count];
-      for(int i=0;i<count;i++)
-	b[i] = (byte)((buf[offset+i] <= 0xFF)?buf[offset+i]:'?');
-      out.write(b);
-    } else {
-      try  {
-	ByteBuffer output = encoder.encode(CharBuffer.wrap(buf,offset,count));
-	encoder.reset();
-	if(output.hasArray())
-	  out.write(output.array());
-	else
-	  {
-	    byte[] outbytes = new byte[output.remaining()];
-	    output.get(outbytes);
-	    out.write(outbytes);
-	  }
-      } catch(IllegalStateException e) {
-	throw new IOException("Internal error.");
-      } catch(MalformedInputException e) {
-	throw new IOException("Invalid character sequence.");
-      } catch(CharacterCodingException e) {
-	throw new IOException("Unmappable character.");
-      }
-    }
+    CharBuffer charBuffer = getCharBuffer(buf, offset, count);
+    encodeChars(charBuffer);
+    flush();
   }
 
   /**
    * This method writes <code>count</code> bytes from the specified 
    * <code>String</code> starting at position <code>offset</code> into the
    * <code>String</code>.
    *
    * @param str The <code>String</code> to write chars from
    * @param offset The position in the <code>String</code> to start 
    * writing chars from
    * @param count The number of chars to write
    *
    * @exception IOException If an error occurs
    */
   public void write (String str, int offset, int count) throws IOException
   {
-    if(str == null)
-      throw new IOException("String is null.");
+    if (out == null)
+      throw new IOException("Stream is closed.");
+    if (str == null)
+      throw new IOException("Buffer is null.");
 
-    write(str.toCharArray(), offset, count);
+    // Don't call str.toCharArray() here to avoid allocation.
+    // TODO: CharBuffer.wrap(String) should not allocate a char array either.
+    CharBuffer charBuffer = getCharBuffer(str, offset, count);
+    encodeChars(charBuffer);
+    flush();
   }
 
   /**
    * This method writes a single character to the output stream.
    *
    * @param ch The char to write, passed as an int.
    *
    * @exception IOException If an error occurs
    */
   public void write (int ch) throws IOException
   {
-    write(new char[]{ (char)ch }, 0, 1);
+    oneChar[0] = (char) ch;
+    write(oneChar, 0, 1);
+  }
+
+  /**
+   * Encodes the specified buffer of characters. The encoded data is stored
+   * in an intermediate buffer and only flushed when this buffer gets full.
+   *
+   * @param chars the characters to encode
+   *
+   * @throws IOException if something goes wrong on the underlying stream
+   */
+  private void encodeChars(CharBuffer chars)
+    throws IOException
+  {
+    assert out != null;
+    assert encoder != null;
+    int remaining = chars.remaining();
+    while (remaining > 0)
+      {
+        CoderResult cr = encode(chars);
+        remaining = chars.remaining();
+        // Flush when the output buffer has no more space or when the
+        // space is not enough to hold more encoded data (that when the
+        // input buffer does not change).
+        if (cr.isOverflow())
+          flush();
+      }
+  }
+
+  /**
+   * Encodes the specified CharBuffer into the output buffer. This takes
+   * care for the seldom case when we have no decoder, i.e. bootstrapping
+   * problems.
+   *
+   * @param chars the char buffer to encode
+   */
+  private CoderResult encode(CharBuffer chars)
+  {
+    CoderResult cr;
+    if (encoder != null)
+      {
+        cr = encoder.encode(chars, outputBuffer, false);
+      }
+    else
+      {
+        // For bootstrapping weirdness.
+        // Perform primitive Latin1 decoding.
+        while (chars.hasRemaining() && outputBuffer.hasRemaining())
+          {
+            outputBuffer.put((byte) (chars.get()));
+          }
+        // One of the buffers must be drained.
+        if (! outputBuffer.hasRemaining())
+          cr = CoderResult.OVERFLOW;
+        else
+          cr = CoderResult.UNDERFLOW;
+      }
+    return cr;
+  }
+
+  /**
+   * Returns a CharBuffer that wraps the specified char array. This tries
+   * to return a cached instance because usually the read() method is called
+   * repeatedly with the same char array instance, or the no-arg read
+   * method is called repeatedly which uses the oneChar field of this class
+   * over and over again.
+   *
+   * @param buf the array to wrap
+   * @param offset the offset
+   * @param length the length
+   *
+   * @return a prepared CharBuffer to write to
+   */
+  private final CharBuffer getCharBuffer(Object buf, int offset, int length)
+  {
+    CharBuffer outBuffer;
+    if (lastArray == buf)
+      {
+        outBuffer = lastBuffer;
+        outBuffer.position(offset);
+        outBuffer.limit(offset + length);
+      }
+    else
+      {
+        lastArray = buf;
+        if (buf instanceof String)
+          lastBuffer = CharBuffer.wrap((String) buf, offset, length);
+        else
+          lastBuffer = CharBuffer.wrap((char[]) buf, offset, length);
+        outBuffer = lastBuffer;
+      }
+    return outBuffer;
   }
 } // class OutputStreamWriter

Re: [cp-patches] RFC: InputStreamReader and OutputStreamWriter revisited

Reply via email to