This is an automated email from the ASF dual-hosted git repository.

bodewig pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ant.git


The following commit(s) were added to refs/heads/master by this push:
     new 569e428  replace our ReaderInputStream with the one of Commons IO
569e428 is described below

commit 569e4288501c69398605707d3a76eaf5b9354361
Author: Stefan Bodewig <bode...@apache.org>
AuthorDate: Sat Aug 22 21:07:32 2020 +0200

    replace our ReaderInputStream with the one of Commons IO
    
    https://bz.apache.org/bugzilla/show_bug.cgi?id=40455
    
    Suggested by Vladimir Sitnikov
---
 WHATSNEW                                           |   5 +
 .../apache/tools/ant/util/ReaderInputStream.java   | 253 +++++++++++----------
 .../tools/ant/util/ReaderInputStreamTest.java      | 122 ++++++++++
 3 files changed, 255 insertions(+), 125 deletions(-)

diff --git a/WHATSNEW b/WHATSNEW
index 96920b7..0dfcdaf 100644
--- a/WHATSNEW
+++ b/WHATSNEW
@@ -10,6 +10,11 @@ Fixed bugs:
  * propertyset now also sees in-scope local properties
    Bugzilla Report 50179
 
+ * replaced our version of ReaderInputStream with the battle-tested
+   version of Apache Commons IO as our version had problems with
+   surrogate pairs (and likely other edge cases as well).
+   Bugzilla Report 40455
+
 Changes from Ant 1.10.7 TO Ant 1.10.8
 =====================================
 
diff --git a/src/main/org/apache/tools/ant/util/ReaderInputStream.java 
b/src/main/org/apache/tools/ant/util/ReaderInputStream.java
index 610351a..ff2eb63 100644
--- a/src/main/org/apache/tools/ant/util/ReaderInputStream.java
+++ b/src/main/org/apache/tools/ant/util/ReaderInputStream.java
@@ -20,24 +20,68 @@ package org.apache.tools.ant.util;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
 import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+import java.util.Objects;
 
 /**
  * Adapts a <code>Reader</code> as an <code>InputStream</code>.
- * Adapted from <code>StringInputStream</code>.
- *
+ * <p>This is a stripped down version of {@code 
org.apache.commons.io.input.ReaderInputStream} of Apache Commons IO 2.7.</p>
  */
 public class ReaderInputStream extends InputStream {
-    private static final int BYTE_MASK = 0xFF;
+    private static final int EOF = -1;
+    private static final int DEFAULT_BUFFER_SIZE = 1024;
+
+    private final Reader reader;
+    private final CharsetEncoder encoder;
+
+    /**
+     * CharBuffer used as input for the decoder. It should be reasonably
+     * large as we read data from the underlying Reader into this buffer.
+     */
+    private final CharBuffer encoderIn;
 
-    /** Source Reader */
-    private Reader in;
+    /**
+     * ByteBuffer used as output for the decoder. This buffer can be small
+     * as it is only used to transfer data from the decoder to the
+     * buffer provided by the caller.
+     */
+    private final ByteBuffer encoderOut;
 
-    private String encoding = System.getProperty("file.encoding");
+    private CoderResult lastCoderResult;
+    private boolean endOfInput;
 
-    private byte[] slack;
+    /**
+     * Construct a new {@link ReaderInputStream}.
+     *
+     * @param reader the target {@link Reader}
+     * @param encoder the charset encoder
+     * @since 1.10.9
+     */
+    public ReaderInputStream(final Reader reader, final CharsetEncoder 
encoder) {
+        this(reader, encoder, DEFAULT_BUFFER_SIZE);
+    }
 
-    private int begin;
+    /**
+     * Construct a new {@link ReaderInputStream}.
+     *
+     * @param reader the target {@link Reader}
+     * @param encoder the charset encoder
+     * @param bufferSize the size of the input buffer in number of characters
+     * @since 1.10.9
+     */
+    public ReaderInputStream(final Reader reader, final CharsetEncoder 
encoder, final int bufferSize) {
+        this.reader = reader;
+        this.encoder = encoder;
+        this.encoderIn = CharBuffer.allocate(bufferSize);
+        this.encoderIn.flip();
+        this.encoderOut = ByteBuffer.allocate(128);
+        this.encoderOut.flip();
+    }
 
     /**
      * Construct a <code>ReaderInputStream</code>
@@ -46,7 +90,7 @@ public class ReaderInputStream extends InputStream {
      * @param reader   <code>Reader</code>.  Must not be <code>null</code>.
      */
     public ReaderInputStream(Reader reader) {
-        in = reader;
+        this(reader, Charset.defaultCharset());
     }
 
     /**
@@ -58,11 +102,7 @@ public class ReaderInputStream extends InputStream {
      * @param encoding   non-null <code>String</code> encoding.
      */
     public ReaderInputStream(Reader reader, String encoding) {
-        this(reader);
-        if (encoding == null) {
-            throw new IllegalArgumentException("encoding must not be null");
-        }
-        this.encoding = encoding;
+        this(reader, Charset.forName(encoding));
     }
 
     /**
@@ -75,153 +115,116 @@ public class ReaderInputStream extends InputStream {
      * @since Ant 1.10.6
      */
     public ReaderInputStream(Reader reader, Charset charset) {
-        this(reader);
-        if (charset == null) {
-            throw new IllegalArgumentException("encoding must not be null");
-        }
-        this.encoding = charset.name();
+        this(reader,
+             charset.newEncoder()
+                 .onMalformedInput(CodingErrorAction.REPLACE)
+                 .onUnmappableCharacter(CodingErrorAction.REPLACE));
     }
 
     /**
-     * Reads from the <code>Reader</code>, returning the same value.
-     *
-     * @return the value of the next character in the <code>Reader</code>.
+     * Fills the internal char buffer from the reader.
      *
-     * @exception IOException if the original <code>Reader</code> fails to be 
read
+     * @throws IOException
+     *             If an I/O error occurs
      */
-    @Override
-    public synchronized int read() throws IOException {
-        if (in == null) {
-            throw new IOException("Stream Closed");
-        }
-
-        byte result;
-        if (slack != null && begin < slack.length) {
-            result = slack[begin];
-            if (++begin == slack.length) {
-                slack = null;
-            }
-        } else {
-            byte[] buf = new byte[1];
-            if (read(buf, 0, 1) <= 0) {
-                return -1;
+    private void fillBuffer() throws IOException {
+        if (!endOfInput && (lastCoderResult == null || 
lastCoderResult.isUnderflow())) {
+            encoderIn.compact();
+            final int position = encoderIn.position();
+            // We don't use Reader#read(CharBuffer) here because it is more 
efficient
+            // to write directly to the underlying char array (the default 
implementation
+            // copies data to a temporary char array).
+            final int c = reader.read(encoderIn.array(), position, 
encoderIn.remaining());
+            if (c == EOF) {
+                endOfInput = true;
             } else {
-                result = buf[0];
+                encoderIn.position(position+c);
             }
+            encoderIn.flip();
         }
-        return result & BYTE_MASK;
+        encoderOut.compact();
+        lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput);
+        encoderOut.flip();
     }
 
     /**
-     * Reads from the <code>Reader</code> into a byte array
+     * Read the specified number of bytes into an array.
      *
-     * @param b  the byte array to read into
-     * @param off the offset in the byte array
-     * @param len the length in the byte array to fill
-     * @return the actual number read into the byte array, -1 at
-     *         the end of the stream
-     * @exception IOException if an error occurs
+     * @param array the byte array to read into
+     * @param off the offset to start reading bytes into
+     * @param len the number of bytes to read
+     * @return the number of bytes read or <code>-1</code>
+     *         if the end of the stream has been reached
+     * @throws IOException if an I/O error occurs
      */
     @Override
-    public synchronized int read(byte[] b, int off, int len) throws 
IOException {
-        if (in == null) {
-            throw new IOException("Stream Closed");
+    public int read(final byte[] array, int off, int len) throws IOException {
+        Objects.requireNonNull(array, "array");
+        if (len < 0 || off < 0 || (off + len) > array.length) {
+            throw new IndexOutOfBoundsException("Array Size=" + array.length +
+                    ", offset=" + off + ", length=" + len);
         }
+        int read = 0;
         if (len == 0) {
-            return 0;
+            return 0; // Always return 0 if len == 0
         }
-        while (slack == null) {
-            char[] buf = new char[len]; // might read too much
-            int n = in.read(buf);
-            if (n == -1) {
-                return -1;
-            }
-            if (n > 0) {
-                slack = new String(buf, 0, n).getBytes(encoding);
-                begin = 0;
+        while (len > 0) {
+            if (encoderOut.hasRemaining()) {
+                final int c = Math.min(encoderOut.remaining(), len);
+                encoderOut.get(array, off, c);
+                off += c;
+                len -= c;
+                read += c;
+            } else {
+                fillBuffer();
+                if (endOfInput && !encoderOut.hasRemaining()) {
+                    break;
+                }
             }
         }
-
-        if (len > slack.length - begin) {
-            len = slack.length - begin;
-        }
-
-        System.arraycopy(slack, begin, b, off, len);
-
-        begin += len;
-        if (begin >= slack.length) {
-            slack = null;
-        }
-
-        return len;
+        return read == 0 && endOfInput ? EOF : read;
     }
 
     /**
-     * Marks the read limit of the Reader.
+     * Read the specified number of bytes into an array.
      *
-     * @param limit the maximum limit of bytes that can be read before the
-     *              mark position becomes invalid
-     */
-    @Override
-    public synchronized void mark(final int limit) {
-        try {
-            in.mark(limit);
-        } catch (IOException ioe) {
-            throw new RuntimeException(ioe.getMessage()); //NOSONAR
-        }
-    }
-
-    /**
-     * @return   the current number of bytes ready for reading
-     * @exception IOException if an error occurs
+     * @param b the byte array to read into
+     * @return the number of bytes read or <code>-1</code>
+     *         if the end of the stream has been reached
+     * @throws IOException if an I/O error occurs
      */
     @Override
-    public synchronized int available() throws IOException {
-        if (in == null) {
-            throw new IOException("Stream Closed");
-        }
-        if (slack != null) {
-            return slack.length - begin;
-        }
-        if (in.ready()) {
-            return 1;
-        }
-        return 0;
-    }
-
-    /**
-     * @return false - mark is not supported
-     */
-    @Override
-    public boolean markSupported() {
-        return false;   // would be imprecise
+    public int read(final byte[] b) throws IOException {
+        return read(b, 0, b.length);
     }
 
     /**
-     * Resets the Reader.
+     * Read a single byte.
      *
-     * @exception IOException if the Reader fails to be reset
+     * @return either the byte read or <code>-1</code> if the end of the stream
+     *         has been reached
+     * @throws IOException if an I/O error occurs
      */
     @Override
-    public synchronized void reset() throws IOException {
-        if (in == null) {
-            throw new IOException("Stream Closed");
+    public int read() throws IOException {
+        for (;;) {
+            if (encoderOut.hasRemaining()) {
+                return encoderOut.get() & 0xFF;
+            }
+            fillBuffer();
+            if (endOfInput && !encoderOut.hasRemaining()) {
+                return EOF;
+            }
         }
-        slack = null;
-        in.reset();
     }
 
     /**
-     * Closes the Reader.
-     *
-     * @exception IOException if the original Reader fails to be closed
+     * Close the stream. This method will cause the underlying {@link Reader}
+     * to be closed.
+     * @throws IOException if an I/O error occurs
      */
     @Override
-    public synchronized void close() throws IOException {
-        if (in != null) {
-            in.close();
-            slack = null;
-            in = null;
-        }
+    public void close() throws IOException {
+        reader.close();
     }
 }
diff --git 
a/src/tests/junit/org/apache/tools/ant/util/ReaderInputStreamTest.java 
b/src/tests/junit/org/apache/tools/ant/util/ReaderInputStreamTest.java
index f6fd84d..6844325 100644
--- a/src/tests/junit/org/apache/tools/ant/util/ReaderInputStreamTest.java
+++ b/src/tests/junit/org/apache/tools/ant/util/ReaderInputStreamTest.java
@@ -21,13 +21,18 @@ import org.apache.tools.ant.MagicTestNames;
 import org.junit.Test;
 
 import java.io.ByteArrayOutputStream;
+import java.io.CharArrayReader;
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.IOException;;
 import java.io.InputStreamReader;
 import java.io.StringReader;
+import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
+import java.util.Random;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 /**
  * Test for ReaderInputStream
@@ -132,4 +137,121 @@ public class ReaderInputStreamTest {
         assertEquals("Mismatch in ReaderInputStream - EOF not seen for string "
                 + s + " with encoding " + encoding, -1, r.read());
     }
+
+    private static final String TEST_STRING = "\u00e0 peine arriv\u00e9s nous 
entr\u00e2mes dans sa chambre";
+    private static final String LARGE_TEST_STRING;
+
+    static {
+        final StringBuilder buffer = new StringBuilder();
+        for (int i=0; i<100; i++) {
+            buffer.append(TEST_STRING);
+        }
+        LARGE_TEST_STRING = buffer.toString();
+    }
+
+    private final Random random = new Random();
+
+    private void testWithSingleByteRead(final String testString, final String 
charsetName) throws IOException {
+        final byte[] bytes = testString.getBytes(charsetName);
+        final ReaderInputStream in = new ReaderInputStream(new 
StringReader(testString), charsetName);
+        for (final byte b : bytes) {
+            final int read = in.read();
+            assertTrue(read >= 0);
+            assertTrue(read <= 255);
+            assertEquals(b, (byte)read);
+        }
+        assertEquals(-1, in.read());
+        in.close();
+    }
+
+    private void testWithBufferedRead(final String testString, final String 
charsetName) throws IOException {
+        final byte[] expected = testString.getBytes(charsetName);
+        final ReaderInputStream in = new ReaderInputStream(new 
StringReader(testString), charsetName);
+        final byte[] buffer = new byte[128];
+        int offset = 0;
+        while (true) {
+            int bufferOffset = random.nextInt(64);
+            final int bufferLength = random.nextInt(64);
+            int read = in.read(buffer, bufferOffset, bufferLength);
+            if (read == -1) {
+                assertEquals(offset, expected.length);
+                break;
+            }
+            assertTrue(read <= bufferLength);
+            while (read > 0) {
+                assertTrue(offset < expected.length);
+                assertEquals(expected[offset], buffer[bufferOffset]);
+                offset++;
+                bufferOffset++;
+                read--;
+            }
+        }
+        in.close();
+    }
+
+    @Test
+    public void testUTF8WithSingleByteRead() throws IOException {
+        testWithSingleByteRead(TEST_STRING, "UTF-8");
+    }
+
+    @Test
+    public void testLargeUTF8WithSingleByteRead() throws IOException {
+        testWithSingleByteRead(LARGE_TEST_STRING, "UTF-8");
+    }
+
+    @Test
+    public void testUTF8WithBufferedRead() throws IOException {
+        testWithBufferedRead(TEST_STRING, "UTF-8");
+    }
+
+    @Test
+    public void testLargeUTF8WithBufferedRead() throws IOException {
+        testWithBufferedRead(LARGE_TEST_STRING, "UTF-8");
+    }
+
+    @Test
+    public void testUTF16WithSingleByteRead() throws IOException {
+        testWithSingleByteRead(TEST_STRING, "UTF-16");
+    }
+
+    @SuppressWarnings("deprecation")
+    @Test
+    public void testReadZeroCommonsIo() throws Exception {
+        final String inStr = "test";
+        final ReaderInputStream r = new ReaderInputStream(new 
StringReader(inStr));
+        final byte[] bytes = new byte[30];
+        assertEquals(0, r.read(bytes, 0, 0));
+        assertEquals(inStr.length(), r.read(bytes, 0, inStr.length()+1));
+        // Should always return 0 for length == 0
+        assertEquals(0, r.read(bytes, 0, 0));
+        r.close();
+    }
+
+    @SuppressWarnings("deprecation")
+    @Test
+    public void testReadZeroEmptyString() throws Exception {
+        final ReaderInputStream r = new ReaderInputStream(new 
StringReader(""));
+        final byte[] bytes = new byte[30];
+        // Should always return 0 for length == 0
+        assertEquals(0, r.read(bytes, 0, 0));
+        assertEquals(-1, r.read(bytes, 0, 1));
+        assertEquals(0, r.read(bytes, 0, 0));
+        assertEquals(-1, r.read(bytes, 0, 1));
+        r.close();
+    }
+
+    /*
+     * Tests https://issues.apache.org/jira/browse/IO-277
+     */
+    @Test
+    public void testCharsetMismatchInfiniteLoop() throws IOException {
+        // Input is UTF-8 bytes: 0xE0 0xB2 0xA0
+        final char[] inputChars = new char[] { (char) 0xE0, (char) 0xB2, 
(char) 0xA0 };
+        // Charset charset = Charset.forName("UTF-8"); // works
+        final Charset charset = Charset.forName("ASCII"); // infinite loop
+        try (ReaderInputStream stream = new ReaderInputStream(new 
CharArrayReader(inputChars), charset)) {
+            while (stream.read() != -1) {
+            }
+        }
+    }
 }

Reply via email to