This is an automated email from the ASF dual-hosted git repository. bodewig pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/ant.git
The following commit(s) were added to refs/heads/master by this push: new 569e428 replace our ReaderInputStream with the one of Commons IO 569e428 is described below commit 569e4288501c69398605707d3a76eaf5b9354361 Author: Stefan Bodewig <bode...@apache.org> AuthorDate: Sat Aug 22 21:07:32 2020 +0200 replace our ReaderInputStream with the one of Commons IO https://bz.apache.org/bugzilla/show_bug.cgi?id=40455 Suggested by Vladimir Sitnikov --- WHATSNEW | 5 + .../apache/tools/ant/util/ReaderInputStream.java | 253 +++++++++++---------- .../tools/ant/util/ReaderInputStreamTest.java | 122 ++++++++++ 3 files changed, 255 insertions(+), 125 deletions(-) diff --git a/WHATSNEW b/WHATSNEW index 96920b7..0dfcdaf 100644 --- a/WHATSNEW +++ b/WHATSNEW @@ -10,6 +10,11 @@ Fixed bugs: * propertyset now also sees in-scope local properties Bugzilla Report 50179 + * replaced our version of ReaderInputStream with the battle-tested + version of Apache Commons IO as our version had problems with + surrogate pairs (and likely other edge cases as well). + Bugzilla Report 40455 + Changes from Ant 1.10.7 TO Ant 1.10.8 ===================================== diff --git a/src/main/org/apache/tools/ant/util/ReaderInputStream.java b/src/main/org/apache/tools/ant/util/ReaderInputStream.java index 610351a..ff2eb63 100644 --- a/src/main/org/apache/tools/ant/util/ReaderInputStream.java +++ b/src/main/org/apache/tools/ant/util/ReaderInputStream.java @@ -20,24 +20,68 @@ package org.apache.tools.ant.util; import java.io.IOException; import java.io.InputStream; import java.io.Reader; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; +import java.util.Objects; /** * Adapts a <code>Reader</code> as an <code>InputStream</code>. - * Adapted from <code>StringInputStream</code>. - * + * <p>This is a stripped down version of {@code org.apache.commons.io.input.ReaderInputStream} of Apache Commons IO 2.7.</p> */ public class ReaderInputStream extends InputStream { - private static final int BYTE_MASK = 0xFF; + private static final int EOF = -1; + private static final int DEFAULT_BUFFER_SIZE = 1024; + + private final Reader reader; + private final CharsetEncoder encoder; + + /** + * CharBuffer used as input for the decoder. It should be reasonably + * large as we read data from the underlying Reader into this buffer. + */ + private final CharBuffer encoderIn; - /** Source Reader */ - private Reader in; + /** + * ByteBuffer used as output for the decoder. This buffer can be small + * as it is only used to transfer data from the decoder to the + * buffer provided by the caller. + */ + private final ByteBuffer encoderOut; - private String encoding = System.getProperty("file.encoding"); + private CoderResult lastCoderResult; + private boolean endOfInput; - private byte[] slack; + /** + * Construct a new {@link ReaderInputStream}. + * + * @param reader the target {@link Reader} + * @param encoder the charset encoder + * @since 1.10.9 + */ + public ReaderInputStream(final Reader reader, final CharsetEncoder encoder) { + this(reader, encoder, DEFAULT_BUFFER_SIZE); + } - private int begin; + /** + * Construct a new {@link ReaderInputStream}. + * + * @param reader the target {@link Reader} + * @param encoder the charset encoder + * @param bufferSize the size of the input buffer in number of characters + * @since 1.10.9 + */ + public ReaderInputStream(final Reader reader, final CharsetEncoder encoder, final int bufferSize) { + this.reader = reader; + this.encoder = encoder; + this.encoderIn = CharBuffer.allocate(bufferSize); + this.encoderIn.flip(); + this.encoderOut = ByteBuffer.allocate(128); + this.encoderOut.flip(); + } /** * Construct a <code>ReaderInputStream</code> @@ -46,7 +90,7 @@ public class ReaderInputStream extends InputStream { * @param reader <code>Reader</code>. Must not be <code>null</code>. */ public ReaderInputStream(Reader reader) { - in = reader; + this(reader, Charset.defaultCharset()); } /** @@ -58,11 +102,7 @@ public class ReaderInputStream extends InputStream { * @param encoding non-null <code>String</code> encoding. */ public ReaderInputStream(Reader reader, String encoding) { - this(reader); - if (encoding == null) { - throw new IllegalArgumentException("encoding must not be null"); - } - this.encoding = encoding; + this(reader, Charset.forName(encoding)); } /** @@ -75,153 +115,116 @@ public class ReaderInputStream extends InputStream { * @since Ant 1.10.6 */ public ReaderInputStream(Reader reader, Charset charset) { - this(reader); - if (charset == null) { - throw new IllegalArgumentException("encoding must not be null"); - } - this.encoding = charset.name(); + this(reader, + charset.newEncoder() + .onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE)); } /** - * Reads from the <code>Reader</code>, returning the same value. - * - * @return the value of the next character in the <code>Reader</code>. + * Fills the internal char buffer from the reader. * - * @exception IOException if the original <code>Reader</code> fails to be read + * @throws IOException + * If an I/O error occurs */ - @Override - public synchronized int read() throws IOException { - if (in == null) { - throw new IOException("Stream Closed"); - } - - byte result; - if (slack != null && begin < slack.length) { - result = slack[begin]; - if (++begin == slack.length) { - slack = null; - } - } else { - byte[] buf = new byte[1]; - if (read(buf, 0, 1) <= 0) { - return -1; + private void fillBuffer() throws IOException { + if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) { + encoderIn.compact(); + final int position = encoderIn.position(); + // We don't use Reader#read(CharBuffer) here because it is more efficient + // to write directly to the underlying char array (the default implementation + // copies data to a temporary char array). + final int c = reader.read(encoderIn.array(), position, encoderIn.remaining()); + if (c == EOF) { + endOfInput = true; } else { - result = buf[0]; + encoderIn.position(position+c); } + encoderIn.flip(); } - return result & BYTE_MASK; + encoderOut.compact(); + lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput); + encoderOut.flip(); } /** - * Reads from the <code>Reader</code> into a byte array + * Read the specified number of bytes into an array. * - * @param b the byte array to read into - * @param off the offset in the byte array - * @param len the length in the byte array to fill - * @return the actual number read into the byte array, -1 at - * the end of the stream - * @exception IOException if an error occurs + * @param array the byte array to read into + * @param off the offset to start reading bytes into + * @param len the number of bytes to read + * @return the number of bytes read or <code>-1</code> + * if the end of the stream has been reached + * @throws IOException if an I/O error occurs */ @Override - public synchronized int read(byte[] b, int off, int len) throws IOException { - if (in == null) { - throw new IOException("Stream Closed"); + public int read(final byte[] array, int off, int len) throws IOException { + Objects.requireNonNull(array, "array"); + if (len < 0 || off < 0 || (off + len) > array.length) { + throw new IndexOutOfBoundsException("Array Size=" + array.length + + ", offset=" + off + ", length=" + len); } + int read = 0; if (len == 0) { - return 0; + return 0; // Always return 0 if len == 0 } - while (slack == null) { - char[] buf = new char[len]; // might read too much - int n = in.read(buf); - if (n == -1) { - return -1; - } - if (n > 0) { - slack = new String(buf, 0, n).getBytes(encoding); - begin = 0; + while (len > 0) { + if (encoderOut.hasRemaining()) { + final int c = Math.min(encoderOut.remaining(), len); + encoderOut.get(array, off, c); + off += c; + len -= c; + read += c; + } else { + fillBuffer(); + if (endOfInput && !encoderOut.hasRemaining()) { + break; + } } } - - if (len > slack.length - begin) { - len = slack.length - begin; - } - - System.arraycopy(slack, begin, b, off, len); - - begin += len; - if (begin >= slack.length) { - slack = null; - } - - return len; + return read == 0 && endOfInput ? EOF : read; } /** - * Marks the read limit of the Reader. + * Read the specified number of bytes into an array. * - * @param limit the maximum limit of bytes that can be read before the - * mark position becomes invalid - */ - @Override - public synchronized void mark(final int limit) { - try { - in.mark(limit); - } catch (IOException ioe) { - throw new RuntimeException(ioe.getMessage()); //NOSONAR - } - } - - /** - * @return the current number of bytes ready for reading - * @exception IOException if an error occurs + * @param b the byte array to read into + * @return the number of bytes read or <code>-1</code> + * if the end of the stream has been reached + * @throws IOException if an I/O error occurs */ @Override - public synchronized int available() throws IOException { - if (in == null) { - throw new IOException("Stream Closed"); - } - if (slack != null) { - return slack.length - begin; - } - if (in.ready()) { - return 1; - } - return 0; - } - - /** - * @return false - mark is not supported - */ - @Override - public boolean markSupported() { - return false; // would be imprecise + public int read(final byte[] b) throws IOException { + return read(b, 0, b.length); } /** - * Resets the Reader. + * Read a single byte. * - * @exception IOException if the Reader fails to be reset + * @return either the byte read or <code>-1</code> if the end of the stream + * has been reached + * @throws IOException if an I/O error occurs */ @Override - public synchronized void reset() throws IOException { - if (in == null) { - throw new IOException("Stream Closed"); + public int read() throws IOException { + for (;;) { + if (encoderOut.hasRemaining()) { + return encoderOut.get() & 0xFF; + } + fillBuffer(); + if (endOfInput && !encoderOut.hasRemaining()) { + return EOF; + } } - slack = null; - in.reset(); } /** - * Closes the Reader. - * - * @exception IOException if the original Reader fails to be closed + * Close the stream. This method will cause the underlying {@link Reader} + * to be closed. + * @throws IOException if an I/O error occurs */ @Override - public synchronized void close() throws IOException { - if (in != null) { - in.close(); - slack = null; - in = null; - } + public void close() throws IOException { + reader.close(); } } diff --git a/src/tests/junit/org/apache/tools/ant/util/ReaderInputStreamTest.java b/src/tests/junit/org/apache/tools/ant/util/ReaderInputStreamTest.java index f6fd84d..6844325 100644 --- a/src/tests/junit/org/apache/tools/ant/util/ReaderInputStreamTest.java +++ b/src/tests/junit/org/apache/tools/ant/util/ReaderInputStreamTest.java @@ -21,13 +21,18 @@ import org.apache.tools.ant.MagicTestNames; import org.junit.Test; import java.io.ByteArrayOutputStream; +import java.io.CharArrayReader; import java.io.File; import java.io.FileInputStream; +import java.io.IOException;; import java.io.InputStreamReader; import java.io.StringReader; +import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; +import java.util.Random; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; /** * Test for ReaderInputStream @@ -132,4 +137,121 @@ public class ReaderInputStreamTest { assertEquals("Mismatch in ReaderInputStream - EOF not seen for string " + s + " with encoding " + encoding, -1, r.read()); } + + private static final String TEST_STRING = "\u00e0 peine arriv\u00e9s nous entr\u00e2mes dans sa chambre"; + private static final String LARGE_TEST_STRING; + + static { + final StringBuilder buffer = new StringBuilder(); + for (int i=0; i<100; i++) { + buffer.append(TEST_STRING); + } + LARGE_TEST_STRING = buffer.toString(); + } + + private final Random random = new Random(); + + private void testWithSingleByteRead(final String testString, final String charsetName) throws IOException { + final byte[] bytes = testString.getBytes(charsetName); + final ReaderInputStream in = new ReaderInputStream(new StringReader(testString), charsetName); + for (final byte b : bytes) { + final int read = in.read(); + assertTrue(read >= 0); + assertTrue(read <= 255); + assertEquals(b, (byte)read); + } + assertEquals(-1, in.read()); + in.close(); + } + + private void testWithBufferedRead(final String testString, final String charsetName) throws IOException { + final byte[] expected = testString.getBytes(charsetName); + final ReaderInputStream in = new ReaderInputStream(new StringReader(testString), charsetName); + final byte[] buffer = new byte[128]; + int offset = 0; + while (true) { + int bufferOffset = random.nextInt(64); + final int bufferLength = random.nextInt(64); + int read = in.read(buffer, bufferOffset, bufferLength); + if (read == -1) { + assertEquals(offset, expected.length); + break; + } + assertTrue(read <= bufferLength); + while (read > 0) { + assertTrue(offset < expected.length); + assertEquals(expected[offset], buffer[bufferOffset]); + offset++; + bufferOffset++; + read--; + } + } + in.close(); + } + + @Test + public void testUTF8WithSingleByteRead() throws IOException { + testWithSingleByteRead(TEST_STRING, "UTF-8"); + } + + @Test + public void testLargeUTF8WithSingleByteRead() throws IOException { + testWithSingleByteRead(LARGE_TEST_STRING, "UTF-8"); + } + + @Test + public void testUTF8WithBufferedRead() throws IOException { + testWithBufferedRead(TEST_STRING, "UTF-8"); + } + + @Test + public void testLargeUTF8WithBufferedRead() throws IOException { + testWithBufferedRead(LARGE_TEST_STRING, "UTF-8"); + } + + @Test + public void testUTF16WithSingleByteRead() throws IOException { + testWithSingleByteRead(TEST_STRING, "UTF-16"); + } + + @SuppressWarnings("deprecation") + @Test + public void testReadZeroCommonsIo() throws Exception { + final String inStr = "test"; + final ReaderInputStream r = new ReaderInputStream(new StringReader(inStr)); + final byte[] bytes = new byte[30]; + assertEquals(0, r.read(bytes, 0, 0)); + assertEquals(inStr.length(), r.read(bytes, 0, inStr.length()+1)); + // Should always return 0 for length == 0 + assertEquals(0, r.read(bytes, 0, 0)); + r.close(); + } + + @SuppressWarnings("deprecation") + @Test + public void testReadZeroEmptyString() throws Exception { + final ReaderInputStream r = new ReaderInputStream(new StringReader("")); + final byte[] bytes = new byte[30]; + // Should always return 0 for length == 0 + assertEquals(0, r.read(bytes, 0, 0)); + assertEquals(-1, r.read(bytes, 0, 1)); + assertEquals(0, r.read(bytes, 0, 0)); + assertEquals(-1, r.read(bytes, 0, 1)); + r.close(); + } + + /* + * Tests https://issues.apache.org/jira/browse/IO-277 + */ + @Test + public void testCharsetMismatchInfiniteLoop() throws IOException { + // Input is UTF-8 bytes: 0xE0 0xB2 0xA0 + final char[] inputChars = new char[] { (char) 0xE0, (char) 0xB2, (char) 0xA0 }; + // Charset charset = Charset.forName("UTF-8"); // works + final Charset charset = Charset.forName("ASCII"); // infinite loop + try (ReaderInputStream stream = new ReaderInputStream(new CharArrayReader(inputChars), charset)) { + while (stream.read() != -1) { + } + } + } }