Author: nextgens
Date: 2008-08-15 18:31:36 +0000 (Fri, 15 Aug 2008)
New Revision: 21912
Added:
trunk/freenet/test/freenet/support/io/
trunk/freenet/test/freenet/support/io/LineReadingInputStreamTest.java
Modified:
trunk/freenet/src/freenet/support/io/LineReadingInputStream.java
Log:
Commit an improved LineReadingInputStream (profiling has shown that it was a
bottleneck) and its unit test
Modified: trunk/freenet/src/freenet/support/io/LineReadingInputStream.java
===================================================================
--- trunk/freenet/src/freenet/support/io/LineReadingInputStream.java
2008-08-15 16:38:33 UTC (rev 21911)
+++ trunk/freenet/src/freenet/support/io/LineReadingInputStream.java
2008-08-15 18:31:36 UTC (rev 21912)
@@ -16,8 +16,6 @@
super(in);
}
- private byte[] buf;
-
/**
* Read a \n or \r\n terminated line of UTF-8 or ISO-8859-1.
* @param maxLength The maximum length of a line. If a line is longer
than this, we throw IOException rather
@@ -28,29 +26,73 @@
public String readLine(int maxLength, int bufferSize, boolean utf)
throws IOException {
if(maxLength < bufferSize)
bufferSize = maxLength + 1; // Buffer too big, shrink
it (add 1 for the optional \r)
- if(buf == null)
- buf = new byte[Math.max(Math.min(128,maxLength),
Math.min(1024, bufferSize))];
+
+ if(!markSupported())
+ return readLineWithoutMarking(maxLength, bufferSize,
utf);
+
+ byte[] buf = new byte[Math.max(Math.min(128, maxLength),
Math.min(1024, bufferSize))];
int ctr = 0;
while(true) {
+ mark(maxLength);
+ int x = read(buf, ctr, bufferSize);
+ if(x == -1) {
+ if(ctr == 0)
+ return null;
+ return new String(buf, 0, ctr, utf ? "UTF-8" :
"ISO-8859-1");
+ }
+ // REDFLAG this is definitely safe with the above
charsets, it may not be safe with some wierd ones.
+ for(; ctr < buf.length; ctr++) {
+ if(ctr >= maxLength)
+ throw new TooLongException();
+ if(buf[ctr] == '\n') {
+ boolean removeCR = false;
+ String toReturn = "";
+ if(ctr != 0) {
+ if(buf[ctr - 1] == '\r') {
+ ctr--;
+ removeCR = true;
+ }
+ toReturn = new String(buf, 0,
ctr, utf ? "UTF-8" : "ISO-8859-1");
+ }
+ reset();
+ skip(ctr + (removeCR ? 2 : 1));
+ return toReturn;
+ }
+ }
+ byte[] newBuf = new byte[Math.min(buf.length * 2,
maxLength)];
+ System.arraycopy(buf, 0, newBuf, 0, buf.length);
+ buf = newBuf;
+ }
+ }
+
+ public String readLineWithoutMarking(int maxLength, int bufferSize,
boolean utf) throws IOException {
+ if(maxLength < bufferSize)
+ bufferSize = maxLength + 1; // Buffer too big, shrink
it (add 1 for the optional \r)
+ byte[] buf = new byte[Math.max(Math.min(128, maxLength),
Math.min(1024, bufferSize))];
+ int ctr = 0;
+ while(true) {
int x = read();
if(x == -1) {
- if(ctr == 0) return null;
+ if(ctr == 0)
+ return null;
return new String(buf, 0, ctr, utf ? "UTF-8" :
"ISO-8859-1");
}
// REDFLAG this is definitely safe with the above
charsets, it may not be safe with some wierd ones.
if(x == '\n') {
- if(ctr == 0) return "";
- if(buf[ctr-1] == '\r') ctr--;
+ if(ctr == 0)
+ return "";
+ if(buf[ctr - 1] == '\r')
+ ctr--;
return new String(buf, 0, ctr, utf ? "UTF-8" :
"ISO-8859-1");
}
- if(ctr >= maxLength) throw new TooLongException();
+ if(ctr >= maxLength)
+ throw new TooLongException();
if(ctr >= buf.length) {
byte[] newBuf = new byte[Math.min(buf.length *
2, maxLength)];
System.arraycopy(buf, 0, newBuf, 0, buf.length);
buf = newBuf;
}
- buf[ctr++] = (byte)x;
+ buf[ctr++] = (byte) x;
}
}
-
}
Added: trunk/freenet/test/freenet/support/io/LineReadingInputStreamTest.java
===================================================================
--- trunk/freenet/test/freenet/support/io/LineReadingInputStreamTest.java
(rev 0)
+++ trunk/freenet/test/freenet/support/io/LineReadingInputStreamTest.java
2008-08-15 18:31:36 UTC (rev 21912)
@@ -0,0 +1,85 @@
+/* This code is part of Freenet. It is distributed under the GNU General
+ * Public License, version 2 (or at your option any later version). See
+ * http://www.gnu.org/ for further details of the GPL. */
+package freenet.support.io;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import junit.framework.TestCase;
+
+public class LineReadingInputStreamTest extends TestCase {
+ public static final String BLOCK =
"\ntesting1\ntesting2\r\ntesting3\n\n";
+ public static final String[] LINES = new String[] {
+ "",
+ "testing1",
+ "testing2",
+ "testing3",
+ ""
+ };
+
+ public static final String STRESSED_LINE = "\n\u0114\n";
+
+ public static final int MAX_LENGTH = 128;
+ public static final int BUFFER_SIZE = 128;
+
+ public void testReadLineWithoutMarking() throws Exception {
+ // try utf8
+ InputStream is = new
ByteArrayInputStream(STRESSED_LINE.getBytes("utf-8"));
+ LineReadingInputStream instance = new
LineReadingInputStream(is);
+ assertEquals("", instance.readLineWithoutMarking(MAX_LENGTH,
BUFFER_SIZE, true));
+ assertEquals("\u0114",
instance.readLineWithoutMarking(MAX_LENGTH, BUFFER_SIZE, true));
+ assertNull(instance.readLineWithoutMarking(MAX_LENGTH,
BUFFER_SIZE, true));
+
+ // try ISO-8859-1
+ is = new ByteArrayInputStream(BLOCK.getBytes("ISO-8859-1"));
+ instance = new LineReadingInputStream(is);
+ for(String expectedLine : LINES) {
+ assertEquals(expectedLine,
instance.readLineWithoutMarking(MAX_LENGTH, BUFFER_SIZE, false));
+ }
+ assertNull(instance.readLineWithoutMarking(MAX_LENGTH,
BUFFER_SIZE, false));
+
+ // is it returning null?
+ is = new NullInputStream();
+ instance = new LineReadingInputStream(is);
+ assertNull(instance.readLineWithoutMarking(0, BUFFER_SIZE,
false));
+
+ // is it throwing?
+ is = new ByteArrayInputStream("aaa\na\n".getBytes());
+ instance = new LineReadingInputStream(is);
+ try {
+ instance.readLineWithoutMarking(2, BUFFER_SIZE, true);
+ fail();
+ } catch (TooLongException e) {}
+ }
+
+ public void testReadLine() throws Exception {
+ // try utf8
+ InputStream is = new
ByteArrayInputStream(STRESSED_LINE.getBytes("utf-8"));
+ LineReadingInputStream instance = new
LineReadingInputStream(is);
+ assertEquals("", instance.readLine(MAX_LENGTH, BUFFER_SIZE,
true));
+ assertEquals("\u0114", instance.readLine(MAX_LENGTH,
BUFFER_SIZE, true));
+ assertNull(instance.readLine(MAX_LENGTH, BUFFER_SIZE, true));
+
+ // try ISO-8859-1
+ is = new ByteArrayInputStream(BLOCK.getBytes("ISO-8859-1"));
+ instance = new LineReadingInputStream(is);
+ for(String expectedLine : LINES) {
+ assertEquals(expectedLine,
instance.readLine(MAX_LENGTH, BUFFER_SIZE, false));
+ }
+ assertNull(instance.readLine(MAX_LENGTH, BUFFER_SIZE, false));
+
+ // is it returning null?
+ is = new NullInputStream();
+ instance = new LineReadingInputStream(is);
+ assertNull(instance.readLine(0, BUFFER_SIZE, false));
+
+ // is it throwing?
+ is = new ByteArrayInputStream("aaa\na\n".getBytes());
+ instance = new LineReadingInputStream(is);
+ try {
+ instance.readLine(2, BUFFER_SIZE, true);
+ fail();
+ } catch (TooLongException e) {}
+ }
+
+}