Author: nextgens
Date: 2008-08-15 18:31:36 +0000 (Fri, 15 Aug 2008)
New Revision: 21912

Added:
   trunk/freenet/test/freenet/support/io/
   trunk/freenet/test/freenet/support/io/LineReadingInputStreamTest.java
Modified:
   trunk/freenet/src/freenet/support/io/LineReadingInputStream.java
Log:
Commit an improved LineReadingInputStream (profiling has shown that it was a 
bottleneck) and its unit test

Modified: trunk/freenet/src/freenet/support/io/LineReadingInputStream.java
===================================================================
--- trunk/freenet/src/freenet/support/io/LineReadingInputStream.java    
2008-08-15 16:38:33 UTC (rev 21911)
+++ trunk/freenet/src/freenet/support/io/LineReadingInputStream.java    
2008-08-15 18:31:36 UTC (rev 21912)
@@ -16,8 +16,6 @@
                super(in);
        }

-       private byte[] buf;
-
        /**
         * Read a \n or \r\n terminated line of UTF-8 or ISO-8859-1.
         * @param maxLength The maximum length of a line. If a line is longer 
than this, we throw IOException rather
@@ -28,29 +26,73 @@
        public String readLine(int maxLength, int bufferSize, boolean utf) 
throws IOException {
                if(maxLength < bufferSize)
                        bufferSize = maxLength + 1; // Buffer too big, shrink 
it (add 1 for the optional \r)
-               if(buf == null)
-                       buf = new byte[Math.max(Math.min(128,maxLength), 
Math.min(1024, bufferSize))];
+
+               if(!markSupported())
+                       return readLineWithoutMarking(maxLength, bufferSize, 
utf);
+
+               byte[] buf = new byte[Math.max(Math.min(128, maxLength), 
Math.min(1024, bufferSize))];
                int ctr = 0;
                while(true) {
+                       mark(maxLength);
+                       int x = read(buf, ctr, bufferSize);
+                       if(x == -1) {
+                               if(ctr == 0)
+                                       return null;
+                               return new String(buf, 0, ctr, utf ? "UTF-8" : 
"ISO-8859-1");
+                       }
+                       // REDFLAG this is definitely safe with the above 
charsets, it may not be safe with some wierd ones. 
+                       for(; ctr < buf.length; ctr++) {
+                               if(ctr >= maxLength)
+                                       throw new TooLongException();
+                               if(buf[ctr] == '\n') {
+                                       boolean removeCR = false;
+                                       String toReturn = "";
+                                       if(ctr != 0) {
+                                               if(buf[ctr - 1] == '\r') {
+                                                       ctr--;
+                                                       removeCR = true;
+                                               }
+                                               toReturn = new String(buf, 0, 
ctr, utf ? "UTF-8" : "ISO-8859-1");
+                                       }
+                                       reset();
+                                       skip(ctr + (removeCR ? 2 : 1));
+                                       return toReturn;
+                               }
+                       }
+                       byte[] newBuf = new byte[Math.min(buf.length * 2, 
maxLength)];
+                       System.arraycopy(buf, 0, newBuf, 0, buf.length);
+                       buf = newBuf;
+               }
+       }
+
+       public String readLineWithoutMarking(int maxLength, int bufferSize, 
boolean utf) throws IOException {
+               if(maxLength < bufferSize)
+                       bufferSize = maxLength + 1; // Buffer too big, shrink 
it (add 1 for the optional \r)
+               byte[] buf = new byte[Math.max(Math.min(128, maxLength), 
Math.min(1024, bufferSize))];
+               int ctr = 0;
+               while(true) {
                        int x = read();
                        if(x == -1) {
-                               if(ctr == 0) return null;
+                               if(ctr == 0)
+                                       return null;
                                return new String(buf, 0, ctr, utf ? "UTF-8" : 
"ISO-8859-1");
                        }
                        // REDFLAG this is definitely safe with the above 
charsets, it may not be safe with some wierd ones. 
                        if(x == '\n') {
-                               if(ctr == 0) return "";
-                               if(buf[ctr-1] == '\r') ctr--;
+                               if(ctr == 0)
+                                       return "";
+                               if(buf[ctr - 1] == '\r')
+                                       ctr--;
                                return new String(buf, 0, ctr, utf ? "UTF-8" : 
"ISO-8859-1");
                        }
-                       if(ctr >= maxLength) throw new TooLongException();
+                       if(ctr >= maxLength)
+                               throw new TooLongException();
                        if(ctr >= buf.length) {
                                byte[] newBuf = new byte[Math.min(buf.length * 
2, maxLength)];
                                System.arraycopy(buf, 0, newBuf, 0, buf.length);
                                buf = newBuf;
                        }
-                       buf[ctr++] = (byte)x;
+                       buf[ctr++] = (byte) x;
                }
        }
-       
 }

Added: trunk/freenet/test/freenet/support/io/LineReadingInputStreamTest.java
===================================================================
--- trunk/freenet/test/freenet/support/io/LineReadingInputStreamTest.java       
                        (rev 0)
+++ trunk/freenet/test/freenet/support/io/LineReadingInputStreamTest.java       
2008-08-15 18:31:36 UTC (rev 21912)
@@ -0,0 +1,85 @@
+/* This code is part of Freenet. It is distributed under the GNU General
+ * Public License, version 2 (or at your option any later version). See
+ * http://www.gnu.org/ for further details of the GPL. */
+package freenet.support.io;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import junit.framework.TestCase;
+
+public class LineReadingInputStreamTest extends TestCase {
+       public static final String BLOCK = 
"\ntesting1\ntesting2\r\ntesting3\n\n";
+       public static final String[] LINES = new String[] {
+               "",
+               "testing1",
+               "testing2",
+               "testing3",
+               ""
+       };
+       
+       public static final String STRESSED_LINE = "\n\u0114\n";
+       
+       public static final int MAX_LENGTH = 128;
+       public static final int BUFFER_SIZE = 128;
+       
+       public void testReadLineWithoutMarking() throws Exception {
+               // try utf8
+               InputStream is = new 
ByteArrayInputStream(STRESSED_LINE.getBytes("utf-8"));
+               LineReadingInputStream instance = new 
LineReadingInputStream(is);
+               assertEquals("", instance.readLineWithoutMarking(MAX_LENGTH, 
BUFFER_SIZE, true));
+               assertEquals("\u0114", 
instance.readLineWithoutMarking(MAX_LENGTH, BUFFER_SIZE, true));
+               assertNull(instance.readLineWithoutMarking(MAX_LENGTH, 
BUFFER_SIZE, true));
+               
+               // try ISO-8859-1
+               is = new ByteArrayInputStream(BLOCK.getBytes("ISO-8859-1"));
+               instance = new LineReadingInputStream(is);
+               for(String expectedLine : LINES) {
+                       assertEquals(expectedLine, 
instance.readLineWithoutMarking(MAX_LENGTH, BUFFER_SIZE, false));
+               }
+               assertNull(instance.readLineWithoutMarking(MAX_LENGTH, 
BUFFER_SIZE, false));
+               
+               // is it returning null?
+               is = new NullInputStream();
+               instance = new LineReadingInputStream(is);
+               assertNull(instance.readLineWithoutMarking(0, BUFFER_SIZE, 
false));
+               
+               // is it throwing?
+               is = new ByteArrayInputStream("aaa\na\n".getBytes());
+               instance = new LineReadingInputStream(is);
+               try {
+                       instance.readLineWithoutMarking(2, BUFFER_SIZE, true);
+                       fail();
+               } catch (TooLongException e) {}
+       }
+       
+       public void testReadLine() throws Exception {
+               // try utf8
+               InputStream is = new 
ByteArrayInputStream(STRESSED_LINE.getBytes("utf-8"));
+               LineReadingInputStream instance = new 
LineReadingInputStream(is);
+               assertEquals("", instance.readLine(MAX_LENGTH, BUFFER_SIZE, 
true));
+               assertEquals("\u0114", instance.readLine(MAX_LENGTH, 
BUFFER_SIZE, true));
+               assertNull(instance.readLine(MAX_LENGTH, BUFFER_SIZE, true));
+               
+               // try ISO-8859-1
+               is = new ByteArrayInputStream(BLOCK.getBytes("ISO-8859-1"));
+               instance = new LineReadingInputStream(is);
+               for(String expectedLine : LINES) {
+                       assertEquals(expectedLine, 
instance.readLine(MAX_LENGTH, BUFFER_SIZE, false));
+               }
+               assertNull(instance.readLine(MAX_LENGTH, BUFFER_SIZE, false));
+               
+               // is it returning null?
+               is = new NullInputStream();
+               instance = new LineReadingInputStream(is);
+               assertNull(instance.readLine(0, BUFFER_SIZE, false));
+               
+               // is it throwing?
+               is = new ByteArrayInputStream("aaa\na\n".getBytes());
+               instance = new LineReadingInputStream(is);
+               try {
+                       instance.readLine(2, BUFFER_SIZE, true);
+                       fail();
+               } catch (TooLongException e) {}
+       }
+
+}


Reply via email to