Author: markt Date: Mon Mar 4 00:50:02 2013 New Revision: 1452159 URL: http://svn.apache.org/r1452159 Log: Add a reference to the relevant specification. Add some more UTF-8 tests along with a few associated fixes to my tweaks to the Harmony UTF-8 decoder on replace. Move a test from the old to the new test framework. Still lots more tests required.
Modified: tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Decoder.java tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8.java tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java Modified: tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Decoder.java URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Decoder.java?rev=1452159&r1=1452158&r2=1452159&view=diff ============================================================================== --- tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Decoder.java (original) +++ tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Decoder.java Mon Mar 4 00:50:02 2013 @@ -174,18 +174,20 @@ public class Utf8Decoder extends Charset if ((bArr[inIndex + 1] & 0xFF) > 0x8F) { // 11110100 1yyyxxxx xxxxxxxx xxxxxxxx // Any non-zero y is > max code point - return CoderResult.unmappableForLength(4); + return CoderResult.unmappableForLength(1); } } - if (jchar == 0x60 && inIndexLimit > inIndex +1) { - if ((bArr[inIndex + 1] & 0x7F) == 0) { - // 11100000 10000000 10xxxxxx + if (jchar == 0x60 && inIndexLimit > inIndex + 1) { + if ((bArr[inIndex + 1] & 0x60) == 0) { + // 11100000 100yyyyy 10xxxxxx // should have been + // 11oyyyyy 1oxxxxxx + // or possibly // 00xxxxxx - return CoderResult.malformedForLength(3); + return CoderResult.malformedForLength(1); } } - if (jchar == 0x70 && inIndexLimit > inIndex +1) { + if (jchar == 0x70 && inIndexLimit > inIndex + 1) { if ((bArr[inIndex + 1] & 0x7F) < 0x10) { // 11110000 1000zzzz 1oyyyyyy 1oxxxxxx // should have been Modified: tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8.java URL: http://svn.apache.org/viewvc/tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8.java?rev=1452159&r1=1452158&r2=1452159&view=diff ============================================================================== --- tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8.java (original) +++ tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8.java Mon Mar 4 00:50:02 2013 @@ -32,10 +32,6 @@ public class TestUtf8 { new byte[] {-50, -70, -31, -67, -71, -49, -125, -50, -68, -50, -75, -19, -96, -128, 101, 100, 105, 116, 101, 100}; - // Invalid code point (out of range) - private static final byte[] SRC_BYTES_2 = - new byte[] {-12, -112, -128, -128}; - // Various invalid UTF-8 sequences private static final byte[][] MALFORMED = { // One-byte sequences: @@ -94,13 +90,6 @@ public class TestUtf8 { } - @Test - public void testJvmDecoder2() { - // Ideally should fail after 2 bytes (i==1) - doJvmDecoder(SRC_BYTES_2, false, true, 3); - } - - private void doJvmDecoder(byte[] src, boolean endOfinput, boolean errorExpected, int failPosExpected) { CharsetDecoder decoder = B2CConverter.UTF_8.newDecoder() @@ -144,12 +133,6 @@ public class TestUtf8 { } - @Test - public void testHarmonyDecoder2() { - doHarmonyDecoder(SRC_BYTES_2, false, true, 1); - } - - private void doHarmonyDecoder(byte[] src, boolean endOfinput, boolean errorExpected, int failPosExpected) { CharsetDecoder decoder = new Utf8Decoder(); Modified: tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java URL: http://svn.apache.org/viewvc/tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java?rev=1452159&r1=1452158&r2=1452159&view=diff ============================================================================== --- tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java (original) +++ tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java Mon Mar 4 00:50:02 2013 @@ -29,6 +29,11 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +/** + * These tests have been written with reference to + * <a href="http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf">unicode 6.2, + * chapter 3, section 3.9</a>. + */ public class TestUtf8Extended { private List<Utf8TestCase> testCases = new ArrayList<>(); @@ -60,22 +65,53 @@ public class TestUtf8Extended { new int[] {0xF0, 0x90, 0x90, 0x80}, -1, "\uD801\uDC00")); + // JVM decoder does not report error until all 4 bytes are available + testCases.add(new Utf8TestCase( + "Invalid code point - out of range", + new int[] {0xF4, 0x90, 0x80, 0x80}, + 1, + "\uFFFD\uFFFD\uFFFD\uFFFD").setSkipErrorForJvm(true)); + // JVM decoder does not report error until all 2 bytes are available + testCases.add(new Utf8TestCase( + "Valid sequence padded from one byte to two", + new int[] {0xC0, 0xC1}, + 0, + "\uFFFD\uFFFD").setSkipErrorForJvm(true)); + // JVM decoder does not report error until all 3 bytes are available + testCases.add(new Utf8TestCase( + "Valid sequence padded from one byte to three", + new int[] {0xE0, 0x80, 0xC1}, + 1, + "\uFFFD\uFFFD\uFFFD").setSkipErrorForJvm(true)); } @Test public void testHarmonyDecoder() { - doTest(new Utf8Decoder()); + CharsetDecoder decoder = new Utf8Decoder(); + for (Utf8TestCase testCase : testCases) { + doTest(decoder, testCase, false, false); + } } @Test public void testJvmDecoder() { - doTest(Charset.forName("UTF-8").newDecoder()); + CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder(); + for (Utf8TestCase testCase : testCases) { + doTest(decoder, testCase, testCase.skipErrorForJvm, + testCase.skipReplaceForJvm); + } } - private void doTest(CharsetDecoder decoder) { - for (Utf8TestCase testCase : testCases) { + private void doTest(CharsetDecoder decoder, Utf8TestCase testCase, + boolean skipError, boolean skipReplace) { + + int len = testCase.input.length; + ByteBuffer bb = ByteBuffer.allocate(len); + CharBuffer cb = CharBuffer.allocate(len); + + if (!skipError) { // Configure decoder to fail on an error decoder.reset(); decoder.onMalformedInput(CodingErrorAction.REPORT); @@ -83,9 +119,6 @@ public class TestUtf8Extended { // Add each byte one at a time. The decoder should fail as soon as // an invalid sequence has been provided - int len = testCase.input.length; - ByteBuffer bb = ByteBuffer.allocate(len); - CharBuffer cb = CharBuffer.allocate(len); for (int i = 0; i < len; i++) { bb.put((byte) testCase.input[i]); bb.flip(); @@ -97,7 +130,9 @@ public class TestUtf8Extended { } bb.compact(); } + } + if (!skipReplace) { // Configure decoder to replace on an error decoder.reset(); decoder.onMalformedInput(CodingErrorAction.REPLACE); @@ -137,6 +172,8 @@ public class TestUtf8Extended { private final int[] input; private final int invalidIndex; private final String outputReplaced; + private boolean skipErrorForJvm = false; + private boolean skipReplaceForJvm = false; public Utf8TestCase(String description, int[] input, int invalidIndex, String outputReplaced) { @@ -144,6 +181,17 @@ public class TestUtf8Extended { this.input = input; this.invalidIndex = invalidIndex; this.outputReplaced = outputReplaced; + + } + + public Utf8TestCase setSkipErrorForJvm(boolean skipErrorForJvm) { + this.skipErrorForJvm = skipErrorForJvm; + return this; + } + + public Utf8TestCase setSkipReplaceForJvm(boolean skipReplaceForJvm) { + this.skipReplaceForJvm = skipReplaceForJvm; + return this; } } } --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org