Author: markt
Date: Mon Mar 4 00:50:02 2013
New Revision: 1452159
URL: http://svn.apache.org/r1452159
Log:
Add a reference to the relevant specification.
Add some more UTF-8 tests along with a few associated fixes to my tweaks to the
Harmony UTF-8 decoder on replace.
Move a test from the old to the new test framework.
Still lots more tests required.
Modified:
tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Decoder.java
tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8.java
tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java
Modified: tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Decoder.java
URL:
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Decoder.java?rev=1452159&r1=1452158&r2=1452159&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Decoder.java (original)
+++ tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Decoder.java Mon Mar 4
00:50:02 2013
@@ -174,18 +174,20 @@ public class Utf8Decoder extends Charset
if ((bArr[inIndex + 1] & 0xFF) > 0x8F) {
// 11110100 1yyyxxxx xxxxxxxx xxxxxxxx
// Any non-zero y is > max code point
- return CoderResult.unmappableForLength(4);
+ return CoderResult.unmappableForLength(1);
}
}
- if (jchar == 0x60 && inIndexLimit > inIndex +1) {
- if ((bArr[inIndex + 1] & 0x7F) == 0) {
- // 11100000 10000000 10xxxxxx
+ if (jchar == 0x60 && inIndexLimit > inIndex + 1) {
+ if ((bArr[inIndex + 1] & 0x60) == 0) {
+ // 11100000 100yyyyy 10xxxxxx
// should have been
+ // 11oyyyyy 1oxxxxxx
+ // or possibly
// 00xxxxxx
- return CoderResult.malformedForLength(3);
+ return CoderResult.malformedForLength(1);
}
}
- if (jchar == 0x70 && inIndexLimit > inIndex +1) {
+ if (jchar == 0x70 && inIndexLimit > inIndex + 1) {
if ((bArr[inIndex + 1] & 0x7F) < 0x10) {
// 11110000 1000zzzz 1oyyyyyy 1oxxxxxx
// should have been
Modified: tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8.java
URL:
http://svn.apache.org/viewvc/tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8.java?rev=1452159&r1=1452158&r2=1452159&view=diff
==============================================================================
--- tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8.java (original)
+++ tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8.java Mon Mar 4
00:50:02 2013
@@ -32,10 +32,6 @@ public class TestUtf8 {
new byte[] {-50, -70, -31, -67, -71, -49, -125, -50, -68, -50,
-75, -19, -96, -128, 101, 100, 105, 116, 101, 100};
- // Invalid code point (out of range)
- private static final byte[] SRC_BYTES_2 =
- new byte[] {-12, -112, -128, -128};
-
// Various invalid UTF-8 sequences
private static final byte[][] MALFORMED = {
// One-byte sequences:
@@ -94,13 +90,6 @@ public class TestUtf8 {
}
- @Test
- public void testJvmDecoder2() {
- // Ideally should fail after 2 bytes (i==1)
- doJvmDecoder(SRC_BYTES_2, false, true, 3);
- }
-
-
private void doJvmDecoder(byte[] src, boolean endOfinput,
boolean errorExpected, int failPosExpected) {
CharsetDecoder decoder = B2CConverter.UTF_8.newDecoder()
@@ -144,12 +133,6 @@ public class TestUtf8 {
}
- @Test
- public void testHarmonyDecoder2() {
- doHarmonyDecoder(SRC_BYTES_2, false, true, 1);
- }
-
-
private void doHarmonyDecoder(byte[] src, boolean endOfinput,
boolean errorExpected, int failPosExpected) {
CharsetDecoder decoder = new Utf8Decoder();
Modified: tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java
URL:
http://svn.apache.org/viewvc/tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java?rev=1452159&r1=1452158&r2=1452159&view=diff
==============================================================================
--- tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java
(original)
+++ tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java Mon Mar
4 00:50:02 2013
@@ -29,6 +29,11 @@ import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
+/**
+ * These tests have been written with reference to
+ * <a href="http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf">unicode 6.2,
+ * chapter 3, section 3.9</a>.
+ */
public class TestUtf8Extended {
private List<Utf8TestCase> testCases = new ArrayList<>();
@@ -60,22 +65,53 @@ public class TestUtf8Extended {
new int[] {0xF0, 0x90, 0x90, 0x80},
-1,
"\uD801\uDC00"));
+ // JVM decoder does not report error until all 4 bytes are available
+ testCases.add(new Utf8TestCase(
+ "Invalid code point - out of range",
+ new int[] {0xF4, 0x90, 0x80, 0x80},
+ 1,
+ "\uFFFD\uFFFD\uFFFD\uFFFD").setSkipErrorForJvm(true));
+ // JVM decoder does not report error until all 2 bytes are available
+ testCases.add(new Utf8TestCase(
+ "Valid sequence padded from one byte to two",
+ new int[] {0xC0, 0xC1},
+ 0,
+ "\uFFFD\uFFFD").setSkipErrorForJvm(true));
+ // JVM decoder does not report error until all 3 bytes are available
+ testCases.add(new Utf8TestCase(
+ "Valid sequence padded from one byte to three",
+ new int[] {0xE0, 0x80, 0xC1},
+ 1,
+ "\uFFFD\uFFFD\uFFFD").setSkipErrorForJvm(true));
}
@Test
public void testHarmonyDecoder() {
- doTest(new Utf8Decoder());
+ CharsetDecoder decoder = new Utf8Decoder();
+ for (Utf8TestCase testCase : testCases) {
+ doTest(decoder, testCase, false, false);
+ }
}
@Test
public void testJvmDecoder() {
- doTest(Charset.forName("UTF-8").newDecoder());
+ CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
+ for (Utf8TestCase testCase : testCases) {
+ doTest(decoder, testCase, testCase.skipErrorForJvm,
+ testCase.skipReplaceForJvm);
+ }
}
- private void doTest(CharsetDecoder decoder) {
- for (Utf8TestCase testCase : testCases) {
+ private void doTest(CharsetDecoder decoder, Utf8TestCase testCase,
+ boolean skipError, boolean skipReplace) {
+
+ int len = testCase.input.length;
+ ByteBuffer bb = ByteBuffer.allocate(len);
+ CharBuffer cb = CharBuffer.allocate(len);
+
+ if (!skipError) {
// Configure decoder to fail on an error
decoder.reset();
decoder.onMalformedInput(CodingErrorAction.REPORT);
@@ -83,9 +119,6 @@ public class TestUtf8Extended {
// Add each byte one at a time. The decoder should fail as soon as
// an invalid sequence has been provided
- int len = testCase.input.length;
- ByteBuffer bb = ByteBuffer.allocate(len);
- CharBuffer cb = CharBuffer.allocate(len);
for (int i = 0; i < len; i++) {
bb.put((byte) testCase.input[i]);
bb.flip();
@@ -97,7 +130,9 @@ public class TestUtf8Extended {
}
bb.compact();
}
+ }
+ if (!skipReplace) {
// Configure decoder to replace on an error
decoder.reset();
decoder.onMalformedInput(CodingErrorAction.REPLACE);
@@ -137,6 +172,8 @@ public class TestUtf8Extended {
private final int[] input;
private final int invalidIndex;
private final String outputReplaced;
+ private boolean skipErrorForJvm = false;
+ private boolean skipReplaceForJvm = false;
public Utf8TestCase(String description, int[] input, int invalidIndex,
String outputReplaced) {
@@ -144,6 +181,17 @@ public class TestUtf8Extended {
this.input = input;
this.invalidIndex = invalidIndex;
this.outputReplaced = outputReplaced;
+
+ }
+
+ public Utf8TestCase setSkipErrorForJvm(boolean skipErrorForJvm) {
+ this.skipErrorForJvm = skipErrorForJvm;
+ return this;
+ }
+
+ public Utf8TestCase setSkipReplaceForJvm(boolean skipReplaceForJvm) {
+ this.skipReplaceForJvm = skipReplaceForJvm;
+ return this;
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]