TestUtf8Extended.java

markt Sun, 03 Mar 2013 16:50:27 -0800

Author: markt
Date: Mon Mar  4 00:50:02 2013
New Revision: 1452159

URL: http://svn.apache.org/r1452159
Log:
Add a reference to the relevant specification.
Add some more UTF-8 tests along with a few associated fixes to my tweaks to the 
Harmony UTF-8 decoder on replace.
Move a test from the old to the new test framework.
Still lots more tests required.


Modified:
    tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Decoder.java
    tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8.java
    tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java

Modified: tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Decoder.java
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Decoder.java?rev=1452159&r1=1452158&r2=1452159&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Decoder.java (original)
+++ tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Decoder.java Mon Mar  4 
00:50:02 2013
@@ -174,18 +174,20 @@ public class Utf8Decoder extends Charset
                         if ((bArr[inIndex + 1] & 0xFF) > 0x8F) {
                             // 11110100 1yyyxxxx xxxxxxxx xxxxxxxx
                             // Any non-zero y is > max code point
-                            return CoderResult.unmappableForLength(4);
+                            return CoderResult.unmappableForLength(1);
                         }
                     }
-                    if (jchar == 0x60 && inIndexLimit > inIndex +1) {
-                        if ((bArr[inIndex + 1] & 0x7F) == 0) {
-                            // 11100000 10000000 10xxxxxx
+                    if (jchar == 0x60 && inIndexLimit > inIndex + 1) {
+                        if ((bArr[inIndex + 1] & 0x60) == 0) {
+                            // 11100000 100yyyyy 10xxxxxx
                             // should have been
+                            // 11oyyyyy 1oxxxxxx
+                            // or possibly
                             // 00xxxxxx
-                            return CoderResult.malformedForLength(3);
+                            return CoderResult.malformedForLength(1);
                         }
                     }
-                    if (jchar == 0x70 && inIndexLimit > inIndex +1) {
+                    if (jchar == 0x70 && inIndexLimit > inIndex + 1) {
                         if ((bArr[inIndex + 1] & 0x7F) < 0x10) {
                             // 11110000 1000zzzz 1oyyyyyy 1oxxxxxx
                             // should have been

Modified: tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8.java
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8.java?rev=1452159&r1=1452158&r2=1452159&view=diff
==============================================================================
--- tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8.java (original)
+++ tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8.java Mon Mar  4 
00:50:02 2013
@@ -32,10 +32,6 @@ public class TestUtf8 {
             new byte[] {-50, -70, -31,  -67, -71, -49, -125, -50, -68, -50,
                         -75, -19, -96, -128, 101, 100,  105, 116, 101, 100};
 
-    // Invalid code point (out of range)
-    private static final byte[] SRC_BYTES_2 =
-            new byte[] {-12, -112, -128, -128};
-
     // Various invalid UTF-8 sequences
     private static final byte[][] MALFORMED = {
             // One-byte sequences:
@@ -94,13 +90,6 @@ public class TestUtf8 {
     }
 
 
-    @Test
-    public void testJvmDecoder2() {
-        // Ideally should fail after 2 bytes (i==1)
-        doJvmDecoder(SRC_BYTES_2, false, true, 3);
-    }
-
-
     private void doJvmDecoder(byte[] src, boolean endOfinput,
             boolean errorExpected, int failPosExpected) {
         CharsetDecoder decoder = B2CConverter.UTF_8.newDecoder()
@@ -144,12 +133,6 @@ public class TestUtf8 {
     }
 
 
-    @Test
-    public void testHarmonyDecoder2() {
-        doHarmonyDecoder(SRC_BYTES_2, false, true, 1);
-    }
-
-
     private void doHarmonyDecoder(byte[] src, boolean endOfinput,
             boolean errorExpected, int failPosExpected) {
         CharsetDecoder decoder = new Utf8Decoder();

Modified: tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java?rev=1452159&r1=1452158&r2=1452159&view=diff
==============================================================================
--- tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java 
(original)
+++ tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java Mon Mar  
4 00:50:02 2013
@@ -29,6 +29,11 @@ import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
+/**
+ * These tests have been written with reference to
+ * <a href="http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf";>unicode 6.2,
+ * chapter 3, section 3.9</a>.
+ */
 public class TestUtf8Extended {
 
     private List<Utf8TestCase> testCases = new ArrayList<>();
@@ -60,22 +65,53 @@ public class TestUtf8Extended {
                 new int[] {0xF0, 0x90, 0x90, 0x80},
                 -1,
                 "\uD801\uDC00"));
+        // JVM decoder does not report error until all 4 bytes are available
+        testCases.add(new Utf8TestCase(
+                "Invalid code point - out of range",
+                new int[] {0xF4, 0x90, 0x80, 0x80},
+                1,
+                "\uFFFD\uFFFD\uFFFD\uFFFD").setSkipErrorForJvm(true));
+        // JVM decoder does not report error until all 2 bytes are available
+        testCases.add(new Utf8TestCase(
+                "Valid sequence padded from one byte to two",
+                new int[] {0xC0, 0xC1},
+                0,
+                "\uFFFD\uFFFD").setSkipErrorForJvm(true));
+        // JVM decoder does not report error until all 3 bytes are available
+        testCases.add(new Utf8TestCase(
+                "Valid sequence padded from one byte to three",
+                new int[] {0xE0, 0x80, 0xC1},
+                1,
+                "\uFFFD\uFFFD\uFFFD").setSkipErrorForJvm(true));
     }
 
     @Test
     public void testHarmonyDecoder() {
-        doTest(new Utf8Decoder());
+        CharsetDecoder decoder = new Utf8Decoder();
+        for (Utf8TestCase testCase : testCases) {
+            doTest(decoder, testCase, false, false);
+        }
     }
 
 
     @Test
     public void testJvmDecoder() {
-        doTest(Charset.forName("UTF-8").newDecoder());
+        CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
+        for (Utf8TestCase testCase : testCases) {
+            doTest(decoder, testCase, testCase.skipErrorForJvm,
+                    testCase.skipReplaceForJvm);
+        }
     }
 
 
-    private void doTest(CharsetDecoder decoder) {
-        for (Utf8TestCase testCase : testCases) {
+    private void doTest(CharsetDecoder decoder, Utf8TestCase testCase,
+            boolean skipError, boolean skipReplace) {
+
+        int len = testCase.input.length;
+        ByteBuffer bb = ByteBuffer.allocate(len);
+        CharBuffer cb = CharBuffer.allocate(len);
+
+        if (!skipError) {
             // Configure decoder to fail on an error
             decoder.reset();
             decoder.onMalformedInput(CodingErrorAction.REPORT);
@@ -83,9 +119,6 @@ public class TestUtf8Extended {
 
             // Add each byte one at a time. The decoder should fail as soon as
             // an invalid sequence has been provided
-            int len = testCase.input.length;
-            ByteBuffer bb = ByteBuffer.allocate(len);
-            CharBuffer cb = CharBuffer.allocate(len);
             for (int i = 0; i < len; i++) {
                 bb.put((byte) testCase.input[i]);
                 bb.flip();
@@ -97,7 +130,9 @@ public class TestUtf8Extended {
                 }
                 bb.compact();
             }
+        }
 
+        if (!skipReplace) {
             // Configure decoder to replace on an error
             decoder.reset();
             decoder.onMalformedInput(CodingErrorAction.REPLACE);
@@ -137,6 +172,8 @@ public class TestUtf8Extended {
         private final int[] input;
         private final int invalidIndex;
         private final String outputReplaced;
+        private boolean skipErrorForJvm = false;
+        private boolean skipReplaceForJvm = false;
 
         public Utf8TestCase(String description, int[] input, int invalidIndex,
                 String outputReplaced) {
@@ -144,6 +181,17 @@ public class TestUtf8Extended {
             this.input = input;
             this.invalidIndex = invalidIndex;
             this.outputReplaced = outputReplaced;
+
+        }
+
+        public Utf8TestCase setSkipErrorForJvm(boolean skipErrorForJvm) {
+            this.skipErrorForJvm = skipErrorForJvm;
+            return this;
+        }
+
+        public Utf8TestCase setSkipReplaceForJvm(boolean skipReplaceForJvm) {
+            this.skipReplaceForJvm = skipReplaceForJvm;
+            return this;
         }
     }
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

svn commit: r1452159 - in /tomcat/trunk: java/org/apache/tomcat/util/buf/Utf8Decoder.java test/org/apache/tomcat/util/buf/TestUtf8.java test/org/apache/tomcat/util/buf/TestUtf8Extended.java

Reply via email to