Reviewers: shindig.remailer_gmail.com,

Description:

Resolve rare case of ArrayIndexOutOfBoundsException exception due to
special bytes at end of stream.


Please review this at http://codereview.appspot.com/1257042/show

Affected files:
java/gadgets/src/main/java/org/apache/shindig/gadgets/encoding/EncodingDetector.java java/gadgets/src/test/java/org/apache/shindig/gadgets/encoding/EncodingDetectorTest.java


### Eclipse Workspace Patch 1.0
#P shindig-project
Index: java/gadgets/src/main/java/org/apache/shindig/gadgets/encoding/EncodingDetector.java
===================================================================
--- java/gadgets/src/main/java/org/apache/shindig/gadgets/encoding/EncodingDetector.java (revision 946706) +++ java/gadgets/src/main/java/org/apache/shindig/gadgets/encoding/EncodingDetector.java (working copy)
@@ -32,7 +32,7 @@
   private static final Charset UTF_8 = Charset.forName("UTF-8");
   private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");

-
+
   public static class FallbackEncodingDetector {
     public Charset detectEncoding(byte[] input) {
// Fall back to the incredibly slow ICU. It might be better to just skip this entirely.
@@ -51,7 +51,7 @@
* encoding for HTTP) if the bytes are not valid UTF-8. Only recommended if you can reasonably * expect that other encodings are going to be specified. Full encoding detection is very
    *     expensive!
-   * @param alternateDecoder specify a fallback encoding detection.
+   * @param alternateDecoder specify a fallback encoding detection.
    *     Only used if assume88591IfNotUtf8 is false.
    * @return The detected encoding.
    */
@@ -103,11 +103,16 @@
         return false;
       }

+      if (endOfSequence >= j) {
+        // End of sequence reached, not a valid sequence
+        return false;
+      }
+
       while (i < endOfSequence) {
         i++;
         bite = input[i];
         if ((bite & 0xC0) != 0x80) {
-          // High bit not set, not a vlaid sequence
+          // High bit not set, not a valid sequence
           return false;
         }
       }
Index: java/gadgets/src/test/java/org/apache/shindig/gadgets/encoding/EncodingDetectorTest.java
===================================================================
--- java/gadgets/src/test/java/org/apache/shindig/gadgets/encoding/EncodingDetectorTest.java (revision 946706) +++ java/gadgets/src/test/java/org/apache/shindig/gadgets/encoding/EncodingDetectorTest.java (working copy)
@@ -44,7 +44,7 @@
     byte[] data = "Hello, world".getBytes("US-ASCII");
assertEquals("UTF-8", EncodingDetector.detectEncoding(data, true, null).name());
   }
-
+
   @Test
   public void detectedUtf8WithByteOrderMark() {
     byte[] data = {
@@ -57,7 +57,14 @@
   @Test
   public void assumeLatin1OnInvalidUtf8() throws Exception {
     byte[] data = "\u4F60\u597D".getBytes("BIG5");
-
+
+ assertEquals("ISO-8859-1", EncodingDetector.detectEncoding(data, true, null).name());
+  }
+
+  @Test
+  public void badStreamEnd() throws Exception {
+    byte[] data = { 'd', 'u',  (byte)0xC0 };
+
assertEquals("ISO-8859-1", EncodingDetector.detectEncoding(data, true, null).name());
   }

@@ -68,13 +75,13 @@
                    
"\u8FBE\u4E0D\u51FA\uFF0C\u6709\u611F\u60C5\u65E0\u6CD5\u503E\u5410")
                    .getBytes("GB18030");

-    EncodingDetector.FallbackEncodingDetector detector =
+    EncodingDetector.FallbackEncodingDetector detector =
       newMockFallbackEncoding(data, "GB18030");

assertEquals("GB18030", EncodingDetector.detectEncoding(data, false, detector).name());
     verify(detector);
   }
-
+
   // Test the fallback detector:
   @Test
   public void doNotAssumeLatin1OnInvalidUtf8() throws Exception {
@@ -111,7 +118,7 @@

     assertEquals("UTF-8", detector.detectEncoding(data).name());
   }
-
+
   @Test(expected=NullPointerException.class)
   public void nullCustomDetector() throws Exception {
     byte[] data = "\u4F60\u597D".getBytes("BIG5");


Reply via email to