Reviewers: shindig.remailer_gmail.com,
Description:
Resolve rare case of ArrayIndexOutOfBoundsException exception due to
special bytes at end of stream.
Please review this at http://codereview.appspot.com/1257042/show
Affected files:
java/gadgets/src/main/java/org/apache/shindig/gadgets/encoding/EncodingDetector.java
java/gadgets/src/test/java/org/apache/shindig/gadgets/encoding/EncodingDetectorTest.java
### Eclipse Workspace Patch 1.0
#P shindig-project
Index:
java/gadgets/src/main/java/org/apache/shindig/gadgets/encoding/EncodingDetector.java
===================================================================
---
java/gadgets/src/main/java/org/apache/shindig/gadgets/encoding/EncodingDetector.java
(revision 946706)
+++
java/gadgets/src/main/java/org/apache/shindig/gadgets/encoding/EncodingDetector.java
(working copy)
@@ -32,7 +32,7 @@
private static final Charset UTF_8 = Charset.forName("UTF-8");
private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
-
+
public static class FallbackEncodingDetector {
public Charset detectEncoding(byte[] input) {
// Fall back to the incredibly slow ICU. It might be better to just
skip this entirely.
@@ -51,7 +51,7 @@
* encoding for HTTP) if the bytes are not valid UTF-8. Only
recommended if you can reasonably
* expect that other encodings are going to be specified. Full
encoding detection is very
* expensive!
- * @param alternateDecoder specify a fallback encoding detection.
+ * @param alternateDecoder specify a fallback encoding detection.
* Only used if assume88591IfNotUtf8 is false.
* @return The detected encoding.
*/
@@ -103,11 +103,16 @@
return false;
}
+ if (endOfSequence >= j) {
+ // End of sequence reached, not a valid sequence
+ return false;
+ }
+
while (i < endOfSequence) {
i++;
bite = input[i];
if ((bite & 0xC0) != 0x80) {
- // High bit not set, not a vlaid sequence
+ // High bit not set, not a valid sequence
return false;
}
}
Index:
java/gadgets/src/test/java/org/apache/shindig/gadgets/encoding/EncodingDetectorTest.java
===================================================================
---
java/gadgets/src/test/java/org/apache/shindig/gadgets/encoding/EncodingDetectorTest.java
(revision 946706)
+++
java/gadgets/src/test/java/org/apache/shindig/gadgets/encoding/EncodingDetectorTest.java
(working copy)
@@ -44,7 +44,7 @@
byte[] data = "Hello, world".getBytes("US-ASCII");
assertEquals("UTF-8", EncodingDetector.detectEncoding(data, true,
null).name());
}
-
+
@Test
public void detectedUtf8WithByteOrderMark() {
byte[] data = {
@@ -57,7 +57,14 @@
@Test
public void assumeLatin1OnInvalidUtf8() throws Exception {
byte[] data = "\u4F60\u597D".getBytes("BIG5");
-
+
+ assertEquals("ISO-8859-1", EncodingDetector.detectEncoding(data, true,
null).name());
+ }
+
+ @Test
+ public void badStreamEnd() throws Exception {
+ byte[] data = { 'd', 'u', (byte)0xC0 };
+
assertEquals("ISO-8859-1", EncodingDetector.detectEncoding(data, true,
null).name());
}
@@ -68,13 +75,13 @@
"\u8FBE\u4E0D\u51FA\uFF0C\u6709\u611F\u60C5\u65E0\u6CD5\u503E\u5410")
.getBytes("GB18030");
- EncodingDetector.FallbackEncodingDetector detector =
+ EncodingDetector.FallbackEncodingDetector detector =
newMockFallbackEncoding(data, "GB18030");
assertEquals("GB18030", EncodingDetector.detectEncoding(data, false,
detector).name());
verify(detector);
}
-
+
// Test the fallback detector:
@Test
public void doNotAssumeLatin1OnInvalidUtf8() throws Exception {
@@ -111,7 +118,7 @@
assertEquals("UTF-8", detector.detectEncoding(data).name());
}
-
+
@Test(expected=NullPointerException.class)
public void nullCustomDetector() throws Exception {
byte[] data = "\u4F60\u597D".getBytes("BIG5");