Author: markt Date: Thu Apr 13 19:37:39 2017 New Revision: 1791298 URL: http://svn.apache.org/viewvc?rev=1791298&view=rev Log: When no BOM is present and an encoding is detected, do not skip the bytes used to detect the encoding since they are not part of a BOM.
Modified: tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java tomcat/trunk/webapps/docs/changelog.xml Modified: tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java?rev=1791298&r1=1791297&r2=1791298&view=diff ============================================================================== --- tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java (original) +++ tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java Thu Apr 13 19:37:39 2017 @@ -162,41 +162,41 @@ class EncodingDetector { return new BomResult("UTF-8", 0); } - // other encodings + // Other encodings. No BOM. Try and ID encoding. int b3 = b4[3] & 0xFF; if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { // UCS-4, big endian (1234) - return new BomResult("ISO-10646-UCS-4", 4); + return new BomResult("ISO-10646-UCS-4", 0); } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { // UCS-4, little endian (4321) - return new BomResult("ISO-10646-UCS-4", 4); + return new BomResult("ISO-10646-UCS-4", 0); } if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { // UCS-4, unusual octet order (2143) // REVISIT: What should this be? - return new BomResult("ISO-10646-UCS-4", 4); + return new BomResult("ISO-10646-UCS-4", 0); } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { // UCS-4, unusual octect order (3412) // REVISIT: What should this be? - return new BomResult("ISO-10646-UCS-4", 4); + return new BomResult("ISO-10646-UCS-4", 0); } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { // UTF-16, big-endian, no BOM // (or could turn out to be UCS-2... // REVISIT: What should this be? - return new BomResult("UTF-16BE", 4); + return new BomResult("UTF-16BE", 0); } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { // UTF-16, little-endian, no BOM // (or could turn out to be UCS-2... - return new BomResult("UTF-16LE", 4); + return new BomResult("UTF-16LE", 0); } if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { // EBCDIC // a la xerces1, return CP037 instead of EBCDIC here - return new BomResult("CP037", 4); + return new BomResult("CP037", 0); } // default encoding Modified: tomcat/trunk/webapps/docs/changelog.xml URL: http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/changelog.xml?rev=1791298&r1=1791297&r2=1791298&view=diff ============================================================================== --- tomcat/trunk/webapps/docs/changelog.xml (original) +++ tomcat/trunk/webapps/docs/changelog.xml Thu Apr 13 19:37:39 2017 @@ -45,6 +45,15 @@ issues do not "pop up" wrt. others). --> <section name="Tomcat 9.0.0.M21 (markt)" rtext="in development"> + <subsection name="Jasper"> + <changelog> + <fix> + When no BOM is present and an encoding is detected, do not skip the + bytes used to detect the encoding since they are not part of a BOM. + (markt) + </fix> + </changelog> + </subsection> </section> <section name="Tomcat 9.0.0.M20 (markt)" rtext="release in progress"> <subsection name="Catalina"> --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org