Author: markt Date: Mon Jul 24 10:00:23 2017 New Revision: 1802780 URL: http://svn.apache.org/viewvc?rev=1802780&view=rev Log: Correct a further regression in the fix for bug 49464 that could cause an byte order mark character to appear at the start of content included by the DefaultServlet.
Added: tomcat/trunk/test/webapp/bug49nnn/bug49464-utf8-bom.txt (with props) Modified: tomcat/trunk/conf/web.xml tomcat/trunk/java/org/apache/catalina/servlets/DefaultServlet.java tomcat/trunk/test/org/apache/catalina/servlets/TestDefaultServlet.java tomcat/trunk/webapps/docs/changelog.xml tomcat/trunk/webapps/docs/default-servlet.xml Modified: tomcat/trunk/conf/web.xml URL: http://svn.apache.org/viewvc/tomcat/trunk/conf/web.xml?rev=1802780&r1=1802779&r2=1802780&view=diff ============================================================================== --- tomcat/trunk/conf/web.xml (original) +++ tomcat/trunk/conf/web.xml Mon Jul 24 10:00:23 2017 @@ -48,6 +48,11 @@ <!-- fileEncoding Encoding to be used to read static resources --> <!-- [platform default] --> <!-- --> + <!-- useBomIfPresent If a static file contains a byte order mark --> + <!-- (BOM), should this be used to determine the --> + <!-- file encoding in preference to fileEncoding. --> + <!-- [true] --> + <!-- --> <!-- input Input buffer size (in bytes) when reading --> <!-- resources to be served. [2048] --> <!-- --> Modified: tomcat/trunk/java/org/apache/catalina/servlets/DefaultServlet.java URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/servlets/DefaultServlet.java?rev=1802780&r1=1802779&r2=1802780&view=diff ============================================================================== --- tomcat/trunk/java/org/apache/catalina/servlets/DefaultServlet.java (original) +++ tomcat/trunk/java/org/apache/catalina/servlets/DefaultServlet.java Mon Jul 24 10:00:23 2017 @@ -236,6 +236,11 @@ public class DefaultServlet extends Http private Charset fileEncodingCharset = null; /** + * If a file has a BOM, should that be used in preference to fileEncoding? + */ + private boolean useBomIfPresent = true; + + /** * Minimum size for sendfile usage in bytes. */ protected int sendfileSize = 48 * 1024; @@ -302,6 +307,10 @@ public class DefaultServlet extends Http } } + if (getServletConfig().getInitParameter("useBomIfPresent") != null) + useBomIfPresent = Boolean.parseBoolean( + getServletConfig().getInitParameter("useBomIfPresent")); + globalXsltFile = getServletConfig().getInitParameter("globalXsltFile"); contextXsltFile = getServletConfig().getInitParameter("contextXsltFile"); localXsltFile = getServletConfig().getInitParameter("localXsltFile"); @@ -726,11 +735,11 @@ public class DefaultServlet extends Http /** * Serve the specified resource, optionally including the data content. * - * @param request The servlet request we are processing - * @param response The servlet response we are creating - * @param content Should the content be included? - * @param encoding The encoding to use if it is necessary to access the - * source as characters rather than as bytes + * @param request The servlet request we are processing + * @param response The servlet response we are creating + * @param content Should the content be included? + * @param inputEncoding The encoding to use if it is necessary to access the + * source as characters rather than as bytes * * @exception IOException if an input/output error occurs * @exception ServletException if a servlet-specified error occurs @@ -738,7 +747,7 @@ public class DefaultServlet extends Http protected void serveResource(HttpServletRequest request, HttpServletResponse response, boolean content, - String encoding) + String inputEncoding) throws IOException, ServletException { boolean serveContent = content; @@ -1011,35 +1020,46 @@ public class DefaultServlet extends Http // Output via a writer so can't use sendfile or write // content directly. if (resource.isDirectory()) { - renderResult = render(getPathPrefix(request), resource, encoding); + renderResult = render(getPathPrefix(request), resource, inputEncoding); } else { renderResult = resource.getInputStream(); + if (included) { + // Need to make sure any BOM is removed + if (!renderResult.markSupported()) { + renderResult = new BufferedInputStream(renderResult); + } + Charset bomCharset = processBom(renderResult); + if (bomCharset != null && useBomIfPresent) { + inputEncoding = bomCharset.name(); + } + } } - copy(renderResult, writer, encoding); + copy(renderResult, writer, inputEncoding); } else { // Output is via an OutputStream if (resource.isDirectory()) { - renderResult = render(getPathPrefix(request), resource, encoding); + renderResult = render(getPathPrefix(request), resource, inputEncoding); } else { // Output is content of resource // Check to see if conversion is required - if (conversionRequired) { - // A conversion is required from fileEncoding to - // response encoding - byte[] resourceBody = resource.getContent(); - InputStream source; - if (resourceBody == null) { - source = resource.getInputStream(); - } else { - source = new ByteArrayInputStream(resourceBody); + if (conversionRequired || included) { + // When including a file, we need to check for a BOM + // to determine if a conversion is required, so we + // might as well always convert + InputStream source = resource.getInputStream(); + if (!source.markSupported()) { + source = new BufferedInputStream(source); + } + Charset bomCharset = processBom(source); + if (bomCharset != null && useBomIfPresent) { + inputEncoding = bomCharset.name(); } OutputStreamWriter osw = new OutputStreamWriter(ostream, charset); PrintWriter pw = new PrintWriter(osw); - copy(source, pw, fileEncoding); + copy(source, pw, inputEncoding); pw.flush(); } else { - if (!checkSendfile(request, response, resource, - contentLength, null)) { + if (!checkSendfile(request, response, resource, contentLength, null)) { // sendfile not possible so check if resource // content is available directly byte[] resourceBody = resource.getContent(); @@ -1124,7 +1144,75 @@ public class DefaultServlet extends Http } - private boolean isText(String contentType) { + /* + * Code borrowed heavily from Jasper's EncodingDetector + */ + private static Charset processBom(InputStream is) throws IOException { + // Java supported character sets do not use BOMs longer than 4 bytes + byte[] bom = new byte[4]; + is.mark(bom.length); + + int count = is.read(bom); + + // BOMs are at least 2 bytes + if (count < 2) { + skip(is, 0); + return null; + } + + // Look for two byte BOMs + int b0 = bom[0] & 0xFF; + int b1 = bom[1] & 0xFF; + if (b0 == 0xFE && b1 == 0xFF) { + skip(is, 2); + return StandardCharsets.UTF_16BE; + } + if (b0 == 0xFF && b1 == 0xFE) { + skip(is, 2); + return StandardCharsets.UTF_16LE; + } + + // Remaining BOMs are at least 3 bytes + if (count < 3) { + skip(is, 0); + return null; + } + + // UTF-8 is only 3-byte BOM + int b2 = bom[2] & 0xFF; + if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { + skip(is, 3); + return StandardCharsets.UTF_8; + } + + if (count < 4) { + skip(is, 0); + return null; + } + + // Look for 4-bute BOMs + int b3 = bom[3] & 0xFF; + if (b0 == 0x00 && b1 == 0x00 && b2 == 0xFE && b3 == 0xFF) { + return Charset.forName("UTF32-BE"); + } + if (b0 == 0xFF && b1 == 0xFE && b2 == 0x00 && b3 == 0x00) { + return Charset.forName("UTF32-LE"); + } + + skip(is, 0); + return null; + } + + + private static void skip(InputStream is, int skip) throws IOException { + is.reset(); + while (skip-- > 0) { + is.read(); + } + } + + + private static boolean isText(String contentType) { return contentType == null || contentType.startsWith("text") || contentType.endsWith("xml") || contentType.contains("/javascript"); } Modified: tomcat/trunk/test/org/apache/catalina/servlets/TestDefaultServlet.java URL: http://svn.apache.org/viewvc/tomcat/trunk/test/org/apache/catalina/servlets/TestDefaultServlet.java?rev=1802780&r1=1802779&r2=1802780&view=diff ============================================================================== --- tomcat/trunk/test/org/apache/catalina/servlets/TestDefaultServlet.java (original) +++ tomcat/trunk/test/org/apache/catalina/servlets/TestDefaultServlet.java Mon Jul 24 10:00:23 2017 @@ -624,36 +624,112 @@ public class TestDefaultServlet extends } @Test - public void testEncodingIncludeStreamOutIso88591() throws Exception { - doTestEncoding(false, "ISO-8859-1"); + public void testEncodingIncludeIbm850StreamOutIso88591() throws Exception { + doTestEncodingFileIbm850(false, "ISO-8859-1"); } @Test - public void testEncodingIncludeWriterOutIso88591() throws Exception { - doTestEncoding(true, "ISO-8859-1"); + public void testEncodingIncludeIbm850WriterOutIso88591() throws Exception { + doTestEncodingFileIbm850(true, "ISO-8859-1"); } @Test - public void testEncodingIncludeStreamOutUtf8() throws Exception { - doTestEncoding(false, "UTF-8"); + public void testEncodingIncludeIbm850StreamOutUtf8() throws Exception { + doTestEncodingFileIbm850(false, "UTF-8"); } @Test - public void testEncodingIncludeWriterOutUtf8() throws Exception { - doTestEncoding(true, "UTF-8"); + public void testEncodingIncludeIbm850WriterOutUtf8() throws Exception { + doTestEncodingFileIbm850(true, "UTF-8"); } @Test - public void testEncodingIncludeStreamOutIbm850() throws Exception { - doTestEncoding(false, "IBM850"); + public void testEncodingIncludeIbm850StreamOutIbm850() throws Exception { + doTestEncodingFileIbm850(false, "IBM850"); } @Test - public void testEncodingIncludeWriterOutIbm850() throws Exception { - doTestEncoding(false, "IBM850"); + public void testEncodingIncludeIbm850WriterOutIbm850() throws Exception { + doTestEncodingFileIbm850(false, "IBM850"); } - public void doTestEncoding(boolean useWriter, String outputEncoding) throws Exception { + @Test + public void testEncodingIncludeUtf8BomStreamOutIso88591() throws Exception { + doTestEncodingFileUtf8Bom(false, "ISO-8859-1"); + } + + @Test + public void testEncodingIncludeUtf8BomWriterOutIso88591() throws Exception { + doTestEncodingFileUtf8Bom(true, "ISO-8859-1"); + } + + @Test + public void testEncodingIncludeUtf8BomStreamOutUtf8() throws Exception { + doTestEncodingFileUtf8Bom(false, "UTF-8"); + } + + @Test + public void testEncodingIncludeUtf8BomWriterOutUtf8() throws Exception { + doTestEncodingFileUtf8Bom(true, "UTF-8"); + } + + @Test + public void testEncodingIncludeUtf8BomStreamOutIbm850() throws Exception { + doTestEncodingFileUtf8Bom(false, "IBM850"); + } + + @Test + public void testEncodingIncludeUtf8BomWriterOutIbm850() throws Exception { + doTestEncodingFileUtf8Bom(false, "IBM850"); + } + + @Test + public void testEncodingIncludeUtf8BomOverrideStreamOutIso88591() throws Exception { + doTestEncodingFileUtf8BomOverride(false, "ISO-8859-1"); + } + + @Test + public void testEncodingIncludeUtf8BomOverrideWriterOutIso88591() throws Exception { + doTestEncodingFileUtf8BomOverride(true, "ISO-8859-1"); + } + + @Test + public void testEncodingIncludeUtf8BomOverrideStreamOutUtf8() throws Exception { + doTestEncodingFileUtf8BomOverride(false, "UTF-8"); + } + + @Test + public void testEncodingIncludeUtf8BomOverrideWriterOutUtf8() throws Exception { + doTestEncodingFileUtf8BomOverride(true, "UTF-8"); + } + + @Test + public void testEncodingIncludeUtf8BomOverrideStreamOutIbm850() throws Exception { + doTestEncodingFileUtf8BomOverride(false, "IBM850"); + } + + @Test + public void testEncodingIncludeUtf8BomOverrideWriterOutIbm850() throws Exception { + doTestEncodingFileUtf8BomOverride(false, "IBM850"); + } + + private void doTestEncodingFileIbm850(boolean useWriter, String outputEncoding) + throws Exception { + doTestEncoding("/bug49nnn/bug49464-ibm850.txt", "IBM850", useWriter, outputEncoding); + } + + private void doTestEncodingFileUtf8Bom(boolean useWriter, String outputEncoding) + throws Exception { + doTestEncoding("/bug49nnn/bug49464-utf8-bom.txt", "UTF-8", useWriter, outputEncoding); + } + + private void doTestEncodingFileUtf8BomOverride(boolean useWriter, String outputEncoding) + throws Exception { + doTestEncoding("/bug49nnn/bug49464-utf8-bom.txt", "IBM850", useWriter, outputEncoding); + } + + private void doTestEncoding(String includePath, String inputEncoding, boolean useWriter, + String outputEncoding) throws Exception { Tomcat tomcat = getTomcatInstance(); File appDir = new File("test/webapp"); @@ -661,11 +737,11 @@ public class TestDefaultServlet extends Context ctxt = tomcat.addContext("", appDir.getAbsolutePath()); Wrapper defaultServlet = Tomcat.addServlet(ctxt, "default", DefaultServlet.class.getName()); - defaultServlet.addInitParameter("fileEncoding", "IBM850"); + defaultServlet.addInitParameter("fileEncoding", inputEncoding); ctxt.addServletMappingDecoded("/", "default"); Tomcat.addServlet(ctxt, "encoding", - new EncodingServlet(outputEncoding, "/bug49nnn/bug49464-ibm850.txt", useWriter)); + new EncodingServlet(outputEncoding, includePath, useWriter)); ctxt.addServletMappingDecoded("/test", "encoding"); tomcat.start(); Added: tomcat/trunk/test/webapp/bug49nnn/bug49464-utf8-bom.txt URL: http://svn.apache.org/viewvc/tomcat/trunk/test/webapp/bug49nnn/bug49464-utf8-bom.txt?rev=1802780&view=auto ============================================================================== Binary file - no diff available. Propchange: tomcat/trunk/test/webapp/bug49nnn/bug49464-utf8-bom.txt ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Modified: tomcat/trunk/webapps/docs/changelog.xml URL: http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/changelog.xml?rev=1802780&r1=1802779&r2=1802780&view=diff ============================================================================== --- tomcat/trunk/webapps/docs/changelog.xml (original) +++ tomcat/trunk/webapps/docs/changelog.xml Mon Jul 24 10:00:23 2017 @@ -62,6 +62,11 @@ <bug>61253</bug>: Add warn message when Digester.updateAttributes throws an exception instead of ignoring it. (csutherl) </fix> + <fix> + Correct a further regression in the fix for <bug>49464</bug> that could + cause an byte order mark character to appear at the start of content + included by the <code>DefaultServlet</code>. (markt) + </fix> </changelog> </subsection> <subsection name="Web applications"> Modified: tomcat/trunk/webapps/docs/default-servlet.xml URL: http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/default-servlet.xml?rev=1802780&r1=1802779&r2=1802780&view=diff ============================================================================== --- tomcat/trunk/webapps/docs/default-servlet.xml (original) +++ tomcat/trunk/webapps/docs/default-servlet.xml Mon Jul 24 10:00:23 2017 @@ -178,6 +178,10 @@ Tomcat.</p> File encoding to be used when reading static resources. [platform default] </property> + <property name="useBomIfPresent"> + If a static file contains a byte order mark (BOM), should this be used + to determine the file encoding in preference to fileEncoding. [true] + </property> <property name="sendfileSize"> If the connector used supports sendfile, this represents the minimal file size in KB for which sendfile will be used. Use a negative value --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org