Author: markt
Date: Mon Jul 24 10:00:23 2017
New Revision: 1802780
URL: http://svn.apache.org/viewvc?rev=1802780&view=rev
Log:
Correct a further regression in the fix for bug 49464 that could cause an byte
order mark character to appear at the start of content included by the
DefaultServlet.
Added:
tomcat/trunk/test/webapp/bug49nnn/bug49464-utf8-bom.txt (with props)
Modified:
tomcat/trunk/conf/web.xml
tomcat/trunk/java/org/apache/catalina/servlets/DefaultServlet.java
tomcat/trunk/test/org/apache/catalina/servlets/TestDefaultServlet.java
tomcat/trunk/webapps/docs/changelog.xml
tomcat/trunk/webapps/docs/default-servlet.xml
Modified: tomcat/trunk/conf/web.xml
URL:
http://svn.apache.org/viewvc/tomcat/trunk/conf/web.xml?rev=1802780&r1=1802779&r2=1802780&view=diff
==============================================================================
--- tomcat/trunk/conf/web.xml (original)
+++ tomcat/trunk/conf/web.xml Mon Jul 24 10:00:23 2017
@@ -48,6 +48,11 @@
<!-- fileEncoding Encoding to be used to read static resources -->
<!-- [platform default] -->
<!-- -->
+ <!-- useBomIfPresent If a static file contains a byte order mark -->
+ <!-- (BOM), should this be used to determine the -->
+ <!-- file encoding in preference to fileEncoding. -->
+ <!-- [true] -->
+ <!-- -->
<!-- input Input buffer size (in bytes) when reading -->
<!-- resources to be served. [2048] -->
<!-- -->
Modified: tomcat/trunk/java/org/apache/catalina/servlets/DefaultServlet.java
URL:
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/servlets/DefaultServlet.java?rev=1802780&r1=1802779&r2=1802780&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/catalina/servlets/DefaultServlet.java
(original)
+++ tomcat/trunk/java/org/apache/catalina/servlets/DefaultServlet.java Mon Jul
24 10:00:23 2017
@@ -236,6 +236,11 @@ public class DefaultServlet extends Http
private Charset fileEncodingCharset = null;
/**
+ * If a file has a BOM, should that be used in preference to fileEncoding?
+ */
+ private boolean useBomIfPresent = true;
+
+ /**
* Minimum size for sendfile usage in bytes.
*/
protected int sendfileSize = 48 * 1024;
@@ -302,6 +307,10 @@ public class DefaultServlet extends Http
}
}
+ if (getServletConfig().getInitParameter("useBomIfPresent") != null)
+ useBomIfPresent = Boolean.parseBoolean(
+ getServletConfig().getInitParameter("useBomIfPresent"));
+
globalXsltFile = getServletConfig().getInitParameter("globalXsltFile");
contextXsltFile =
getServletConfig().getInitParameter("contextXsltFile");
localXsltFile = getServletConfig().getInitParameter("localXsltFile");
@@ -726,11 +735,11 @@ public class DefaultServlet extends Http
/**
* Serve the specified resource, optionally including the data content.
*
- * @param request The servlet request we are processing
- * @param response The servlet response we are creating
- * @param content Should the content be included?
- * @param encoding The encoding to use if it is necessary to access the
- * source as characters rather than as bytes
+ * @param request The servlet request we are processing
+ * @param response The servlet response we are creating
+ * @param content Should the content be included?
+ * @param inputEncoding The encoding to use if it is necessary to access
the
+ * source as characters rather than as bytes
*
* @exception IOException if an input/output error occurs
* @exception ServletException if a servlet-specified error occurs
@@ -738,7 +747,7 @@ public class DefaultServlet extends Http
protected void serveResource(HttpServletRequest request,
HttpServletResponse response,
boolean content,
- String encoding)
+ String inputEncoding)
throws IOException, ServletException {
boolean serveContent = content;
@@ -1011,35 +1020,46 @@ public class DefaultServlet extends Http
// Output via a writer so can't use sendfile or write
// content directly.
if (resource.isDirectory()) {
- renderResult = render(getPathPrefix(request),
resource, encoding);
+ renderResult = render(getPathPrefix(request),
resource, inputEncoding);
} else {
renderResult = resource.getInputStream();
+ if (included) {
+ // Need to make sure any BOM is removed
+ if (!renderResult.markSupported()) {
+ renderResult = new
BufferedInputStream(renderResult);
+ }
+ Charset bomCharset = processBom(renderResult);
+ if (bomCharset != null && useBomIfPresent) {
+ inputEncoding = bomCharset.name();
+ }
+ }
}
- copy(renderResult, writer, encoding);
+ copy(renderResult, writer, inputEncoding);
} else {
// Output is via an OutputStream
if (resource.isDirectory()) {
- renderResult = render(getPathPrefix(request),
resource, encoding);
+ renderResult = render(getPathPrefix(request),
resource, inputEncoding);
} else {
// Output is content of resource
// Check to see if conversion is required
- if (conversionRequired) {
- // A conversion is required from fileEncoding to
- // response encoding
- byte[] resourceBody = resource.getContent();
- InputStream source;
- if (resourceBody == null) {
- source = resource.getInputStream();
- } else {
- source = new
ByteArrayInputStream(resourceBody);
+ if (conversionRequired || included) {
+ // When including a file, we need to check for a
BOM
+ // to determine if a conversion is required, so we
+ // might as well always convert
+ InputStream source = resource.getInputStream();
+ if (!source.markSupported()) {
+ source = new BufferedInputStream(source);
+ }
+ Charset bomCharset = processBom(source);
+ if (bomCharset != null && useBomIfPresent) {
+ inputEncoding = bomCharset.name();
}
OutputStreamWriter osw = new
OutputStreamWriter(ostream, charset);
PrintWriter pw = new PrintWriter(osw);
- copy(source, pw, fileEncoding);
+ copy(source, pw, inputEncoding);
pw.flush();
} else {
- if (!checkSendfile(request, response, resource,
- contentLength, null)) {
+ if (!checkSendfile(request, response, resource,
contentLength, null)) {
// sendfile not possible so check if resource
// content is available directly
byte[] resourceBody = resource.getContent();
@@ -1124,7 +1144,75 @@ public class DefaultServlet extends Http
}
- private boolean isText(String contentType) {
+ /*
+ * Code borrowed heavily from Jasper's EncodingDetector
+ */
+ private static Charset processBom(InputStream is) throws IOException {
+ // Java supported character sets do not use BOMs longer than 4 bytes
+ byte[] bom = new byte[4];
+ is.mark(bom.length);
+
+ int count = is.read(bom);
+
+ // BOMs are at least 2 bytes
+ if (count < 2) {
+ skip(is, 0);
+ return null;
+ }
+
+ // Look for two byte BOMs
+ int b0 = bom[0] & 0xFF;
+ int b1 = bom[1] & 0xFF;
+ if (b0 == 0xFE && b1 == 0xFF) {
+ skip(is, 2);
+ return StandardCharsets.UTF_16BE;
+ }
+ if (b0 == 0xFF && b1 == 0xFE) {
+ skip(is, 2);
+ return StandardCharsets.UTF_16LE;
+ }
+
+ // Remaining BOMs are at least 3 bytes
+ if (count < 3) {
+ skip(is, 0);
+ return null;
+ }
+
+ // UTF-8 is only 3-byte BOM
+ int b2 = bom[2] & 0xFF;
+ if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
+ skip(is, 3);
+ return StandardCharsets.UTF_8;
+ }
+
+ if (count < 4) {
+ skip(is, 0);
+ return null;
+ }
+
+ // Look for 4-bute BOMs
+ int b3 = bom[3] & 0xFF;
+ if (b0 == 0x00 && b1 == 0x00 && b2 == 0xFE && b3 == 0xFF) {
+ return Charset.forName("UTF32-BE");
+ }
+ if (b0 == 0xFF && b1 == 0xFE && b2 == 0x00 && b3 == 0x00) {
+ return Charset.forName("UTF32-LE");
+ }
+
+ skip(is, 0);
+ return null;
+ }
+
+
+ private static void skip(InputStream is, int skip) throws IOException {
+ is.reset();
+ while (skip-- > 0) {
+ is.read();
+ }
+ }
+
+
+ private static boolean isText(String contentType) {
return contentType == null || contentType.startsWith("text") ||
contentType.endsWith("xml") ||
contentType.contains("/javascript");
}
Modified: tomcat/trunk/test/org/apache/catalina/servlets/TestDefaultServlet.java
URL:
http://svn.apache.org/viewvc/tomcat/trunk/test/org/apache/catalina/servlets/TestDefaultServlet.java?rev=1802780&r1=1802779&r2=1802780&view=diff
==============================================================================
--- tomcat/trunk/test/org/apache/catalina/servlets/TestDefaultServlet.java
(original)
+++ tomcat/trunk/test/org/apache/catalina/servlets/TestDefaultServlet.java Mon
Jul 24 10:00:23 2017
@@ -624,36 +624,112 @@ public class TestDefaultServlet extends
}
@Test
- public void testEncodingIncludeStreamOutIso88591() throws Exception {
- doTestEncoding(false, "ISO-8859-1");
+ public void testEncodingIncludeIbm850StreamOutIso88591() throws Exception {
+ doTestEncodingFileIbm850(false, "ISO-8859-1");
}
@Test
- public void testEncodingIncludeWriterOutIso88591() throws Exception {
- doTestEncoding(true, "ISO-8859-1");
+ public void testEncodingIncludeIbm850WriterOutIso88591() throws Exception {
+ doTestEncodingFileIbm850(true, "ISO-8859-1");
}
@Test
- public void testEncodingIncludeStreamOutUtf8() throws Exception {
- doTestEncoding(false, "UTF-8");
+ public void testEncodingIncludeIbm850StreamOutUtf8() throws Exception {
+ doTestEncodingFileIbm850(false, "UTF-8");
}
@Test
- public void testEncodingIncludeWriterOutUtf8() throws Exception {
- doTestEncoding(true, "UTF-8");
+ public void testEncodingIncludeIbm850WriterOutUtf8() throws Exception {
+ doTestEncodingFileIbm850(true, "UTF-8");
}
@Test
- public void testEncodingIncludeStreamOutIbm850() throws Exception {
- doTestEncoding(false, "IBM850");
+ public void testEncodingIncludeIbm850StreamOutIbm850() throws Exception {
+ doTestEncodingFileIbm850(false, "IBM850");
}
@Test
- public void testEncodingIncludeWriterOutIbm850() throws Exception {
- doTestEncoding(false, "IBM850");
+ public void testEncodingIncludeIbm850WriterOutIbm850() throws Exception {
+ doTestEncodingFileIbm850(false, "IBM850");
}
- public void doTestEncoding(boolean useWriter, String outputEncoding)
throws Exception {
+ @Test
+ public void testEncodingIncludeUtf8BomStreamOutIso88591() throws Exception
{
+ doTestEncodingFileUtf8Bom(false, "ISO-8859-1");
+ }
+
+ @Test
+ public void testEncodingIncludeUtf8BomWriterOutIso88591() throws Exception
{
+ doTestEncodingFileUtf8Bom(true, "ISO-8859-1");
+ }
+
+ @Test
+ public void testEncodingIncludeUtf8BomStreamOutUtf8() throws Exception {
+ doTestEncodingFileUtf8Bom(false, "UTF-8");
+ }
+
+ @Test
+ public void testEncodingIncludeUtf8BomWriterOutUtf8() throws Exception {
+ doTestEncodingFileUtf8Bom(true, "UTF-8");
+ }
+
+ @Test
+ public void testEncodingIncludeUtf8BomStreamOutIbm850() throws Exception {
+ doTestEncodingFileUtf8Bom(false, "IBM850");
+ }
+
+ @Test
+ public void testEncodingIncludeUtf8BomWriterOutIbm850() throws Exception {
+ doTestEncodingFileUtf8Bom(false, "IBM850");
+ }
+
+ @Test
+ public void testEncodingIncludeUtf8BomOverrideStreamOutIso88591() throws
Exception {
+ doTestEncodingFileUtf8BomOverride(false, "ISO-8859-1");
+ }
+
+ @Test
+ public void testEncodingIncludeUtf8BomOverrideWriterOutIso88591() throws
Exception {
+ doTestEncodingFileUtf8BomOverride(true, "ISO-8859-1");
+ }
+
+ @Test
+ public void testEncodingIncludeUtf8BomOverrideStreamOutUtf8() throws
Exception {
+ doTestEncodingFileUtf8BomOverride(false, "UTF-8");
+ }
+
+ @Test
+ public void testEncodingIncludeUtf8BomOverrideWriterOutUtf8() throws
Exception {
+ doTestEncodingFileUtf8BomOverride(true, "UTF-8");
+ }
+
+ @Test
+ public void testEncodingIncludeUtf8BomOverrideStreamOutIbm850() throws
Exception {
+ doTestEncodingFileUtf8BomOverride(false, "IBM850");
+ }
+
+ @Test
+ public void testEncodingIncludeUtf8BomOverrideWriterOutIbm850() throws
Exception {
+ doTestEncodingFileUtf8BomOverride(false, "IBM850");
+ }
+
+ private void doTestEncodingFileIbm850(boolean useWriter, String
outputEncoding)
+ throws Exception {
+ doTestEncoding("/bug49nnn/bug49464-ibm850.txt", "IBM850", useWriter,
outputEncoding);
+ }
+
+ private void doTestEncodingFileUtf8Bom(boolean useWriter, String
outputEncoding)
+ throws Exception {
+ doTestEncoding("/bug49nnn/bug49464-utf8-bom.txt", "UTF-8", useWriter,
outputEncoding);
+ }
+
+ private void doTestEncodingFileUtf8BomOverride(boolean useWriter, String
outputEncoding)
+ throws Exception {
+ doTestEncoding("/bug49nnn/bug49464-utf8-bom.txt", "IBM850", useWriter,
outputEncoding);
+ }
+
+ private void doTestEncoding(String includePath, String inputEncoding,
boolean useWriter,
+ String outputEncoding) throws Exception {
Tomcat tomcat = getTomcatInstance();
File appDir = new File("test/webapp");
@@ -661,11 +737,11 @@ public class TestDefaultServlet extends
Context ctxt = tomcat.addContext("", appDir.getAbsolutePath());
Wrapper defaultServlet = Tomcat.addServlet(ctxt, "default",
DefaultServlet.class.getName());
- defaultServlet.addInitParameter("fileEncoding", "IBM850");
+ defaultServlet.addInitParameter("fileEncoding", inputEncoding);
ctxt.addServletMappingDecoded("/", "default");
Tomcat.addServlet(ctxt, "encoding",
- new EncodingServlet(outputEncoding,
"/bug49nnn/bug49464-ibm850.txt", useWriter));
+ new EncodingServlet(outputEncoding, includePath, useWriter));
ctxt.addServletMappingDecoded("/test", "encoding");
tomcat.start();
Added: tomcat/trunk/test/webapp/bug49nnn/bug49464-utf8-bom.txt
URL:
http://svn.apache.org/viewvc/tomcat/trunk/test/webapp/bug49nnn/bug49464-utf8-bom.txt?rev=1802780&view=auto
==============================================================================
Binary file - no diff available.
Propchange: tomcat/trunk/test/webapp/bug49nnn/bug49464-utf8-bom.txt
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: tomcat/trunk/webapps/docs/changelog.xml
URL:
http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/changelog.xml?rev=1802780&r1=1802779&r2=1802780&view=diff
==============================================================================
--- tomcat/trunk/webapps/docs/changelog.xml (original)
+++ tomcat/trunk/webapps/docs/changelog.xml Mon Jul 24 10:00:23 2017
@@ -62,6 +62,11 @@
<bug>61253</bug>: Add warn message when Digester.updateAttributes
throws an exception instead of ignoring it. (csutherl)
</fix>
+ <fix>
+ Correct a further regression in the fix for <bug>49464</bug> that could
+ cause an byte order mark character to appear at the start of content
+ included by the <code>DefaultServlet</code>. (markt)
+ </fix>
</changelog>
</subsection>
<subsection name="Web applications">
Modified: tomcat/trunk/webapps/docs/default-servlet.xml
URL:
http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/default-servlet.xml?rev=1802780&r1=1802779&r2=1802780&view=diff
==============================================================================
--- tomcat/trunk/webapps/docs/default-servlet.xml (original)
+++ tomcat/trunk/webapps/docs/default-servlet.xml Mon Jul 24 10:00:23 2017
@@ -178,6 +178,10 @@ Tomcat.</p>
File encoding to be used when reading static resources.
[platform default]
</property>
+ <property name="useBomIfPresent">
+ If a static file contains a byte order mark (BOM), should this be used
+ to determine the file encoding in preference to fileEncoding. [true]
+ </property>
<property name="sendfileSize">
If the connector used supports sendfile, this represents the minimal
file size in KB for which sendfile will be used. Use a negative value
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]