Author: markt
Date: Mon Jul 24 10:00:23 2017
New Revision: 1802780

URL: http://svn.apache.org/viewvc?rev=1802780&view=rev
Log:
Correct a further regression in the fix for bug 49464 that could cause an byte 
order mark character to appear at the start of content included by the 
DefaultServlet.

Added:
    tomcat/trunk/test/webapp/bug49nnn/bug49464-utf8-bom.txt   (with props)
Modified:
    tomcat/trunk/conf/web.xml
    tomcat/trunk/java/org/apache/catalina/servlets/DefaultServlet.java
    tomcat/trunk/test/org/apache/catalina/servlets/TestDefaultServlet.java
    tomcat/trunk/webapps/docs/changelog.xml
    tomcat/trunk/webapps/docs/default-servlet.xml

Modified: tomcat/trunk/conf/web.xml
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/conf/web.xml?rev=1802780&r1=1802779&r2=1802780&view=diff
==============================================================================
--- tomcat/trunk/conf/web.xml (original)
+++ tomcat/trunk/conf/web.xml Mon Jul 24 10:00:23 2017
@@ -48,6 +48,11 @@
   <!--   fileEncoding        Encoding to be used to read static resources   -->
   <!--                       [platform default]                             -->
   <!--                                                                      -->
+  <!--   useBomIfPresent     If a static file contains a byte order mark    -->
+  <!--                       (BOM), should this be used to determine the    -->
+  <!--                       file encoding in preference to fileEncoding.   -->
+  <!--                       [true]                                         -->
+  <!--                                                                      -->
   <!--   input               Input buffer size (in bytes) when reading      -->
   <!--                       resources to be served.  [2048]                -->
   <!--                                                                      -->

Modified: tomcat/trunk/java/org/apache/catalina/servlets/DefaultServlet.java
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/servlets/DefaultServlet.java?rev=1802780&r1=1802779&r2=1802780&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/catalina/servlets/DefaultServlet.java 
(original)
+++ tomcat/trunk/java/org/apache/catalina/servlets/DefaultServlet.java Mon Jul 
24 10:00:23 2017
@@ -236,6 +236,11 @@ public class DefaultServlet extends Http
     private Charset fileEncodingCharset = null;
 
     /**
+     * If a file has a BOM, should that be used in preference to fileEncoding?
+     */
+    private boolean useBomIfPresent = true;
+
+    /**
      * Minimum size for sendfile usage in bytes.
      */
     protected int sendfileSize = 48 * 1024;
@@ -302,6 +307,10 @@ public class DefaultServlet extends Http
             }
         }
 
+        if (getServletConfig().getInitParameter("useBomIfPresent") != null)
+            useBomIfPresent = Boolean.parseBoolean(
+                    getServletConfig().getInitParameter("useBomIfPresent"));
+
         globalXsltFile = getServletConfig().getInitParameter("globalXsltFile");
         contextXsltFile = 
getServletConfig().getInitParameter("contextXsltFile");
         localXsltFile = getServletConfig().getInitParameter("localXsltFile");
@@ -726,11 +735,11 @@ public class DefaultServlet extends Http
     /**
      * Serve the specified resource, optionally including the data content.
      *
-     * @param request  The servlet request we are processing
-     * @param response The servlet response we are creating
-     * @param content  Should the content be included?
-     * @param encoding The encoding to use if it is necessary to access the
-     *                 source as characters rather than as bytes
+     * @param request       The servlet request we are processing
+     * @param response      The servlet response we are creating
+     * @param content       Should the content be included?
+     * @param inputEncoding The encoding to use if it is necessary to access 
the
+     *                      source as characters rather than as bytes
      *
      * @exception IOException if an input/output error occurs
      * @exception ServletException if a servlet-specified error occurs
@@ -738,7 +747,7 @@ public class DefaultServlet extends Http
     protected void serveResource(HttpServletRequest request,
                                  HttpServletResponse response,
                                  boolean content,
-                                 String encoding)
+                                 String inputEncoding)
         throws IOException, ServletException {
 
         boolean serveContent = content;
@@ -1011,35 +1020,46 @@ public class DefaultServlet extends Http
                     // Output via a writer so can't use sendfile or write
                     // content directly.
                     if (resource.isDirectory()) {
-                        renderResult = render(getPathPrefix(request), 
resource, encoding);
+                        renderResult = render(getPathPrefix(request), 
resource, inputEncoding);
                     } else {
                         renderResult = resource.getInputStream();
+                        if (included) {
+                            // Need to make sure any BOM is removed
+                            if (!renderResult.markSupported()) {
+                                renderResult = new 
BufferedInputStream(renderResult);
+                            }
+                            Charset bomCharset = processBom(renderResult);
+                            if (bomCharset != null && useBomIfPresent) {
+                                inputEncoding = bomCharset.name();
+                            }
+                        }
                     }
-                    copy(renderResult, writer, encoding);
+                    copy(renderResult, writer, inputEncoding);
                 } else {
                     // Output is via an OutputStream
                     if (resource.isDirectory()) {
-                        renderResult = render(getPathPrefix(request), 
resource, encoding);
+                        renderResult = render(getPathPrefix(request), 
resource, inputEncoding);
                     } else {
                         // Output is content of resource
                         // Check to see if conversion is required
-                        if (conversionRequired) {
-                            // A conversion is required from fileEncoding to
-                            // response encoding
-                            byte[] resourceBody = resource.getContent();
-                            InputStream source;
-                            if (resourceBody == null) {
-                                source = resource.getInputStream();
-                            } else {
-                                source = new 
ByteArrayInputStream(resourceBody);
+                        if (conversionRequired || included) {
+                            // When including a file, we need to check for a 
BOM
+                            // to determine if a conversion is required, so we
+                            // might as well always convert
+                            InputStream source = resource.getInputStream();
+                            if (!source.markSupported()) {
+                                source = new BufferedInputStream(source);
+                            }
+                            Charset bomCharset = processBom(source);
+                            if (bomCharset != null && useBomIfPresent) {
+                                inputEncoding = bomCharset.name();
                             }
                             OutputStreamWriter osw = new 
OutputStreamWriter(ostream, charset);
                             PrintWriter pw = new PrintWriter(osw);
-                            copy(source, pw, fileEncoding);
+                            copy(source, pw, inputEncoding);
                             pw.flush();
                         } else {
-                            if (!checkSendfile(request, response, resource,
-                                    contentLength, null)) {
+                            if (!checkSendfile(request, response, resource, 
contentLength, null)) {
                                 // sendfile not possible so check if resource
                                 // content is available directly
                                 byte[] resourceBody = resource.getContent();
@@ -1124,7 +1144,75 @@ public class DefaultServlet extends Http
     }
 
 
-    private boolean isText(String contentType) {
+    /*
+     * Code borrowed heavily from Jasper's EncodingDetector
+     */
+    private static Charset processBom(InputStream is) throws IOException {
+        // Java supported character sets do not use BOMs longer than 4 bytes
+        byte[] bom = new byte[4];
+        is.mark(bom.length);
+
+        int count = is.read(bom);
+
+        // BOMs are at least 2 bytes
+        if (count < 2) {
+            skip(is, 0);
+            return null;
+        }
+
+        // Look for two byte BOMs
+        int b0 = bom[0] & 0xFF;
+        int b1 = bom[1] & 0xFF;
+        if (b0 == 0xFE && b1 == 0xFF) {
+            skip(is, 2);
+            return StandardCharsets.UTF_16BE;
+        }
+        if (b0 == 0xFF && b1 == 0xFE) {
+            skip(is, 2);
+            return StandardCharsets.UTF_16LE;
+        }
+
+        // Remaining BOMs are at least 3 bytes
+        if (count < 3) {
+            skip(is, 0);
+            return null;
+        }
+
+        // UTF-8 is only 3-byte BOM
+        int b2 = bom[2] & 0xFF;
+        if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
+            skip(is, 3);
+            return StandardCharsets.UTF_8;
+        }
+
+        if (count < 4) {
+            skip(is, 0);
+            return null;
+        }
+
+        // Look for 4-bute BOMs
+        int b3 = bom[3] & 0xFF;
+        if (b0 == 0x00 && b1 == 0x00 && b2 == 0xFE && b3 == 0xFF) {
+            return Charset.forName("UTF32-BE");
+        }
+        if (b0 == 0xFF && b1 == 0xFE && b2 == 0x00 && b3 == 0x00) {
+            return Charset.forName("UTF32-LE");
+        }
+
+        skip(is, 0);
+        return null;
+    }
+
+
+    private static void skip(InputStream is, int skip) throws IOException {
+        is.reset();
+        while (skip-- > 0) {
+            is.read();
+        }
+    }
+
+
+    private static boolean isText(String contentType) {
         return  contentType == null || contentType.startsWith("text") ||
                 contentType.endsWith("xml") || 
contentType.contains("/javascript");
     }

Modified: tomcat/trunk/test/org/apache/catalina/servlets/TestDefaultServlet.java
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/test/org/apache/catalina/servlets/TestDefaultServlet.java?rev=1802780&r1=1802779&r2=1802780&view=diff
==============================================================================
--- tomcat/trunk/test/org/apache/catalina/servlets/TestDefaultServlet.java 
(original)
+++ tomcat/trunk/test/org/apache/catalina/servlets/TestDefaultServlet.java Mon 
Jul 24 10:00:23 2017
@@ -624,36 +624,112 @@ public class TestDefaultServlet extends
     }
 
     @Test
-    public void testEncodingIncludeStreamOutIso88591() throws Exception {
-        doTestEncoding(false, "ISO-8859-1");
+    public void testEncodingIncludeIbm850StreamOutIso88591() throws Exception {
+        doTestEncodingFileIbm850(false, "ISO-8859-1");
     }
 
     @Test
-    public void testEncodingIncludeWriterOutIso88591() throws Exception {
-        doTestEncoding(true, "ISO-8859-1");
+    public void testEncodingIncludeIbm850WriterOutIso88591() throws Exception {
+        doTestEncodingFileIbm850(true, "ISO-8859-1");
     }
 
     @Test
-    public void testEncodingIncludeStreamOutUtf8() throws Exception {
-        doTestEncoding(false, "UTF-8");
+    public void testEncodingIncludeIbm850StreamOutUtf8() throws Exception {
+        doTestEncodingFileIbm850(false, "UTF-8");
     }
 
     @Test
-    public void testEncodingIncludeWriterOutUtf8() throws Exception {
-        doTestEncoding(true, "UTF-8");
+    public void testEncodingIncludeIbm850WriterOutUtf8() throws Exception {
+        doTestEncodingFileIbm850(true, "UTF-8");
     }
 
     @Test
-    public void testEncodingIncludeStreamOutIbm850() throws Exception {
-        doTestEncoding(false, "IBM850");
+    public void testEncodingIncludeIbm850StreamOutIbm850() throws Exception {
+        doTestEncodingFileIbm850(false, "IBM850");
     }
 
     @Test
-    public void testEncodingIncludeWriterOutIbm850() throws Exception {
-        doTestEncoding(false, "IBM850");
+    public void testEncodingIncludeIbm850WriterOutIbm850() throws Exception {
+        doTestEncodingFileIbm850(false, "IBM850");
     }
 
-    public void doTestEncoding(boolean useWriter, String outputEncoding) 
throws Exception {
+    @Test
+    public void testEncodingIncludeUtf8BomStreamOutIso88591() throws Exception 
{
+        doTestEncodingFileUtf8Bom(false, "ISO-8859-1");
+    }
+
+    @Test
+    public void testEncodingIncludeUtf8BomWriterOutIso88591() throws Exception 
{
+        doTestEncodingFileUtf8Bom(true, "ISO-8859-1");
+    }
+
+    @Test
+    public void testEncodingIncludeUtf8BomStreamOutUtf8() throws Exception {
+        doTestEncodingFileUtf8Bom(false, "UTF-8");
+    }
+
+    @Test
+    public void testEncodingIncludeUtf8BomWriterOutUtf8() throws Exception {
+        doTestEncodingFileUtf8Bom(true, "UTF-8");
+    }
+
+    @Test
+    public void testEncodingIncludeUtf8BomStreamOutIbm850() throws Exception {
+        doTestEncodingFileUtf8Bom(false, "IBM850");
+    }
+
+    @Test
+    public void testEncodingIncludeUtf8BomWriterOutIbm850() throws Exception {
+        doTestEncodingFileUtf8Bom(false, "IBM850");
+    }
+
+    @Test
+    public void testEncodingIncludeUtf8BomOverrideStreamOutIso88591() throws 
Exception {
+        doTestEncodingFileUtf8BomOverride(false, "ISO-8859-1");
+    }
+
+    @Test
+    public void testEncodingIncludeUtf8BomOverrideWriterOutIso88591() throws 
Exception {
+        doTestEncodingFileUtf8BomOverride(true, "ISO-8859-1");
+    }
+
+    @Test
+    public void testEncodingIncludeUtf8BomOverrideStreamOutUtf8() throws 
Exception {
+        doTestEncodingFileUtf8BomOverride(false, "UTF-8");
+    }
+
+    @Test
+    public void testEncodingIncludeUtf8BomOverrideWriterOutUtf8() throws 
Exception {
+        doTestEncodingFileUtf8BomOverride(true, "UTF-8");
+    }
+
+    @Test
+    public void testEncodingIncludeUtf8BomOverrideStreamOutIbm850() throws 
Exception {
+        doTestEncodingFileUtf8BomOverride(false, "IBM850");
+    }
+
+    @Test
+    public void testEncodingIncludeUtf8BomOverrideWriterOutIbm850() throws 
Exception {
+        doTestEncodingFileUtf8BomOverride(false, "IBM850");
+    }
+
+    private void doTestEncodingFileIbm850(boolean useWriter, String 
outputEncoding)
+            throws Exception {
+        doTestEncoding("/bug49nnn/bug49464-ibm850.txt", "IBM850", useWriter, 
outputEncoding);
+    }
+
+    private void doTestEncodingFileUtf8Bom(boolean useWriter, String 
outputEncoding)
+            throws Exception {
+        doTestEncoding("/bug49nnn/bug49464-utf8-bom.txt", "UTF-8", useWriter, 
outputEncoding);
+    }
+
+    private void doTestEncodingFileUtf8BomOverride(boolean useWriter, String 
outputEncoding)
+            throws Exception {
+        doTestEncoding("/bug49nnn/bug49464-utf8-bom.txt", "IBM850", useWriter, 
outputEncoding);
+    }
+
+    private void doTestEncoding(String includePath, String inputEncoding, 
boolean useWriter,
+            String outputEncoding) throws Exception {
         Tomcat tomcat = getTomcatInstance();
 
         File appDir = new File("test/webapp");
@@ -661,11 +737,11 @@ public class TestDefaultServlet extends
         Context ctxt = tomcat.addContext("", appDir.getAbsolutePath());
 
         Wrapper defaultServlet = Tomcat.addServlet(ctxt, "default", 
DefaultServlet.class.getName());
-        defaultServlet.addInitParameter("fileEncoding", "IBM850");
+        defaultServlet.addInitParameter("fileEncoding", inputEncoding);
         ctxt.addServletMappingDecoded("/", "default");
 
         Tomcat.addServlet(ctxt, "encoding",
-                new EncodingServlet(outputEncoding, 
"/bug49nnn/bug49464-ibm850.txt", useWriter));
+                new EncodingServlet(outputEncoding, includePath, useWriter));
         ctxt.addServletMappingDecoded("/test", "encoding");
 
         tomcat.start();

Added: tomcat/trunk/test/webapp/bug49nnn/bug49464-utf8-bom.txt
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/test/webapp/bug49nnn/bug49464-utf8-bom.txt?rev=1802780&view=auto
==============================================================================
Binary file - no diff available.

Propchange: tomcat/trunk/test/webapp/bug49nnn/bug49464-utf8-bom.txt
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: tomcat/trunk/webapps/docs/changelog.xml
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/changelog.xml?rev=1802780&r1=1802779&r2=1802780&view=diff
==============================================================================
--- tomcat/trunk/webapps/docs/changelog.xml (original)
+++ tomcat/trunk/webapps/docs/changelog.xml Mon Jul 24 10:00:23 2017
@@ -62,6 +62,11 @@
         <bug>61253</bug>: Add warn message when Digester.updateAttributes
         throws an exception instead of ignoring it. (csutherl)
       </fix>
+      <fix>
+        Correct a further regression in the fix for <bug>49464</bug> that could
+        cause an byte order mark character to appear at the start of content
+        included by the <code>DefaultServlet</code>. (markt)
+      </fix>
     </changelog>
   </subsection>
   <subsection name="Web applications">

Modified: tomcat/trunk/webapps/docs/default-servlet.xml
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/default-servlet.xml?rev=1802780&r1=1802779&r2=1802780&view=diff
==============================================================================
--- tomcat/trunk/webapps/docs/default-servlet.xml (original)
+++ tomcat/trunk/webapps/docs/default-servlet.xml Mon Jul 24 10:00:23 2017
@@ -178,6 +178,10 @@ Tomcat.</p>
         File encoding to be used when reading static resources.
         [platform default]
   </property>
+  <property name="useBomIfPresent">
+        If a static file contains a byte order mark (BOM), should this be used
+        to determine the file encoding in preference to fileEncoding. [true]
+  </property>
   <property name="sendfileSize">
         If the connector used supports sendfile, this represents the minimal
         file size in KB for which sendfile will be used. Use a negative value



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
For additional commands, e-mail: dev-h...@tomcat.apache.org

Reply via email to