pierred 00/12/22 10:37:40 Modified: jasper/src/share/org/apache/jasper/compiler JspParseEventListener.java ParserController.java Log: Fix for Bug #55. ----- Synopsis: Default for included files is 8859_1, with no option to set otherwise. Report Description: The default for reading an included file is ISO_8859_1. We can, of course, set pageConent to read UTF-8 (which is what we need it to be to support international code). Unfortunately, when there are two or more levels of encoding (or the pageContent type ins't set), the encoding that the JspReader gets set to a hard-coded "ISO_8859_1", and doesn't allow this to be set to anything else via the runtime system properties. In: org.apache.jasper.compiler.JspReader JspReader.java line 158, encoding ALWAYS defaults to 8859_1, and the file.encoding, when set from the System properties. This is an easy fix, to set encoding to: encoding = System.getPropert("file.encoding","8859_1") ; The result, typically, is that the file will flake out and convert all of the non-UTF-8 characters to US-ASCII, @%, etc. ----- I'm not sure I fully understand what's described there, so here is what I believe should be done. The "encoding" for a JSP file is currently handled as follows: 1. In Compiler.java, we create a JspReader for the top-level ("including") jsp file using the 8859_1 encoding. 2. Using that JspReader, we check if there is a page directive with 'contentType' specified. If there is, then a new JspReader for the page is created with the encoding set to the "charset" specified in the contentType value of the page directive; otherwise we stick with the default 8859_1 encoding. 3. When a page is included, JspReader.pushFile() is called, and the encoding passed as argument appears to always be null (since no encoding attribute can be specified in the "include" directive, reading 'encoding' off of the attributes appears to be a bug in JspParseEventListener). Because it is null, it always defaults to 8859_1. If I understand well the intent of the bug report, we'd need the following modifications: - In step 2, if contentType is not specified in the "including" page, set the encoding to be: encoding = System.getProperty("file.encoding", "8859_1"); This means that the default encoding of all JSP files at a site could be defined globally using system property "file.encoding". I don't think this is spec-compliant, and would be reluctant to make that change. --> Change not done Comments from Hans Bergsten "I agree that using "file.encoding" as the ultimate default is not spec compliant. I suggest you stick to the current behavior, with "8859_1" if contentType doesn't specify a charset." - In step 3, use the encoding of the "including" page. This would fix what I believe is a bug in the current implementation. --> Change done Submitted by: [EMAIL PROTECTED] Revision Changes Path 1.20 +4 -6 jakarta-tomcat-4.0/jasper/src/share/org/apache/jasper/compiler/JspParseEventListener.java Index: JspParseEventListener.java =================================================================== RCS file: /home/cvs/jakarta-tomcat-4.0/jasper/src/share/org/apache/jasper/compiler/JspParseEventListener.java,v retrieving revision 1.19 retrieving revision 1.20 diff -u -r1.19 -r1.20 --- JspParseEventListener.java 2000/12/21 23:16:41 1.19 +++ JspParseEventListener.java 2000/12/22 18:37:39 1.20 @@ -1,7 +1,7 @@ /* - * $Header: /home/cvs/jakarta-tomcat-4.0/jasper/src/share/org/apache/jasper/compiler/JspParseEventListener.java,v 1.19 2000/12/21 23:16:41 pierred Exp $ - * $Revision: 1.19 $ - * $Date: 2000/12/21 23:16:41 $ + * $Header: /home/cvs/jakarta-tomcat-4.0/jasper/src/share/org/apache/jasper/compiler/JspParseEventListener.java,v 1.20 2000/12/22 18:37:39 pierred Exp $ + * $Revision: 1.20 $ + * $Date: 2000/12/22 18:37:39 $ * * ==================================================================== * @@ -750,8 +750,6 @@ if (directive.equals("include")) { String file = attrs.getValue("file"); - String encoding = attrs.getValue("encoding"); - if (file == null) throw new CompileException(start, Constants.getString("jsp.error.include.missing.file")); @@ -766,7 +764,7 @@ } */ try { - parserCtl.parse(file, encoding); + parserCtl.parse(file); } catch (FileNotFoundException ex) { throw new CompileException( start, 1.9 +44 -38 jakarta-tomcat-4.0/jasper/src/share/org/apache/jasper/compiler/ParserController.java Index: ParserController.java =================================================================== RCS file: /home/cvs/jakarta-tomcat-4.0/jasper/src/share/org/apache/jasper/compiler/ParserController.java,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- ParserController.java 2000/11/30 23:33:02 1.8 +++ ParserController.java 2000/12/22 18:37:39 1.9 @@ -114,6 +114,19 @@ private static final String JSP_ROOT_TAG = "<jsp:root"; /* + * Tells if the file being processed is the "top" file + * in the translation unit. + */ + private boolean isTopFile = true; + + /* + * The encoding of the "top" file. This encoding is used + * for included files by default. + * Defaults to "8859_1" per JSP spec. + */ + private String topFileEncoding = "8859_1"; + + /* * The 'new' encoding required to read a page. */ private String newEncoding; @@ -159,12 +172,6 @@ //********************************************************************* // Parse - public void parse(String inFileName) - throws FileNotFoundException, JasperException - { - parse(inFileName, null); - } - /** * Parse the jsp page provided as an argument. * First invoked by the compiler, then invoked recursively by the @@ -172,28 +179,18 @@ * * @param The name of the jsp file to be parsed. */ - public void parse(String inFileName, String encoding) + public void parse(String inFileName) throws FileNotFoundException, JasperException { - //p("parse(" + inFileName + ", " + encoding + ")"); + //p("parse(" + inFileName + ")"); String absFileName = resolveFileName(inFileName); - - if (encoding == null) { - encoding = "8859_1"; // default per JSP spec - // XXX - longer term, this should really be: - // System.getProperty("file.encoding", "8859_1"); - // but this doesn't work right now, so we stick with ASCII - } - - // @@@ need to do a pass at JSP doc to find encoding as specified - // @@@ in page directive (see JspParseEventListener) - File file = new File(absFileName); String filePath = (ctxt == null) ? file.getAbsolutePath() : ctxt.getRealPath(file.toString()); //p("filePath: " + filePath); + String encoding = topFileEncoding; InputStreamReader reader = null; try { // Figure out what type of JSP document we are dealing with @@ -201,6 +198,12 @@ figureOutJspDocument(file, encoding, reader); //p("isXml = " + isXml + " hasTaglib = " + hasTaglib); encoding = (newEncoding!=null) ? newEncoding : encoding; + if (isTopFile) { + // Set the "top level" file encoding that will be used + // for all included files where encoding is not defined. + topFileEncoding = encoding; + isTopFile = false; + } try { reader.close(); } catch (IOException ex) {} @@ -251,34 +254,37 @@ isXml = false; } + newEncoding = null; + // Figure out the encoding of the page // FIXME: We assume xml parser will take care of // encoding for page in XML syntax. Correct? - newEncoding = null; - jspReader.reset(startMark); - while (jspReader.skipUntil("<%@") != null) { - jspReader.skipSpaces(); - if (jspReader.matches("page")) { - jspReader.advance(4); + if (!isXml) { + jspReader.reset(startMark); + while (jspReader.skipUntil("<%@") != null) { jspReader.skipSpaces(); - Attributes attrs = jspReader.parseTagAttributes(); - String attribute = "pageEncoding"; - newEncoding = attrs.getValue("pageEncoding"); - if (newEncoding == null) { - String contentType = attrs.getValue("contentType"); - if (contentType != null) { - int loc = contentType.indexOf("charset="); - if (loc != -1) { - newEncoding = contentType.substring(loc+8); - return; + if (jspReader.matches("page")) { + jspReader.advance(4); + jspReader.skipSpaces(); + Attributes attrs = jspReader.parseTagAttributes(); + String attribute = "pageEncoding"; + newEncoding = attrs.getValue("pageEncoding"); + if (newEncoding == null) { + String contentType = attrs.getValue("contentType"); + if (contentType != null) { + int loc = contentType.indexOf("charset="); + if (loc != -1) { + newEncoding = contentType.substring(loc+8); + return; + } } + } else { + return; } - } else { - return; } } } - + /* NOT COMPILED // This is an XML document. Let's see if it uses tag libraries. jspReader.reset(startMark);