Author: markt Date: Sun Jan 10 11:10:36 2010 New Revision: 897614 URL: http://svn.apache.org/viewvc?rev=897614&view=rev Log: Re-work EL attribute parsing. The underlying issue was complete independence of attribute and EL parsing. The attribute parser would generate the same result - ${1+1} - after parsing ${1+1} and \${+1} and the EL had no way to differentiate between the first (that should be treated as an expression) and the second (that should be treated as a literal). The attribute parser has been modified to output any literals that would be mi-interpreted by the EL parser as EL literals. ie \ is output as ${'\\'} or #{'\\'}, $ as ${'$'} or #{'$'} and # as ${'#'} or #{'#'}.
Added: tomcat/trunk/java/org/apache/jasper/compiler/AttributeParser.java (with props) Modified: tomcat/trunk/java/org/apache/jasper/compiler/Parser.java Added: tomcat/trunk/java/org/apache/jasper/compiler/AttributeParser.java URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/compiler/AttributeParser.java?rev=897614&view=auto ============================================================================== --- tomcat/trunk/java/org/apache/jasper/compiler/AttributeParser.java (added) +++ tomcat/trunk/java/org/apache/jasper/compiler/AttributeParser.java Sun Jan 10 11:10:36 2010 @@ -0,0 +1,333 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jasper.compiler; + +/** + * Converts a JSP attribute value into the unquoted equivalent. The attribute + * may contain EL expressions, in which case care needs to be taken to avoid any + * ambiguities. For example, consider the attribute values "${1+1}" and + * "\${1+1}". After unquoting, both appear as "${1+1}" but the first should + * evaluate to "2" and the second to "${1+1}". Literal \, $ and # need special + * treatment to ensure there is no ambiguity. The JSP attribute unquoting + * covers \\, \", \', \$, \#, %\>, <\%, ' and " + */ +public class AttributeParser { + + /* System property that controls if the strict quoting rules are applied. */ + private static final boolean STRICT_QUOTE_ESCAPING = Boolean.valueOf( + System.getProperty( + "org.apache.jasper.compiler.Parser.STRICT_QUOTE_ESCAPING", + "true")).booleanValue(); + + /** + * Parses the provided input String as a JSP attribute and returns an + * unquoted value. + * + * @param input The input. + * @param quote The quote character for the attribute or 0 for + * scripting expressions. + * @param isELIgnored Is expression language being ignored on the page + * where the JSP attribute is defined. + * @return An unquoted JSP attribute that, if it contains + * expression language can be safely passed to the EL + * processor without fear of ambiguity. + */ + public static String getUnquoted(String input, char quote, + boolean isELIgnored) { + return (new AttributeParser(input, quote, isELIgnored, + STRICT_QUOTE_ESCAPING)).getUnquoted(); + } + + /** + * Provided solely for unit test purposes and allows per call overriding of + * the STRICT_QUOTE_ESCAPING system property. + * + * @param input The input. + * @param quote The quote character for the attribute or 0 for + * scripting expressions. + * @param isELIgnored Is expression language being ignored on the page + * where the JSP attribute is defined. + * @param strict The value to use for STRICT_QUOTE_ESCAPING. + * @return An unquoted JSP attribute that, if it contains + * expression language can be safely passed to the EL + * processor without fear of ambiguity. + */ + protected static String getUnquoted(String input, char quote, + boolean isELIgnored, boolean strict) { + return (new AttributeParser(input, quote, isELIgnored, + strict)).getUnquoted(); + } + + /* The quoted input string. */ + private final String input; + + /* The quote used for the attribute - null for scripting expressions. */ + private final char quote; + + /* Is expression language being ignored - affects unquoting. \$ and \# are + * treated as literals rather than quoted values. */ + private final boolean isELIgnored; + + /* Overrides the STRICT_QUOTE_ESCAPING. Used for Unit tests only. */ + private final boolean strict; + + /* The type ($ or #) of expression. Literals have a type of null. */ + private char type; + + /* The length of the quoted input string. */ + private final int size; + + /* Tracks the current position of the parser in the input String. */ + private int i = 0; + + /* Indicates if the last character returned by nextChar() was escaped. */ + private boolean lastChEscaped = false; + + /* The unquoted result. */ + private StringBuilder result; + + + /** + * For test purposes. + * @param input + * @param quote + * @param strict + */ + private AttributeParser(String input, char quote, + boolean isELIgnored, boolean strict) { + this.input = input; + this.quote = quote; + // If quote is null this is a scriptign expressions and any EL syntax + // should be ignored + this.isELIgnored = isELIgnored || (quote == 0); + this.strict = strict; + this.type = getType(input); + this.size = input.length(); + result = new StringBuilder(size); + } + + /* + * Work through input looking for literals and expressions until the input + * has all been read. + */ + private String getUnquoted() { + while (i < size) { + parseLiteral(); + parseEL(); + } + return result.toString(); + } + + /* + * This method gets the next unquoted character and looks for + * - literals that need to be converted for EL processing + * \ -> type{'\\'} + * $ -> type{'$'} + * # -> type{'$'} + * - start of EL + * ${ + * #{ + * Note all the examples above *do not* include the escaping required to use + * the values in Java code. + */ + private void parseLiteral() { + boolean foundEL = false; + while (i < size && !foundEL) { + char ch = nextChar(); + if (!isELIgnored && ch == '\\') { + if (type == 0) { + type = '$'; + } + result.append(type); + result.append("{'\\\\'}"); + } else if (!isELIgnored && ch == '$' && lastChEscaped){ + if (type == 0) { + type = '$'; + } + result.append(type); + result.append("{'$'}"); + } else if (!isELIgnored && ch == '#' && lastChEscaped){ + if (type == 0) { + type = '$'; + } + result.append(type); + result.append("{'#'}"); + } else if (ch == type){ + if (i < size) { + char next = input.charAt(i); + if (next == '{') { + foundEL = true; + // Move back to start of EL + i--; + } else { + result.append(ch); + } + } else { + result.append(ch); + } + } else { + result.append(ch); + } + } + } + + /* + * For EL need to unquote everything but no need to convert anything. The + * EL is terminated by '}'. The only other valid location for '}' is inside + * a StringLiteral. The literals are delimited by '\'' or '\"'. The only + * other valid location for '\'' or '\"' is also inside a StringLiteral. A + * quote character inside a StringLiteral must be escaped if the same quote + * character is used to delimit the StringLiteral. + */ + private void parseEL() { + boolean endEL = false; + boolean insideLiteral = false; + while (i < size && !endEL) { + char literalQuote = '\''; + char ch = nextChar(); + if (ch == '\'' || ch == '\"') { + if (insideLiteral) { + if (literalQuote == ch) { + insideLiteral = false; + } + } else { + insideLiteral = true; + literalQuote = ch; + } + result.append(ch); + } else if (ch == '\\') { + result.append(ch); + if (insideLiteral && size < i) { + ch = nextChar(); + result.append(ch); + } + } else if (ch == '}') { + if (!insideLiteral) { + endEL = true; + } + result.append(ch); + } else { + result.append(ch); + } + } + } + + /* + * Returns the nest unquoted character and sets the lastChEscaped flag to + * indicate if it was quoted/escaped or not. + * ' is always unquoted to ' + * " is always unquoted to " + * \" is always unquoted to " + * \' is always unquoted to ' + * \\ is always unquoted to \ + * \$ is unquoted to $ if EL is not being ignored + * \# is unquoted to # if EL is not being ignored + * <\% is always unquoted to <% + * %\> is always unquoted to %> + */ + private char nextChar() { + lastChEscaped = false; + char ch = input.charAt(i); + + if (ch == '&') { + if (i + 5 < size && input.charAt(i + 1) == 'a' && + input.charAt(i + 2) == 'p' && input.charAt(i + 3) == 'o' && + input.charAt(i + 4) == 's' && input.charAt(i + 5) == ';') { + ch = '\''; + i += 6; + } else if (i + 5 < size && input.charAt(i + 1) == 'q' && + input.charAt(i + 2) == 'u' && input.charAt(i + 3) == 'o' && + input.charAt(i + 4) == 't' && input.charAt(i + 5) == ';') { + ch = '\"'; + i += 6; + } else { + ++i; + } + } else if (ch == '\\' && i + 1 < size) { + ch = input.charAt(i + 1); + if (ch == '\\' || ch == '\"' || ch == '\'' || + (!isELIgnored && (ch == '$' || ch == '#'))) { + i += 2; + lastChEscaped = true; + } else { + ch = '\\'; + ++i; + } + } else if (ch == '<' && (i + 2 < size) && input.charAt(i + 1) == '\\' && + input.charAt(i + 2) == '%') { + // Note this is a hack since nextChar only returns a single char + // It is safe since <% does not require special treatment for EL + // or for literals + result.append('<'); + i+=3; + return '%'; + } else if (ch == '%' && i + 2 < size && input.charAt(i + 1) == '\\' && + input.charAt(i + 2) == '>') { + // Note this is a hack since nextChar only returns a single char + // It is safe since %> does not require special treatment for EL + // or for literals + result.append('%'); + i+=3; + return '>'; + } else if (ch == quote && strict) { + String msg = Localizer.getMessage("jsp.error.attribute.noescape", + input, ""+ quote); + throw new IllegalArgumentException(msg); + } else { + ++i; + } + + return ch; + } + + /* + * Determines the type of expression by looking for the first unquoted ${ + * or #{. + */ + private char getType(String value) { + if (value == null) { + return 0; + } + + if (isELIgnored) { + return 0; + } + + int j = 0; + int len = value.length(); + char current; + + while (j < len) { + current = value.charAt(j); + if (current == '\\') { + // Escape character - skip a character + j++; + } else if (current == '#') { + if (j < (len -1) && value.charAt(j + 1) == '{') { + return '#'; + } + } else if (current == '$') { + if (j < (len - 1) && value.charAt(j + 1) == '{') { + return '$'; + } + } + j++; + } + return 0; + } +} Propchange: tomcat/trunk/java/org/apache/jasper/compiler/AttributeParser.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: tomcat/trunk/java/org/apache/jasper/compiler/AttributeParser.java ------------------------------------------------------------------------------ svn:keywords = Date Author Id Revision Modified: tomcat/trunk/java/org/apache/jasper/compiler/Parser.java URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/compiler/Parser.java?rev=897614&r1=897613&r2=897614&view=diff ============================================================================== --- tomcat/trunk/java/org/apache/jasper/compiler/Parser.java (original) +++ tomcat/trunk/java/org/apache/jasper/compiler/Parser.java Sun Jan 10 11:10:36 2010 @@ -75,11 +75,6 @@ private static final String JAVAX_BODY_CONTENT_TEMPLATE_TEXT = "JAVAX_BODY_CONTENT_TEMPLATE_TEXT"; - private static final boolean STRICT_QUOTE_ESCAPING = Boolean.valueOf( - System.getProperty( - "org.apache.jasper.compiler.Parser.STRICT_QUOTE_ESCAPING", - "true")).booleanValue(); - /** * The constructor */ @@ -247,66 +242,25 @@ err.jspError(start, "jsp.error.attribute.unterminated", watch); } - String ret = parseQuoted(start, reader.getText(start, stop), - watch.charAt(watch.length() - 1)); + String ret = null; + try { + char quote = 0; + if (watch.length() == 1) { + quote = watch.charAt(0); + } + ret = AttributeParser.getUnquoted(reader.getText(start, stop), + quote, pageInfo.isELIgnored()); + } catch (IllegalArgumentException iae) { + err.jspError(start, iae.getMessage()); + } if (watch.length() == 1) // quote return ret; - // putback delimiter '<%=' and '%>', since they are needed if the + // Put back delimiter '<%=' and '%>', since they are needed if the // attribute does not allow RTexpression. return "<%=" + ret + "%>"; } - /** - * QuotedChar ::= ''' | '"' | '\\' | '\"' | "\'" | '\>' | '\$' | - * Char - */ - private String parseQuoted(Mark start, String tx, char quote) - throws JasperException { - StringBuilder buf = new StringBuilder(); - int size = tx.length(); - int i = 0; - while (i < size) { - char ch = tx.charAt(i); - if (ch == '&') { - if (i + 5 < size && tx.charAt(i + 1) == 'a' - && tx.charAt(i + 2) == 'p' && tx.charAt(i + 3) == 'o' - && tx.charAt(i + 4) == 's' && tx.charAt(i + 5) == ';') { - buf.append('\''); - i += 6; - } else if (i + 5 < size && tx.charAt(i + 1) == 'q' - && tx.charAt(i + 2) == 'u' && tx.charAt(i + 3) == 'o' - && tx.charAt(i + 4) == 't' && tx.charAt(i + 5) == ';') { - buf.append('"'); - i += 6; - } else { - buf.append(ch); - ++i; - } - } else if (ch == '\\' && i + 1 < size) { - ch = tx.charAt(i + 1); - if (ch == '\\' || ch == '\"' || ch == '\'' || ch == '>') { - // \ " and ' are always unescaped regardless of if they are - // inside or outside of an EL expression. JSP.1.6 takes - // precedence over JSP.1.3.10 (confirmed with EG). - buf.append(ch); - i += 2; - } else { - buf.append('\\'); - ++i; - } - } else if (ch == quote && STRICT_QUOTE_ESCAPING) { - // Unescaped quote character - err.jspError(start, "jsp.error.attribute.noescape", tx, - "" + quote); - } else { - buf.append(ch); - ++i; - } - } - return buf.toString(); - } - private String parseScriptText(String tx) { CharArrayWriter cw = new CharArrayWriter(); int size = tx.length(); --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org