Author: markt
Date: Sun Jan 10 11:10:36 2010
New Revision: 897614

URL: http://svn.apache.org/viewvc?rev=897614&view=rev
Log:
Re-work EL attribute parsing. The underlying issue was complete independence of 
attribute and EL parsing. The attribute parser would generate the same result - 
${1+1} - after parsing ${1+1} and \${+1} and the EL had no way to differentiate 
between the first (that should be treated as an expression) and the second 
(that should be treated as a literal). The attribute parser has been modified 
to output any literals that would be mi-interpreted by the EL parser as EL 
literals. ie \ is output as ${'\\'} or #{'\\'}, $ as ${'$'} or #{'$'} and # as 
${'#'} or #{'#'}.

Added:
    tomcat/trunk/java/org/apache/jasper/compiler/AttributeParser.java   (with 
props)
Modified:
    tomcat/trunk/java/org/apache/jasper/compiler/Parser.java

Added: tomcat/trunk/java/org/apache/jasper/compiler/AttributeParser.java
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/compiler/AttributeParser.java?rev=897614&view=auto
==============================================================================
--- tomcat/trunk/java/org/apache/jasper/compiler/AttributeParser.java (added)
+++ tomcat/trunk/java/org/apache/jasper/compiler/AttributeParser.java Sun Jan 
10 11:10:36 2010
@@ -0,0 +1,333 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jasper.compiler;
+
+/**
+ * Converts a JSP attribute value into the unquoted equivalent. The attribute
+ * may contain EL expressions, in which case care needs to be taken to avoid 
any
+ * ambiguities. For example, consider the attribute values "${1+1}" and
+ * "\${1+1}". After unquoting, both appear as "${1+1}" but the first should
+ * evaluate to "2" and the second to "${1+1}". Literal \, $ and # need special
+ * treatment to ensure there is no ambiguity. The JSP attribute unquoting
+ * covers \\, \", \', \$, \#, %\>, <\%, &apos; and &quot;
+ */
+public class AttributeParser {
+
+    /* System property that controls if the strict quoting rules are applied. 
*/ 
+    private static final boolean STRICT_QUOTE_ESCAPING = Boolean.valueOf(
+            System.getProperty(
+                    "org.apache.jasper.compiler.Parser.STRICT_QUOTE_ESCAPING",
+                    "true")).booleanValue();
+
+    /**
+     * Parses the provided input String as a JSP attribute and returns an
+     * unquoted value.
+     * 
+     * @param input         The input.
+     * @param quote         The quote character for the attribute or 0 for
+     *                      scripting expressions.
+     * @param isELIgnored   Is expression language being ignored on the page
+     *                      where the JSP attribute is defined.
+     * @return              An unquoted JSP attribute that, if it contains
+     *                      expression language can be safely passed to the EL
+     *                      processor without fear of ambiguity.
+     */
+    public static String getUnquoted(String input, char quote,
+            boolean isELIgnored) {
+        return (new AttributeParser(input, quote, isELIgnored,
+                STRICT_QUOTE_ESCAPING)).getUnquoted();
+    }
+
+    /**
+     * Provided solely for unit test purposes and allows per call overriding of
+     * the STRICT_QUOTE_ESCAPING system property.
+     * 
+     * @param input         The input.
+     * @param quote         The quote character for the attribute or 0 for
+     *                      scripting expressions.
+     * @param isELIgnored   Is expression language being ignored on the page
+     *                      where the JSP attribute is defined.
+     * @param strict        The value to use for STRICT_QUOTE_ESCAPING.
+     * @return              An unquoted JSP attribute that, if it contains
+     *                      expression language can be safely passed to the EL
+     *                      processor without fear of ambiguity.
+     */
+    protected static String getUnquoted(String input, char quote,
+            boolean isELIgnored, boolean strict) {
+        return (new AttributeParser(input, quote, isELIgnored,
+                strict)).getUnquoted();
+    }
+
+    /* The quoted input string. */
+    private final String input;
+    
+    /* The quote used for the attribute - null for scripting expressions. */
+    private final char quote;
+    
+    /* Is expression language being ignored - affects unquoting. \$ and \# are
+     * treated as literals rather than quoted values. */
+    private final boolean isELIgnored;
+    
+    /* Overrides the STRICT_QUOTE_ESCAPING. Used for Unit tests only. */
+    private final boolean strict;
+    
+    /* The type ($ or #) of expression. Literals have a type of null. */
+    private char type;
+    
+    /* The length of the quoted input string. */
+    private final int size;
+    
+    /* Tracks the current position of the parser in the input String. */
+    private int i = 0;
+    
+    /* Indicates if the last character returned by nextChar() was escaped. */
+    private boolean lastChEscaped = false;
+    
+    /* The unquoted result. */
+    private StringBuilder result;
+
+
+    /**
+     * For test purposes.
+     * @param input
+     * @param quote
+     * @param strict
+     */
+    private AttributeParser(String input, char quote,
+            boolean isELIgnored, boolean strict) {
+        this.input = input;
+        this.quote = quote;
+        // If quote is null this is a scriptign expressions and any EL syntax
+        // should be ignored
+        this.isELIgnored = isELIgnored || (quote == 0);
+        this.strict = strict;
+        this.type = getType(input);
+        this.size = input.length();
+        result = new StringBuilder(size);
+    }
+
+    /*
+     * Work through input looking for literals and expressions until the input
+     * has all been read.
+     */
+    private String getUnquoted() {
+        while (i < size) {
+            parseLiteral();
+            parseEL();
+        }
+        return result.toString();
+    }
+
+    /*
+     * This method gets the next unquoted character and looks for
+     * - literals that need to be converted for EL processing
+     *   \ -> type{'\\'}
+     *   $ -> type{'$'}
+     *   # -> type{'$'}
+     * - start of EL
+     *   ${
+     *   #{
+     * Note all the examples above *do not* include the escaping required to 
use
+     * the values in Java code.
+     */
+    private void parseLiteral() {
+        boolean foundEL = false;
+        while (i < size && !foundEL) {
+            char ch = nextChar();
+            if (!isELIgnored && ch == '\\') {
+                if (type == 0) {
+                    type = '$';
+                }
+                result.append(type);
+                result.append("{'\\\\'}");
+            } else if (!isELIgnored && ch == '$' && lastChEscaped){
+                if (type == 0) {
+                    type = '$';
+                }
+                result.append(type);
+                result.append("{'$'}");
+            } else if (!isELIgnored && ch == '#' && lastChEscaped){
+                if (type == 0) {
+                    type = '$';
+                }
+                result.append(type);
+                result.append("{'#'}");
+            } else if (ch == type){
+                if (i < size) {
+                    char next = input.charAt(i);
+                    if (next == '{') {
+                        foundEL = true;
+                        // Move back to start of EL
+                        i--;
+                    } else {
+                        result.append(ch);
+                    }
+                } else {
+                    result.append(ch);
+                }
+            } else {
+                result.append(ch);
+            }
+        }
+    }
+
+    /*
+     * For EL need to unquote everything but no need to convert anything. The
+     * EL is terminated by '}'. The only other valid location for '}' is inside
+     * a StringLiteral. The literals are delimited by '\'' or '\"'. The only
+     * other valid location for '\'' or '\"' is also inside a StringLiteral. A
+     * quote character inside a StringLiteral must be escaped if the same quote
+     * character is used to delimit the StringLiteral.
+     */
+    private void parseEL() {
+        boolean endEL = false;
+        boolean insideLiteral = false;
+        while (i < size && !endEL) {
+            char literalQuote = '\'';
+            char ch = nextChar();
+            if (ch == '\'' || ch == '\"') {
+                if (insideLiteral) {
+                    if (literalQuote == ch) {
+                        insideLiteral = false;
+                    }
+                } else {
+                    insideLiteral = true;
+                    literalQuote = ch;
+                }
+                result.append(ch);
+            } else if (ch == '\\') {
+                result.append(ch);
+                if (insideLiteral && size < i) {
+                    ch = nextChar();
+                    result.append(ch);
+                }
+            } else if (ch == '}') {
+                if (!insideLiteral) {
+                    endEL = true;
+                }
+                result.append(ch);
+            } else {
+                result.append(ch);
+            }
+        }
+    }
+
+    /*
+     * Returns the nest unquoted character and sets the lastChEscaped flag to
+     * indicate if it was quoted/escaped or not.
+     * &apos; is always unquoted to '
+     * &quot; is always unquoted to "
+     * \" is always unquoted to "
+     * \' is always unquoted to '
+     * \\ is always unquoted to \
+     * \$ is unquoted to $ if EL is not being ignored
+     * \# is unquoted to # if EL is not being ignored
+     * <\% is always unquoted to <%
+     * %\> is always unquoted to %>
+     */
+    private char nextChar() {
+        lastChEscaped = false;
+        char ch = input.charAt(i);
+        
+        if (ch == '&') {
+            if (i + 5 < size && input.charAt(i + 1) == 'a' &&
+                    input.charAt(i + 2) == 'p' && input.charAt(i + 3) == 'o' &&
+                    input.charAt(i + 4) == 's' && input.charAt(i + 5) == ';') {
+                ch = '\'';
+                i += 6;
+            } else if (i + 5 < size && input.charAt(i + 1) == 'q' &&
+                    input.charAt(i + 2) == 'u' && input.charAt(i + 3) == 'o' &&
+                    input.charAt(i + 4) == 't' && input.charAt(i + 5) == ';') {
+                ch = '\"';
+                i += 6;
+            } else {
+                ++i;
+            }
+        } else if (ch == '\\' && i + 1 < size) {
+            ch = input.charAt(i + 1);
+            if (ch == '\\' || ch == '\"' || ch == '\'' ||
+                    (!isELIgnored && (ch == '$' || ch == '#'))) {
+                i += 2;
+                lastChEscaped = true;
+            } else {
+                ch = '\\';
+                ++i;
+            }
+        } else if (ch == '<' && (i + 2 < size) && input.charAt(i + 1) == '\\' 
&&
+                input.charAt(i + 2) == '%') {
+            // Note this is a hack since nextChar only returns a single char
+            // It is safe since <% does not require special treatment for EL
+            // or for literals
+            result.append('<');
+            i+=3;
+            return '%';
+        } else if (ch == '%' && i + 2 < size && input.charAt(i + 1) == '\\' &&
+                input.charAt(i + 2) == '>') {
+            // Note this is a hack since nextChar only returns a single char
+            // It is safe since %> does not require special treatment for EL
+            // or for literals
+            result.append('%');
+            i+=3;
+            return '>';
+        } else if (ch == quote && strict) {
+            String msg = Localizer.getMessage("jsp.error.attribute.noescape",
+                    input, ""+ quote);
+            throw new IllegalArgumentException(msg);
+        } else {
+            ++i;
+        }
+
+        return ch;
+    }
+
+    /*
+     * Determines the type of expression by looking for the first unquoted ${
+     * or #{.
+     */
+    private char getType(String value) {
+        if (value == null) {
+            return 0;
+        }
+
+        if (isELIgnored) {
+            return 0;
+        }
+
+        int j = 0;
+        int len = value.length();
+        char current;
+        
+        while (j < len) {
+            current = value.charAt(j);
+            if (current == '\\') {
+                // Escape character - skip a character
+                j++;
+            } else if (current == '#') {
+                if (j < (len -1) && value.charAt(j + 1) == '{') {
+                    return '#';
+                }
+            } else if (current == '$') {
+                if (j < (len - 1) && value.charAt(j + 1) == '{') {
+                    return '$';
+                }
+            }
+            j++;
+        }
+        return 0;
+    }
+}

Propchange: tomcat/trunk/java/org/apache/jasper/compiler/AttributeParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: tomcat/trunk/java/org/apache/jasper/compiler/AttributeParser.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision

Modified: tomcat/trunk/java/org/apache/jasper/compiler/Parser.java
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/compiler/Parser.java?rev=897614&r1=897613&r2=897614&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/jasper/compiler/Parser.java (original)
+++ tomcat/trunk/java/org/apache/jasper/compiler/Parser.java Sun Jan 10 
11:10:36 2010
@@ -75,11 +75,6 @@
     private static final String JAVAX_BODY_CONTENT_TEMPLATE_TEXT =
         "JAVAX_BODY_CONTENT_TEMPLATE_TEXT";
 
-    private static final boolean STRICT_QUOTE_ESCAPING = Boolean.valueOf(
-            System.getProperty(
-                    "org.apache.jasper.compiler.Parser.STRICT_QUOTE_ESCAPING",
-                    "true")).booleanValue();
-
     /**
      * The constructor
      */
@@ -247,66 +242,25 @@
             err.jspError(start, "jsp.error.attribute.unterminated", watch);
         }
 
-        String ret = parseQuoted(start, reader.getText(start, stop),
-                watch.charAt(watch.length() - 1));
+        String ret = null;
+        try {
+            char quote = 0;
+            if (watch.length() == 1) {
+                quote = watch.charAt(0);
+            }
+            ret = AttributeParser.getUnquoted(reader.getText(start, stop),
+                    quote, pageInfo.isELIgnored());
+        } catch (IllegalArgumentException iae) {
+            err.jspError(start, iae.getMessage());
+        }
         if (watch.length() == 1) // quote
             return ret;
 
-        // putback delimiter '<%=' and '%>', since they are needed if the
+        // Put back delimiter '<%=' and '%>', since they are needed if the
         // attribute does not allow RTexpression.
         return "<%=" + ret + "%>";
     }
 
-    /**
-     * QuotedChar ::= '&apos;' | '&quot;' | '\\' | '\"' | "\'" | '\>' | '\$' |
-     * Char
-     */
-    private String parseQuoted(Mark start, String tx, char quote)
-            throws JasperException {
-        StringBuilder buf = new StringBuilder();
-        int size = tx.length();
-        int i = 0;
-        while (i < size) {
-            char ch = tx.charAt(i);
-            if (ch == '&') {
-                if (i + 5 < size && tx.charAt(i + 1) == 'a'
-                        && tx.charAt(i + 2) == 'p' && tx.charAt(i + 3) == 'o'
-                        && tx.charAt(i + 4) == 's' && tx.charAt(i + 5) == ';') 
{
-                    buf.append('\'');
-                    i += 6;
-                } else if (i + 5 < size && tx.charAt(i + 1) == 'q'
-                        && tx.charAt(i + 2) == 'u' && tx.charAt(i + 3) == 'o'
-                        && tx.charAt(i + 4) == 't' && tx.charAt(i + 5) == ';') 
{
-                    buf.append('"');
-                    i += 6;
-                } else {
-                    buf.append(ch);
-                    ++i;
-                }
-            } else if (ch == '\\' && i + 1 < size) {
-                ch = tx.charAt(i + 1);
-                if (ch == '\\' || ch == '\"' || ch == '\'' || ch == '>') {
-                    // \ " and ' are always unescaped regardless of if they are
-                    // inside or outside of an EL expression. JSP.1.6 takes
-                    // precedence over JSP.1.3.10 (confirmed with EG).
-                    buf.append(ch);
-                    i += 2;
-                } else {
-                    buf.append('\\');
-                    ++i;
-                }
-            } else if (ch == quote && STRICT_QUOTE_ESCAPING) {
-                // Unescaped quote character
-                err.jspError(start, "jsp.error.attribute.noescape", tx,
-                        "" + quote);
-            } else {
-                buf.append(ch);
-                ++i;
-            }
-        }
-        return buf.toString();
-    }
-
     private String parseScriptText(String tx) {
         CharArrayWriter cw = new CharArrayWriter();
         int size = tx.length();



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
For additional commands, e-mail: dev-h...@tomcat.apache.org

Reply via email to