Revision: 4238
Author: [email protected]
Date: Tue Aug 10 14:40:28 2010
Log: [?1034hAdd an HTML compatibility rendering mode to fix rendering for
older user-agents
http://codereview.appspot.com/1848059
http://www.w3.org/TR/html401/intro/sgmltut.html#didx-boolean_attribute says
Authors should be aware that many user agents only recognize the minimized
form of boolean attributes and not the full form.
[email protected]
http://code.google.com/p/google-caja/source/detail?r=4238
Added:
/trunk/src/com/google/caja/reporting/MarkupRenderMode.java
Modified:
/trunk/src/com/google/caja/ancillary/servlet/Processor.java
/trunk/src/com/google/caja/opensocial/GadgetParser.java
/trunk/src/com/google/caja/parser/html/Nodes.java
/trunk/src/com/google/caja/plugin/templates/Localizer.java
/trunk/src/com/google/caja/reporting/RenderContext.java
/trunk/tests/com/google/caja/parser/html/NodesTest.java
/trunk/tests/com/google/caja/util/RhinoTestBed.java
=======================================
--- /dev/null
+++ /trunk/src/com/google/caja/reporting/MarkupRenderMode.java Tue Aug 10
14:40:28 2010
@@ -0,0 +1,39 @@
+// Copyright (C) 2010 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.caja.reporting;
+
+/**
+ * Explains the dialect of markup to use for output.
+ */
+public enum MarkupRenderMode {
+ /**
+ * Render as HTML, assuming that CDATA tags like {...@code <script>} have
raw
+ * content.
+ */
+ HTML,
+ /**
+ * Render as XML, either encoding or using CDATA sections
+ * ({...@code <![[CDATA[...]]>}) for content that would be raw in HTML, and
+ * not using any entities not defined for all of XML such as {...@code
'}.
+ */
+ XML,
+ /**
+ * Render as HTML, but make sure to be backwards compatible with HTML4
+ * quirks such as disallowing values on boolean attributes like
+ * the {...@code checked} attribute of {...@code <input>}.
+ */
+ HTML4_BACKWARDS_COMPAT,
+ ;
+}
=======================================
--- /trunk/src/com/google/caja/ancillary/servlet/Processor.java Tue Jun 1
16:47:13 2010
+++ /trunk/src/com/google/caja/ancillary/servlet/Processor.java Tue Aug 10
14:40:28 2010
@@ -64,6 +64,7 @@
import com.google.caja.render.JsMinimalPrinter;
import com.google.caja.render.JsPrettyPrinter;
import com.google.caja.reporting.DevNullMessageQueue;
+import com.google.caja.reporting.MarkupRenderMode;
import com.google.caja.reporting.Message;
import com.google.caja.reporting.MessageLevel;
import com.google.caja.reporting.MessagePart;
@@ -381,7 +382,9 @@
throw new AssertionError(ot.name());
}
RenderContext rc = new RenderContext(tc);
- rc = rc.withAsXml(ot == ContentType.XML);
+ rc = rc.withMarkupRenderMode(
+ ot == ContentType.XML
+ ? MarkupRenderMode.XML : MarkupRenderMode.HTML);
rc = rc.withAsciiOnly(req.asciiOnly);
rc = rc.withJson(ot == ContentType.JSON);
rc = rc.withRawObjKeys(req.minify);
=======================================
--- /trunk/src/com/google/caja/opensocial/GadgetParser.java Thu Jul 22
10:23:41 2010
+++ /trunk/src/com/google/caja/opensocial/GadgetParser.java Tue Aug 10
14:40:28 2010
@@ -27,6 +27,7 @@
import com.google.caja.parser.html.Namespaces;
import com.google.caja.parser.html.Nodes;
import com.google.caja.render.Concatenator;
+import com.google.caja.reporting.MarkupRenderMode;
import com.google.caja.reporting.MessageQueue;
import com.google.caja.reporting.RenderContext;
import com.google.caja.util.Callback;
@@ -192,7 +193,8 @@
throw new RenderFailure(e);
}
});
- RenderContext rc = new RenderContext(tc).withAsXml(true);
+ RenderContext rc = new RenderContext(tc).withMarkupRenderMode(
+ MarkupRenderMode.XML);
Nodes.render(rootElement, MODULE_NS, rc);
tc.noMoreTokens();
}
=======================================
--- /trunk/src/com/google/caja/parser/html/Nodes.java Thu Jul 22 10:23:41
2010
+++ /trunk/src/com/google/caja/parser/html/Nodes.java Tue Aug 10 14:40:28
2010
@@ -21,12 +21,15 @@
import com.google.caja.lexer.TokenConsumer;
import com.google.caja.lexer.escaping.Escaping;
import com.google.caja.render.Concatenator;
+import com.google.caja.reporting.MarkupRenderMode;
import com.google.caja.reporting.RenderContext;
+import com.google.caja.util.Sets;
import com.google.caja.util.SparseBitSet;
import com.google.caja.util.Strings;
import java.util.Iterator;
import java.util.NoSuchElementException;
+import java.util.Set;
import org.w3c.dom.Attr;
import org.w3c.dom.Element;
@@ -215,7 +218,7 @@
*/
public static void render(Node node, Namespaces ns, RenderContext rc) {
StringBuilder sb = new StringBuilder(1 << 18);
- new Renderer(sb, rc.asXml(), rc.isAsciiOnly()).render(node, ns);
+ new Renderer(sb, rc.markupRenderMode(), rc.isAsciiOnly()).render(node,
ns);
TokenConsumer out = rc.getOut();
FilePosition pos = getFilePositionFor(node);
out.mark(FilePosition.startOf(pos));
@@ -237,9 +240,13 @@
}
public static String render(Node node, boolean asXml) {
+ return render(node, asXml ? MarkupRenderMode.XML :
MarkupRenderMode.HTML);
+ }
+
+ public static String render(Node node, MarkupRenderMode renderMode) {
StringBuilder sb = new StringBuilder();
RenderContext rc = new RenderContext(new Concatenator(sb, null))
- .withAsXml(asXml);
+ .withMarkupRenderMode(renderMode);
render(node, rc);
rc.getOut().noMoreTokens();
return sb.toString();
@@ -250,12 +257,14 @@
final class Renderer {
final StringBuilder out;
+ final MarkupRenderMode mode;
final boolean asXml;
final boolean isAsciiOnly;
- Renderer(StringBuilder out, boolean asXml, boolean isAsciiOnly) {
+ Renderer(StringBuilder out, MarkupRenderMode mode, boolean isAsciiOnly) {
this.out = out;
- this.asXml = asXml;
+ this.mode = mode;
+ this.asXml = mode == MarkupRenderMode.XML;
this.isAsciiOnly = isAsciiOnly;
}
@@ -334,7 +343,8 @@
// This is safe regardless of whether the output is XML or HTML
since
// we only skip the end tag for HTML elements that don't require
one,
// and the slash will cause XML to treat it as a void tag.
- out.append(" />");
+ out.append(
+ mode != MarkupRenderMode.HTML4_BACKWARDS_COMPAT ? "
/>" : ">");
} else {
out.append('>');
if (!asXml) {
@@ -456,10 +466,16 @@
throw new IllegalStateException();
}
}
- emitLocalName(localName, isHtml);
- out.append("=\"");
- Escaping.escapeXml(a.getValue(), isAsciiOnly, out);
- out.append("\"");
+ localName = emitLocalName(localName, isHtml);
+ //
http://www.w3.org/TR/html401/intro/sgmltut.html#didx-boolean_attribute:
+ // Authors should be aware that many user agents only recognize the
+ // minimized form of boolean attributes and not the full form.
+ if (!(isHtml && mode == MarkupRenderMode.HTML4_BACKWARDS_COMPAT
+ && BooleanAttrs.isBooleanAttr(localName))) {
+ out.append("=\"");
+ Escaping.escapeXml(a.getValue(), isAsciiOnly, out);
+ out.append("\"");
+ }
}
private static final boolean[] CASE_SENS_NAME_CHARS = new boolean['z' +
1];
@@ -476,7 +492,7 @@
}
}
- private void emitLocalName(String name, boolean isHtml) {
+ private String emitLocalName(String name, boolean isHtml) {
// speed up common case where we already have lower-cased letters and
// digits.
boolean[] simple = isHtml ? CASE_INSENS_NAME_CHARS :
CASE_SENS_NAME_CHARS;
@@ -485,10 +501,11 @@
if (ch > 'z' || !simple[ch]) {
if (isHtml) { name = Strings.toLowerCase(name); }
Escaping.escapeXml(name, isAsciiOnly, out);
- return;
+ return name;
}
}
out.append(name);
+ return name;
}
private static boolean containsEndTag(StringBuilder sb) {
@@ -545,3 +562,20 @@
0x203f, 0x2041, 0x2070, 0x2190, 0x2c00, 0x2ff0, 0x3001, 0xd800,
0xf900, 0xfdd0, 0xfdf0, 0xfffe, 0x10000, 0xf0000);
}
+
+final class BooleanAttrs {
+ /**
+ * The set of HTML4.01 attributes that have the sole value {...@code
(<name>)}
+ * where {...@code <name>} is the attribute name and that are #IMPLIED.
+ * @see <a href="http://www.w3.org/TR/html401/index/attributes.html">
+ * the HTML4.01 attributes index</a>
+ */
+ private static final Set<String> BOOLEAN_ATTR_NAMES = Sets.immutableSet(
+ "checked", "compact", "declare", "defer", "disabled", "ismap",
"multiple",
+ "nohref", "noresize", "noshade", "nowrap", "readonly", "selected");
+
+ // http://www.w3.org/TR/html401/index/attributes.html
+ static boolean isBooleanAttr(String htmlAttrLocalName) {
+ return BOOLEAN_ATTR_NAMES.contains(htmlAttrLocalName);
+ }
+}
=======================================
--- /trunk/src/com/google/caja/plugin/templates/Localizer.java Thu Dec 10
17:39:38 2009
+++ /trunk/src/com/google/caja/plugin/templates/Localizer.java Tue Aug 10
14:40:28 2010
@@ -24,6 +24,7 @@
import com.google.caja.parser.html.Namespaces;
import com.google.caja.parser.html.Nodes;
import com.google.caja.render.Concatenator;
+import com.google.caja.reporting.MarkupRenderMode;
import com.google.caja.reporting.MessagePart;
import com.google.caja.reporting.MessageQueue;
import com.google.caja.reporting.RenderContext;
@@ -184,7 +185,7 @@
// <ph> elements.
StringBuilder xhtml = new StringBuilder();
RenderContext rc = new RenderContext(new Concatenator(xhtml))
- .withAsXml(true).withAsciiOnly(true);
+ .withMarkupRenderMode(MarkupRenderMode.XML).withAsciiOnly(true);
for (Node c : Nodes.childrenOf(message)) {
Nodes.render(c, rc);
}
=======================================
--- /trunk/src/com/google/caja/reporting/RenderContext.java Mon Nov 2
13:56:19 2009
+++ /trunk/src/com/google/caja/reporting/RenderContext.java Tue Aug 10
14:40:28 2010
@@ -29,7 +29,7 @@
/** Should javascript output be rendered using JSON conventions. */
private final boolean json;
/** True iff DOM tree nodes should be rendered as XML. */
- private final boolean asXml;
+ private final MarkupRenderMode markupMode;
/**
* True iff object ctor keys that are JS identifiers can be rendered
without
* quotes.
@@ -38,17 +38,17 @@
private final TokenConsumer out;
public RenderContext(TokenConsumer out) {
- this(true, false, false, false, false, out);
+ this(true, false, false, MarkupRenderMode.HTML, false, out);
}
private RenderContext(
- boolean asciiOnly, boolean embeddable, boolean json, boolean asXml,
- boolean rawObjKeys, TokenConsumer out) {
+ boolean asciiOnly, boolean embeddable, boolean json,
+ MarkupRenderMode markupMode, boolean rawObjKeys, TokenConsumer out) {
if (null == out) { throw new NullPointerException(); }
this.embeddable = embeddable;
this.asciiOnly = asciiOnly;
this.json = json;
- this.asXml = asXml;
+ this.markupMode = markupMode;
this.rawObjKeys = rawObjKeys;
this.out = out;
}
@@ -65,30 +65,41 @@
public final boolean isAsciiOnly() { return asciiOnly; }
public final boolean asJson() { return json; }
/** True iff DOM tree nodes should be rendered as XML. */
- public final boolean asXml() { return asXml; }
+ public final boolean asXml() { return markupMode ==
MarkupRenderMode.XML; }
+ public final MarkupRenderMode markupRenderMode() { return markupMode; }
public final boolean rawObjKeys() { return rawObjKeys; }
public final TokenConsumer getOut() { return out; }
public RenderContext withAsciiOnly(boolean b) {
return b != asciiOnly
- ? new RenderContext(b, embeddable, json, asXml, rawObjKeys, out) :
this;
+ ? new RenderContext(b, embeddable, json, markupMode, rawObjKeys,
out)
+ : this;
}
public RenderContext withEmbeddable(boolean b) {
return b != embeddable
- ? new RenderContext(asciiOnly, b, json, asXml, rawObjKeys, out) :
this;
+ ? new RenderContext(asciiOnly, b, json, markupMode, rawObjKeys,
out)
+ : this;
}
public RenderContext withJson(boolean b) {
return b != json
- ? new RenderContext(asciiOnly, embeddable, b, asXml, rawObjKeys,
out)
+ ? new RenderContext(
+ asciiOnly, embeddable, b, markupMode, rawObjKeys, out)
: this;
}
- public RenderContext withAsXml(boolean b) {
- return b != this.asXml
- ? new RenderContext(asciiOnly, embeddable, json, b, rawObjKeys,
out)
+ public RenderContext withMarkupRenderMode(MarkupRenderMode markupMode) {
+ return markupMode != this.markupMode
+ ? new RenderContext(
+ asciiOnly, embeddable, json, markupMode, rawObjKeys, out)
: this;
}
+ @Deprecated
+ public RenderContext withAsXml(boolean b) {
+ return withMarkupRenderMode(
+ b ? MarkupRenderMode.XML : MarkupRenderMode.HTML);
+ }
public RenderContext withRawObjKeys(boolean b) {
- return b != this.asXml
- ? new RenderContext(asciiOnly, embeddable, json, asXml, b, out) :
this;
+ return b != this.rawObjKeys
+ ? new RenderContext(asciiOnly, embeddable, json, markupMode, b,
out)
+ : this;
}
}
=======================================
--- /trunk/tests/com/google/caja/parser/html/NodesTest.java Tue May 25
12:11:32 2010
+++ /trunk/tests/com/google/caja/parser/html/NodesTest.java Tue Aug 10
14:40:28 2010
@@ -15,6 +15,7 @@
package com.google.caja.parser.html;
import com.google.caja.render.Concatenator;
+import com.google.caja.reporting.MarkupRenderMode;
import com.google.caja.reporting.RenderContext;
import com.google.caja.util.CajaTestCase;
@@ -153,7 +154,8 @@
Namespaces ns = new Namespaces(
Namespaces.HTML_DEFAULT, "svg", Namespaces.XML_NAMESPACE_URI);
StringBuilder sb = new StringBuilder();
- RenderContext rc = new RenderContext(new
Concatenator(sb)).withAsXml(true);
+ RenderContext rc = new RenderContext(new Concatenator(sb))
+ .withMarkupRenderMode(MarkupRenderMode.XML);
Nodes.render(fragment, ns, rc);
rc.getOut().noMoreTokens();
assertEquals(
@@ -173,7 +175,8 @@
Namespaces ns = new Namespaces(
Namespaces.HTML_DEFAULT, "html", Namespaces.HTML_NAMESPACE_URI);
StringBuilder sb = new StringBuilder();
- RenderContext rc = new RenderContext(new
Concatenator(sb)).withAsXml(false);
+ RenderContext rc = new RenderContext(new Concatenator(sb))
+ .withMarkupRenderMode(MarkupRenderMode.HTML);
Nodes.render(el, ns, rc);
rc.getOut().noMoreTokens();
assertEquals("<html:span title=\"Howdy\"></html:span>", sb.toString());
@@ -282,6 +285,21 @@
Nodes.render(new NullLocalNameMembrane().wrap(el, Element.class)));
}
+ public final void testRenderModes() throws Exception {
+ DocumentFragment f = htmlFragment(fromString(
+ "<input checked name=foo type=checkbox>"));
+ assertEquals(
+ "<input checked=\"checked\" name=\"foo\" type=\"checkbox\" />",
+ Nodes.render(f, MarkupRenderMode.XML));
+ assertEquals(
+ "<input checked=\"checked\" name=\"foo\" type=\"checkbox\" />",
+ Nodes.render(f, MarkupRenderMode.HTML));
+ assertEquals(
+ "<input checked name=\"foo\" type=\"checkbox\">",
+ Nodes.render(f, MarkupRenderMode.HTML4_BACKWARDS_COMPAT));
+ }
+
+
public final void testRenderSpeed() throws Exception {
Element doc = html(fromResource("amazon.com.html"));
benchmark(100, doc); // prime the JIT
=======================================
--- /trunk/tests/com/google/caja/util/RhinoTestBed.java Thu Dec 10 17:39:38
2009
+++ /trunk/tests/com/google/caja/util/RhinoTestBed.java Tue Aug 10 14:40:28
2010
@@ -35,6 +35,7 @@
import com.google.caja.parser.quasiliteral.CajitaRewriter;
import com.google.caja.parser.quasiliteral.DefaultValijaRewriter;
import com.google.caja.reporting.EchoingMessageQueue;
+import com.google.caja.reporting.MarkupRenderMode;
import com.google.caja.reporting.MessageContext;
import com.google.caja.reporting.MessageQueue;
import com.google.caja.reporting.RenderContext;
@@ -211,7 +212,7 @@
private static String render(ParseTreeNode n) {
StringBuilder sb = new StringBuilder();
TokenConsumer tc = n.makeRenderer(sb, null);
- n.render(new RenderContext(tc).withAsXml(true));
+ n.render(new
RenderContext(tc).withMarkupRenderMode(MarkupRenderMode.XML));
tc.noMoreTokens();
return sb.toString();
}