Revision: 4238
Author: [email protected]
Date: Tue Aug 10 14:40:28 2010
Log: [?1034hAdd an HTML compatibility rendering mode to fix rendering for older user-agents
http://codereview.appspot.com/1848059

http://www.w3.org/TR/html401/intro/sgmltut.html#didx-boolean_attribute says

Authors should be aware that many user agents only recognize the minimized
form of boolean attributes and not the full form.

[email protected]

http://code.google.com/p/google-caja/source/detail?r=4238

Added:
 /trunk/src/com/google/caja/reporting/MarkupRenderMode.java
Modified:
 /trunk/src/com/google/caja/ancillary/servlet/Processor.java
 /trunk/src/com/google/caja/opensocial/GadgetParser.java
 /trunk/src/com/google/caja/parser/html/Nodes.java
 /trunk/src/com/google/caja/plugin/templates/Localizer.java
 /trunk/src/com/google/caja/reporting/RenderContext.java
 /trunk/tests/com/google/caja/parser/html/NodesTest.java
 /trunk/tests/com/google/caja/util/RhinoTestBed.java

=======================================
--- /dev/null
+++ /trunk/src/com/google/caja/reporting/MarkupRenderMode.java Tue Aug 10 14:40:28 2010
@@ -0,0 +1,39 @@
+// Copyright (C) 2010 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.caja.reporting;
+
+/**
+ * Explains the dialect of markup to use for output.
+ */
+public enum MarkupRenderMode {
+  /**
+ * Render as HTML, assuming that CDATA tags like {...@code <script>} have raw
+   * content.
+   */
+  HTML,
+  /**
+   * Render as XML, either encoding or using CDATA sections
+   * ({...@code <![[CDATA[...]]>}) for content that would be raw in HTML, and
+ * not using any entities not defined for all of XML such as {...@code &apos;}.
+   */
+  XML,
+  /**
+   * Render as HTML, but make sure to be backwards compatible with HTML4
+   * quirks such as disallowing values on boolean attributes like
+   * the {...@code checked} attribute of {...@code <input>}.
+   */
+  HTML4_BACKWARDS_COMPAT,
+  ;
+}
=======================================
--- /trunk/src/com/google/caja/ancillary/servlet/Processor.java Tue Jun 1 16:47:13 2010 +++ /trunk/src/com/google/caja/ancillary/servlet/Processor.java Tue Aug 10 14:40:28 2010
@@ -64,6 +64,7 @@
 import com.google.caja.render.JsMinimalPrinter;
 import com.google.caja.render.JsPrettyPrinter;
 import com.google.caja.reporting.DevNullMessageQueue;
+import com.google.caja.reporting.MarkupRenderMode;
 import com.google.caja.reporting.Message;
 import com.google.caja.reporting.MessageLevel;
 import com.google.caja.reporting.MessagePart;
@@ -381,7 +382,9 @@
         throw new AssertionError(ot.name());
     }
     RenderContext rc = new RenderContext(tc);
-    rc = rc.withAsXml(ot == ContentType.XML);
+    rc = rc.withMarkupRenderMode(
+        ot == ContentType.XML
+        ? MarkupRenderMode.XML : MarkupRenderMode.HTML);
     rc = rc.withAsciiOnly(req.asciiOnly);
     rc = rc.withJson(ot == ContentType.JSON);
     rc = rc.withRawObjKeys(req.minify);
=======================================
--- /trunk/src/com/google/caja/opensocial/GadgetParser.java Thu Jul 22 10:23:41 2010 +++ /trunk/src/com/google/caja/opensocial/GadgetParser.java Tue Aug 10 14:40:28 2010
@@ -27,6 +27,7 @@
 import com.google.caja.parser.html.Namespaces;
 import com.google.caja.parser.html.Nodes;
 import com.google.caja.render.Concatenator;
+import com.google.caja.reporting.MarkupRenderMode;
 import com.google.caja.reporting.MessageQueue;
 import com.google.caja.reporting.RenderContext;
 import com.google.caja.util.Callback;
@@ -192,7 +193,8 @@
         throw new RenderFailure(e);
       }
     });
-    RenderContext rc = new RenderContext(tc).withAsXml(true);
+    RenderContext rc = new RenderContext(tc).withMarkupRenderMode(
+        MarkupRenderMode.XML);
     Nodes.render(rootElement, MODULE_NS, rc);
     tc.noMoreTokens();
   }
=======================================
--- /trunk/src/com/google/caja/parser/html/Nodes.java Thu Jul 22 10:23:41 2010 +++ /trunk/src/com/google/caja/parser/html/Nodes.java Tue Aug 10 14:40:28 2010
@@ -21,12 +21,15 @@
 import com.google.caja.lexer.TokenConsumer;
 import com.google.caja.lexer.escaping.Escaping;
 import com.google.caja.render.Concatenator;
+import com.google.caja.reporting.MarkupRenderMode;
 import com.google.caja.reporting.RenderContext;
+import com.google.caja.util.Sets;
 import com.google.caja.util.SparseBitSet;
 import com.google.caja.util.Strings;

 import java.util.Iterator;
 import java.util.NoSuchElementException;
+import java.util.Set;

 import org.w3c.dom.Attr;
 import org.w3c.dom.Element;
@@ -215,7 +218,7 @@
    */
   public static void render(Node node, Namespaces ns, RenderContext rc) {
     StringBuilder sb = new StringBuilder(1 << 18);
-    new Renderer(sb, rc.asXml(), rc.isAsciiOnly()).render(node, ns);
+ new Renderer(sb, rc.markupRenderMode(), rc.isAsciiOnly()).render(node, ns);
     TokenConsumer out = rc.getOut();
     FilePosition pos = getFilePositionFor(node);
     out.mark(FilePosition.startOf(pos));
@@ -237,9 +240,13 @@
   }

   public static String render(Node node, boolean asXml) {
+ return render(node, asXml ? MarkupRenderMode.XML : MarkupRenderMode.HTML);
+  }
+
+  public static String render(Node node, MarkupRenderMode renderMode) {
     StringBuilder sb = new StringBuilder();
     RenderContext rc = new RenderContext(new Concatenator(sb, null))
-        .withAsXml(asXml);
+        .withMarkupRenderMode(renderMode);
     render(node, rc);
     rc.getOut().noMoreTokens();
     return sb.toString();
@@ -250,12 +257,14 @@

 final class Renderer {
   final StringBuilder out;
+  final MarkupRenderMode mode;
   final boolean asXml;
   final boolean isAsciiOnly;

-  Renderer(StringBuilder out, boolean asXml, boolean isAsciiOnly) {
+  Renderer(StringBuilder out, MarkupRenderMode mode, boolean isAsciiOnly) {
     this.out = out;
-    this.asXml = asXml;
+    this.mode = mode;
+    this.asXml = mode == MarkupRenderMode.XML;
     this.isAsciiOnly = isAsciiOnly;
   }

@@ -334,7 +343,8 @@
// This is safe regardless of whether the output is XML or HTML since // we only skip the end tag for HTML elements that don't require one,
           // and the slash will cause XML to treat it as a void tag.
-          out.append(" />");
+          out.append(
+ mode != MarkupRenderMode.HTML4_BACKWARDS_COMPAT ? " />" : ">");
         } else {
           out.append('>');
           if (!asXml) {
@@ -456,10 +466,16 @@
         throw new IllegalStateException();
       }
     }
-    emitLocalName(localName, isHtml);
-    out.append("=\"");
-    Escaping.escapeXml(a.getValue(), isAsciiOnly, out);
-    out.append("\"");
+    localName = emitLocalName(localName, isHtml);
+ // http://www.w3.org/TR/html401/intro/sgmltut.html#didx-boolean_attribute:
+    // Authors should be aware that many user agents only recognize the
+    // minimized form of boolean attributes and not the full form.
+    if (!(isHtml && mode == MarkupRenderMode.HTML4_BACKWARDS_COMPAT
+          && BooleanAttrs.isBooleanAttr(localName))) {
+      out.append("=\"");
+      Escaping.escapeXml(a.getValue(), isAsciiOnly, out);
+      out.append("\"");
+    }
   }

private static final boolean[] CASE_SENS_NAME_CHARS = new boolean['z' + 1];
@@ -476,7 +492,7 @@
     }
   }

-  private void emitLocalName(String name, boolean isHtml) {
+  private String emitLocalName(String name, boolean isHtml) {
     // speed up common case where we already have lower-cased letters and
     // digits.
boolean[] simple = isHtml ? CASE_INSENS_NAME_CHARS : CASE_SENS_NAME_CHARS;
@@ -485,10 +501,11 @@
       if (ch > 'z' || !simple[ch]) {
         if (isHtml) { name = Strings.toLowerCase(name); }
         Escaping.escapeXml(name, isAsciiOnly, out);
-        return;
+        return name;
       }
     }
     out.append(name);
+    return name;
   }

   private static boolean containsEndTag(StringBuilder sb) {
@@ -545,3 +562,20 @@
       0x203f, 0x2041, 0x2070, 0x2190, 0x2c00, 0x2ff0, 0x3001, 0xd800,
       0xf900, 0xfdd0, 0xfdf0, 0xfffe, 0x10000, 0xf0000);
 }
+
+final class BooleanAttrs {
+  /**
+ * The set of HTML4.01 attributes that have the sole value {...@code (<name>)}
+   * where {...@code <name>} is the attribute name and that are #IMPLIED.
+   * @see <a href="http://www.w3.org/TR/html401/index/attributes.html";>
+   *    the HTML4.01 attributes index</a>
+   */
+  private static final Set<String> BOOLEAN_ATTR_NAMES = Sets.immutableSet(
+      "checked", "compact", "declare", "defer", "disabled", "ismap", 
"multiple",
+      "nohref", "noresize", "noshade", "nowrap", "readonly", "selected");
+
+  // http://www.w3.org/TR/html401/index/attributes.html
+  static boolean isBooleanAttr(String htmlAttrLocalName) {
+    return BOOLEAN_ATTR_NAMES.contains(htmlAttrLocalName);
+  }
+}
=======================================
--- /trunk/src/com/google/caja/plugin/templates/Localizer.java Thu Dec 10 17:39:38 2009 +++ /trunk/src/com/google/caja/plugin/templates/Localizer.java Tue Aug 10 14:40:28 2010
@@ -24,6 +24,7 @@
 import com.google.caja.parser.html.Namespaces;
 import com.google.caja.parser.html.Nodes;
 import com.google.caja.render.Concatenator;
+import com.google.caja.reporting.MarkupRenderMode;
 import com.google.caja.reporting.MessagePart;
 import com.google.caja.reporting.MessageQueue;
 import com.google.caja.reporting.RenderContext;
@@ -184,7 +185,7 @@
       // <ph> elements.
       StringBuilder xhtml = new StringBuilder();
       RenderContext rc = new RenderContext(new Concatenator(xhtml))
-          .withAsXml(true).withAsciiOnly(true);
+          .withMarkupRenderMode(MarkupRenderMode.XML).withAsciiOnly(true);
       for (Node c : Nodes.childrenOf(message)) {
         Nodes.render(c, rc);
       }
=======================================
--- /trunk/src/com/google/caja/reporting/RenderContext.java Mon Nov 2 13:56:19 2009 +++ /trunk/src/com/google/caja/reporting/RenderContext.java Tue Aug 10 14:40:28 2010
@@ -29,7 +29,7 @@
   /** Should javascript output be rendered using JSON conventions. */
   private final boolean json;
   /** True iff DOM tree nodes should be rendered as XML. */
-  private final boolean asXml;
+  private final MarkupRenderMode markupMode;
   /**
* True iff object ctor keys that are JS identifiers can be rendered without
    * quotes.
@@ -38,17 +38,17 @@
   private final TokenConsumer out;

   public RenderContext(TokenConsumer out) {
-    this(true, false, false, false, false, out);
+    this(true, false, false, MarkupRenderMode.HTML, false, out);
   }

   private RenderContext(
-      boolean asciiOnly, boolean embeddable, boolean json, boolean asXml,
-      boolean rawObjKeys, TokenConsumer out) {
+      boolean asciiOnly, boolean embeddable, boolean json,
+      MarkupRenderMode markupMode, boolean rawObjKeys, TokenConsumer out) {
     if (null == out) { throw new NullPointerException(); }
     this.embeddable = embeddable;
     this.asciiOnly = asciiOnly;
     this.json = json;
-    this.asXml = asXml;
+    this.markupMode = markupMode;
     this.rawObjKeys = rawObjKeys;
     this.out = out;
   }
@@ -65,30 +65,41 @@
   public final boolean isAsciiOnly() { return asciiOnly; }
   public final boolean asJson() { return json; }
   /** True iff DOM tree nodes should be rendered as XML. */
-  public final boolean asXml() { return asXml; }
+ public final boolean asXml() { return markupMode == MarkupRenderMode.XML; }
+  public final MarkupRenderMode markupRenderMode() { return markupMode; }
   public final boolean rawObjKeys() { return rawObjKeys; }
   public final TokenConsumer getOut() { return out; }

   public RenderContext withAsciiOnly(boolean b) {
     return b != asciiOnly
- ? new RenderContext(b, embeddable, json, asXml, rawObjKeys, out) : this; + ? new RenderContext(b, embeddable, json, markupMode, rawObjKeys, out)
+        : this;
   }
   public RenderContext withEmbeddable(boolean b) {
     return b != embeddable
- ? new RenderContext(asciiOnly, b, json, asXml, rawObjKeys, out) : this; + ? new RenderContext(asciiOnly, b, json, markupMode, rawObjKeys, out)
+        : this;
   }
   public RenderContext withJson(boolean b) {
     return b != json
- ? new RenderContext(asciiOnly, embeddable, b, asXml, rawObjKeys, out)
+        ? new RenderContext(
+            asciiOnly, embeddable, b, markupMode, rawObjKeys, out)
         : this;
   }
-  public RenderContext withAsXml(boolean b) {
-    return b != this.asXml
- ? new RenderContext(asciiOnly, embeddable, json, b, rawObjKeys, out)
+  public RenderContext withMarkupRenderMode(MarkupRenderMode markupMode) {
+    return markupMode != this.markupMode
+        ? new RenderContext(
+            asciiOnly, embeddable, json, markupMode, rawObjKeys, out)
         : this;
   }
+  @Deprecated
+  public RenderContext withAsXml(boolean b) {
+    return withMarkupRenderMode(
+        b ? MarkupRenderMode.XML : MarkupRenderMode.HTML);
+  }
   public RenderContext withRawObjKeys(boolean b) {
-    return b != this.asXml
- ? new RenderContext(asciiOnly, embeddable, json, asXml, b, out) : this;
+    return b != this.rawObjKeys
+ ? new RenderContext(asciiOnly, embeddable, json, markupMode, b, out)
+        : this;
   }
 }
=======================================
--- /trunk/tests/com/google/caja/parser/html/NodesTest.java Tue May 25 12:11:32 2010 +++ /trunk/tests/com/google/caja/parser/html/NodesTest.java Tue Aug 10 14:40:28 2010
@@ -15,6 +15,7 @@
 package com.google.caja.parser.html;

 import com.google.caja.render.Concatenator;
+import com.google.caja.reporting.MarkupRenderMode;
 import com.google.caja.reporting.RenderContext;
 import com.google.caja.util.CajaTestCase;

@@ -153,7 +154,8 @@
     Namespaces ns = new Namespaces(
         Namespaces.HTML_DEFAULT, "svg", Namespaces.XML_NAMESPACE_URI);
     StringBuilder sb = new StringBuilder();
- RenderContext rc = new RenderContext(new Concatenator(sb)).withAsXml(true);
+    RenderContext rc = new RenderContext(new Concatenator(sb))
+        .withMarkupRenderMode(MarkupRenderMode.XML);
     Nodes.render(fragment, ns, rc);
     rc.getOut().noMoreTokens();
     assertEquals(
@@ -173,7 +175,8 @@
     Namespaces ns = new Namespaces(
         Namespaces.HTML_DEFAULT, "html", Namespaces.HTML_NAMESPACE_URI);
     StringBuilder sb = new StringBuilder();
- RenderContext rc = new RenderContext(new Concatenator(sb)).withAsXml(false);
+    RenderContext rc = new RenderContext(new Concatenator(sb))
+        .withMarkupRenderMode(MarkupRenderMode.HTML);
     Nodes.render(el, ns, rc);
     rc.getOut().noMoreTokens();
     assertEquals("<html:span title=\"Howdy\"></html:span>", sb.toString());
@@ -282,6 +285,21 @@
         Nodes.render(new NullLocalNameMembrane().wrap(el, Element.class)));
   }

+  public final void testRenderModes() throws Exception {
+    DocumentFragment f = htmlFragment(fromString(
+        "<input checked name=foo type=checkbox>"));
+    assertEquals(
+        "<input checked=\"checked\" name=\"foo\" type=\"checkbox\" />",
+        Nodes.render(f, MarkupRenderMode.XML));
+    assertEquals(
+        "<input checked=\"checked\" name=\"foo\" type=\"checkbox\" />",
+        Nodes.render(f, MarkupRenderMode.HTML));
+    assertEquals(
+        "<input checked name=\"foo\" type=\"checkbox\">",
+        Nodes.render(f, MarkupRenderMode.HTML4_BACKWARDS_COMPAT));
+  }
+
+
   public final void testRenderSpeed() throws Exception {
     Element doc = html(fromResource("amazon.com.html"));
     benchmark(100, doc);  // prime the JIT
=======================================
--- /trunk/tests/com/google/caja/util/RhinoTestBed.java Thu Dec 10 17:39:38 2009 +++ /trunk/tests/com/google/caja/util/RhinoTestBed.java Tue Aug 10 14:40:28 2010
@@ -35,6 +35,7 @@
 import com.google.caja.parser.quasiliteral.CajitaRewriter;
 import com.google.caja.parser.quasiliteral.DefaultValijaRewriter;
 import com.google.caja.reporting.EchoingMessageQueue;
+import com.google.caja.reporting.MarkupRenderMode;
 import com.google.caja.reporting.MessageContext;
 import com.google.caja.reporting.MessageQueue;
 import com.google.caja.reporting.RenderContext;
@@ -211,7 +212,7 @@
   private static String render(ParseTreeNode n) {
     StringBuilder sb = new StringBuilder();
     TokenConsumer tc = n.makeRenderer(sb, null);
-    n.render(new RenderContext(tc).withAsXml(true));
+ n.render(new RenderContext(tc).withMarkupRenderMode(MarkupRenderMode.XML));
     tc.noMoreTokens();
     return sb.toString();
   }

Reply via email to