Revision: 4789
Author:   [email protected]
Date:     Mon Feb 27 09:52:24 2012
Log:      Draft of a sanitizer for CSS selectors in JavaScript
http://codereview.appspot.com/5649058

sanitizerSelectors(cssText) performs the same sanitization as
CssRewriter.  This CL is just a testfile that contains both the code
under test and the testcases in CssRewriterTest and does not attempt
to move the code into it's final location since I want to discuss
where this should fit as part of this CL.

One issue is whether CssRewriterTest.java should run its tests against the
JavaScript version.  To do that, we would need to

1. either load html-emitter.js and domado.js into Rhino to get the property
sanitization function in domado.js, OR

2. move the property sanitization out of domado.js so that less needs
to be pulled into Rhino to test equivalence of the java and javascript
CSS rewritings.

I will follow this CL shortly with another that wires
sanitizeSelectors into html-emitter.js.

[email protected]

http://code.google.com/p/google-caja/source/detail?r=4789

Added:
 /trunk/tests/com/google/caja/plugin/CssSelectorTest.java
 /trunk/tests/com/google/caja/plugin/css-selector-test.html
 /trunk/tests/com/google/caja/plugin/css-selector-test.js
Modified:
 /trunk/src/com/google/caja/plugin/domado.js
 /trunk/src/com/google/caja/plugin/es53-frame-group.js
 /trunk/src/com/google/caja/plugin/html-emitter.js
 /trunk/src/com/google/caja/plugin/sanitizecss.js

=======================================
--- /dev/null
+++ /trunk/tests/com/google/caja/plugin/CssSelectorTest.java Mon Feb 27 09:52:24 2012
@@ -0,0 +1,30 @@
+// Copyright (C) 2012 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.caja.plugin;
+
+import com.google.caja.util.CajaTestCase;
+import com.google.caja.util.RhinoTestBed;
+
+/**
+ * JUnit wrapper for CSS selector JSUnit unittests.
+ *
+ * @author [email protected]
+ */
+public final class CssSelectorTest extends CajaTestCase {
+  public final void testHtmlSanitizer() throws Exception {
+    RhinoTestBed.runJsUnittestFromHtml(
+        html(fromResource("css-selector-test.html")));
+  }
+}
=======================================
--- /dev/null
+++ /trunk/tests/com/google/caja/plugin/css-selector-test.html Mon Feb 27 09:52:24 2012
@@ -0,0 +1,29 @@
+<!--
+ - Copyright (C) 2012 Google Inc.
+ -
+ - Licensed under the Apache License, Version 2.0 (the "License");
+ - you may not use this file except in compliance with the License.
+ - You may obtain a copy of the License at
+ -
+ -      http://www.apache.org/licenses/LICENSE-2.0
+ -
+ - Unless required by applicable law or agreed to in writing, software
+ - distributed under the License is distributed on an "AS IS" BASIS,
+ - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ - See the License for the specific language governing permissions and
+ - limitations under the License.
+-->
+<title>CSS Lexer Test</title>
+
+<style>
+p { font-family: monospace; white-space: pre-wrap }
+</style>
+<script type="text/javascript" src="../../../../js/jsunit/2.2/jsUnitCore.js"
+ ></script>
+<script type="text/javascript" src="html4-defs.js"></script>
+<script type="text/javascript" src="jsunit.js"></script>
+<script type="text/javascript" src="csslexer.js"></script>
+<script type="text/javascript" src="cssparser.js"></script>
+<script type="text/javascript" src="sanitizecss.js"></script>
+<script type="text/javascript" src="css-selector-test.js"></script>
+<script>jsunitRun()</script>
=======================================
--- /dev/null
+++ /trunk/tests/com/google/caja/plugin/css-selector-test.js Mon Feb 27 09:52:24 2012
@@ -0,0 +1,68 @@
+// Copyright (C) 2012 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+assertSelector("#c__", [[], []]);
+assertSelector("#foo .bar", [["#foo .bar"], []]);
+assertSelector("#foo > #bar", [["#foo > #bar"], []]);
+assertSelector("#foo", [["#foo"], []]);
+assertSelector("#foo:visited", [[], ["a#foo:visited"]]);
+assertSelector("#foo:visited, div, .bar:link, p",
+               [["div", "p"], ["a#foo:visited", "a.bar:link"]]);
+assertSelector("* html > * > p", [[], []]);
+assertSelector("* html p", [["* html p"], []]);
+assertSelector("* html", [[], []]);
+assertSelector("*:visited", [[], ["a:visited"]]);
+assertSelector(".c__", [[], []]);
+assertSelector(".foo:link", [[], ["a.foo:link"]]);
+assertSelector("a", [["a"], []]);
+assertSelector("a#_c", [[], []]);
+assertSelector("a#foo-bank", [["a#foo-bank"], []]);
+assertSelector("a#foo-bank:visited", [[], ["a#foo-bank:visited"]]);
+assertSelector("a, bogus, i", [["a", "i"], []]);
+assertSelector("a._c", [[], []]);
+assertSelector("a.c", [["a.c"], []]);
+assertSelector("a.foo", [["a.foo"], []]);
+assertSelector("a.foo, ._c", [["a.foo"], []]);
+assertSelector("a.foo, .b_c", [["a.foo", ".b_c"], []]);
+assertSelector("a.foo, b#c\2c d, .e", [["a.foo", ".e"], []]);
+assertSelector("a:attr(href)", [[], []]);
+assertSelector("a:attr(href), b", [["b"], []]);
+assertSelector("a:link, a:badness", [[], ["a:link"]]);
+assertSelector("a:visited", [[], ["a:visited"]]);
+assertSelector("body", [[], []]);
+assertSelector("body.ie6 p", [["body.ie6 p"], []]);
+assertSelector("body.ie6", [[], []]);
+assertSelector("bogus", [[], []]);
+assertSelector("div *", [["div *"], []]);
+assertSelector("div", [["div"], []]);
+assertSelector("div[zwop]", [[], []]);
+assertSelector("p", [["p"], []]);
+assertSelector("script", [[], []]);
+assertSelector("strike, script, strong", [["strike", "strong"], []]);
+
+function assertSelector(selectorText, safeSelectors) {
+  var selectorTokens = [];
+  parseCssStylesheet(
+      selectorText + ' {}',
+      {
+        startRuleset: function (selector) {
+          selectorTokens.push.apply(selectorTokens, selector);
+        }
+      });
+  var selectors = sanitizeCssSelectors(selectorTokens);
+  assertEquals(
+      selectorText,
+      JSON.stringify(safeSelectors),
+      JSON.stringify(selectors));
+}
=======================================
--- /trunk/src/com/google/caja/plugin/domado.js Sun Feb 26 21:57:19 2012
+++ /trunk/src/com/google/caja/plugin/domado.js Mon Feb 27 09:52:24 2012
@@ -4872,8 +4872,8 @@
         if (typeof domicile.writeHook !== 'function') {
throw new Error('document.writeln not provided for this document');
         }
- // We don't write the \n separately rather than copying args, because the
-        // HTML parser would rather get fewer larger chunks.
+ // We don't write the \n separately rather than copying args, because
+        // the HTML parser would rather get fewer larger chunks.
         var args = Array.slice.call(arguments, 0);
         args.push("\n");
         domicile.writeHook.apply(undefined, args);
@@ -4899,7 +4899,7 @@
           window.setTimeout(listeners[+i], 0);
         }
       });
-
+
       // For JavaScript handlers.  See function dispatchEvent below
       domicile.handlers = [];
       domicile.TameHTMLDocument = TameHTMLDocument;  // Exposed for testing
=======================================
--- /trunk/src/com/google/caja/plugin/es53-frame-group.js Mon Jan 30 12:14:52 2012 +++ /trunk/src/com/google/caja/plugin/es53-frame-group.js Mon Feb 27 09:52:24 2012
@@ -442,7 +442,7 @@
     // functions. makeDOMAccessible does not make functions callable.

     // Testing for own properties, not 'in', because some quirk of Firefox
-    // makes  event objects appear as if they have the taming frame's
+    // makes event objects appear as if they have the taming frame's
     // prototype after being passed into taming frame code (!), so we want
     // to be able to override Object.prototype.v___ etc. Except for that,
     // it would be safer to not allow applying this to apparently defined-
=======================================
--- /trunk/src/com/google/caja/plugin/html-emitter.js Mon Jan 30 12:14:52 2012 +++ /trunk/src/com/google/caja/plugin/html-emitter.js Mon Feb 27 09:52:24 2012
@@ -24,7 +24,8 @@
  *
  * @author [email protected]
  * @provides HtmlEmitter
- * @requires bridalMaker html html4 cajaVM
+ * @requires bridalMaker html html4 cajaVM parseCssStylesheet console
+ * @requires cssSchema sanitizeCssProperty sanitizeCssSelectors
  */

 /**
@@ -366,9 +367,166 @@
         // Ignore problems dispatching error.
       }
     }
+
+    var allowed = {};
+    var cssMediaTypeWhitelist = {
+      'braille': allowed,
+      'embossed': allowed,
+      'handheld': allowed,
+      'print': allowed,
+      'projection': allowed,
+      'screen': allowed,
+      'speech': allowed,
+      'tty': allowed,
+      'tv': allowed
+    };
+
+    function sanitizeHistorySensitive(blockOfProperties) {
+      return '{}';  // TODO: implement me.
+    }

     function defineUntrustedStylesheet(cssText) {
-      // TODO(mikesamuel): Implement client side CSS sanitizing.
+      var safeCss = void 0;
+      // A stack describing the { ... } regions.
+      // Null elements indicate blocks that should not be emitted.
+      var blockStack = [];
+ // True when the content of the current block should be left off safeCss. + // If we don't have a domicile then we don't have a way to sanitize CSS
+      // properties.
+      var elide = !domicile;
+      parseCssStylesheet(
+          cssText,
+          {
+            startStylesheet: function () {
+              safeCss = [];
+            },
+            endStylesheet: function () {
+            },
+            startAtrule: function (atIdent, headerArray) {
+              if (elide) {
+                atIdent = null;
+              } else if (atIdent === '@media') {
+                headerArray = headerArray.filter(
+                  function (mediaType) {
+                    return cssMediaTypeWhitelist[mediaType] == allowed;
+                  });
+                if (headerArray.length) {
+                  safeCss.push(atIdent, headerArray.join(','), '{');
+                } else {
+                  atIdent = null;
+                }
+              } else {
+                if (atIdent === '@import') {
+                  if ('undefined' !== typeof console) {
+ console.log('@import ' + headerArray.join(' ') + ' elided');
+                  }
+                }
+                atIdent = null;  // Elide the block.
+              }
+              elide = !atIdent;
+              blockStack.push(atIdent);
+            },
+            endAtrule: function () {
+              var atIdent = blockStack.pop();
+              if (!elide) {
+                safeCss.push(';');
+              }
+              checkElide();
+            },
+            startBlock: function () {
+              // There are no bare blocks in CSS, so we do not change the
+              // block stack here, but instead in the events that bracket
+              // blocks.
+              if (!elide) {
+                safeCss.push('{');
+              }
+            },
+            endBlock: function () {
+              if (!elide) {
+                safeCss.push('}');
+                elide = true;  // skip any semicolon from endAtRule.
+              }
+            },
+            startRuleset: function (selectorArray) {
+              var historySensitiveSelectors = void 0;
+              var removeHistoryInsensitiveSelectors = false;
+              if (!elide) {
+                var selectors = sanitizeCssSelectors(selectorArray);
+                var historyInsensitiveSelectors = selectors[0];
+                historySensitiveSelectors = selectors[1];
+                if (!historyInsensitiveSelectors.length
+                    && !historySensitiveSelectors.length) {
+                  elide = true;
+                } else {
+                  var selector = historyInsensitiveSelectors.join(', ');
+                  if (!selector) {
+                    // If we have only history sensitive selectors,
+ // use an impossible rule so that we can capture the content
+                    // for later processing by
+                    // history insenstive content for use below.
+                    selector = 'head > html';
+                    removeHistoryInsensitiveSelectors = true;
+                  }
+                  safeCss.push(selector);
+                }
+              }
+              blockStack.push(
+                  elide
+                  ? null
+                  // Sometimes a single list of selectors is split in two,
+                  //   div, a:visited
+                  // because we want to allow some properties for DIV that
+                  // we don't want to allow for A:VISITED to avoid leaking
+                  // user history.
+                  // Store the history sensitive selectors and the position
+                  // where the block starts so we can later create a copy
+                  // of the permissive tokens, and filter it to handle the
+                  // history sensitive case.
+                  : {
+                      historySensitiveSelectors: historySensitiveSelectors,
+                      endOfSelecctors: safeCss.length,
+                      removeHistoryInsensitiveSelectors:
+                         removeHistoryInsensitiveSelectors
+                    });
+            },
+            endRuleset: function () {
+              var rules = blockStack.pop();
+              var propertiesEnd = safeCss.length;
+              if (!elide && rules) {
+                var extraSelectors = rules.historySensitiveSelectors;
+                if (extraSelectors.length) {
+ var propertyGroupTokens = safeCss.slice(rules.endOfSelectors);
+                  safeCss.push(extraSelectors.join(', '));
+                  safeCss.push.apply(
+ safeCss, sanitizeHistorySensitive(propertyGroupTokens));
+                }
+              }
+              if (rules && rules.removeHistoryInsensitiveSelectors) {
+                safeCss.splice(rules.endOfSelectors - 1, propertiesEnd);
+              }
+              checkElide();
+            },
+            declaration: function (property, valueArray) {
+              if (!elide && domicile) {
+                var schema = cssSchema[property];
+                var sanitizeUri = void 0;  // TODO
+                if (schema) {
+                  sanitizeCssProperty(property, valueArray, sanitizeUri);
+                  if (valueArray.length) {
+                    safeCss.push(property, ':', valueArray.join(' '), ';');
+                  }
+                }
+              }
+            }
+          });
+      function checkElide() {
+        elide = blockStack.length === 0
+            || blockStack[blockStack.length-1] !== null;
+      }
+      var document = insertionPoint.ownerDocument;
+      var safeCssText = safeCss.join('');
+      document.getElementsByTagName('head')[0].appendChild(
+          bridal.createStyleSheet(document, safeCssText));
     }

// Zero or one of the html4.eflags constants that captures the content type
@@ -446,7 +604,7 @@
         }
       }
     };
-     documentWriter.rcdata = documentWriter.pcdata;
+    documentWriter.rcdata = documentWriter.pcdata;

     var htmlParser = html.makeSaxParser(documentWriter);

=======================================
--- /trunk/src/com/google/caja/plugin/sanitizecss.js Thu Jan 19 09:04:11 2012 +++ /trunk/src/com/google/caja/plugin/sanitizecss.js Mon Feb 27 09:52:24 2012
@@ -14,7 +14,7 @@

 /**
  * @fileoverview
- * JavaScript support for client-side CSS schema.
+ * JavaScript support for client-side CSS sanitization.
  * The CSS property schema API is defined in CssPropertyPatterns.java which
  * is used to generate css-defs.js.
  *
@@ -25,7 +25,9 @@
  * @requires CSS_PROP_BIT_QSTRING_URL
  * @requires CSS_PROP_BIT_QUANTITY
  * @requires decodeCss
+ * @requires html4
  * @provides sanitizeCssProperty
+ * @provides sanitizeCssSelectors
  */

 /**
@@ -211,3 +213,135 @@
     tokens.length = k;
   };
 })();
+
+/**
+ * Given a series of tokens, returns two lists of sanitized selectors.
+ * @param {Array.<string>}selectors In the form produces by csslexer.js.
+ * @return {Array.<Array.<string>>} an array of length 2 where the zeroeth
+ *    element contains history-insensitive selectors and the first element
+ *    contains history-sensitive selectors.
+ */
+function sanitizeCssSelectors(selectors) {
+ // Produce two distinct lists of selectors to sequester selectors that are + // history sensitive (:visited), so that we can disallow properties in the
+  // property groups for the history sensitive ones.
+  var historySensitiveSelectors = [];
+  var historyInsensitiveSelectors = [];
+
+  // Remove any spaces that are not operators.
+  var k = 0, i;
+  for (i = 0; i < selectors.length; ++i) {
+    if (!(selectors[i] == ' '
+          && (selectors[i-1] == '>' || selectors[i+1] == '>'))) {
+      selectors[k++] = selectors[i];
+    }
+  }
+  selectors.length = k;
+
+ // Split around commas. If there is an error in one of the comma separated + // bits, we throw the whole away, but the failure of one selector does not
+  // affect others.
+  var n = selectors.length, start = 0;
+  for (i = 0; i < n; ++i) {
+    if (selectors[i] == ',') {
+      processSelector(start, i);
+      start = i+1;
+    }
+  }
+  processSelector(start, n);
+
+
+  function processSelector(start, end) {
+    var historySensitive = false;
+
+    // Space around commas is not an operator.
+    if (selectors[start] === ' ') { ++start; }
+    if (end-1 !== start && selectors[end] === ' ') { --end; }
+
+    // Split the selector into element selectors, content around
+    // space (ancestor operator) and '>' (descendant operator).
+    var out = [];
+    var lastOperator = start;
+    var elSelector = '';
+    for (var i = start; i < end; ++i) {
+      var tok = selectors[i];
+      var isChild = (tok === '>');
+      if (isChild || tok === ' ') {
+        // We've found the end of a single link in the selector chain.
+        // We disallow absolute positions relative to html.
+        elSelector = processElementSelector(lastOperator, i, false);
+        if (!elSelector || (isChild && /^html/i.test(elSelector))) {
+          return;
+        }
+        lastOperator = i+1;
+        out.push(elSelector, isChild ? ' > ' : ' ');
+      }
+    }
+    elSelector = processElementSelector(lastOperator, end, true);
+    if (!elSelector) { return; }
+    out.push(elSelector);
+
+    function processElementSelector(start, end, last) {
+      var debugStart = start, debugEnd = end;
+
+      // Split the element selector into three parts.
+      // DIV.foo#bar:hover
+      //    ^       ^
+      // el classes pseudo
+      var element, classId, pseudoSelector, tok, elType;
+      element = '';
+      if (start < end) {
+        tok = selectors[start].toLowerCase();
+        if (tok === '*' || (tok === 'html' && !last)
+            || (tok === 'body' && start+1 !== end && !last)
+            || ('number' === typeof (elType = html4.ELEMENTS[tok])
+                && !(elType & html4.eflags.UNSAFE))) {
+          ++start;
+          element = tok;
+        }
+      }
+      classId = '';
+      while (start < end) {
+        tok = selectors[start];
+        if (tok.charAt(0) === '#') {
+          if (/^#_|__$/.test(tok)) { return null; }
+          classId += tok;
+        } else if (tok === '.') {
+          if (++start < end
+              && /^[0-9A-Za-z:_\-]+$/.test(tok = selectors[start])
+              && !/^_|__$/.test(tok)) {
+            classId += '.' + tok;
+          } else {
+            return null;
+          }
+        } else {
+          break;
+        }
+        ++start;
+      }
+      pseudoSelector = '';
+      if (start < end && selectors[start] === ':') {
+        tok = selectors[++start];
+        if (tok === 'visited' || tok === 'link') {
+          if (!/^[a*]?$/.test(element)) {
+            return null;
+          }
+          historySensitive = true;
+          pseudoSelector = ':' + tok;
+          element = 'a';
+          ++start;
+        }
+      }
+      if (start === end) {
+        return element + classId + pseudoSelector;
+      }
+      return null;
+    }
+
+    (historySensitive
+     ? historySensitiveSelectors
+     : historyInsensitiveSelectors).push(out.join(''));
+  }
+
+  return [historyInsensitiveSelectors, historySensitiveSelectors];
+}

Reply via email to