Revision: 4960
Author:   jasvir
Date:     Tue Jul  3 08:29:01 2012
Log:      Add optional logging to html sanitizer
http://codereview.appspot.com/6343069

Adds an optional logger which gets called everytime tags, attributes or
values are rewritten or elided by the sanitizer

[email protected]

http://code.google.com/p/google-caja/source/detail?r=4960

Modified:
 /trunk/src/com/google/caja/plugin/html-sanitizer.js
 /trunk/tests/com/google/caja/plugin/html-sanitizer-test.js

=======================================
--- /trunk/src/com/google/caja/plugin/html-sanitizer.js Fri Jun 22 11:39:33 2012 +++ /trunk/src/com/google/caja/plugin/html-sanitizer.js Tue Jul 3 08:29:01 2012
@@ -741,6 +741,31 @@
     }
   }

+  function log(opt_logger, tagName, attribName, oldValue, newValue) {
+    if (!attribName) {
+      opt_logger(tagName + " removed", {
+        change: "removed",
+        tagName: tagName
+      });
+    }
+    if (oldValue !== newValue) {
+      var changed = "changed";
+      if (oldValue && !newValue) {
+        changed = "removed";
+      } else if (!oldValue && newValue)  {
+        changed = "added";
+      }
+      opt_logger(tagName + "." + attribName + " " + changed, {
+        change: changed,
+        tagName: tagName,
+        attribName: attribName,
+        oldValue: oldValue,
+        newValue: newValue
+      });
+    }
+  }
+
+
   /**
    * Sanitizes attributes on an HTML tag.
    * @param {string} tagName An HTML tag name in lowercase.
@@ -755,11 +780,12 @@
* @return {Array.<?string>} The sanitized attributes as a list of alternating
    *     names and values, where a null value means to omit the attribute.
    */
-  function sanitizeAttribs(
-      tagName, attribs, opt_naiveUriRewriter, opt_nmTokenPolicy) {
+  function sanitizeAttribs(tagName, attribs,
+    opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
     for (var i = 0; i < attribs.length; i += 2) {
       var attribName = attribs[i];
       var value = attribs[i + 1];
+      var oldValue = value;
       var atype = null, attribKey;
       if ((attribKey = tagName + '::' + attribName,
            html4.ATTRIBS.hasOwnProperty(attribKey)) ||
@@ -772,10 +798,16 @@
           case html4.atype['NONE']: break;
           case html4.atype['SCRIPT']:
             value = null;
+            if (opt_logger) {
+              log(opt_logger, tagName, attribName, oldValue, value);
+            }
             break;
           case html4.atype['STYLE']:
             if ('undefined' === typeof parseCssDeclarations) {
               value = null;
+              if (opt_logger) {
+                log(opt_logger, tagName, attribName, oldValue, value);
+             }
               break;
             }
             var sanitizedDeclarations = [];
@@ -794,7 +826,11 @@
sanitizedDeclarations.push(property + ': ' + tokens.join(' '));
                   }
                 });
- value = sanitizedDeclarations.length > 0 ? sanitizedDeclarations.join(' ; ') : null;
+            value = sanitizedDeclarations.length > 0 ?
+              sanitizedDeclarations.join(' ; ') : null;
+            if (opt_logger) {
+              log(opt_logger, tagName, attribName, oldValue, value);
+            }
             break;
           case html4.atype['ID']:
           case html4.atype['IDREF']:
@@ -803,9 +839,15 @@
           case html4.atype['LOCAL_NAME']:
           case html4.atype['CLASSES']:
             value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value;
+            if (opt_logger) {
+              log(opt_logger, tagName, attribName, oldValue, value);
+            }
             break;
           case html4.atype['URI']:
             value = safeUri(value, opt_naiveUriRewriter);
+            if (opt_logger) {
+              log(opt_logger, tagName, attribName, oldValue, value);
+            }
             break;
           case html4.atype['URI_FRAGMENT']:
             if (value && '#' === value.charAt(0)) {
@@ -817,13 +859,22 @@
             } else {
               value = null;
             }
+            if (opt_logger) {
+              log(opt_logger, tagName, attribName, oldValue, value);
+            }
             break;
           default:
             value = null;
+            if (opt_logger) {
+              log(opt_logger, tagName, attribName, oldValue, value);
+            }
             break;
         }
       } else {
         value = null;
+        if (opt_logger) {
+          log(opt_logger, tagName, attribName, oldValue, value);
+        }
       }
       attribs[i + 1] = value;
     }
@@ -842,11 +893,16 @@
    * @return {function(string, Array.<?string>)} A tagPolicy suitable for
    *     passing to html.sanitize.
    */
-  function makeTagPolicy(opt_naiveUriRewriter, opt_nmTokenPolicy) {
+  function makeTagPolicy(
+    opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
     return function(tagName, attribs) {
       if (!(html4.ELEMENTS[tagName] & html4.eflags['UNSAFE'])) {
-        return sanitizeAttribs(
-            tagName, attribs, opt_naiveUriRewriter, opt_nmTokenPolicy);
+        return sanitizeAttribs(tagName, attribs,
+          opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger);
+      } else {
+        if (opt_logger) {
+          log(opt_logger, tagName, undefined, undefined, undefined);
+        }
       }
     };
   }
@@ -874,8 +930,10 @@
* to attributes containing HTML names, element IDs, and space-separated * lists of classes. If not given, such attributes are left unchanged.
    */
-  function sanitize(inputHtml, opt_naiveUriRewriter, opt_nmTokenPolicy) {
-    var tagPolicy = makeTagPolicy(opt_naiveUriRewriter, opt_nmTokenPolicy);
+  function sanitize(inputHtml,
+    opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
+    var tagPolicy = makeTagPolicy(
+      opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger);
     return sanitizeWithPolicy(inputHtml, tagPolicy);
   }

=======================================
--- /trunk/tests/com/google/caja/plugin/html-sanitizer-test.js Fri Jun 22 09:20:54 2012 +++ /trunk/tests/com/google/caja/plugin/html-sanitizer-test.js Tue Jul 3 08:29:01 2012
@@ -1,8 +1,14 @@
 function uriPolicy(value) {
+  if ("specialurl" === value) {
+    return value;
+  }
   return 'u:' + value;
 }

 function nmTokenPolicy(nmTokens) {
+  if ("specialtoken" === nmTokens) {
+    return nmTokens;
+  }
   if (/[^a-z\t\n\r ]/i.test(nmTokens)) {
     return null;
   } else {
@@ -13,6 +19,11 @@
       });
   }
 }
+
+var logMessages = [];
+function logPolicy(msg, detail) {
+  logMessages.push(msg);
+}

 function check1(original, opt_result) {
   if (opt_result === void 0) {
@@ -287,6 +298,41 @@
   jsunit.pass();
 });

+function assertSanitizerMessages(input, expected, messages) {
+  logMessages = [];
+  var actual = html.sanitize(input, uriPolicy, nmTokenPolicy, logPolicy);
+  assertEquals(expected, actual);
+  // legacy sanitizer does not support logging
+  if (!html.isLegacy) {
+    assertEquals(messages.length, logMessages.length);
+    logMessages.forEach(function (val, i) {
+      assertEquals(messages[i], val);
+    });
+  }
+}
+
+jsunitRegister('testLogger',
+               function testLogger() {
+  assertSanitizerMessages('<a href="http://www.example.com/";>hi</a>',
+    '<a href=\"u:http://www.example.com/\";>hi</a>',
+    ["a.href changed"]);
+  assertSanitizerMessages('<a href="specialurl">hi</a>',
+    '<a href=\"specialurl\">hi</a>',
+    []);
+  assertSanitizerMessages('<div onclick="foo()"></div>',
+    '<div></div>',
+    ["div.onclick removed"]);
+  assertSanitizerMessages(
+    '<div onclick="foo()" class="specialtoken" id=baz></div>',
+    '<div class="specialtoken" id="p-baz"></div>',
+    ["div.onclick removed", "div.id changed"]);
+  assertSanitizerMessages(
+    '<script>alert(1);</script>',
+    '',
+    ["script removed"]);
+  jsunit.pass();
+});
+
 function assertSAXEvents(htmlSource, param, varargs_golden) {
   // events is a flat array of triples (type, data, param)
   var events = [];

Reply via email to