Revision: 4960
Author: jasvir
Date: Tue Jul 3 08:29:01 2012
Log: Add optional logging to html sanitizer
http://codereview.appspot.com/6343069
Adds an optional logger which gets called everytime tags, attributes or
values are rewritten or elided by the sanitizer
[email protected]
http://code.google.com/p/google-caja/source/detail?r=4960
Modified:
/trunk/src/com/google/caja/plugin/html-sanitizer.js
/trunk/tests/com/google/caja/plugin/html-sanitizer-test.js
=======================================
--- /trunk/src/com/google/caja/plugin/html-sanitizer.js Fri Jun 22 11:39:33
2012
+++ /trunk/src/com/google/caja/plugin/html-sanitizer.js Tue Jul 3 08:29:01
2012
@@ -741,6 +741,31 @@
}
}
+ function log(opt_logger, tagName, attribName, oldValue, newValue) {
+ if (!attribName) {
+ opt_logger(tagName + " removed", {
+ change: "removed",
+ tagName: tagName
+ });
+ }
+ if (oldValue !== newValue) {
+ var changed = "changed";
+ if (oldValue && !newValue) {
+ changed = "removed";
+ } else if (!oldValue && newValue) {
+ changed = "added";
+ }
+ opt_logger(tagName + "." + attribName + " " + changed, {
+ change: changed,
+ tagName: tagName,
+ attribName: attribName,
+ oldValue: oldValue,
+ newValue: newValue
+ });
+ }
+ }
+
+
/**
* Sanitizes attributes on an HTML tag.
* @param {string} tagName An HTML tag name in lowercase.
@@ -755,11 +780,12 @@
* @return {Array.<?string>} The sanitized attributes as a list of
alternating
* names and values, where a null value means to omit the attribute.
*/
- function sanitizeAttribs(
- tagName, attribs, opt_naiveUriRewriter, opt_nmTokenPolicy) {
+ function sanitizeAttribs(tagName, attribs,
+ opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
for (var i = 0; i < attribs.length; i += 2) {
var attribName = attribs[i];
var value = attribs[i + 1];
+ var oldValue = value;
var atype = null, attribKey;
if ((attribKey = tagName + '::' + attribName,
html4.ATTRIBS.hasOwnProperty(attribKey)) ||
@@ -772,10 +798,16 @@
case html4.atype['NONE']: break;
case html4.atype['SCRIPT']:
value = null;
+ if (opt_logger) {
+ log(opt_logger, tagName, attribName, oldValue, value);
+ }
break;
case html4.atype['STYLE']:
if ('undefined' === typeof parseCssDeclarations) {
value = null;
+ if (opt_logger) {
+ log(opt_logger, tagName, attribName, oldValue, value);
+ }
break;
}
var sanitizedDeclarations = [];
@@ -794,7 +826,11 @@
sanitizedDeclarations.push(property + ': ' +
tokens.join(' '));
}
});
- value = sanitizedDeclarations.length > 0 ?
sanitizedDeclarations.join(' ; ') : null;
+ value = sanitizedDeclarations.length > 0 ?
+ sanitizedDeclarations.join(' ; ') : null;
+ if (opt_logger) {
+ log(opt_logger, tagName, attribName, oldValue, value);
+ }
break;
case html4.atype['ID']:
case html4.atype['IDREF']:
@@ -803,9 +839,15 @@
case html4.atype['LOCAL_NAME']:
case html4.atype['CLASSES']:
value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value;
+ if (opt_logger) {
+ log(opt_logger, tagName, attribName, oldValue, value);
+ }
break;
case html4.atype['URI']:
value = safeUri(value, opt_naiveUriRewriter);
+ if (opt_logger) {
+ log(opt_logger, tagName, attribName, oldValue, value);
+ }
break;
case html4.atype['URI_FRAGMENT']:
if (value && '#' === value.charAt(0)) {
@@ -817,13 +859,22 @@
} else {
value = null;
}
+ if (opt_logger) {
+ log(opt_logger, tagName, attribName, oldValue, value);
+ }
break;
default:
value = null;
+ if (opt_logger) {
+ log(opt_logger, tagName, attribName, oldValue, value);
+ }
break;
}
} else {
value = null;
+ if (opt_logger) {
+ log(opt_logger, tagName, attribName, oldValue, value);
+ }
}
attribs[i + 1] = value;
}
@@ -842,11 +893,16 @@
* @return {function(string, Array.<?string>)} A tagPolicy suitable for
* passing to html.sanitize.
*/
- function makeTagPolicy(opt_naiveUriRewriter, opt_nmTokenPolicy) {
+ function makeTagPolicy(
+ opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
return function(tagName, attribs) {
if (!(html4.ELEMENTS[tagName] & html4.eflags['UNSAFE'])) {
- return sanitizeAttribs(
- tagName, attribs, opt_naiveUriRewriter, opt_nmTokenPolicy);
+ return sanitizeAttribs(tagName, attribs,
+ opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger);
+ } else {
+ if (opt_logger) {
+ log(opt_logger, tagName, undefined, undefined, undefined);
+ }
}
};
}
@@ -874,8 +930,10 @@
* to attributes containing HTML names, element IDs, and
space-separated
* lists of classes. If not given, such attributes are left
unchanged.
*/
- function sanitize(inputHtml, opt_naiveUriRewriter, opt_nmTokenPolicy) {
- var tagPolicy = makeTagPolicy(opt_naiveUriRewriter, opt_nmTokenPolicy);
+ function sanitize(inputHtml,
+ opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger) {
+ var tagPolicy = makeTagPolicy(
+ opt_naiveUriRewriter, opt_nmTokenPolicy, opt_logger);
return sanitizeWithPolicy(inputHtml, tagPolicy);
}
=======================================
--- /trunk/tests/com/google/caja/plugin/html-sanitizer-test.js Fri Jun 22
09:20:54 2012
+++ /trunk/tests/com/google/caja/plugin/html-sanitizer-test.js Tue Jul 3
08:29:01 2012
@@ -1,8 +1,14 @@
function uriPolicy(value) {
+ if ("specialurl" === value) {
+ return value;
+ }
return 'u:' + value;
}
function nmTokenPolicy(nmTokens) {
+ if ("specialtoken" === nmTokens) {
+ return nmTokens;
+ }
if (/[^a-z\t\n\r ]/i.test(nmTokens)) {
return null;
} else {
@@ -13,6 +19,11 @@
});
}
}
+
+var logMessages = [];
+function logPolicy(msg, detail) {
+ logMessages.push(msg);
+}
function check1(original, opt_result) {
if (opt_result === void 0) {
@@ -287,6 +298,41 @@
jsunit.pass();
});
+function assertSanitizerMessages(input, expected, messages) {
+ logMessages = [];
+ var actual = html.sanitize(input, uriPolicy, nmTokenPolicy, logPolicy);
+ assertEquals(expected, actual);
+ // legacy sanitizer does not support logging
+ if (!html.isLegacy) {
+ assertEquals(messages.length, logMessages.length);
+ logMessages.forEach(function (val, i) {
+ assertEquals(messages[i], val);
+ });
+ }
+}
+
+jsunitRegister('testLogger',
+ function testLogger() {
+ assertSanitizerMessages('<a href="http://www.example.com/">hi</a>',
+ '<a href=\"u:http://www.example.com/\">hi</a>',
+ ["a.href changed"]);
+ assertSanitizerMessages('<a href="specialurl">hi</a>',
+ '<a href=\"specialurl\">hi</a>',
+ []);
+ assertSanitizerMessages('<div onclick="foo()"></div>',
+ '<div></div>',
+ ["div.onclick removed"]);
+ assertSanitizerMessages(
+ '<div onclick="foo()" class="specialtoken" id=baz></div>',
+ '<div class="specialtoken" id="p-baz"></div>',
+ ["div.onclick removed", "div.id changed"]);
+ assertSanitizerMessages(
+ '<script>alert(1);</script>',
+ '',
+ ["script removed"]);
+ jsunit.pass();
+});
+
function assertSAXEvents(htmlSource, param, varargs_golden) {
// events is a flat array of triples (type, data, param)
var events = [];