jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/332711 )

Change subject: T58846: Port sanitizer changes from core commit feb23b46
......................................................................


T58846: Port sanitizer changes from core commit feb23b46

Change-Id: I59163d62fface3a77ba223cb0dd49bc8602ba49a
---
M lib/utils/Util.js
M lib/wt2html/tt/Sanitizer.js
M tests/parserTests-blacklist.js
M tests/parserTests.txt
4 files changed, 133 insertions(+), 89 deletions(-)

Approvals:
  Subramanya Sastry: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/utils/Util.js b/lib/utils/Util.js
index 93d54ec..c308abc 100644
--- a/lib/utils/Util.js
+++ b/lib/utils/Util.js
@@ -1215,14 +1215,14 @@
                return src.substring(tagWidths[0], src.length - tagWidths[1]);
        },
 
-       // Returns the utf8 encoding of the code point
+       // Returns a JS string from the provided code point
        codepointToUtf8: function(cp) {
-               try {
-                       return String.fromCharCode(cp);
-               } catch (e) {
-                       // Return a tofu?
-                       return cp.toString();
-               }
+               return String.fromCodePoint(cp);
+       },
+
+       // Returns the code point at the first position of the string
+       utf8ToCodepoint: function(str) {
+               return str.codePointAt(0);
        },
 
        // Returns true if a given Unicode codepoint is a valid character in 
XML.
diff --git a/lib/wt2html/tt/Sanitizer.js b/lib/wt2html/tt/Sanitizer.js
index 60fd0a0..2c48739 100644
--- a/lib/wt2html/tt/Sanitizer.js
+++ b/lib/wt2html/tt/Sanitizer.js
@@ -818,52 +818,76 @@
        });
 };
 
-Sanitizer.prototype.checkCss = function(text) {
-       function removeMismatchedQuoteChar(str, quoteChar) {
-               var re1, re2;
-               if (quoteChar === "'") {
-                       re1 = /'/g;
-                       re2 = /'([^'\n\r\f]*)$/;
-               } else {
-                       re1 = /"/g;
-                       re2 = /"([^"\n\r\f]*)$/;
-               }
-
-               var mismatch = ((str.match(re1) || []).length) % 2 === 1;
-               if (mismatch) {
-                       str = str.replace(re2, function() {
-                               // replace the mismatched quoteChar with a space
-                               return " " + arguments[1];
-                       });
-               }
-
-               return str;
+function removeMismatchedQuoteChar(str, quoteChar) {
+       var re1, re2;
+       if (quoteChar === "'") {
+               re1 = /'/g;
+               re2 = /'([^'\n\r\f]*)$/;
+       } else {
+               re1 = /"/g;
+               re2 = /"([^"\n\r\f]*)$/;
        }
+       var mismatch = ((str.match(re1) || []).length) % 2 === 1;
+       if (mismatch) {
+               str = str.replace(re2, function() {
+                       // replace the mismatched quoteChar with a space
+                       return " " + arguments[1];
+               });
+       }
+       return str;
+}
 
+var ieReplace = new Map(Object.entries({
+       'ʀ': 'r',
+       'ɴ': 'n',
+       'ⁿ': 'n',
+       'ʟ': 'l',
+       'ɪ': 'i',
+       '⁽': '(',
+       '₍': '(',
+}));
+
+Sanitizer.prototype.normalizeCss = function(text) {
        // Decode character references like {
        text = Sanitizer.decodeCharReferences(text);
-       text = text.replace(this.constants.cssDecodeRE, function() {
-                               var c;
-                               if (arguments[1] !== undefined) {
-                                       // Line continuation
-                                       return '';
-                               } else if (arguments[2] !== undefined) {
-                                       c = 
Util.codepointToUtf8(parseInt(arguments[2], 16));
-                               } else if (arguments[3] !== undefined) {
-                                       c = arguments[3];
-                               } else {
-                                       c = '\\';
-                               }
 
-                               if (c === "\n" || c === '"' || c === "'" || c 
=== '\\') {
-                                       // These characters need to be escaped 
in strings
-                                       // Clean up the escape sequence to 
avoid parsing errors by clients
-                                       return '\\' + 
(c.charCodeAt(0)).toString(16) + ' ';
-                               } else {
-                                       // Decode unnecessary escape
-                                       return c;
-                               }
-                       });
+       text = text.replace(this.constants.cssDecodeRE, function 
cssDecodeCallback() {
+               var c;
+               if (arguments[1] !== undefined) {
+                       // Line continuation
+                       return '';
+               } else if (arguments[2] !== undefined) {
+                       c = Util.codepointToUtf8(parseInt(arguments[2], 16));
+               } else if (arguments[3] !== undefined) {
+                       c = arguments[3];
+               } else {
+                       c = '\\';
+               }
+
+               if (c === "\n" || c === '"' || c === "'" || c === '\\') {
+                       // These characters need to be escaped in strings
+                       // Clean up the escape sequence to avoid parsing errors 
by clients
+                       return '\\' + (c.charCodeAt(0)).toString(16) + ' ';
+               } else {
+                       // Decode unnecessary escape
+                       return c;
+               }
+       });
+
+       // Normalize Halfwidth and Fullwidth Unicode block that IE6 might treat 
as ascii
+       text = text.replace(/[\uFF00-\uFFEF]/g, function(u) {
+               if (/\uFF3c/.test(u)) {
+                       return u;
+               } else {
+                       var cp = Util.utf8ToCodepoint(u);
+                       return String.fromCodePoint(cp - 65248);  // ASCII 
range \x21-\x7A
+               }
+       });
+
+       // Convert more characters IE6 might treat as ascii
+       text = 
text.replace(/\u0280|\u0274|\u207F|\u029F|\u026A|\u207D|\u208D/g, function(u) {
+               return ieReplace.get(u) || u;
+       });
 
        // Remove any comments; IE gets token splitting wrong
        // This must be done AFTER decoding character references and
@@ -894,20 +918,40 @@
                text = text.substr(0, commentPos);
        }
 
-       // SSS FIXME: Looks like the HTML5 library normalizes attributes
-       // and gets rid of these attribute values -- something that needs
-       // investigation and fixing.
-       //
-       // So, style="/* insecure input */" comes out as style=""
-       if (/[\000-\010\016-\037\177]/.test(text)) {
-               return '/* invalid control char */';
-       }
-       if (/expression|filter\s*:|accelerator\s*:|url\s*\(/i.test(text)) {
-               return '/* insecure input */';
-       }
+       // S followed by repeat, iteration, or prolonged sound marks,
+       // which IE will treat as "ss"
+       text = 
text.replace(/s(?:\u3031|\u309D|\u30FC|\u30FD|\uFE7C|\uFE7D|\uFF70)/ig, 'ss');
+
        return text;
 };
 
+var insecureRE = new RegExp(
+       "expression" +
+               "|filter\\s*:" +
+               "|accelerator\\s*:" +
+               "|-o-link\\s*:" +
+               "|-o-link-source\\s*:" +
+               "|-o-replace\\s*:" +
+               "|url\\s*\\(" +
+               "|image\\s*\\(" +
+               "|image-set\\s*\\(" +
+               "|attr\\s*\\([^)]+[\\s,]+url",
+       "i"
+);
+
+Sanitizer.prototype.checkCss = function(text) {
+       text = this.normalizeCss(text);
+       // \000-\010\013\016-\037\177 are the octal escape sequences
+       if (/[\u0000-\u0008\u000B\u000E-\u001F\u007F]/.test(text) ||
+                       text.indexOf(SanitizerConstants.UTF8_REPLACEMENT) > -1) 
{
+               return '/* invalid control char */';
+       } else if (insecureRE.test(text)) {
+               return '/* insecure input */';
+       } else {
+               return text;
+       }
+};
+
 Sanitizer.normalizeSectionIdWhiteSpace = function(id) {
        return id.replace(/[ _]+/g, ' ').trim();
 };
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index bc414d2..f6b670a 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -153,12 +153,6 @@
 add("wt2html", "(bug 19451) Links should refer to the normalized form.", "<p 
data-parsoid='{\"dsr\":[0,64,0,0]}'><a rel=\"mw:WikiLink\" href=\"./אַ\" 
title=\"אַ\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./אַ\"},\"sa\":{\"href\":\"&amp;#xFB2E;\"},\"dsr\":[0,12,2,2]}'>אַ</a>\n<a
 rel=\"mw:WikiLink\" href=\"./אַ\" title=\"אַ\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./אַ\"},\"sa\":{\"href\":\"&amp;#x5d0;&amp;#x5b7;\"},\"dsr\":[13,31,2,2]}'>אַ</a>\n<a
 rel=\"mw:WikiLink\" href=\"./אַ\" title=\"אַ\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./אַ\"},\"sa\":{\"href\":\"&amp;#x5d0;ַ\"},\"dsr\":[32,44,2,2]}'>אַ</a>\n<a
 rel=\"mw:WikiLink\" href=\"./אַ\" title=\"אַ\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./אַ\"},\"sa\":{\"href\":\"א&amp;#x5b7;\"},\"dsr\":[45,57,2,2]}'>אַ</a>\n<a
 rel=\"mw:WikiLink\" href=\"./אַ\" title=\"אַ\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./אַ\"},\"sa\":{\"href\":\"אַ\"},\"dsr\":[58,64,2,2]}'>אַ</a></p>");
 add("wt2html", "Bug 2304: HTML attribute safety (unsafe breakout parameter; 
2309)", "<div style=\"float: right;  >alert(document.cookie)\" about=\"#mwt1\" 
typeof=\"mw:Transclusion\" 
data-parsoid='{\"stx\":\"html\",\"a\":{\"style\":\"float: right;  
>alert(document.cookie)\"},\"sa\":{\"style\":\"float: right; 
{{{1}}}\"},\"dsr\":[0,55,null,null],\"pi\":[[{\"k\":\"1\"}]]}' 
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"div 
style\",\"href\":\"./Template:Div_style\"},\"params\":{\"1\":{\"wt\":\"\\\">&lt;script>alert(document.cookie)&lt;/script>\"}},\"i\":0}}]}'>Magic
 div</div>");
 add("wt2html", "Bug 2304: HTML attribute safety (unsafe breakout parameter 2; 
2309)", "<div style=\"float: right;   >alert(document.cookie)\" about=\"#mwt1\" 
typeof=\"mw:Transclusion\" 
data-parsoid='{\"stx\":\"html\",\"a\":{\"style\":\"float: right;   
>alert(document.cookie)\"},\"sa\":{\"style\":\"float: right; 
{{{1}}}\"},\"dsr\":[0,56,null,null],\"pi\":[[{\"k\":\"1\"}]]}' 
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"div 
style\",\"href\":\"./Template:Div_style\"},\"params\":{\"1\":{\"wt\":\"\\\" 
>&lt;script>alert(document.cookie)&lt;/script>\"}},\"i\":0}}]}'>Magic 
div</div>");
-add("wt2html", "CSS safety test (all browsers): vertical tab (bug 55332 / 
CVE-2013-4567)", "<p style=\"/* insecure input */\" 
data-parsoid='{\"stx\":\"html\",\"a\":{\"style\":\"/* insecure input 
*/\"},\"sa\":{\"style\":\"font-size: 100px; 
background-image:url\\\\b(https://www.google.com/images/srpr/logo6w.png)\"},\"dsr\":[0,104,99,4]}'>A</p>");
-add("wt2html", "MSIE 6 CSS safety test: Fullwidth (bug 55332)", "<p 
style=\"font-size: 100px; color: expression((title='XSSed'),'red')\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,75,70,4]}'>A</p>\n<div 
style=\"top:EXPRESSION(alert())\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[76,120,37,6]}'>B</div>");
-add("wt2html", "MSIE 6 CSS safety test: IPA extensions (bug 55332)", "<div 
style=\"background-image:uʀʟ(javascript:alert())\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,61,54,6]}'>A</div>\n<p 
style=\"font-size: 100px; color: expʀessɪoɴ((title='XSSed'),'red')\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[62,137,70,4]}'>B</p>");
-add("wt2html", "MSIE 6 CSS safety test: sup/sub script (bug 55332)", "<div 
style=\"background-image:url⁽javascript:alert())\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,61,54,6]}'>A</div>\n<div 
style=\"background-image:url₍javascript:alert())\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[62,123,54,6]}'>B</div>\n<p 
style=\"font-size: 100px; color: expressioⁿ((title='XSSed'),'red')\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[124,199,70,4]}'>C</p>");
-add("wt2html", "Opera -o-link CSS", "<div title=\"data:text/html,&lt;img src=1 
onerror=alert(1)>\" style=\"-o-link:attr(title);-o-link-source:current\" 
data-parsoid='{\"stx\":\"html\",\"a\":{\"title\":\"data:text/html,&lt;img src=1 
onerror=alert(1)>\"},\"sa\":{\"title\":\"&amp;#100;&amp;#97;&amp;#116;&amp;#97;&amp;#58;&amp;#116;&amp;#101;&amp;#120;&amp;#116;&amp;#47;&amp;#104;&amp;#116;&amp;#109;&amp;#108;&amp;#44;&amp;#60;&amp;#105;&amp;#109;&amp;#103;&amp;#32;&amp;#115;&amp;#114;&amp;#99;&amp;#61;&amp;#49;&amp;#32;&amp;#111;&amp;#110;&amp;#101;&amp;#114;&amp;#114;&amp;#111;&amp;#114;&amp;#61;&amp;#97;&amp;#108;&amp;#101;&amp;#114;&amp;#116;&amp;#40;&amp;#49;&amp;#41;&amp;#62;\"},\"dsr\":[0,313,306,6]}'>X</div>");
-add("wt2html", "MSIE 6 CSS safety test: Repetition markers (bug 55332)", "<p 
style=\"font-size: 100px; color: expres〱ion((title='XSSed'),'red')\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,75,70,4]}'>A</p>\n<p 
style=\"font-size: 100px; color: expresゝion((title='XSSed'),'red')\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[76,151,70,4]}'>B</p>\n<p 
style=\"font-size: 100px; color: expresーion((title='XSSed'),'red')\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[152,227,70,4]}'>C</p>\n<p 
style=\"font-size: 100px; color: expresヽion((title='XSSed'),'red')\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[228,303,70,4]}'>D</p>\n<p 
style=\"font-size: 100px; color: expresﹽion((title='XSSed'),'red')\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[304,379,70,4]}'>E</p>\n<p 
style=\"font-size: 100px; color: expresﹼion((title='XSSed'),'red')\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[380,455,70,4]}'>F</p>\n<p 
style=\"font-size: 100px; color: expresーion((title='XSSed'),'red')\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[456,531,70,4]}'>G</p>");
 add("wt2html", "CSS line continuation 2", "<div style=\"/* insecure input */\" 
data-parsoid='{\"stx\":\"html\",\"a\":{\"style\":\"/* insecure input 
*/\"},\"sa\":{\"style\":\"background-image: u\\\\&amp;#13;rl(test.jpg); 
\"},\"dsr\":[0,59,53,6]}'></div>");
 add("wt2html", "Sanitizer: Closing of closed but not open table tags", "Table 
not started");
 add("wt2html", "Sanitizer: Validating that <meta> and <link> work, but only 
for Microdata", "<div itemscope=\"\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,308,15,6]}'>\n\t<p 
data-parsoid='{\"dsr\":[17,301,0,0]}'>&lt;meta itemprop=\"hello\" 
content=\"world\">\n\t&lt;meta http-equiv=\"refresh\" 
content=\"5\">\n\t&lt;meta itemprop=\"hello\" http-equiv=\"refresh\" 
content=\"5\">\n\t&lt;link itemprop=\"hello\" href=\"{{SERVER}}\">\n\t&lt;link 
rel=\"stylesheet\" href=\"{{SERVER}}\">\n\t&lt;link rel=\"stylesheet\" 
itemprop=\"hello\" href=\"{{SERVER}}\"></p>\n</div>");
@@ -329,7 +323,6 @@
 add("wt2wt", "Empty attribute crash test single-quotes (bug 2067)", "<font 
color=\"\">foo</font>\n");
 add("wt2wt", "Attribute test: unquoted but illegal value (hash)", "<font 
color=\"#x\">foo</font>\n");
 add("wt2wt", "Bug 2095: link with pipe and three closing brackets, version 2", 
"[[Main Page|<nowiki>[http://example.com/]</nowiki>]]\n");
-add("wt2wt", "Opera -o-link CSS", "<div 
title=\"&#100;&#97;&#116;&#97;&#58;&#116;&#101;&#120;&#116;&#47;&#104;&#116;&#109;&#108;&#44;&#60;&#105;&#109;&#103;&#32;&#115;&#114;&#99;&#61;&#49;&#32;&#111;&#110;&#101;&#114;&#114;&#111;&#114;&#61;&#97;&#108;&#101;&#114;&#116;&#40;&#49;&#41;&#62;\"
 style=\"-o-link:attr(title);-o-link-source:current\">X</div>");
 add("wt2wt", "Table attribute legitimate extension", "{|\n! 
style=\"<nowiki>color:blue</nowiki>\" + | status\n|}");
 add("wt2wt", "Table attribute safety", "{|\n! 
style=\"<nowiki>border-width:expression(0+alert(document.cookie))</nowiki>\" + 
| status\n|}");
 add("wt2wt", "Parser hook: nested tags", 
"<tag><tag></tag><nowiki></tag></nowiki>");
@@ -508,12 +501,6 @@
 add("html2html", "Bug 3244: HTML attribute safety (extension; unsafe)", "<div 
style=\" \" data-parsoid='{\"stx\":\"html\",\"a\":{\"style\":\" 
\"},\"sa\":{\"style\":\"/* insecure input 
*/\"},\"dsr\":[0,40,34,6]}'></div>\n");
 add("html2html", "MSIE CSS safety test: spurious slash", "<div style=\" \" 
data-parsoid='{\"stx\":\"html\",\"a\":{\"style\":\" \"},\"sa\":{\"style\":\"/* 
insecure input */\"},\"dsr\":[0,44,34,6]}'>evil</div>\n");
 add("html2html", "MSIE CSS safety test: hex code", "<div style=\" \" 
data-parsoid='{\"stx\":\"html\",\"a\":{\"style\":\" \"},\"sa\":{\"style\":\"/* 
insecure input */\"},\"dsr\":[0,44,34,6]}'>evil</div>\n");
-add("html2html", "CSS safety test (all browsers): vertical tab (bug 55332 / 
CVE-2013-4567)", "<p data-parsoid='{\"dsr\":[0,1,0,0]}'>A</p>\n");
-add("html2html", "MSIE 6 CSS safety test: Fullwidth (bug 55332)", "<p 
data-parsoid='{\"dsr\":[0,1,0,0]}'>A</p>\n<div style=\" \" 
data-parsoid='{\"stx\":\"html\",\"a\":{\"style\":\" \"},\"sa\":{\"style\":\"/* 
insecure input */\"},\"dsr\":[2,43,34,6]}'>B</div>\n");
-add("html2html", "MSIE 6 CSS safety test: IPA extensions (bug 55332)", "<div 
style=\" \" data-parsoid='{\"stx\":\"html\",\"a\":{\"style\":\" 
\"},\"sa\":{\"style\":\"/* insecure input 
*/\"},\"dsr\":[0,41,34,6]}'>A</div>\n<p 
data-parsoid='{\"dsr\":[42,43,0,0]}'>B</p>\n");
-add("html2html", "MSIE 6 CSS safety test: sup/sub script (bug 55332)", "<div 
style=\" \" data-parsoid='{\"stx\":\"html\",\"a\":{\"style\":\" 
\"},\"sa\":{\"style\":\"/* insecure input 
*/\"},\"dsr\":[0,41,34,6]}'>A</div>\n<div style=\" \" 
data-parsoid='{\"stx\":\"html\",\"a\":{\"style\":\" \"},\"sa\":{\"style\":\"/* 
insecure input */\"},\"dsr\":[42,83,34,6]}'>B</div>\n<p 
data-parsoid='{\"dsr\":[84,85,0,0]}'>C</p>\n");
-add("html2html", "Opera -o-link CSS", "<div title=\"data:text/html,&lt;img 
src=1 onerror=alert(1)\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,93,55,6]}'>\" style=\"/* insecure 
input */\">X</div>\n");
-add("html2html", "MSIE 6 CSS safety test: Repetition markers (bug 55332)", "<p 
data-parsoid='{\"dsr\":[0,1,0,0]}'>A</p>\n\n<p 
data-parsoid='{\"dsr\":[3,4,0,0]}'>B</p>\n\n<p 
data-parsoid='{\"dsr\":[6,7,0,0]}'>C</p>\n\n<p 
data-parsoid='{\"dsr\":[9,10,0,0]}'>D</p>\n\n<p 
data-parsoid='{\"dsr\":[12,13,0,0]}'>E</p>\n\n<p 
data-parsoid='{\"dsr\":[15,16,0,0]}'>F</p>\n\n<p 
data-parsoid='{\"dsr\":[18,19,0,0]}'>G</p>\n");
 add("html2html", "Table attribute safety", "<table 
data-parsoid='{\"dsr\":[0,45,2,2]}'>\n<tbody 
data-parsoid='{\"dsr\":[3,43,0,0]}'><tr 
data-parsoid='{\"autoInsertedEnd\":true,\"autoInsertedStart\":true,\"dsr\":[3,42,0,0]}'><th
 style=\" \" data-parsoid='{\"a\":{\"style\":\" \"},\"sa\":{\"style\":\"/* 
insecure input */\"},\"autoInsertedEnd\":true,\"dsr\":[3,42,32,0]}'> 
status</th></tr>\n</tbody></table>\n");
 add("html2html", "CSS line continuation 1", "<div style=\" \" 
data-parsoid='{\"stx\":\"html\",\"a\":{\"style\":\" \"},\"sa\":{\"style\":\"/* 
insecure input */\"},\"dsr\":[0,40,34,6]}'></div>\n");
 add("html2html", "CSS line continuation 2", "<div style=\" \" 
data-parsoid='{\"stx\":\"html\",\"a\":{\"style\":\" \"},\"sa\":{\"style\":\"/* 
invalid control char */\"},\"dsr\":[0,46,40,6]}'></div>\n");
@@ -1046,12 +1033,6 @@
 add("html2wt", "MSIE CSS safety test: hex code", "<div style=\"/* insecure 
input */\">evil</div>\n");
 add("html2wt", "MSIE CSS safety test: comment in url", "<div 
style=\"background-image:u rl(javascript:alert('boo'))\">evil</div>\n");
 add("html2wt", "MSIE CSS safety test: comment in expression", "<div 
style=\"background-image:expres sion(alert('boo4'))\">evil4</div>\n");
-add("html2wt", "CSS safety test (all browsers): vertical tab (bug 55332 / 
CVE-2013-4567)", "A\n");
-add("html2wt", "MSIE 6 CSS safety test: Fullwidth (bug 55332)", "A\n<div 
style=\"/* insecure input */\">B</div>\n");
-add("html2wt", "MSIE 6 CSS safety test: IPA extensions (bug 55332)", "<div 
style=\"/* insecure input */\">A</div>\nB\n");
-add("html2wt", "MSIE 6 CSS safety test: sup/sub script (bug 55332)", "<div 
style=\"/* insecure input */\">A</div>\n<div style=\"/* insecure input 
*/\">B</div>\nC\n");
-add("html2wt", "Opera -o-link CSS", "<div title=\"data:text/html,<img src=1 
onerror=alert(1)>\" style=\"/* insecure input */\">X</div>\n");
-add("html2wt", "MSIE 6 CSS safety test: Repetition markers (bug 55332)", 
"A\n\nB\n\nC\n\nD\n\nE\n\nF\n\nG\n");
 add("html2wt", "Table attribute legitimate extension", "{|\n! 
style=\"color:blue\" | status\n|}\n");
 add("html2wt", "Table attribute safety", "{|\n! style=\"/* insecure input */\" 
| status\n|}\n");
 add("html2wt", "CSS line continuation 1", "<div style=\"/* insecure input 
*/\"></div>\n");
@@ -1846,10 +1827,6 @@
 add("selser", "Bug 2095: link with pipe and three closing brackets, version 2 
[1]", "[[Main Page|[http://example.com/]]]";);
 add("selser", "Bug 2095: link with pipe and three closing brackets, version 2 
[2]", "mfl7xqc4xioa8aor\n\n[[Main Page|[http://example.com/]]]";);
 add("selser", "Bug 2095: link with pipe and three closing brackets, version 2 
[[2]]", "h4wq9ivlle1xlxr[[Main Page|[http://example.com/]]]";);
-add("selser", "Opera -o-link CSS [[2]]", 
"<div\ntitle=\"&#100;&#97;&#116;&#97;&#58;&#116;&#101;&#120;&#116;&#47;&#104;&#116;&#109;&#108;&#44;&#60;&#105;&#109;&#103;&#32;&#115;&#114;&#99;&#61;&#49;&#32;&#111;&#110;&#101;&#114;&#114;&#111;&#114;&#61;&#97;&#108;&#101;&#114;&#116;&#40;&#49;&#41;&#62;\"\nstyle=\"-o-link:attr(title);-o-link-source:current\">e1v85xt7j899hpviX</div>");
-add("selser", "Opera -o-link CSS [2]", 
"s1oe4611wh035wmi<div\ntitle=\"&#100;&#97;&#116;&#97;&#58;&#116;&#101;&#120;&#116;&#47;&#104;&#116;&#109;&#108;&#44;&#60;&#105;&#109;&#103;&#32;&#115;&#114;&#99;&#61;&#49;&#32;&#111;&#110;&#101;&#114;&#114;&#111;&#114;&#61;&#97;&#108;&#101;&#114;&#116;&#40;&#49;&#41;&#62;\"\nstyle=\"-o-link:attr(title);-o-link-source:current\">X</div>");
-add("selser", "Opera -o-link CSS [[4]]", 
"<div\ntitle=\"&#100;&#97;&#116;&#97;&#58;&#116;&#101;&#120;&#116;&#47;&#104;&#116;&#109;&#108;&#44;&#60;&#105;&#109;&#103;&#32;&#115;&#114;&#99;&#61;&#49;&#32;&#111;&#110;&#101;&#114;&#114;&#111;&#114;&#61;&#97;&#108;&#101;&#114;&#116;&#40;&#49;&#41;&#62;\"\nstyle=\"-o-link:attr(title);-o-link-source:current\">bsf73uag1j9ssjor</div>");
-add("selser", "Opera -o-link CSS [[3]]", 
"<div\ntitle=\"&#100;&#97;&#116;&#97;&#58;&#116;&#101;&#120;&#116;&#47;&#104;&#116;&#109;&#108;&#44;&#60;&#105;&#109;&#103;&#32;&#115;&#114;&#99;&#61;&#49;&#32;&#111;&#110;&#101;&#114;&#114;&#111;&#114;&#61;&#97;&#108;&#101;&#114;&#116;&#40;&#49;&#41;&#62;\"\nstyle=\"-o-link:attr(title);-o-link-source:current\"></div>");
 add("selser", "Table attribute legitimate extension [[0,1]]", "{|\n!+ 
style=\"<nowiki>color:blue</nowiki>\"| status\n|}");
 add("selser", "Table attribute legitimate extension [[0,[1,4]]]", "{|\n!+ 
style=\"<nowiki>color:blue</nowiki>\"| status<!--96xa8rclnmblc8fr-->\n|}");
 add("selser", "Table attribute legitimate extension [2]", 
"ttlfw8y9so9wwmi\n{|\n!+ style=\"<nowiki>color:blue</nowiki>\"| status\n|}");
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index 34083e0..a93ce62 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -16682,9 +16682,11 @@
 CSS safety test (all browsers): vertical tab (bug 55332 / CVE-2013-4567)
 !! wikitext
 <p style="font-size: 100px; 
background-image:url\b(https://www.google.com/images/srpr/logo6w.png)">A</p>
-!! html
+!! html/php
 <p style="/* invalid control char */">A</p>
 
+!! html/parsoid
+<p style="/* invalid control char */" 
data-parsoid='{"stx":"html","a":{"style":"/* invalid control char 
*/"},"sa":{"style":"font-size: 100px; 
background-image:url\\b(https://www.google.com/images/srpr/logo6w.png)"}}'>A</p>
 !! end
 
 !! test
@@ -16692,10 +16694,13 @@
 !! wikitext
 <p style="font-size: 100px; color: expression((title='XSSed'),'red')">A</p>
 <div style="top:EXPRESSION(alert())">B</div>
-!! html
+!! html/php
 <p style="/* insecure input */">A</p>
 <div style="/* insecure input */">B</div>
 
+!! html/parsoid
+<p style="/* insecure input */" data-parsoid='{"stx":"html","a":{"style":"/* 
insecure input */"},"sa":{"style":"font-size: 100px; color: 
expression((title=&#39;XSSed&#39;),&#39;red&#39;)"}}'>A</p>
+<div style="/* insecure input */" data-parsoid='{"stx":"html","a":{"style":"/* 
insecure input */"},"sa":{"style":"top:EXPRESSION(alert())"}}'>B</div>
 !! end
 
 !! test
@@ -16703,10 +16708,13 @@
 !! wikitext
 <div style="background-image:uʀʟ(javascript:alert())">A</div>
 <p style="font-size: 100px; color: expʀessɪoɴ((title='XSSed'),'red')">B</p>
-!! html
+!! html/php
 <div style="/* insecure input */">A</div>
 <p style="/* insecure input */">B</p>
 
+!! html/parsoid
+<div style="/* insecure input */" data-parsoid='{"stx":"html","a":{"style":"/* 
insecure input 
*/"},"sa":{"style":"background-image:uʀʟ(javascript:alert())"}}'>A</div>
+<p style="/* insecure input */" data-parsoid='{"stx":"html","a":{"style":"/* 
insecure input */"},"sa":{"style":"font-size: 100px; color: 
expʀessɪoɴ((title=&#39;XSSed&#39;),&#39;red&#39;)"}}'>B</p>
 !! end
 
 !! test
@@ -16715,23 +16723,30 @@
 <div style="background-image:url⁽javascript:alert())">A</div>
 <div style="background-image:url₍javascript:alert())">B</div>
 <p style="font-size: 100px; color: expressioⁿ((title='XSSed'),'red')">C</p>
-!! html
+!! html/php
 <div style="/* insecure input */">A</div>
 <div style="/* insecure input */">B</div>
 <p style="/* insecure input */">C</p>
 
+!! html/parsoid
+<div style="/* insecure input */" data-parsoid='{"stx":"html","a":{"style":"/* 
insecure input 
*/"},"sa":{"style":"background-image:url⁽javascript:alert())"}}'>A</div>
+<div style="/* insecure input */" data-parsoid='{"stx":"html","a":{"style":"/* 
insecure input 
*/"},"sa":{"style":"background-image:url₍javascript:alert())"}}'>B</div>
+<p style="/* insecure input */" data-parsoid='{"stx":"html","a":{"style":"/* 
insecure input */"},"sa":{"style":"font-size: 100px; color: 
expressioⁿ((title=&#39;XSSed&#39;),&#39;red&#39;)"}}'>C</p>
 !! end
 
-# FIXME: Parsoid fails to sanitize this! See T58846.
 !! test
 Opera -o-link CSS
+!! options
+parsoid=wt2html,html2html
 !! wikitext
 <div
 
title="&#100;&#97;&#116;&#97;&#58;&#116;&#101;&#120;&#116;&#47;&#104;&#116;&#109;&#108;&#44;&#60;&#105;&#109;&#103;&#32;&#115;&#114;&#99;&#61;&#49;&#32;&#111;&#110;&#101;&#114;&#114;&#111;&#114;&#61;&#97;&#108;&#101;&#114;&#116;&#40;&#49;&#41;&#62;"
 style="-o-link:attr(title);-o-link-source:current">X</div>
-!! html
+!! html/php
 <div title="data:text/html,&lt;img src=1 onerror=alert(1)&gt;" style="/* 
insecure input */">X</div>
 
+!! html/parsoid
+<div title="data:text/html,&lt;img src=1 onerror=alert(1)>" style="/* insecure 
input */" data-parsoid='{"stx":"html","a":{"title":"data:text/html,&lt;img 
src=1 onerror=alert(1)>","style":"/* insecure input 
*/"},"sa":{"title":"&amp;#100;&amp;#97;&amp;#116;&amp;#97;&amp;#58;&amp;#116;&amp;#101;&amp;#120;&amp;#116;&amp;#47;&amp;#104;&amp;#116;&amp;#109;&amp;#108;&amp;#44;&amp;#60;&amp;#105;&amp;#109;&amp;#103;&amp;#32;&amp;#115;&amp;#114;&amp;#99;&amp;#61;&amp;#49;&amp;#32;&amp;#111;&amp;#110;&amp;#101;&amp;#114;&amp;#114;&amp;#111;&amp;#114;&amp;#61;&amp;#97;&amp;#108;&amp;#101;&amp;#114;&amp;#116;&amp;#40;&amp;#49;&amp;#41;&amp;#62;","style":"-o-link:attr(title);-o-link-source:current"}}'>X</div>
 !! end
 
 !! test
@@ -16744,7 +16759,7 @@
 <p style="font-size: 100px; color: expresﹽion((title='XSSed'),'red')">E</p>
 <p style="font-size: 100px; color: expresﹼion((title='XSSed'),'red')">F</p>
 <p style="font-size: 100px; color: expresーion((title='XSSed'),'red')">G</p>
-!! html
+!! html/php
 <p style="/* insecure input */">A</p>
 <p style="/* insecure input */">B</p>
 <p style="/* insecure input */">C</p>
@@ -16753,6 +16768,14 @@
 <p style="/* insecure input */">F</p>
 <p style="/* insecure input */">G</p>
 
+!! html/parsoid
+<p style="/* insecure input */" data-parsoid='{"stx":"html","a":{"style":"/* 
insecure input */"},"sa":{"style":"font-size: 100px; color: 
expres〱ion((title=&#39;XSSed&#39;),&#39;red&#39;)"}}'>A</p>
+<p style="/* insecure input */" data-parsoid='{"stx":"html","a":{"style":"/* 
insecure input */"},"sa":{"style":"font-size: 100px; color: 
expresゝion((title=&#39;XSSed&#39;),&#39;red&#39;)"}}'>B</p>
+<p style="/* insecure input */" data-parsoid='{"stx":"html","a":{"style":"/* 
insecure input */"},"sa":{"style":"font-size: 100px; color: 
expresーion((title=&#39;XSSed&#39;),&#39;red&#39;)"}}'>C</p>
+<p style="/* insecure input */" data-parsoid='{"stx":"html","a":{"style":"/* 
insecure input */"},"sa":{"style":"font-size: 100px; color: 
expresヽion((title=&#39;XSSed&#39;),&#39;red&#39;)"}}'>D</p>
+<p style="/* insecure input */" data-parsoid='{"stx":"html","a":{"style":"/* 
insecure input */"},"sa":{"style":"font-size: 100px; color: 
expresﹽion((title=&#39;XSSed&#39;),&#39;red&#39;)"}}'>E</p>
+<p style="/* insecure input */" data-parsoid='{"stx":"html","a":{"style":"/* 
insecure input */"},"sa":{"style":"font-size: 100px; color: 
expresﹼion((title=&#39;XSSed&#39;),&#39;red&#39;)"}}'>F</p>
+<p style="/* insecure input */" data-parsoid='{"stx":"html","a":{"style":"/* 
insecure input */"},"sa":{"style":"font-size: 100px; color: 
expresーion((title=&#39;XSSed&#39;),&#39;red&#39;)"}}'>G</p>
 !! end
 
 !! test

-- 
To view, visit https://gerrit.wikimedia.org/r/332711
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I59163d62fface3a77ba223cb0dd49bc8602ba49a
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <abrea...@wikimedia.org>
Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org>
Gerrit-Reviewer: C. Scott Ananian <canan...@wikimedia.org>
Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to