Arlolra has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/226032

Change subject: Sanitize link fragments
......................................................................

Sanitize link fragments

 * Implements Sanitizer's escapeId.

 * Moves some utilities from the Sanitizer's prototype to the constructor
   for use without instantiation.

 * 36053d3b568145d991d5088e1dbe561d60241f02 mentions T59252 (and some
   comments here) where we are foregoing id munging. That is probably
   less of an issue for editability than it is for Flow. Their stored
   link fragments should point to the right place on wiki.

 * I imagine there may be some complications here with copy/paste.

 * Fixes a little edge case in roundtripping unedited interwiki links.

Bug: T94949
Change-Id: I8758a7af37788a93d2326c7930bb82b9ad138f27
---
M lib/ext.core.LinkHandler.js
M lib/ext.core.Sanitizer.js
M lib/mediawiki.Util.js
M lib/wts.LinkHandler.js
M tests/parserTests-blacklist.js
M tests/parserTests.txt
6 files changed, 77 insertions(+), 43 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/32/226032/1

diff --git a/lib/ext.core.LinkHandler.js b/lib/ext.core.LinkHandler.js
index e80e892..584a6df 100644
--- a/lib/ext.core.LinkHandler.js
+++ b/lib/ext.core.LinkHandler.js
@@ -539,7 +539,7 @@
        if (target.language.protorel !== undefined) {
                absHref = absHref.replace( /^https?:/, '');
        }
-       newTk.addNormalizedAttribute('href', absHref, target.hrefSrc);
+       newTk.addNormalizedAttribute('href', Util.sanitizeURI(absHref), 
target.hrefSrc);
 
        // Change the rel to be mw:PageProp/Language
        Util.lookupKV( newTk.attribs, 'rel' ).v = 'mw:PageProp/Language';
@@ -562,7 +562,8 @@
        if (target.interwiki.protorel !== undefined) {
                absHref = absHref.replace( /^https?:/, '');
        }
-       newTk.addNormalizedAttribute('href', absHref, target.hrefSrc);
+
+       newTk.addNormalizedAttribute('href', Util.sanitizeURI(absHref), 
target.hrefSrc);
 
        // Change the rel to be mw:ExtLink
        Util.lookupKV( newTk.attribs, 'rel' ).v = 'mw:ExtLink';
diff --git a/lib/ext.core.Sanitizer.js b/lib/ext.core.Sanitizer.js
index 1b78f64..a88f3e3 100644
--- a/lib/ext.core.Sanitizer.js
+++ b/lib/ext.core.Sanitizer.js
@@ -807,12 +807,11 @@
  *
  * gwicke: Use Util.decodeEntities instead?
  */
-Sanitizer.prototype.decodeEntity = function(name) {
-       if (this.constants.htmlEntityAliases[name]) {
-               name = this.constants.htmlEntityAliases[name];
+Sanitizer.decodeEntity = function(name) {
+       if (SanitizerConstants.htmlEntityAliases[name]) {
+               name = SanitizerConstants.htmlEntityAliases[name];
        }
-
-       var e = this.constants.htmlEntities[name];
+       var e = SanitizerConstants.htmlEntities[name];
        return e ? Util.codepointToUtf8(e) : "&" + name + ";";
 };
 
@@ -820,11 +819,11 @@
  * Return UTF-8 string for a codepoint if that is a valid
  * character reference, otherwise U+FFFD REPLACEMENT CHARACTER.
  */
-Sanitizer.prototype.decodeChar = function(codepoint) {
+Sanitizer.decodeChar = function(codepoint) {
        if (Util.validateCodepoint(codepoint)) {
                return Util.codepointToUtf8(codepoint);
        } else {
-               return this.constants.UTF8_REPLACEMENT;
+               return SanitizerConstants.UTF8_REPLACEMENT;
        }
 };
 
@@ -832,15 +831,14 @@
  * Decode any character references, numeric or named entities,
  * in the text and return a UTF-8 string.
  */
-Sanitizer.prototype.decodeCharReferences = function( text ) {
-       var sanitizer = this;
-       return text.replace(sanitizer.constants.CHAR_REFS_RE, function() {
+Sanitizer.decodeCharReferences = function(text) {
+       return text.replace(SanitizerConstants.CHAR_REFS_RE, function() {
                if (arguments[1]) {
-                       return sanitizer.decodeEntity(arguments[1]);
+                       return Sanitizer.decodeEntity(arguments[1]);
                } else if (arguments[2]) {
-                       return sanitizer.decodeChar(parseInt(arguments[2], 10));
+                       return Sanitizer.decodeChar(parseInt(arguments[2], 10));
                } else if (arguments[3]) {
-                       return sanitizer.decodeChar(parseInt(arguments[3], 16));
+                       return Sanitizer.decodeChar(parseInt(arguments[3], 16));
                } else {
                        return arguments[4];
                }
@@ -870,7 +868,7 @@
        }
 
        // Decode character references like {
-       text = this.decodeCharReferences(text);
+       text = Sanitizer.decodeCharReferences(text);
        text = text.replace(this.constants.cssDecodeRE, function() {
                                var c;
                                if (arguments[1] !== undefined ) {
@@ -937,8 +935,23 @@
        return text;
 };
 
-Sanitizer.prototype.escapeId = function(id, options) {
-       // SSS: Not ported -- is this relevant for security?
+Sanitizer.escapeId = function(id, options) {
+       options = options || {};
+
+       id = Sanitizer.decodeCharReferences(id);
+
+       // Assume $wgExperimentalHtmlIds is `false` for now.
+
+       id = id.replace(/ /g, '_');
+       id = Util.urlencode(id);
+       id = id.replace(/%3A/g, ':');
+       id = id.replace(/%/g, '.');
+
+       if (!/^[a-zA-Z]/.test(id) && !options.hasOwnProperty('noninitial')) {
+               // Initial character must be a letter!
+               id = 'x' + id;
+       }
+
        return id;
 };
 
@@ -1017,7 +1030,7 @@
                }
 
                if (k === 'id') {
-                       v = this.escapeId(v, ['noninitial']);
+                       v = Sanitizer.escapeId(v, { 'noninitial': true });
                }
 
                // RDFa and microdata properties allow URLs, URIs and/or CURIs.
diff --git a/lib/mediawiki.Util.js b/lib/mediawiki.Util.js
index b5b48f5..3da8672 100644
--- a/lib/mediawiki.Util.js
+++ b/lib/mediawiki.Util.js
@@ -12,6 +12,7 @@
 var TemplateRequest = require('./mediawiki.ApiRequest.js').TemplateRequest;
 var Consts = require('./mediawiki.wikitext.constants.js').WikitextConstants;
 var JSUtils = require('./jsutils.js').JSUtils;
+var Sanitizer;  // Circular definition
 
 
 // This is a circular dependency.  Don't use anything from defines at module
@@ -1011,6 +1012,11 @@
                return num + '';
        },
 
+       // php's urlencode
+       urlencode: function(txt) {
+               return encodeURIComponent(txt).replace(/%20/g, '+');
+       },
+
        decodeURI: function( s ) {
                return s.replace( /(%[0-9a-fA-F][0-9a-fA-F])+/g, function( m ) {
                        try {
@@ -1023,25 +1029,28 @@
        },
 
        sanitizeTitleURI: function(title) {
+               if (!Sanitizer) {
+                       Sanitizer = 
require('./ext.core.Sanitizer.js').Sanitizer;
+               }
                var bits = title.split('#');
                var anchor = null;
-               var sanitize = function(s) {
-                       return s.replace( /[%? \[\]#|<>]/g, function(m) {
-                               return encodeURIComponent( m );
-                       });
-               };
                if (bits.length > 1) { // split at first '#'
                        anchor = title.substring(bits[0].length + 1);
                        title = bits[0];
                }
-               title = sanitize(title);
+               title = title.replace(/[%? \[\]#|<>]/g, function(m) {
+                       return encodeURIComponent(m);
+               });
                if (anchor !== null) {
-                       title += '#' + sanitize(anchor);
+                       title += '#' + Sanitizer.escapeId(anchor, { 
'noninitial': true });
                }
                return title;
        },
 
        sanitizeURI: function(s) {
+               if (!Sanitizer) {
+                       Sanitizer = 
require('./ext.core.Sanitizer.js').Sanitizer;
+               }
                var host = s.match(/^[a-zA-Z]+:\/\/[^\/]+(?:\/|$)/);
                var path = s;
                var anchor = null;
@@ -1064,7 +1073,7 @@
                });
                s = host + path;
                if (anchor !== null) {
-                       s += '#' + anchor;
+                       s += '#' + Sanitizer.escapeId(anchor, { 'noninitial': 
true });
                }
                return s;
        },
diff --git a/lib/wts.LinkHandler.js b/lib/wts.LinkHandler.js
index 6ddfa0f..7549e32 100644
--- a/lib/wts.LinkHandler.js
+++ b/lib/wts.LinkHandler.js
@@ -201,8 +201,10 @@
                                localPrefix += oldPrefix[1] + ':';
                        }
 
-                       // should we preserve the old prefix?
-                       if (oldPrefix && (
+                       if (target.fromsrc && !target.notmodified) {
+                               /* jshint noempty: false */
+                               // Leave the target alone!
+                       } else if (oldPrefix && (  // should we preserve the 
old prefix?
                                        oldPrefix[1].toLowerCase() === 
interWikiMatch[0].toLowerCase() ||
                                        // Check if the old prefix mapped to 
the same URL as
                                        // the new one. Use the old one if 
that's the case.
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index 4d5d865..533e8f6 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -58,8 +58,8 @@
 add("wt2html", "External link containing double-single-quotes in text embedded 
in italics (bug 4598 sanity check)", "<p data-parsoid='{\"dsr\":[0,60,0,0]}'><i 
data-parsoid='{\"dsr\":[0,60,2,2]}'>Some <a rel=\"mw:ExtLink\" 
href=\"http://example.com/\"; 
data-parsoid='{\"targetOff\":28,\"contentOffsets\":[28,56],\"dsr\":[7,57,21,1]}'>pretty
 <i data-parsoid='{\"dsr\":[35,46,2,2]}'>italics</i> and stuff</a>!</i></p>");
 add("wt2html", "Self-link to numeric title", "<p 
data-parsoid='{\"dsr\":[0,5,0,0]}'><a rel=\"mw:WikiLink\" href=\"./0\" 
title=\"0\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./0\"},\"sa\":{\"href\":\"0\"},\"dsr\":[0,5,2,2]}'>0</a></p>");
 add("wt2html", "<nowiki> inside a link", "<p 
data-parsoid='{\"dsr\":[0,96,0,0]}'><a rel=\"mw:WikiLink\" href=\"./Main_Page\" 
title=\"Main Page\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main_Page\"},\"sa\":{\"href\":\"Main&lt;nowiki>
 Page&lt;/nowiki>\"},\"dsr\":[0,30,2,2]}'>Main Page</a> <a rel=\"mw:WikiLink\" 
href=\"./Main_Page\" title=\"Main Page\" 
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Main_Page\"},\"sa\":{\"href\":\"Main
 Page\"},\"dsr\":[31,96,12,2]}'>the main page <span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[57,94,8,9]}'>[it's not very good]</span></a></p>");
-add("wt2html", "Interlanguage link with spacing", "<p 
data-parsoid='{\"dsr\":[0,14,0,0]}'>Blah blah blah</p>\n<link 
rel=\"mw:PageProp/Language\" href=\"http://zh.wikipedia.org/wiki/    Chinese    
 \" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"http://zh.wikipedia.org/wiki/
    Chinese     \"},\"sa\":{\"href\":\"   zh  :    Chinese     
\"},\"dsr\":[15,43,null,null]}'/>");
-add("wt2html", "Space and question mark encoding in interlanguage links 
(T95473)", "<p data-parsoid='{\"dsr\":[0,14,0,0]}'>Blah blah blah</p>\n<link 
rel=\"mw:PageProp/Language\" href=\"http://es.wikipedia.org/wiki/Foo bar?\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"http://es.wikipedia.org/wiki/Foo
 bar?\"},\"sa\":{\"href\":\"es:Foo bar?\"},\"dsr\":[15,30,null,null]}'/>");
+add("wt2html", "Interlanguage link with spacing", "<p 
data-parsoid='{\"dsr\":[0,14,0,0]}'>Blah blah blah</p>\n<link 
rel=\"mw:PageProp/Language\" 
href=\"http://zh.wikipedia.org/wiki/%20%20%20%20Chinese%20%20%20%20%20\"; 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"http://zh.wikipedia.org/wiki/%20%20%20%20Chinese%20%20%20%20%20\"},\"sa\":{\"href\":\";
   zh  :    Chinese     \"},\"dsr\":[15,43,null,null]}'/>");
+add("wt2html", "Space and question mark encoding in interlanguage links 
(T95473)", "<p data-parsoid='{\"dsr\":[0,14,0,0]}'>Blah blah blah</p>\n<link 
rel=\"mw:PageProp/Language\" href=\"http://es.wikipedia.org/wiki/Foo%20bar?\"; 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"http://es.wikipedia.org/wiki/Foo%20bar?\"},\"sa\":{\"href\":\"es:Foo
 bar?\"},\"dsr\":[15,30,null,null]}'/>");
 add("wt2html", "2. Lists with start-of-line-transparent tokens before bullets: 
Template close", "<ul about=\"#mwt1\" typeof=\"mw:Transclusion\" 
data-parsoid='{\"dsr\":[0,18,0,0],\"pi\":[[{\"k\":\"1\",\"spc\":[\"\",\"\",\"\",\"\"]}]]}'
 data-mw='{\"parts\":[\"*foo 
\",{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"bar\\n\"}},\"i\":0}}]}'><li>foo
 bar</li></ul><span about=\"#mwt1\">\n</span><p 
data-parsoid='{\"dsr\":[18,22,0,0]}'>*baz</p>");
 add("wt2html", "Multiple list tags generated by templates", "<li 
about=\"#mwt1\" typeof=\"mw:Transclusion\" 
data-parsoid='{\"stx\":\"html\",\"autoInsertedEnd\":true,\"dsr\":[0,44,null,null],\"pi\":[[{\"k\":\"1\",\"spc\":[\"\",\"\",\"\",\"\"]}],[{\"k\":\"1\",\"spc\":[\"\",\"\",\"\",\"\"]}],[{\"k\":\"1\",\"spc\":[\"\",\"\",\"\",\"\"]}]]}'
 
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"&lt;li>\"}},\"i\":0}},\"a\\n\",{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"&lt;li>\"}},\"i\":1}},\"b\\n\",{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"&lt;li>\"}},\"i\":2}},\"c\"]}'>a\n</li><li
 about=\"#mwt1\">b\n</li><li about=\"#mwt1\" 
data-parsoid='{\"stx\":\"html\",\"autoInsertedEnd\":true,\"dsr\":[null,44,null,0]}'>c</li>");
 add("wt2html", "Test the li-hack\n(The PHP parser relies on Tidy for the 
hack)", "<ul data-parsoid='{\"dsr\":[0,114,0,0]}'><li 
data-parsoid='{\"dsr\":[0,5,1,0]}'> foo</li>\n<li 
data-parsoid='{\"stx\":\"html\",\"autoInsertedEnd\":true,\"dsr\":[6,19,6,0],\"liHackSrc\":\"*
 \"}'>li-hack</li>\n<li about=\"#mwt1\" typeof=\"mw:Transclusion\" 
data-parsoid='{\"stx\":\"html\",\"autoInsertedEnd\":true,\"dsr\":[20,52,2,null],\"pi\":[[{\"k\":\"1\",\"spc\":[\"\",\"\",\"\",\"\"]}]]}'
 data-mw='{\"parts\":[\"* 
\",{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"&lt;li>templated
 li-hack\"}},\"i\":0}}]}'>templated li-hack</li>\n<li 
data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[53,66,1,0]}'> <!--foo--> 
</li><li 
data-parsoid='{\"stx\":\"html\",\"autoInsertedEnd\":true,\"dsr\":[66,114,4,0]}'>
 unsupported li-hack with preceding comments</li></ul>\n\n<ul 
data-parsoid='{\"stx\":\"html\",\"dsr\":[116,154,4,5]}'>\n<li 
data-parsoid='{\"stx\":\"html\",\"autoInsertedEnd\":true,\"dsr\":[121,125,4,0]}'></li><li
 data-parsoid='{\"stx\":\"html\",\"dsr\":[125,148,4,5]}'>not a 
li-hack\n</li>\n</ul>");
@@ -117,7 +117,7 @@
 add("wt2html", "Self-closed noinclude, includeonly, onlyinclude tags", "<meta 
typeof=\"mw:Includes/NoInclude\" data-parsoid='{\"src\":\"&lt;noinclude 
/>\",\"dsr\":[0,13,null,null]}'/>\n<meta typeof=\"mw:Includes/IncludeOnly\" 
data-parsoid='{\"src\":\"&lt;includeonly 
/>\",\"dsr\":[14,29,null,null]}'/>\n<meta typeof=\"mw:Includes/OnlyInclude\" 
data-parsoid='{\"src\":\"&lt;onlyinclude />\",\"dsr\":[30,45,null,null]}'/>");
 add("wt2html", "Bug 6563: Section extraction for section shown by 
<includeonly>", "<meta typeof=\"mw:Includes/IncludeOnly\" 
data-parsoid='{\"src\":\"&lt;includeonly>\\n==Includeonly 
section==\\n&lt;/includeonly>\",\"dsr\":[0,52,null,null]}'/><meta 
typeof=\"mw:Includes/IncludeOnly/End\" 
data-parsoid='{\"src\":\"\",\"dsr\":[52,52,null,null]}'/>\n<h2 
data-parsoid='{\"dsr\":[53,68,2,2]}'>Section T-2</h2>");
 add("wt2html", "Bug 6563: Section extraction for section suppressed by 
<includeonly>", "<meta typeof=\"mw:Includes/IncludeOnly\" 
data-parsoid='{\"src\":\"&lt;includeonly>\\n==Includeonly 
section==\\n&lt;/includeonly>\",\"dsr\":[0,52,null,null]}'/><meta 
typeof=\"mw:Includes/IncludeOnly/End\" 
data-parsoid='{\"src\":\"\",\"dsr\":[52,52,null,null]}'/>\n<h2 
data-parsoid='{\"dsr\":[53,66,2,2]}'>Section 1</h2>");
-add("wt2html", "2. includeonly in html attr value", "<p 
data-parsoid='{\"dsr\":[0,155,0,0]}'><span id=\"v1\" about=\"#mwt1\" 
typeof=\"mw:ExpandedAttrs\" 
data-parsoid='{\"stx\":\"html\",\"a\":{\"id\":\"v1\"},\"sa\":{\"id\":\"&lt;noinclude>v1&lt;/noinclude>&lt;includeonly>v2&lt;/includeonly>\"},\"dsr\":[0,76,66,7]}'
 data-mw='{\"attribs\":[[{\"txt\":\"id\"},{\"html\":\"&lt;meta 
typeof=\\\"mw:Includes/NoInclude\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&lt;noinclude>&amp;quot;,&amp;quot;dsr&amp;quot;:[10,21,null,null]}\\\">v1&lt;meta
 typeof=\\\"mw:Includes/NoInclude/End\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&lt;/noinclude>&amp;quot;,&amp;quot;dsr&amp;quot;:[23,35,null,null]}\\\">&lt;meta
 typeof=\\\"mw:Includes/IncludeOnly\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&lt;includeonly>v2&lt;/includeonly>&amp;quot;,&amp;quot;dsr&amp;quot;:[35,64,null,null]}\\\">&lt;meta
 typeof=\\\"mw:Includes/IncludeOnly/End\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&amp;quot;,&amp;quot;dsr&amp;quot;:[64,64,null,null]}\\\">\"}]]}'>bar</span>\n<span
 id='\"v1\"' about=\"#mwt2\" typeof=\"mw:ExpandedAttrs\" 
data-parsoid='{\"stx\":\"html\",\"a\":{\"id\":\"\\\"v1\\\"\"},\"sa\":{\"id\":\"&lt;noinclude>\\\"v1\\\"&lt;/noinclude>&lt;includeonly>\\\"v2\\\"&lt;/includeonly>\"},\"dsr\":[77,155,68,7]}'
 data-mw='{\"attribs\":[[{\"txt\":\"id\"},{\"html\":\"&lt;meta 
typeof=\\\"mw:Includes/NoInclude\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&lt;noinclude>&amp;quot;,&amp;quot;dsr&amp;quot;:[86,97,null,null]}\\\">\\\"v1\\\"&lt;meta
 typeof=\\\"mw:Includes/NoInclude/End\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&lt;/noinclude>&amp;quot;,&amp;quot;dsr&amp;quot;:[101,113,null,null]}\\\">&lt;meta
 typeof=\\\"mw:Includes/IncludeOnly\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&lt;includeonly>\\\\&amp;quot;v2\\\\&amp;quot;&lt;/includeonly>&amp;quot;,&amp;quot;dsr&amp;quot;:[113,144,null,null]}\\\">&lt;meta
 typeof=\\\"mw:Includes/IncludeOnly/End\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&amp;quot;,&amp;quot;dsr&amp;quot;:[144,144,null,null]}\\\">\"}]]}'>bar</span></p>");
+add("wt2html", "2. includeonly in html attr value", "<p 
data-parsoid='{\"dsr\":[0,155,0,0]}'><span id=\"v1\" about=\"#mwt1\" 
typeof=\"mw:ExpandedAttrs\" 
data-parsoid='{\"stx\":\"html\",\"a\":{\"id\":\"v1\"},\"sa\":{\"id\":\"&lt;noinclude>v1&lt;/noinclude>&lt;includeonly>v2&lt;/includeonly>\"},\"dsr\":[0,76,66,7]}'
 data-mw='{\"attribs\":[[{\"txt\":\"id\"},{\"html\":\"&lt;meta 
typeof=\\\"mw:Includes/NoInclude\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&lt;noinclude>&amp;quot;,&amp;quot;dsr&amp;quot;:[10,21,null,null]}\\\">v1&lt;meta
 typeof=\\\"mw:Includes/NoInclude/End\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&lt;/noinclude>&amp;quot;,&amp;quot;dsr&amp;quot;:[23,35,null,null]}\\\">&lt;meta
 typeof=\\\"mw:Includes/IncludeOnly\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&lt;includeonly>v2&lt;/includeonly>&amp;quot;,&amp;quot;dsr&amp;quot;:[35,64,null,null]}\\\">&lt;meta
 typeof=\\\"mw:Includes/IncludeOnly/End\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&amp;quot;,&amp;quot;dsr&amp;quot;:[64,64,null,null]}\\\">\"}]]}'>bar</span>\n<span
 id=\".22v1.22\" about=\"#mwt2\" typeof=\"mw:ExpandedAttrs\" 
data-parsoid='{\"stx\":\"html\",\"a\":{\"id\":\".22v1.22\"},\"sa\":{\"id\":\"&lt;noinclude>\\\"v1\\\"&lt;/noinclude>&lt;includeonly>\\\"v2\\\"&lt;/includeonly>\"},\"dsr\":[77,155,68,7]}'
 data-mw='{\"attribs\":[[{\"txt\":\"id\"},{\"html\":\"&lt;meta 
typeof=\\\"mw:Includes/NoInclude\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&lt;noinclude>&amp;quot;,&amp;quot;dsr&amp;quot;:[86,97,null,null]}\\\">\\\"v1\\\"&lt;meta
 typeof=\\\"mw:Includes/NoInclude/End\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&lt;/noinclude>&amp;quot;,&amp;quot;dsr&amp;quot;:[101,113,null,null]}\\\">&lt;meta
 typeof=\\\"mw:Includes/IncludeOnly\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&lt;includeonly>\\\\&amp;quot;v2\\\\&amp;quot;&lt;/includeonly>&amp;quot;,&amp;quot;dsr&amp;quot;:[113,144,null,null]}\\\">&lt;meta
 typeof=\\\"mw:Includes/IncludeOnly/End\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&amp;quot;,&amp;quot;dsr&amp;quot;:[144,144,null,null]}\\\">\"}]]}'>bar</span></p>");
 add("wt2html", "4. includeonly in table attributes", "<table 
data-parsoid='{\"dsr\":[0,77,2,2]}'>\n<tbody 
data-parsoid='{\"dsr\":[3,75,0,0]}'><tr about=\"#mwt1\" 
typeof=\"mw:ExpandedAttrs\" 
data-parsoid='{\"startTagSrc\":\"|-\",\"a\":{\"&lt;noinclude>\\n|-\\n|a\\n&lt;/noinclude>\":null},\"sa\":{\"&lt;noinclude>\\n|-\\n|a\\n&lt;/noinclude>\":\"\"},\"autoInsertedEnd\":true,\"dsr\":[3,36,33,0]}'
 data-mw='{\"attribs\":[[{\"txt\":\"a\",\"html\":\"&lt;meta 
typeof=\\\"mw:Includes/NoInclude\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&lt;noinclude>&amp;quot;,&amp;quot;dsr&amp;quot;:[6,17,null,null]}\\\">\\n|-\\n|a\\n&lt;meta
 typeof=\\\"mw:Includes/NoInclude/End\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&lt;/noinclude>&amp;quot;,&amp;quot;dsr&amp;quot;:[24,36,null,null]}\\\">\"},{\"html\":\"\"}]]}'></tr>\n<tr
 about=\"#mwt2\" typeof=\"mw:ExpandedAttrs\" 
data-parsoid='{\"startTagSrc\":\"|-\",\"a\":{\"&lt;includeonly>\\n|-\\n|b\\n&lt;/includeonly>\":null},\"sa\":{\"&lt;includeonly>\\n|-\\n|b\\n&lt;/includeonly>\":\"\"},\"autoInsertedEnd\":true,\"dsr\":[37,74,37,0]}'
 data-mw='{\"attribs\":[[{\"txt\":\"\",\"html\":\"&lt;meta 
typeof=\\\"mw:Includes/IncludeOnly\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&lt;includeonly>\\\\n|-\\\\n|b\\\\n&lt;/includeonly>&amp;quot;,&amp;quot;dsr&amp;quot;:[40,74,null,null]}\\\">&lt;meta
 typeof=\\\"mw:Includes/IncludeOnly/End\\\" 
data-parsoid=\\\"{&amp;quot;src&amp;quot;:&amp;quot;&amp;quot;,&amp;quot;dsr&amp;quot;:[74,74,null,null]}\\\">\"},{\"html\":\"\"}]]}'></tr>\n</tbody></table>");
 add("wt2html", "Templates: 2. Inside a block tag", "<div 
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,23,5,6]}'><span about=\"#mwt1\" 
typeof=\"mw:Transclusion\" 
data-parsoid='{\"pi\":[[{\"k\":\"1\",\"spc\":[\"\",\"\",\"\",\"\"]}]],\"dsr\":[5,17,null,null]}'
 
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"Foo\"}},\"i\":0}}]}'>Foo</span></div>\n<blockquote
 data-parsoid='{\"stx\":\"html\",\"dsr\":[24,61,12,13]}'><span about=\"#mwt2\" 
typeof=\"mw:Transclusion\" 
data-parsoid='{\"pi\":[[{\"k\":\"1\",\"spc\":[\"\",\"\",\"\",\"\"]}]],\"dsr\":[36,48,null,null]}'
 
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"Foo\"}},\"i\":0}}]}'>Foo</span></blockquote>");
 add("wt2html", "Templates: HTML Tag: 2. Generation of HTML attr. value", "<div 
style=\"'color:red;'\" about=\"#mwt2\" typeof=\"mw:ExpandedAttrs\" 
data-parsoid='{\"stx\":\"html\",\"a\":{\"style\":\"&#39;color:red;&#39;\"},\"sa\":{\"style\":\"{{echo|&#39;color:red;&#39;}}\"},\"dsr\":[0,42,33,6]}'
 data-mw='{\"attribs\":[[{\"txt\":\"style\"},{\"html\":\"&lt;span 
about=\\\"#mwt1\\\" typeof=\\\"mw:Transclusion\\\" 
data-parsoid=\\\"{&amp;quot;pi&amp;quot;:[[{&amp;quot;k&amp;quot;:&amp;quot;1&amp;quot;,&amp;quot;spc&amp;quot;:[&amp;quot;&amp;quot;,&amp;quot;&amp;quot;,&amp;quot;&amp;quot;,&amp;quot;&amp;quot;]}]],&amp;quot;dsr&amp;quot;:[11,32,null,null]}\\\"
 
data-mw=\\\"{&amp;quot;parts&amp;quot;:[{&amp;quot;template&amp;quot;:{&amp;quot;target&amp;quot;:{&amp;quot;wt&amp;quot;:&amp;quot;echo&amp;quot;,&amp;quot;href&amp;quot;:&amp;quot;./Template:Echo&amp;quot;},&amp;quot;params&amp;quot;:{&amp;quot;1&amp;quot;:{&amp;quot;wt&amp;quot;:&amp;quot;&#39;color:red;&#39;&amp;quot;}},&amp;quot;i&amp;quot;:0}}]}\\\">&#39;color:red;&#39;&lt;/span>\"}]]}'>foo</div>");
@@ -189,7 +189,6 @@
 add("wt2html", "Category with template in sort key and title", "<link 
typeof=\"mw:ExpandedAttrs\" about=\"#mwt4\" rel=\"mw:PageProp/Category\" 
href=\"./Category:Foo#Bar\" 
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Category:Foo\"},\"sa\":{\"href\":\"Category:{{echo|Foo}}\"},\"dsr\":[0,38,null,null]}'
 data-mw='{\"attribs\":[[{\"txt\":\"href\"},{\"html\":\"Category:&lt;span 
about=\\\"#mwt1\\\" typeof=\\\"mw:Transclusion\\\" 
data-parsoid=\\\"{&amp;quot;pi&amp;quot;:[[{&amp;quot;k&amp;quot;:&amp;quot;1&amp;quot;,&amp;quot;spc&amp;quot;:[&amp;quot;&amp;quot;,&amp;quot;&amp;quot;,&amp;quot;&amp;quot;,&amp;quot;&amp;quot;]}]],&amp;quot;dsr&amp;quot;:[11,23,null,null]}\\\"
 
data-mw=\\\"{&amp;quot;parts&amp;quot;:[{&amp;quot;template&amp;quot;:{&amp;quot;target&amp;quot;:{&amp;quot;wt&amp;quot;:&amp;quot;echo&amp;quot;,&amp;quot;href&amp;quot;:&amp;quot;./Template:Echo&amp;quot;},&amp;quot;params&amp;quot;:{&amp;quot;1&amp;quot;:{&amp;quot;wt&amp;quot;:&amp;quot;Foo&amp;quot;}},&amp;quot;i&amp;quot;:0}}]}\\\">Foo&lt;/span>\"}],[{\"txt\":\"mw:sortKey\"},{\"html\":\"&lt;span
 about=\\\"#mwt2\\\" typeof=\\\"mw:Transclusion\\\" 
data-parsoid=\\\"{&amp;quot;pi&amp;quot;:[[{&amp;quot;k&amp;quot;:&amp;quot;1&amp;quot;,&amp;quot;spc&amp;quot;:[&amp;quot;&amp;quot;,&amp;quot;&amp;quot;,&amp;quot;&amp;quot;,&amp;quot;&amp;quot;]}]],&amp;quot;dsr&amp;quot;:[24,36,null,null]}\\\"
 
data-mw=\\\"{&amp;quot;parts&amp;quot;:[{&amp;quot;template&amp;quot;:{&amp;quot;target&amp;quot;:{&amp;quot;wt&amp;quot;:&amp;quot;echo&amp;quot;,&amp;quot;href&amp;quot;:&amp;quot;./Template:Echo&amp;quot;},&amp;quot;params&amp;quot;:{&amp;quot;1&amp;quot;:{&amp;quot;wt&amp;quot;:&amp;quot;Bar&amp;quot;}},&amp;quot;i&amp;quot;:0}}]}\\\">Bar&lt;/span>\"}]]}'/>");
 add("wt2html", "Category / paragraph interactions", "<p 
data-parsoid='{\"dsr\":[0,24,0,0]}'>Foo <link rel=\"mw:PageProp/Category\" 
href=\"./Category:Baz\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Category:Baz\"},\"sa\":{\"href\":\"Category:Baz\"},\"dsr\":[4,20,null,null]}'/>
 Bar</p>\n\n<p data-parsoid='{\"dsr\":[26,50,0,0]}'>Foo <link 
rel=\"mw:PageProp/Category\" href=\"./Category:Baz\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Category:Baz\"},\"sa\":{\"href\":\"Category:Baz\"},\"dsr\":[30,46,null,null]}'/>\nBar</p>\n\n<p
 data-parsoid='{\"dsr\":[52,76,0,0]}'>Foo\n<link rel=\"mw:PageProp/Category\" 
href=\"./Category:Baz\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Category:Baz\"},\"sa\":{\"href\":\"Category:Baz\"},\"dsr\":[56,72,null,null]}'/>\nBar</p>\n\n<p
 data-parsoid='{\"dsr\":[78,102,0,0]}'>Foo\n<link rel=\"mw:PageProp/Category\" 
href=\"./Category:Baz\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Category:Baz\"},\"sa\":{\"href\":\"Category:Baz\"},\"dsr\":[82,98,null,null]}'/>
 Bar</p>\n\n<p data-parsoid='{\"dsr\":[104,261,0,0]}'>Foo\n<link 
rel=\"mw:PageProp/Category\" href=\"./Category:Baz\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Category:Baz\"},\"sa\":{\"href\":\"Category:Baz\"},\"dsr\":[108,124,null,null]}'/>\n
 <link rel=\"mw:PageProp/Category\" href=\"./Category:Baz\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Category:Baz\"},\"sa\":{\"href\":\"Category:Baz\"},\"dsr\":[126,142,null,null]}'/>\n<link
 rel=\"mw:PageProp/Category\" href=\"./Category:Baz\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Category:Baz\"},\"sa\":{\"href\":\"Category:Baz\"},\"dsr\":[143,159,null,null]}'/>\nBar\n\n<link
 rel=\"mw:PageProp/Category\" href=\"./Category:Baz\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Category:Baz\"},\"sa\":{\"href\":\"Category:Baz\"},\"dsr\":[165,181,null,null]}'/>\n
 <link rel=\"mw:PageProp/Category\" href=\"./Category:Baz\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Category:Baz\"},\"sa\":{\"href\":\"Category:Baz\"},\"dsr\":[183,199,null,null]}'/>\n<link
 rel=\"mw:PageProp/Category\" href=\"./Category:Baz\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Category:Baz\"},\"sa\":{\"href\":\"Category:Baz\"},\"dsr\":[200,216,null,null]}'/>\n\n<link
 rel=\"mw:PageProp/Category\" href=\"./Category:Baz\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Category:Baz\"},\"sa\":{\"href\":\"Category:Baz\"},\"dsr\":[218,234,null,null]}'/>\n
 <link rel=\"mw:PageProp/Category\" href=\"./Category:Baz\" about=\"#mwt1\" 
typeof=\"mw:Transclusion\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Category:Baz\"},\"sa\":{\"href\":\"Category:Baz\"},\"dsr\":[236,261,null,null],\"pi\":[[{\"k\":\"1\",\"spc\":[\"\",\"\",\"\",\"\"]}]]}'
 
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"[[Category:Baz]]\"}},\"i\":0}}]}'/></p>\n<link
 rel=\"mw:PageProp/Category\" href=\"./Category:Baz\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Category:Baz\"},\"sa\":{\"href\":\"Category:Baz\"},\"dsr\":[262,278,null,null]}'/>");
 add("wt2html", "Short headings with trailing space should match behavior of 
Parser::doHeadings (bug 19910)", "<p data-parsoid='{\"dsr\":[0,100,0,0]}'>=== 
\nThe line above must have a trailing space!\n=== <!--\n--> <!-- -->\nBut just 
in case it doesn't...</p>");
-add("wt2html", "Header with space, plus and underscore as entity", "<p 
data-parsoid='{\"dsr\":[0,34,0,0]}'>Id should not contain + for 
spaces</p>\n\n<h2 data-parsoid='{\"dsr\":[36,60,2,2]}'> Space between Text 
</h2>\n<p data-parsoid='{\"dsr\":[61,70,0,0]}'>section 1</p>\n\n<h2 
data-parsoid='{\"dsr\":[72,111,2,2]}'> Space-Entity<span typeof=\"mw:Entity\" 
data-parsoid='{\"src\":\"&amp;#32;\",\"srcContent\":\" 
\",\"dsr\":[87,92,null,null]}'> </span>between<span typeof=\"mw:Entity\" 
data-parsoid='{\"src\":\"&amp;#32;\",\"srcContent\":\" 
\",\"dsr\":[99,104,null,null]}'> </span>Text </h2>\n<p 
data-parsoid='{\"dsr\":[112,121,0,0]}'>section 2</p>\n\n<h2 
data-parsoid='{\"dsr\":[123,146,2,2]}'> Plus+between+Text </h2>\n<p 
data-parsoid='{\"dsr\":[147,156,0,0]}'>section 3</p>\n\n<h2 
data-parsoid='{\"dsr\":[158,196,2,2]}'> Plus-Entity<span typeof=\"mw:Entity\" 
data-parsoid='{\"src\":\"&amp;#43;\",\"srcContent\":\"+\",\"dsr\":[172,177,null,null]}'>+</span>between<span
 typeof=\"mw:Entity\" 
data-parsoid='{\"src\":\"&amp;#43;\",\"srcContent\":\"+\",\"dsr\":[184,189,null,null]}'>+</span>Text
 </h2>\n<p data-parsoid='{\"dsr\":[197,206,0,0]}'>section 4</p>\n\n<h2 
data-parsoid='{\"dsr\":[208,237,2,2]}'> Underscore_between_Text </h2>\n<p 
data-parsoid='{\"dsr\":[238,247,0,0]}'>section 5</p>\n\n<h2 
data-parsoid='{\"dsr\":[249,293,2,2]}'> Underscore-Entity<span 
typeof=\"mw:Entity\" 
data-parsoid='{\"src\":\"&amp;#95;\",\"srcContent\":\"_\",\"dsr\":[269,274,null,null]}'>_</span>between<span
 typeof=\"mw:Entity\" 
data-parsoid='{\"src\":\"&amp;#95;\",\"srcContent\":\"_\",\"dsr\":[281,286,null,null]}'>_</span>Text
 </h2>\n<p data-parsoid='{\"dsr\":[294,303,0,0]}'>section 6</p>\n\n<p 
data-parsoid='{\"dsr\":[305,501,0,0]}'><a rel=\"mw:WikiLink\" 
href=\"./Main%20Page#Space_between_Text\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main%20Page#Space_between_Text\"},\"sa\":{\"href\":\"#Space
 between Text\"},\"dsr\":[305,328,2,2]}'>#Space between Text</a>\n<a 
rel=\"mw:WikiLink\" href=\"./Main%20Page#Space-Entity_between_Text\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main%20Page#Space-Entity_between_Text\"},\"sa\":{\"href\":\"#Space-Entity&amp;#32;between&amp;#32;Text\"},\"dsr\":[329,367,2,2]}'>#Space-Entity
 between Text</a>\n<a rel=\"mw:WikiLink\" 
href=\"./Main%20Page#Plus+between+Text\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main%20Page#Plus+between+Text\"},\"sa\":{\"href\":\"#Plus+between+Text\"},\"dsr\":[368,390,2,2]}'>#Plus+between+Text</a>\n<a
 rel=\"mw:WikiLink\" href=\"./Main%20Page#Plus-Entity+between+Text\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main%20Page#Plus-Entity+between+Text\"},\"sa\":{\"href\":\"#Plus-Entity&amp;#43;between&amp;#43;Text\"},\"dsr\":[391,428,2,2]}'>#Plus-Entity+between+Text</a>\n<a
 rel=\"mw:WikiLink\" href=\"./Main%20Page#Underscore_between_Text\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main%20Page#Underscore_between_Text\"},\"sa\":{\"href\":\"#Underscore_between_Text\"},\"dsr\":[429,457,2,2]}'>#Underscore_between_Text</a>\n<a
 rel=\"mw:WikiLink\" href=\"./Main%20Page#Underscore-Entity_between_Text\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main%20Page#Underscore-Entity_between_Text\"},\"sa\":{\"href\":\"#Underscore-Entity&amp;#95;between&amp;#95;Text\"},\"dsr\":[458,501,2,2]}'>#Underscore-Entity_between_Text</a></p>");
 add("wt2html", "Single-line or multiline-comments can follow headings", "<h2 
data-parsoid='{\"dsr\":[0,7,2,2]}'>foo</h2><!---->\n<h2 
data-parsoid='{\"dsr\":[15,22,2,2]}'>bar</h2><!--c1-->\n<h2 
data-parsoid='{\"dsr\":[32,39,2,2]}'>baz</h2><!--\nc2\nc3-->");
 add("wt2html", "Namespaced link must have a title", "<p 
data-parsoid='{\"dsr\":[0,12,0,0]}'><a rel=\"mw:WikiLink\" href=\"./Base_MW:\" 
title=\"Base MW:\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Base_MW:\"},\"sa\":{\"href\":\"Project:\"},\"dsr\":[0,12,2,2]}'>Project:</a></p>");
 add("wt2html", "Namespaced link must have a title (bad fragment version)", "<p 
data-parsoid='{\"dsr\":[0,21,0,0]}'><a rel=\"mw:WikiLink\" 
href=\"./Base_MW:Main%20Page#fragment\" title=\"Base MW:Main Page\" 
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Base_MW:Main%20Page#fragment\"},\"sa\":{\"href\":\"Project:#fragment\"},\"dsr\":[0,21,2,2]}'>Project:#fragment</a></p>");
@@ -222,7 +221,6 @@
 add("wt2html", "Parser hook: static parser hook not inside a comment", "<p 
data-parsoid='{\"dsr\":[0,61,0,0]}'>&lt;statictag>hello, 
world&lt;/statictag>\n&lt;statictag action=flush/></p>");
 add("wt2html", "Parser hook: static parser hook inside a comment", "<!-- 
<statictag&#x3E;hello, world</statictag&#x3E; -->\n<p 
data-parsoid='{\"dsr\":[45,70,0,0]}'>&lt;statictag action=flush/></p>");
 add("wt2html", "Sanitizer: Closing of closed but not open table tags", "Table 
not started");
-add("wt2html", "Sanitizer: Escaping of spaces, multibyte characters, colons & 
other stuff in id=\"\"", "<p data-parsoid='{\"dsr\":[0,45,0,0]}'><span id=\"æ: 
v\" data-parsoid='{\"stx\":\"html\",\"dsr\":[0,27,16,7]}'>byte</span><a 
rel=\"mw:WikiLink\" href=\"./Main%20Page#æ:_v\" 
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Main%20Page#æ:_v\"},\"sa\":{\"href\":\"#æ:
 v\"},\"dsr\":[27,45,8,2]}'>backlink</a></p>");
 add("wt2html", "Sanitizer: Validating that <meta> and <link> work, but only 
for Microdata", "<div itemscope=\"\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,308,15,6]}'>\n\t<p 
data-parsoid='{\"dsr\":[17,301,0,0]}'>&lt;meta itemprop=\"hello\" 
content=\"world\">\n\t&lt;meta http-equiv=\"refresh\" 
content=\"5\">\n\t&lt;meta itemprop=\"hello\" http-equiv=\"refresh\" 
content=\"5\">\n\t&lt;link itemprop=\"hello\" href=\"{{SERVER}}\">\n\t&lt;link 
rel=\"stylesheet\" href=\"{{SERVER}}\">\n\t&lt;link rel=\"stylesheet\" 
itemprop=\"hello\" href=\"{{SERVER}}\"></p>\n</div>");
 add("wt2html", "Language converter: output gets cut off unexpectedly (bug 
5757)", "<p data-parsoid='{\"dsr\":[0,20,0,0]}'>this bit is safe: }-</p>\n\n<p 
data-parsoid='{\"dsr\":[22,82,0,0]}'>but if we add a conversion instance: 
-{zh-cn:xxx;zh-tw:yyy}-</p>\n\n<p data-parsoid='{\"dsr\":[84,112,0,0]}'>then we 
get cut off here: }-</p>\n\n<p data-parsoid='{\"dsr\":[114,145,0,0]}'>all 
additional text is vanished</p>");
 add("wt2html", "Self closed html pairs (bug 5487)", "<center 
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,47,8,9]}'><font id=\"bug\" 
data-parsoid='{\"stx\":\"html\",\"selfClose\":true,\"dsr\":[8,25,17,0]}'></font>Centered
 text</center>\n<div 
data-parsoid='{\"stx\":\"html\",\"dsr\":[48,88,5,6]}'><font id=\"bug2\" 
data-parsoid='{\"stx\":\"html\",\"selfClose\":true,\"dsr\":[53,71,18,0]}'></font>In
 div text</div>");
@@ -584,7 +582,6 @@
 add("html2html", "Link with space in namespace", "<p 
data-parsoid='{\"dsr\":[0,76,0,0]}'>[/index.php?title=User_talk:Foo_bar&amp;action=edit&amp;redlink=1
 User talk:Foo bar]</p>\n");
 add("html2html", "Namespace takes precedence over interwiki link (bug 51680)", 
"<p data-parsoid='{\"dsr\":[0,52,0,0]}'><a rel=\"mw:WikiLink\" 
href=\"./Wiki/MemoryAlpha:AlphaTest\" title=\"Wiki/MemoryAlpha:AlphaTest\" 
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Wiki/MemoryAlpha:AlphaTest\"},\"sa\":{\"href\":\"wiki/MemoryAlpha:AlphaTest\"},\"dsr\":[0,52,29,2]}'>MemoryAlpha:AlphaTest</a></p>\n");
 add("html2html", "Piped link to namespace", "<p 
data-parsoid='{\"dsr\":[0,73,0,0]}'>[/index.php?title=Meta:Disclaimers&amp;action=edit&amp;redlink=1
 The disclaimers]</p>\n");
-add("html2html", "Link containing \"#<\" and \"#>\" % as a hex sequences- 
these are valid section anchors\nExample for such a section: == < ==", "<p 
data-parsoid='{\"dsr\":[0,36,0,0]}'><a rel=\"mw:WikiLink\" 
href=\"./Main_Page#%3C\" title=\"Main Page\" 
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Main_Page#%3C\"},\"sa\":{\"href\":\"Main
 Page#&lt;\"},\"dsr\":[0,18,14,2]}'>#&lt;</a><a rel=\"mw:WikiLink\" 
href=\"./Main_Page#%3E\" title=\"Main Page\" 
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Main_Page#%3E\"},\"sa\":{\"href\":\"Main
 Page#>\"},\"dsr\":[18,36,14,2]}'>#></a></p>\n");
 add("html2html", "Link containing double-single-quotes '' in text embedded in 
italics (bug 4598 sanity check)", "<p data-parsoid='{\"dsr\":[0,84,0,0]}'><i 
data-parsoid='{\"dsr\":[0,61,2,2]}'>Some 
[/index.php?title=Link&amp;action=edit&amp;redlink=1 pretty </i>italics<i 
data-parsoid='{\"dsr\":[68,84,2,2]}'> and stuff]!</i></p>\n");
 add("html2html", "Plain link to page with question mark in title", "<p 
data-parsoid='{\"dsr\":[0,16,0,0]}'><a rel=\"mw:WikiLink\" 
href=\"./Wiki/A%3Fb\" title=\"Wiki/A?b\" 
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Wiki/A%3Fb\"},\"sa\":{\"href\":\"wiki/A?b\"},\"dsr\":[0,16,11,2]}'>A?b</a></p>\n\n<p
 data-parsoid='{\"dsr\":[18,34,0,0]}'><a rel=\"mw:WikiLink\" 
href=\"./Wiki/A%3Fb\" title=\"Wiki/A?b\" 
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Wiki/A%3Fb\"},\"sa\":{\"href\":\"wiki/A?b\"},\"dsr\":[18,34,11,2]}'>Baz</a></p>\n");
 add("html2html", "Self-link to section should not be bold", "<p 
data-parsoid='{\"dsr\":[0,44,0,0]}'><a rel=\"mw:WikiLink\" 
href=\"./Wiki/Main_Page#section\" title=\"Wiki/Main Page\" 
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Wiki/Main_Page#section\"},\"sa\":{\"href\":\"wiki/Main
 Page#section\"},\"dsr\":[0,44,25,2]}'>Main Page#section</a></p>\n");
@@ -1068,7 +1065,6 @@
 add("html2wt", "Link with space in namespace", 
"[/index.php?title=User_talk:Foo_bar&action=edit&redlink=1 User talk:Foo 
bar]\n");
 add("html2wt", "Namespace takes precedence over interwiki link (bug 51680)", 
"[[wiki/MemoryAlpha:AlphaTest|MemoryAlpha:AlphaTest]]\n");
 add("html2wt", "Piped link to namespace", 
"[/index.php?title=Meta:Disclaimers&action=edit&redlink=1 The disclaimers]\n");
-add("html2wt", "Link containing \"#<\" and \"#>\" % as a hex sequences- these 
are valid section anchors\nExample for such a section: == < ==", "[[Main 
Page#<|#<]][[Main Page#>|#>]]\n");
 add("html2wt", "Link containing double-single-quotes '' in text embedded in 
italics (bug 4598 sanity check)", "''Some 
[/index.php?title=Link&action=edit&redlink=1 pretty ''italics'' and 
stuff]!''\n");
 add("html2wt", "Plain link to page with question mark in title", 
"[[wiki/A?b|A?b]]\n\n[[wiki/A?b|Baz]]\n");
 add("html2wt", "BUG 337: Escaped self-links should be bold", "[[Bug462]] 
[[Bug462]]\n");
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index 04c57db..c90d623 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -6912,8 +6912,6 @@
 </p>
 !! end
 
-# Note that parsoid does not munge anchor text; all non-space
-# characters are valid in HTML5 ids.
 !! test
 Anchor containing a #. (bug 63430)
 !! wikitext
@@ -6922,7 +6920,7 @@
 <p><a href="/wiki/Main_Page#And.23Link" title="Main Page">Main 
Page#And#Link</a>
 </p>
 !! html/parsoid
-<p><a rel="mw:WikiLink" href="./Main_Page#And%23Link" title="Main Page">Main 
Page#And#Link</a></p>
+<p><a rel="mw:WikiLink" href="./Main_Page#And.23Link" title="Main Page" 
data-parsoid='{"stx":"simple","a":{"href":"./Main_Page#And.23Link"},"sa":{"href":"Main
 Page#And#Link"}}'>Main Page#And#Link</a></p>
 !! end
 
 !! test
@@ -7031,8 +7029,6 @@
 </p>
 !!end
 
-# note that parsoid does not munge anchor text; all non-space
-# characters are valid in HTML5 anchors.
 !! test
 Link containing "#<" and "#>" % as a hex sequences- these are valid section 
anchors
 Example for such a section: == < ==
@@ -7042,7 +7038,7 @@
 <p><a href="#.3C">#&lt;</a><a href="#.3E">#&gt;</a>
 </p>
 !! html/parsoid
-<p><a rel="mw:WikiLink" href="./Main%20Page#%3C" title="Main Page">#&lt;</a><a 
rel="mw:WikiLink" href="./Main%20Page#%3E" title="Main Page">#></a></p>
+<p><a rel="mw:WikiLink" href="./Main%20Page#.3C" title="Main Page" 
data-parsoid='{"stx":"simple","a":{"href":"./Main%20Page#.3C"},"sa":{"href":"%23%3c"}}'>#&lt;</a><a
 rel="mw:WikiLink" href="./Main%20Page#.3E" title="Main Page" 
data-parsoid='{"stx":"simple","a":{"href":"./Main%20Page#.3E"},"sa":{"href":"%23%3e"}}'>#></a></p>
 !! end
 
 !! test
@@ -7526,7 +7522,7 @@
 <p><a href="/wiki/Foo#.3Cbar.3E" title="Foo">Foo#&lt;bar&gt;</a>
 </p>
 !! html/parsoid
-<p><a rel="mw:WikiLink" href="./Foo#%3Cbar%3E" title="Foo" 
data-parsoid='{"stx":"simple","a":{"href":"./Foo#%3Cbar%3E"},"sa":{"href":"Foo#&lt;bar>"}}'>Foo#&lt;bar></a></p>
+<p><a rel="mw:WikiLink" href="./Foo#.3Cbar.3E" title="Foo" 
data-parsoid='{"stx":"simple","a":{"href":"./Foo#.3Cbar.3E"},"sa":{"href":"Foo#&lt;bar>"}}'>Foo#&lt;bar></a></p>
 !! end
 
 ###
@@ -7584,6 +7580,23 @@
 </p>
 !! end
 
+!! test
+Link scenarios with escaped fragments
+!! wikitext
+[[#Is this great?]]
+[[Foo#Is this great?]]
+[[meatball:Foo#Is this great?]]
+!! html/php
+<p><a href="#Is_this_great.3F">#Is this great?</a>
+<a href="/wiki/Foo#Is_this_great.3F" title="Foo">Foo#Is this great?</a>
+<a href="http://www.usemod.com/cgi-bin/mb.pl?Foo#Is_this_great.3F"; 
class="extiw" title="meatball:Foo">meatball:Foo#Is this great?</a>
+</p>
+!! html/parsoid
+<p><a rel="mw:WikiLink" href="./Main%20Page#Is_this_great.3F" 
data-parsoid='{"stx":"simple","a":{"href":"./Main%20Page#Is_this_great.3F"},"sa":{"href":"#Is
 this great?"}}'>#Is this great?</a>
+<a rel="mw:WikiLink" href="./Foo#Is_this_great.3F" title="Foo" 
data-parsoid='{"stx":"simple","a":{"href":"./Foo#Is_this_great.3F"},"sa":{"href":"Foo#Is
 this great?"}}'>Foo#Is this great?</a>
+<a rel="mw:ExtLink" 
href="http://www.usemod.com/cgi-bin/mb.pl?Foo#Is_this_great.3F"; 
title="meatball:Foo" 
data-parsoid='{"stx":"simple","a":{"href":"http://www.usemod.com/cgi-bin/mb.pl?Foo#Is_this_great.3F"},"sa":{"href":"meatball:Foo#Is
 this great?"},"isIW":true}'>meatball:Foo#Is this great?</a></p>
+!! end
+
 # Ideally the wikipedia: prefix here should be proto-relative too
 # [CSA]: this is kind of a bogus test, as the PHP parser test doesn't
 # define the 'en' prefix, and originally the test used 'wikipedia',

-- 
To view, visit https://gerrit.wikimedia.org/r/226032
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I8758a7af37788a93d2326c7930bb82b9ad138f27
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to