jenkins-bot has submitted this change and it was merged.
Change subject: Allow non-newline whitespace in RFC/PMID/ISBN autolinks.
......................................................................
Allow non-newline whitespace in RFC/PMID/ISBN autolinks.
This matches core parser behavior. We also add better validation
of ISBNs both during parsing and during WTS; again better matching
core.
Change-Id: I57033647c5084cf281d735c6f245a5b98a235b38
---
M lib/mediawiki.WikiConfig.js
M lib/pegTokenizer.pegjs.txt
M tests/parserTests-blacklist.js
M tests/parserTests.txt
4 files changed, 91 insertions(+), 92 deletions(-)
Approvals:
Arlolra: Looks good to me, approved
jenkins-bot: Verified
diff --git a/lib/mediawiki.WikiConfig.js b/lib/mediawiki.WikiConfig.js
index e24d179..983f3ab 100644
--- a/lib/mediawiki.WikiConfig.js
+++ b/lib/mediawiki.WikiConfig.js
@@ -614,9 +614,7 @@
// Default RFC/PMID resource URL patterns
WikiConfig.prototype.ExtResourceURLPatterns = {
- // SSS FIXME: ISBN url checker has to verify that the number is 10/13
digits long
- // We are punting on that right now which can lead to broken HTML
serialization for
- // invalid ISBN urls. For now, we "trust" clients to do the right thing.
+ // ISBN validation is done below in serializer.
'ISBN': {
prefix: "(?:(?:[.][.]?/)*)",
re: 'Special:BookSources/%isbn',
@@ -625,9 +623,16 @@
'PMID': { re: '//www.ncbi.nlm.nih.gov/pubmed/%s?dopt=Abstract' },
};
+var unispace = /[ \u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]+/g;
+
WikiConfig.prototype.ExtResourceSerializer = {
'ISBN': function(hrefWT, href, content) {
- if (hrefWT.join('') === content.replace(/[\- ]/g,
'').toUpperCase()) {
+ var normalized = Util.decodeEntities(content).replace(unispace,
' ')
+ .replace(/[\- \t]/g, '').toUpperCase();
+ // validate ISBN length and format, so as not to produce magic
links
+ // which aren't actually magic
+ var valid = /^ISBN(97[89])?\d{9}(\d|X)$/.test(normalized);
+ if (hrefWT.join('') === normalized && valid) {
return content;
} else {
href = href.replace(/^\.\//, ''); // strip "./" prefix
@@ -635,10 +640,14 @@
}
},
'RFC': function(hrefWT, href, content) {
- return hrefWT.join(' ') === content ? content : '[' + href + '
' + content + ']';
+ var normalized = Util.decodeEntities(content).replace(unispace,
' ')
+ .replace(/[ \t]/g, ' ');
+ return hrefWT.join(' ') === normalized ? content : '[' + href +
' ' + content + ']';
},
'PMID': function(hrefWT, href, content) {
- return hrefWT.join(' ') === content ? content : '[' + href + '
' + content + ']';
+ var normalized = Util.decodeEntities(content).replace(unispace,
' ')
+ .replace(/[ \t]/g, ' ');
+ return hrefWT.join(' ') === normalized ? content : '[' + href +
' ' + content + ']';
},
};
diff --git a/lib/pegTokenizer.pegjs.txt b/lib/pegTokenizer.pegjs.txt
index 4f59f5a..065f477 100644
--- a/lib/pegTokenizer.pegjs.txt
+++ b/lib/pegTokenizer.pegjs.txt
@@ -405,7 +405,7 @@
// before testing protocol.
return Util.isProtocolValid(target.slice(0, -1), options.env);
}
- sp:$( space / [\u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000] )*
+ sp:$( space / unispace )*
targetOff:( "" { return endOffset(); })
content:(
t1:(
@@ -437,7 +437,7 @@
) { return r; }
autoref
- = ref:('RFC' / 'PMID') space_or_newline+ identifier:$[0-9]+ end_of_word
+ = ref:('RFC' / 'PMID') sp:space_or_nbsp+ identifier:$[0-9]+ end_of_word
{
var base_urls = {
'RFC': '//tools.ietf.org/html/rfc%s',
@@ -448,7 +448,7 @@
return [
new SelfclosingTagTk('extlink', [
new KV('href', tu.sprintf(base_urls[ref], identifier)),
- new KV('mw:content', [ref, identifier].join(' ')),
+ new KV('mw:content', tu.flattenString([ref, sp, identifier])),
new KV('typeof', 'mw:ExtLink/' + ref),
],
{ stx: "magiclink", tsr: tsrOffsets() }),
@@ -456,30 +456,31 @@
}
isbn
- = 'ISBN' space_or_newline+
- head:[0-9]
- digits:$([- ] &[0-9] / [0-9])+
- tail:$([- ]? [xX])?
- end_of_word
-{
- // TODO: round-trip non-decimals too!
- var isbn = [head, digits, tail].join('');
- var isbncode = isbn.replace(/[^\dX]/ig, '');
-
- // ISBNs can only be 10 or 13 chars long
- if ([10, 13].indexOf(isbncode.length) === -1) {
- // just return the string
- return [ text() ];
- }
-
- return [
+ = 'ISBN' sp:space_or_nbsp+ isbn:(
+ [0-9]
+ (s:space_or_nbsp_or_dash &[0-9] { return s; } / [0-9])+
+ ((space_or_nbsp_or_dash / "") [xX] / "")
+ ) isbncode:(
+ end_of_word
+ {
+ // Convert isbn token-and-entity array to stripped string.
+ return tu.flattenStringlist(isbn).filter(function(e) {
+ return e.constructor === String;
+ }).join('').replace(/[^\dX]/ig, '').toUpperCase();
+ }
+ ) &{
+ // ISBNs can only be 10 or 13 digits long (with a specific format)
+ return isbncode.length === 10 ||
+ (isbncode.length === 13 && /^97[89]/.test(isbncode));
+ } {
+ return [
new SelfclosingTagTk('extlink', [
- new KV('href', 'Special:BookSources/' + isbncode.toUpperCase()),
- new KV('mw:content', 'ISBN ' + isbn),
+ new KV('href', 'Special:BookSources/' + isbncode),
+ new KV('mw:content', tu.flattenString(['ISBN', sp, isbn])),
new KV('typeof', 'mw:WikiLink/ISBN'),
],
{ stx: "magiclink", tsr: tsrOffsets() }),
- ];
+ ];
}
@@ -1879,6 +1880,23 @@
end_of_word
= eof / ![A-Za-z0-9_]
+// Unicode "separator, space" category. It covers the \u0020 space as well
+// as \u3000 IDEOGRAPHIC SPACE (see bug 19052). In PHP this is \p{Zs}.
+// Keep this up-to-date with the characters tagged ;Zs; in
+// http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
+unispace = [ \u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]
+
+// Non-newline whitespace, including non-breaking spaces. Used for magic
links.
+space_or_nbsp
+ = space // includes \t
+ / unispace
+ / he:htmlentity &{ return Array.isArray(he) && /^\u00A0$/.test(he[1]); }
+ { return he; }
+
+// Used within ISBN magic links
+space_or_nbsp_or_dash
+ = space_or_nbsp / "-"
+
// Extra newlines followed by at least another newline. Usually used to
// compress surplus newlines into a meta tag, so that they don't trigger
// paragraphs.
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index 900d471..ae43f87 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -101,9 +101,6 @@
add("wt2html", "Namespace -1 {{ns:-1}}", "<p about=\"#mwt1\"
typeof=\"mw:Transclusion\" data-parsoid='{\"dsr\":[0,9,0,0],\"pi\":[[]]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"ns:-1\",\"function\":\"ns\"},\"params\":{},\"i\":0}}]}'>-1</p>");
add("wt2html", "Namespace (lang=de) Benutzer Diskussion {{ns:3}}", "<p
about=\"#mwt1\" typeof=\"mw:Transclusion\"
data-parsoid='{\"dsr\":[0,8,0,0],\"pi\":[[]]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"ns:3\",\"function\":\"ns\"},\"params\":{},\"i\":0}}]}'>3</p>");
add("wt2html", "Urlencode", "<p data-parsoid='{\"dsr\":[0,115,0,0]}'><span
about=\"#mwt1\" typeof=\"mw:Transclusion\"
data-parsoid='{\"pi\":[[]],\"dsr\":[0,24,null,null]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"urlencode:hi
world?!\",\"function\":\"urlencode\"},\"params\":{},\"i\":0}}]}'>hi%20world%3F!</span>\n<span
about=\"#mwt2\" typeof=\"mw:Transclusion\"
data-parsoid='{\"pi\":[[{\"k\":\"1\",\"spc\":[\"\",\"\",\"\",\"\"]}]],\"dsr\":[25,54,null,null]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"urlencode:hi
world?!\",\"function\":\"urlencode\"},\"params\":{\"1\":{\"wt\":\"WIKI\"}},\"i\":0}}]}'>hi%20world%3F!</span>\n<span
about=\"#mwt3\" typeof=\"mw:Transclusion\"
data-parsoid='{\"pi\":[[{\"k\":\"1\",\"spc\":[\"\",\"\",\"\",\"\"]}]],\"dsr\":[55,84,null,null]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"urlencode:hi
world?!\",\"function\":\"urlencode\"},\"params\":{\"1\":{\"wt\":\"PATH\"}},\"i\":0}}]}'>hi%20world%3F!</span>\n<span
about=\"#mwt4\" typeof=\"mw:Transclusion\"
data-parsoid='{\"pi\":[[{\"k\":\"1\",\"spc\":[\"\",\"\",\"\",\"\"]}]],\"dsr\":[85,115,null,null]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"urlencode:hi
world?!\",\"function\":\"urlencode\"},\"params\":{\"1\":{\"wt\":\"QUERY\"}},\"i\":0}}]}'>hi%20world%3F!</span></p>");
-add("wt2html", "Magic links: RFC (w/ non-newline whitespace, bug
28950/29025)", "<p data-parsoid='{\"dsr\":[0,47,0,0]}'>RFC <span
typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&nbsp;\",\"srcContent\":\"
\",\"dsr\":[4,10,null,null]}'> </span><span typeof=\"mw:Entity\"
data-parsoid='{\"src\":\"&#160;\",\"srcContent\":\"
\",\"dsr\":[10,16,null,null]}'> </span><span typeof=\"mw:Entity\"
data-parsoid='{\"src\":\"&#0160;\",\"srcContent\":\"
\",\"dsr\":[16,23,null,null]}'> </span><span typeof=\"mw:Entity\"
data-parsoid='{\"src\":\"&#xA0;\",\"srcContent\":\"
\",\"dsr\":[23,29,null,null]}'> </span><span typeof=\"mw:Entity\"
data-parsoid='{\"src\":\"&#Xa0;\",\"srcContent\":\"
\",\"dsr\":[29,35,null,null]}'> </span> 822\n<a
href=\"//tools.ietf.org/html/rfc822\" rel=\"mw:ExtLink\"
data-parsoid='{\"stx\":\"magiclink\",\"dsr\":[40,47,0,0]}'>RFC 822</a></p>");
-add("wt2html", "Magic links: ISBN (w/ non-newline whitespace, bug
28950/29025)", "<p data-parsoid='{\"dsr\":[0,114,0,0]}'>ISBN <span
typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&nbsp;\",\"srcContent\":\"
\",\"dsr\":[5,11,null,null]}'> </span><span typeof=\"mw:Entity\"
data-parsoid='{\"src\":\"&#160;\",\"srcContent\":\"
\",\"dsr\":[11,17,null,null]}'> </span><span typeof=\"mw:Entity\"
data-parsoid='{\"src\":\"&#0160;\",\"srcContent\":\"
\",\"dsr\":[17,24,null,null]}'> </span><span typeof=\"mw:Entity\"
data-parsoid='{\"src\":\"&#xA0;\",\"srcContent\":\"
\",\"dsr\":[24,30,null,null]}'> </span><span typeof=\"mw:Entity\"
data-parsoid='{\"src\":\"&#Xa0;\",\"srcContent\":\"
\",\"dsr\":[30,36,null,null]}'> </span> 978<span typeof=\"mw:Entity\"
data-parsoid='{\"src\":\"&nbsp;\",\"srcContent\":\"
\",\"dsr\":[40,46,null,null]}'> </span>0<span typeof=\"mw:Entity\"
data-parsoid='{\"src\":\"&#160;\",\"srcContent\":\"
\",\"dsr\":[47,53,null,null]}'> </span>316<span typeof=\"mw:Entity\"
data-parsoid='{\"src\":\"&#0160;\",\"srcContent\":\"
\",\"dsr\":[56,63,null,null]}'> </span>09811<span typeof=\"mw:Entity\"
data-parsoid='{\"src\":\"&#xA0;\",\"srcContent\":\"
\",\"dsr\":[68,74,null,null]}'> </span>3\n<a
href=\"./Special:BookSources/9780316098113\" rel=\"mw:WikiLink\"
data-parsoid='{\"stx\":\"magiclink\",\"dsr\":[76,94,2,2]}'>ISBN
9780316098113</a>\nISBN 978\n0316098113</p>");
-add("wt2html", "Magic links: PMID (w/ non-newline whitespace, bug
28950/29025)", "<p data-parsoid='{\"dsr\":[0,51,0,0]}'>PMID <span
typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&nbsp;\",\"srcContent\":\"
\",\"dsr\":[5,11,null,null]}'> </span><span typeof=\"mw:Entity\"
data-parsoid='{\"src\":\"&#160;\",\"srcContent\":\"
\",\"dsr\":[11,17,null,null]}'> </span><span typeof=\"mw:Entity\"
data-parsoid='{\"src\":\"&#0160;\",\"srcContent\":\"
\",\"dsr\":[17,24,null,null]}'> </span><span typeof=\"mw:Entity\"
data-parsoid='{\"src\":\"&#xA0;\",\"srcContent\":\"
\",\"dsr\":[24,30,null,null]}'> </span><span typeof=\"mw:Entity\"
data-parsoid='{\"src\":\"&#Xa0;\",\"srcContent\":\"
\",\"dsr\":[30,36,null,null]}'> </span> 1234\n<a
href=\"//www.ncbi.nlm.nih.gov/pubmed/1234?dopt=Abstract\" rel=\"mw:ExtLink\"
data-parsoid='{\"stx\":\"magiclink\",\"dsr\":[42,51,0,0]}'>PMID 1234</a></p>");
add("wt2html", "Nonexistent template", "<p
data-parsoid='{\"dsr\":[0,28,0,0]}'><span typeof=\"mw:Transclusion
mw:Placeholder\" about=\"#mwt1\" id=\"mwt1\"
data-parsoid='{\"dsr\":[0,28,null,null],\"pi\":[[]]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"thistemplatedoesnotexist\",\"href\":\"./Template:Thistemplatedoesnotexist\"},\"params\":{},\"i\":0}}]}'>Warning:
Page/template fetching disabled, and no cache for
Template:Thistemplatedoesnotexist</span></p>");
add("wt2html", "Template with invalid target containing tags", "<p
data-parsoid='{\"dsr\":[0,54,0,0]}'>{{a<b
data-parsoid='{\"stx\":\"html\",\"dsr\":[3,11,3,4]}'>b</b>|<span
about=\"#mwt2\" typeof=\"mw:Transclusion\"
data-parsoid='{\"pi\":[[{\"k\":\"1\",\"spc\":[\"\",\"\",\"\",\"\"]}]],\"dsr\":[12,24,null,null]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"foo\"}},\"i\":0}}]}'>foo</span>|<span
about=\"#mwt3\" typeof=\"mw:Transclusion\"
data-parsoid='{\"pi\":[[{\"k\":\"1\",\"spc\":[\"\",\"\",\"\",\"\"]}]],\"dsr\":[25,35,null,null]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"a\"}},\"i\":0}}]}'>a</span>=<span
about=\"#mwt4\" typeof=\"mw:Transclusion\"
data-parsoid='{\"pi\":[[{\"k\":\"1\",\"spc\":[\"\",\"\",\"\",\"\"]}]],\"dsr\":[36,46,null,null]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"b\"}},\"i\":0}}]}'>b</span>|a
=b}}</p>");
add("wt2html", "Template with invalid target containing unclosed tag", "<p
data-parsoid='{\"dsr\":[0,49,0,0]}'>{{a<b
data-parsoid='{\"stx\":\"html\",\"autoInsertedEnd\":true,\"dsr\":[3,49,3,0]}'>|<span
about=\"#mwt2\" typeof=\"mw:Transclusion\"
data-parsoid='{\"pi\":[[{\"k\":\"1\",\"spc\":[\"\",\"\",\"\",\"\"]}]],\"dsr\":[7,19,null,null]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"foo\"}},\"i\":0}}]}'>foo</span>|<span
about=\"#mwt3\" typeof=\"mw:Transclusion\"
data-parsoid='{\"pi\":[[{\"k\":\"1\",\"spc\":[\"\",\"\",\"\",\"\"]}]],\"dsr\":[20,30,null,null]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"a\"}},\"i\":0}}]}'>a</span>=<span
about=\"#mwt4\" typeof=\"mw:Transclusion\"
data-parsoid='{\"pi\":[[{\"k\":\"1\",\"spc\":[\"\",\"\",\"\",\"\"]}]],\"dsr\":[31,41,null,null]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"b\"}},\"i\":0}}]}'>b</span>|a
=b}}</b></p>");
@@ -357,9 +354,6 @@
add("wt2wt", "Nested lists 3 (first element empty)", "\n**bar\n");
add("wt2wt", "Nested lists 6 (both elements empty)", "\n**\n");
add("wt2wt", "Unbalanced closing non-block tags don't break a list\n(php
parser relies on Tidy to fix up)", "<span>\n*a<span>\n*b");
-add("wt2wt", "Magic links: RFC (w/ non-newline whitespace, bug 28950/29025)",
"RFC      822\nRFC 822\n");
-add("wt2wt", "Magic links: ISBN (w/ non-newline whitespace, bug 28950/29025)",
"ISBN     
978 0 316 09811 3\nISBN 9780316098113\nISBN
978\n0316098113\n");
-add("wt2wt", "Magic links: PMID (w/ non-newline whitespace, bug 28950/29025)",
"PMID      1234\nPMID 1234\n");
add("wt2wt", "Template with invalid target containing tags",
"<nowiki>{{</nowiki>a<b>b</b>|{{echo|foo}}|{{echo|a}}={{echo|b}}|a
=b<nowiki>}}</nowiki>\n");
add("wt2wt", "Template with invalid target containing unclosed tag",
"<nowiki>{{</nowiki>a<b>|{{echo|foo}}|{{echo|a}}={{echo|b}}|a
=b<nowiki>}}</nowiki>\n");
add("wt2wt", "Template with invalid target containing wikilink",
"<nowiki>{{</nowiki>[[Main Page]]<nowiki>}}</nowiki>\n");
@@ -404,8 +398,6 @@
add("wt2wt", "Inline wiki vs wiki block nesting", "'''Bold paragraph'''\n\nNew
wiki paragraph\n");
add("wt2wt", "Mixing markup for italics and bold",
"'<nowiki/>''bold'<nowiki/>'''''bold''bolditalics'''''\n");
add("wt2wt", "Illegal character references (T106578)", "; Null: �\n; FF:
\n; CR: \n; Control (low): \n; Control (high):  Ÿ\n;
Surrogate: ��\n; This is an okay astral character: 💩");
-add("wt2wt", "ISBN code coverage", "ISBN 978-0-1234-56 789\n");
-add("wt2wt", "RFC code coverage", "RFC 983 987\n");
add("wt2wt", "Image with page parameter", "[[File:LoremIpsum.djvu]]\n");
add("wt2wt", "Don't fall for the self-closing div", "<div>hello world</div>");
add("wt2wt", "Parsing of overlapping (improperly nested) inline html tags",
"<span><s>x</span>\n");
@@ -508,9 +500,6 @@
add("html2html", "Unbalanced closing non-block tags don't break a list\n(php
parser relies on Tidy to fix up)", "<p data-parsoid='{\"dsr\":[0,6,0,0]}'><span
data-parsoid='{\"stx\":\"html\",\"autoInsertedEnd\":true,\"dsr\":[0,6,6,0]}'></span></p>\n\n<ul
data-parsoid='{\"dsr\":[8,28,0,0]}'><li data-parsoid='{\"dsr\":[8,24,1,0]}'>
a<span data-parsoid='{\"stx\":\"html\",\"dsr\":[11,24,6,7]}'></span></li>\n<li
data-parsoid='{\"dsr\":[25,28,1,0]}'> b</li></ul>\n\n<p
data-parsoid='{\"dsr\":[30,37,0,0]}'></p>");
add("html2html", "2. List embedded in a formatting tag", "<p
data-parsoid='{\"dsr\":[0,15,0,0]}'><small
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,15,7,8]}'></small></p>\n<pre
data-parsoid='{\"dsr\":[16,25,1,0]}'><small
data-parsoid='{\"stx\":\"html\",\"autoInsertedEnd\":true,\"dsr\":[17,25,7,0]}'></small></pre><small
data-parsoid='{\"stx\":\"html\",\"autoInsertedStart\":true,\"dsr\":[25,37,0,8]}'><ul
data-parsoid='{\"dsr\":[25,28,0,0]}'><li data-parsoid='{\"dsr\":[25,28,1,0]}'>
a</li></ul>\n</small>\n<ul data-parsoid='{\"dsr\":[38,56,0,0]}'><li
data-parsoid='{\"dsr\":[38,56,1,0]}'> <small
data-parsoid='{\"stx\":\"html\",\"dsr\":[40,56,7,8]}'>b</small></li></ul>\n");
add("html2html", "Case-sensitive magic words, when cased differently, should
just be template transclusions", "<p
data-parsoid='{\"dsr\":[0,331,0,0]}'>[/index.php?title=Template:CurrentMonth&action=edit&redlink=1
Template:CurrentMonth]\n[/index.php?title=Template:Currentday&action=edit&redlink=1
Template:Currentday]\n[/index.php?title=Template:CURreNTweEK&action=edit&redlink=1
Template:CURreNTweEK]\n[/index.php?title=Template:CurrentHour&action=edit&redlink=1
Template:CurrentHour]</p>\n");
-add("html2html", "Magic links: internal link to RFC (bug 479)", "<p
data-parsoid='{\"dsr\":[0,56,0,0]}'>[/index.php?title=RFC_123&action=edit&redlink=1
<a href=\"//tools.ietf.org/html/rfc123\" rel=\"mw:ExtLink\"
data-parsoid='{\"stx\":\"magiclink\",\"dsr\":[48,55,0,0]}'>RFC 123</a>]</p>\n");
-add("html2html", "Magic links: ISBN (bug 1937)", "<p
data-parsoid='{\"dsr\":[0,58,0,0]}'><a rel=\"mw:WikiLink\"
href=\"./Wiki/Special:BookSources/0306406152\"
title=\"Wiki/Special:BookSources/0306406152\"
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Wiki/Special:BookSources/0306406152\"},\"sa\":{\"href\":\"wiki/Special:BookSources/0306406152\"},\"dsr\":[0,58,38,2]}'>ISBN
0-306-40615-2</a></p>\n");
-add("html2html", "Magic links: ISBN (w/ non-newline whitespace, bug
28950/29025)", "<p data-parsoid='{\"dsr\":[0,121,0,0]}'><a rel=\"mw:WikiLink\"
href=\"./Wiki/Special:BookSources/9780316098113\"
title=\"Wiki/Special:BookSources/9780316098113\"
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Wiki/Special:BookSources/9780316098113\"},\"sa\":{\"href\":\"wiki/Special:BookSources/9780316098113\"},\"dsr\":[0,65,41,2]}'>ISBN
978 0 316 09811 3</a>\n<span typeof=\"mw:Nowiki\"
data-parsoid='{\"dsr\":[66,121,8,9]}'>ISBN\n9780316098113\nISBN
978\n0316098113</span></p>\n");
add("html2html", "Nonexistent template", "<p
data-parsoid='{\"dsr\":[0,108,0,0]}'>[/index.php?title=Template:Thistemplatedoesnotexist&action=edit&redlink=1
Template:Thistemplatedoesnotexist]</p>\n");
add("html2html", "Template with invalid target containing wikilink", "<p
data-parsoid='{\"dsr\":[0,17,0,0]}'>{{<a rel=\"mw:WikiLink\"
href=\"./Main_Page\" title=\"Main Page\"
data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"./Main_Page\"},\"sa\":{\"href\":\"Main
Page\"},\"dsr\":[2,15,2,2]}'>Main Page</a>}}</p>\n");
add("html2html", "Template unnamed parameter", "<p
data-parsoid='{\"dsr\":[0,32,0,0]}'><a rel=\"mw:WikiLink\"
href=\"./Wiki/Main_Page\" title=\"Wiki/Main Page\"
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Wiki/Main_Page\"},\"sa\":{\"href\":\"wiki/Main
Page\"},\"dsr\":[0,32,17,2]}'>the main page</a></p>\n");
@@ -987,11 +976,6 @@
add("html2wt", "Namespace (lang=de) Benutzer {{ns:User}}", "Benutzer\n");
add("html2wt", "Namespace (lang=de) Benutzer Diskussion {{ns:3}}", "Benutzer
Diskussion\n");
add("html2wt", "Urlencode",
"hi+world%3F%21\nhi_world%3F!\nhi%20world%3F%21\nhi+world%3F%21\n");
-add("html2wt", "Magic links: internal link to RFC (bug 479)",
"[/index.php?title=RFC_123&action=edit&redlink=1 RFC 123]\n");
-add("html2wt", "Magic links: RFC (w/ non-newline whitespace, bug
28950/29025)", "RFC 822\n<nowiki>RFC\n822</nowiki>\n");
-add("html2wt", "Magic links: ISBN (bug 1937)",
"[[wiki/Special:BookSources/0306406152|ISBN 0-306-40615-2]]\n");
-add("html2wt", "Magic links: ISBN (w/ non-newline whitespace, bug
28950/29025)", "[[wiki/Special:BookSources/9780316098113|ISBN 978 0 316 09811
3]]\n<nowiki>ISBN\n9780316098113\nISBN 978\n0316098113</nowiki>\n");
-add("html2wt", "Magic links: PMID (w/ non-newline whitespace, bug
28950/29025)", "PMID 1234\n<nowiki>PMID\n1234</nowiki>\n");
add("html2wt", "Nonexistent template",
"[/index.php?title=Template:Thistemplatedoesnotexist&action=edit&redlink=1
Template:Thistemplatedoesnotexist]\n");
add("html2wt", "Template with invalid target containing tags",
"<nowiki>{{</nowiki>a'''b'''|foo|a=b|a = b<nowiki>}}</nowiki>\n");
add("html2wt", "Template with invalid target containing unclosed tag",
"<nowiki>{{</nowiki>a'''|foo|a=b|a = b<nowiki>}}</nowiki>'''\n");
@@ -1813,32 +1797,6 @@
add("selser", "Unbalanced closing non-block tags don't break a list\n(php
parser relies on Tidy to fix up) [[1],0,2]", "<span
data-foobar=\"5ikcial165z8h0k9\">\n\nzsspri1tvvi8jjor\n*a</span><span>\n*b</span>");
add("selser", "Table with missing opening <tr> tag [[2,[[4,3],3]]]",
"<table><!--x5lw2ikhl5cow29-->\n<td>v9n71rpyucvxi529</td></tr>\n</table>");
add("selser", "Table with missing opening <tr> tag [[2,[0,3]]]",
"<table><!--okhghfhpithgp66r-->\n<td>foo</td>\n</tr>\n</table>");
-add("selser", "Magic links: RFC (w/ non-newline whitespace, bug 28950/29025)
[[0,0,0,0,0,0,2,2]]", "RFC     6ln0fhswctinewmi
822\n4yp1y2ywzvsd1jor<nowiki/>RFC\n822");
-add("selser", "Magic links: RFC (w/ non-newline whitespace, bug 28950/29025)
[[0,0,0,0,0,0,2,0]]", "RFC     07diqn4ioilfpqfr
822\nRFC\n822");
-add("selser", "Magic links: RFC (w/ non-newline whitespace, bug 28950/29025)
[1]", "RFC      822\nRFC\n822");
-add("selser", "Magic links: RFC (w/ non-newline whitespace, bug 28950/29025)
[[0,0,0,0,0,0,0,2]]", "RFC     
822\n96kxi90cyxcx47vi<nowiki/>RFC\n822");
-add("selser", "Magic links: RFC (w/ non-newline whitespace, bug 28950/29025)
[2]", "q19jo3fccakmx6r\n\nRFC      822\nRFC\n822");
-add("selser", "Magic links: RFC (w/ non-newline whitespace, bug 28950/29025)
[[0,0,0,0,0,0,4,0]]", "RFC
    0ypabtwijz1714i<nowiki/>RFC\n822");
-add("selser", "Magic links: RFC (w/ non-newline whitespace, bug 28950/29025)
[[4,0,0,0,0,0,0,0]]", "14dg5rj21utk2o6r     
822\nRFC\n822");
-add("selser", "Magic links: RFC (w/ non-newline whitespace, bug 28950/29025)
[[2,0,0,0,0,0,3,2]]", "0woxbjz68u3ul3diRFC
    odegaopxctwvzpvi<nowiki/>RFC\n822");
-add("selser", "Magic links: ISBN (w/ non-newline whitespace, bug 28950/29025)
[[0,0,0,0,0,0,3,0,0,0,2,0,0,0,2,0,0]]", "ISBN
     0 gz7gaw898do8yqfr316 09811 qmd3iuxu0x80k93\nISBN\n9780316098113\nISBN
978\n0316098113");
-add("selser", "Magic links: ISBN (w/ non-newline whitespace, bug 28950/29025)
[1]", "ISBN     
978 0 316 09811 3\nISBN\n9780316098113\nISBN
978\n0316098113");
-add("selser", "Magic links: ISBN (w/ non-newline whitespace, bug 28950/29025)
[[0,0,0,0,0,0,0,0,2,0,3,0,0,0,4,0,0]]", "ISBN     
978 0mxumws078wzsemi0  09811 tlz1025s9atf0f6r<nowiki/>ISBN\n9780316098113\nISBN
978\n0316098113");
-add("selser", "Magic links: ISBN (w/ non-newline whitespace, bug 28950/29025)
[2]", "l7i6lp3ncw0cnmi\n\nISBN     
978 0 316 09811 3\nISBN\n9780316098113\nISBN
978\n0316098113");
-add("selser", "Magic links: ISBN (w/ non-newline whitespace, bug 28950/29025)
[[3,0,0,0,0,0,0,0,3,0,3,0,0,0,4,2,0]]", "     
978   09811 p52pce7ruh257b9i2jg4hs2oexqd7vi<nowiki/>ISBN\n9780316098113\nISBN
978\n0316098113");
-add("selser", "Magic links: ISBN (w/ non-newline whitespace, bug 28950/29025)
[[4,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0]]",
"j0de5dehlo1g7gb9     
978 0  09811 3\nISBN\n9780316098113\nISBN
978\n0316098113");
-add("selser", "Magic links: ISBN (w/ non-newline whitespace, bug 28950/29025)
[[0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2,3]]", "ISBN     
978 0 f5x7dgg6c00h33di316 09811 3\ngngq6yv9i1z6w29<nowiki/>ISBN\n9780316098113\n");
-add("selser", "Magic links: ISBN (w/ non-newline whitespace, bug 28950/29025)
[[0,0,0,0,0,0,0,0,4,0,2,0,3,0,3,0,3]]", "ISBN     
978 3j71dhcogb7kqpvi j5j38kguanu92j4i316  ISBN\n9780316098113\n");
-add("selser", "Magic links: ISBN (w/ non-newline whitespace, bug 28950/29025)
[[0,0,0,0,0,0,0,0,0,0,2,0,4,0,0,0,0]]", "ISBN     
978 0 zkso2ozkkj1nhfr316 rfut2jysxhudte29 3\nISBN\n9780316098113\nISBN
978\n0316098113");
-add("selser", "Magic links: PMID (w/ non-newline whitespace, bug 28950/29025)
[[2,0,0,0,0,0,3,2]]", "6kdkhed02ila0pb9PMID
    d6bjb5w0jwpwg66r<nowiki/>PMID\n1234");
-add("selser", "Magic links: PMID (w/ non-newline whitespace, bug 28950/29025)
[2]", "pslekom4bo0cc8fr\n\nPMID     
1234\nPMID\n1234");
-add("selser", "Magic links: PMID (w/ non-newline whitespace, bug 28950/29025)
[[2,0,0,0,0,0,0,0]]", "dacraut7uz4u0udiPMID     
1234\nPMID\n1234");
-add("selser", "Magic links: PMID (w/ non-newline whitespace, bug 28950/29025)
[1]", "PMID      1234\nPMID\n1234");
-add("selser", "Magic links: PMID (w/ non-newline whitespace, bug 28950/29025)
[[0,0,0,0,0,0,0,2]]", "PMID     
1234\noxxcnmftm0m86w29<nowiki/>PMID\n1234");
-add("selser", "Magic links: PMID (w/ non-newline whitespace, bug 28950/29025)
[[0,0,0,0,0,0,3,0]]", "PMID     PMID\n1234");
-add("selser", "Magic links: PMID (w/ non-newline whitespace, bug 28950/29025)
[[2,0,0,0,0,0,2,0]]", "imkeqyhw3pi4quxrPMID
    esy5kg9hbewv1jor 1234\nPMID\n1234");
-add("selser", "Magic links: PMID (w/ non-newline whitespace, bug 28950/29025)
[[0,0,0,0,0,0,2,2]]", "PMID     dv2ylk4y02l9dx6r
1234\n3xnk54z12w5ng66r<nowiki/>PMID\n1234");
-add("selser", "Magic links: PMID (w/ non-newline whitespace, bug 28950/29025)
[[3,0,0,0,0,0,2,0]]", "     t09u6ilyu3zkhuxr
1234\nPMID\n1234");
add("selser", "Template with invalid target containing tags [2]",
"ki3gc6j34165stt9\n\n{{a<b>b</b>|{{echo|foo}}|{{echo|a}}={{echo|b}}|a = b}}");
add("selser", "Template with invalid target containing tags
[[3,3,3,0,4,0,2,0,0]]",
"{{echo|foo}}jauo06zjihi1kyb9{{echo|a}}9h10iqritlwvobt9={{echo|b}}|a =b}}");
add("selser", "Template with invalid target containing tags
[[3,[4],2,0,0,0,3,0,0]]",
"<b>5q0b9wq3qgu5l8fr</b>gs6mp3pt36vibe29|{{echo|foo}}|{{echo|a}}{{echo|b}}|a
=b}}");
@@ -2193,18 +2151,6 @@
add("selser", "Illegal character references (T106578)
[[0,4,2,4,0,3,2,0,4,2,[3,0],0,4,[2,0,0,0],0,2,2,4,0,4]]", "; Null\n:
9fwhioeeke0442t9\n: j9rvy22kl04quxr\n: b9r3w66fxy8k6gvi: \n:
y1yftc25vffg8pvi\n; CR: 
\n: 2qqnhtc01zjpds4i\n: h4mtw9i1zzq1714i\n;
Control (low):\n: pbxoidwcwzlac3di:xdk2vbc7v3rf6r  Ÿ\n:
ltyipg2p1w9izfr\n; Surrogate\n: 53dgbaz96yeyu8fr: ��\n:
w4u00ww32t7kqpvi\n; This is an okay astral character\n: ybr70brqekzw7b9");
add("selser", "Illegal character references (T106578)
[[[2],0,0,[4],[2,0],0,[2],0,0,1,4,0,[3],0,0,2,4,4,0,3]]", ";zu5495hxxql1sjor
Null: �\n;5cbqazxlqwbl0udi:pof7ppk4wzmpldi \n;xtwdv922j0xusor CR:

\n; Control (low)\n: 7mds6rq9nzwoecdi\n;:  Ÿ\n:
9k3u42yb3sl6usor\n; Surrogate\n: qb704xdxe3aqbyb9\n: phjue90joss2lnmi\n; This
is an okay astral character");
add("selser", "Illegal character references (T106578)
[[1,3,0,1,4,3,2,0,0,2,[4,0],0,[2],2,4,1,3,0,2,1]]", "; Null\n; FF\n:
xf4ed4b6sfywrk9\n: 7lsfy0qpn1nn4s4i\n; CR: 
\n: uh0ehwxo7xjbrzfr\n; Control
(low):qws1kllvtusj1yvi\n;0bvupxnunsq93sor Control (high)\n:
6d547p71l1ve7b9:  Ÿ\n: swbefkspflsrlik9\n; Surrogate\n:
ulaiulmr6q88h0k9\n; This is an okay astral character: 💩");
-add("selser", "ISBN code coverage [2]", "v9jyhulo7jnka9k9\n\nISBN
978-0-1234-56 789");
-add("selser", "ISBN code coverage [1]", "ISBN 978-0-1234-56 789");
-add("selser", "ISBN code coverage [[0,0,3]]", "ISBN 978-0-1234-56 \n");
-add("selser", "ISBN code coverage [[0,0,4]]", "ISBN
978-0-1234-56 zehty9qfs8umcxr");
-add("selser", "ISBN code coverage [[2,0,4]]", "r17ittrbn6mbzkt9<nowiki/>ISBN
978-0-1234-56 dpf8ccpk587aatt9");
-add("selser", "ISBN code coverage [[2,0,0]]", "hak6gyichz2jfw29<nowiki/>ISBN
978-0-1234-56 789");
-add("selser", "RFC code coverage [2]", "zf4sabjzeusaif6r\n\nRFC
983 987");
-add("selser", "RFC code coverage [[2,0,0]]", "3m8l2kyc9uanhfr<nowiki/>RFC
983 987");
-add("selser", "RFC code coverage [1]", "RFC 983 987");
-add("selser", "RFC code coverage [[0,0,3]]", "RFC 983 \n");
-add("selser", "RFC code coverage [[0,0,4]]", "RFC
983 ry0mhh5cy0fkzkt9");
-add("selser", "RFC code coverage [[2,0,2]]", "8rmi32979htfn7b9<nowiki/>RFC
983 4i0lf243q1j4te29987");
add("selser", "Image with page parameter [2]",
"t1gktsrwi7iizfr\n\n[[File:LoremIpsum.djvu|page=2]]");
add("selser", "Image with page parameter [1]",
"[[File:LoremIpsum.djvu|page=2]]");
add("selser", "Don't fall for the self-closing div [[4]]",
"<div>f7k96jvehbnmte29</div/>");
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index 86a829d..04915dc 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -9892,27 +9892,33 @@
Magic links: internal link to RFC (bug 479)
!! wikitext
[[RFC 123]]
-!! html
+!! html/php
<p><a href="/index.php?title=RFC_123&action=edit&redlink=1"
class="new" title="RFC 123 (page does not exist)">RFC 123</a>
</p>
+!! html/parsoid
+<p><a rel="mw:WikiLink" href="./RFC_123" title="RFC 123">RFC 123</a></p>
!! end
!! test
Magic links: RFC (bug 479)
!! wikitext
RFC 822
-!! html
+!! html/php
<p><a class="external mw-magiclink-rfc" rel="nofollow"
href="//tools.ietf.org/html/rfc822">RFC 822</a>
</p>
+!! html/parsoid
+<p><a href="//tools.ietf.org/html/rfc822" rel="mw:ExtLink">RFC 822</a></p>
!! end
!! test
Magic links: RFC (bug 65278)
!! wikitext
This is RFC 822 but thisRFC 822 is not RFC 822linked.
-!! html
+!! html/php
<p>This is <a class="external mw-magiclink-rfc" rel="nofollow"
href="//tools.ietf.org/html/rfc822">RFC 822</a> but thisRFC 822 is not RFC
822linked.
</p>
+!! html/parsoid
+<p>This is <a href="//tools.ietf.org/html/rfc822" rel="mw:ExtLink">RFC 822</a>
but thisRFC 822 is not RFC 822linked.</p>
!! end
!! test
@@ -9921,20 +9927,26 @@
RFC      822
RFC
822
-!! html
+!! html/php
<p><a class="external mw-magiclink-rfc" rel="nofollow"
href="//tools.ietf.org/html/rfc822">RFC 822</a>
RFC
822
</p>
+!! html/parsoid
+<p><a href="//tools.ietf.org/html/rfc822" rel="mw:ExtLink">RFC <span
typeof="mw:Entity" data-parsoid='{"src":"&nbsp;","srcContent":" "}'>
</span><span typeof="mw:Entity"
data-parsoid='{"src":"&#160;","srcContent":" "}'> </span><span
typeof="mw:Entity" data-parsoid='{"src":"&#0160;","srcContent":" "}'>
</span><span typeof="mw:Entity"
data-parsoid='{"src":"&#xA0;","srcContent":" "}'> </span><span
typeof="mw:Entity" data-parsoid='{"src":"&#Xa0;","srcContent":" "}'>
</span> 822</a>
+RFC
+822</p>
!! end
!! test
Magic links: ISBN (bug 1937)
!! wikitext
ISBN 0-306-40615-2
-!! html
+!! html/php
<p><a href="/wiki/Special:BookSources/0306406152" class="internal
mw-magiclink-isbn">ISBN 0-306-40615-2</a>
</p>
+!! html/parsoid
+<p><a href="./Special:BookSources/0306406152" rel="mw:WikiLink">ISBN
0-306-40615-2</a></p>
!! end
!! test
@@ -9956,31 +9968,41 @@
9780316098113
ISBN 978
0316098113
-!! html
+!! html/php
<p><a href="/wiki/Special:BookSources/9780316098113" class="internal
mw-magiclink-isbn">ISBN 978 0 316 09811 3</a>
ISBN
9780316098113
ISBN 978
0316098113
</p>
+!! html/parsoid
+<p><a href="./Special:BookSources/9780316098113" rel="mw:WikiLink">ISBN <span
typeof="mw:Entity" data-parsoid='{"src":"&nbsp;","srcContent":" "}'>
</span><span typeof="mw:Entity"
data-parsoid='{"src":"&#160;","srcContent":" "}'> </span><span
typeof="mw:Entity" data-parsoid='{"src":"&#0160;","srcContent":" "}'>
</span><span typeof="mw:Entity"
data-parsoid='{"src":"&#xA0;","srcContent":" "}'> </span><span
typeof="mw:Entity" data-parsoid='{"src":"&#Xa0;","srcContent":" "}'>
</span> 978<span typeof="mw:Entity"
data-parsoid='{"src":"&nbsp;","srcContent":" "}'> </span>0<span
typeof="mw:Entity" data-parsoid='{"src":"&#160;","srcContent":" "}'>
</span>316<span typeof="mw:Entity"
data-parsoid='{"src":"&#0160;","srcContent":" "}'> </span>09811<span
typeof="mw:Entity" data-parsoid='{"src":"&#xA0;","srcContent":" "}'>
</span>3</a>
+ISBN
+9780316098113
+ISBN 978
+0316098113</p>
!! end
!! test
Magic links: PMID incorrectly converts space to underscore
!! wikitext
PMID 1234
-!! html
+!! html/php
<p><a class="external mw-magiclink-pmid" rel="nofollow"
href="//www.ncbi.nlm.nih.gov/pubmed/1234?dopt=Abstract">PMID 1234</a>
</p>
+!! html/parsoid
+<p><a href="//www.ncbi.nlm.nih.gov/pubmed/1234?dopt=Abstract"
rel="mw:ExtLink">PMID 1234</a></p>
!! end
!! test
Magic links: PMID (bug 65278)
!! wikitext
This is PMID 1234 but thisPMID 1234 is not PMID 1234linked.
-!! html
+!! html/php
<p>This is <a class="external mw-magiclink-pmid" rel="nofollow"
href="//www.ncbi.nlm.nih.gov/pubmed/1234?dopt=Abstract">PMID 1234</a> but
thisPMID 1234 is not PMID 1234linked.
</p>
+!! html/parsoid
+<p>This is <a href="//www.ncbi.nlm.nih.gov/pubmed/1234?dopt=Abstract"
rel="mw:ExtLink">PMID 1234</a> but thisPMID 1234 is not PMID 1234linked.</p>
!! end
!! test
@@ -9989,11 +10011,15 @@
PMID      1234
PMID
1234
-!! html
+!! html/php
<p><a class="external mw-magiclink-pmid" rel="nofollow"
href="//www.ncbi.nlm.nih.gov/pubmed/1234?dopt=Abstract">PMID 1234</a>
PMID
1234
</p>
+!! html/parsoid
+<p><a href="//www.ncbi.nlm.nih.gov/pubmed/1234?dopt=Abstract"
rel="mw:ExtLink">PMID <span typeof="mw:Entity"
data-parsoid='{"src":"&nbsp;","srcContent":" "}'> </span><span
typeof="mw:Entity" data-parsoid='{"src":"&#160;","srcContent":" "}'>
</span><span typeof="mw:Entity"
data-parsoid='{"src":"&#0160;","srcContent":" "}'> </span><span
typeof="mw:Entity" data-parsoid='{"src":"&#xA0;","srcContent":" "}'>
</span><span typeof="mw:Entity"
data-parsoid='{"src":"&#Xa0;","srcContent":" "}'> </span> 1234</a>
+PMID
+1234</p>
!! end
###
--
To view, visit https://gerrit.wikimedia.org/r/134145
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I57033647c5084cf281d735c6f245a5b98a235b38
Gerrit-PatchSet: 8
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Cscott <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: Jforrester <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits