jenkins-bot has submitted this change and it was merged. Change subject: Context aware parsing of definition list colon ......................................................................
Context aware parsing of definition list colon * Ignore inside tags to prevent illegal overlapping. * Follows findColonNoLinks in the php parser. * Adds a new test to document this behaviour, which exposes a bug in parsing templates in definition lists. Serialization is introducing unnecessary nowikis, which looks like they should be fixed by https://gerrit.wikimedia.org/r/#/c/178871/ but probably aren't.. Bug: T71219 Change-Id: I2d8403eb732fdb022e8c87a990fd392a62e7f477 --- M lib/ext.core.ListHandler.js M tests/parserTests-blacklist.js M tests/parserTests.txt 3 files changed, 64 insertions(+), 6 deletions(-) Approvals: Subramanya Sastry: Looks good to me, approved jenkins-bot: Verified diff --git a/lib/ext.core.ListHandler.js b/lib/ext.core.ListHandler.js index 5594a34..d93b260 100644 --- a/lib/ext.core.ListHandler.js +++ b/lib/ext.core.ListHandler.js @@ -5,12 +5,13 @@ "use strict"; var Util = require('./mediawiki.Util.js').Util, defines = require('./mediawiki.parser.defines.js'); + // define some constructor shortcuts var NlTk = defines.NlTk, TagTk = defines.TagTk, EndTagTk = defines.EndTagTk; -function ListHandler ( manager ) { +function ListHandler( manager ) { this.manager = manager; this.listFrames = []; this.init(); @@ -42,7 +43,9 @@ endtags : [], // Stack of end tags // Partial DOM building heuristic // # of open block tags encountered within list context - numOpenBlockTags: 0 + numOpenBlockTags: 0, + // # of open tags encountered within list context + numOpenTags: 0 }; } @@ -77,6 +80,14 @@ } return { token: token }; + } + + // Keep track of open tags per list frame in order to prevent colons + // starting lists illegally. Php's findColonNoLinks. + if ( token.constructor === TagTk ) { + this.currListFrame.numOpenTags += 1; + } else if ( token.constructor === EndTagTk && this.currListFrame.numOpenTags > 0 ) { + this.currListFrame.numOpenTags -= 1; } if (token.constructor === EndTagTk) { @@ -125,7 +136,7 @@ if (token.constructor === TagTk) { if (token.name === 'table') { this.listFrames.push(this.currListFrame); - this.currListFrame = null; + this.reset(); } else if (Util.isBlockTag(token.name)) { this.currListFrame.numOpenBlockTags++; } @@ -175,9 +186,15 @@ return tokens; }; -ListHandler.prototype.onListItem = function ( token, frame, prevToken ) { - if (token.constructor === TagTk){ - if (!this.currListFrame) { +ListHandler.prototype.onListItem = function( token, frame, prevToken ) { + if ( token.constructor === TagTk ){ + if ( this.currListFrame ) { + // Ignoring colons inside tags to prevent illegal overlapping. + // Attempts to mimic findColonNoLinks in the php parser. + if ( token.bullets.last() === ":" && this.currListFrame.numOpenTags > 0 ) { + return { token: ":" }; + } + } else { this.currListFrame = newListFrame(); } // convert listItem to list and list item tokens diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js index 157da0b..6b3b958 100644 --- a/tests/parserTests-blacklist.js +++ b/tests/parserTests-blacklist.js @@ -50,6 +50,7 @@ add("wt2html", "Templates: Other wikitext in parameter names (bug 67657)", "<p about=\"#mwt1\" typeof=\"mw:Transclusion\" data-parsoid='{\"dsr\":[0,18,0,0],\"pi\":[[{\"k\":\"''1''\",\"named\":true,\"spc\":[\"\",\"\",\"\",\"\"]}]]}' data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"''1''\":{\"wt\":\"foo\"}},\"i\":0}}]}'>foo</p>"); add("wt2html", "4. Indent-Pre and extension tags", "<pre data-parsoid='{\"dsr\":[0,39,1,0]}'>a <span typeof=\"mw:Extension/gallery\" about=\"#mwt1\" data-parsoid='{\"stx\":\"html\",\"src\":\"<gallery>\\nFile:foobar.jpg\\n</gallery>\",\"tagWidths\":[9,10],\"dsr\":[3,39,9,10]}'><gallery>\nFile:foobar.jpg\n</gallery></span></pre>"); add("wt2html", "6. Pre-blocks should extend across lines with leading WS even when there is no wrappable content", "<pre data-parsoid='{\"dsr\":[0,26,1,0]}'>a\n\n <!-- continue -->\nb</pre>\n\n<pre data-parsoid='{\"dsr\":[28,30,1,0]}'>c</pre>\n \n<p data-parsoid='{\"dsr\":[33,34,0,0]}'>d</p>"); +add("wt2html", "Definition Lists: colons occurring in tags", "<dl data-parsoid='{\"dsr\":[0,93,0,0]}'><dt data-parsoid='{\"dsr\":[0,2,1,0]}'>a</dt><dd data-parsoid='{\"stx\":\"row\",\"dsr\":[2,4,1,0]}'>b</dd>\n<dt data-parsoid='{\"dsr\":[5,15,1,0]}'><b data-parsoid='{\"dsr\":[6,15,3,3]}'>a:b</b></dt>\n<dt data-parsoid='{\"dsr\":[16,27,1,0]}'><i data-parsoid='{\"stx\":\"html\",\"dsr\":[17,27,3,4]}'>a:b</i></dt>\n<dt data-parsoid='{\"dsr\":[28,45,1,0]}'><span data-parsoid='{\"stx\":\"html\",\"dsr\":[29,45,6,7]}'>a:b</span></dt>\n<dt data-parsoid='{\"dsr\":[46,61,1,0]}'><div data-parsoid='{\"stx\":\"html\",\"dsr\":[47,61,5,6]}'>a:b</div></dt>\n<dt data-parsoid='{\"dsr\":[62,71,1,0]}'><span typeof=\"mw:Nowiki\" data-parsoid='{\"src\":\"{{\",\"dsr\":[63,65,0,0]}'>{{</span>echo|a</dt><dd data-parsoid='{\"stx\":\"row\",\"dsr\":[71,75,1,0]}'>b<span typeof=\"mw:Nowiki\" data-parsoid='{\"src\":\"}}\",\"dsr\":[73,75,0,0]}'>}}</span></dd>\n<dt data-parsoid='{\"dsr\":[76,93,1,0]}'><span typeof=\"mw:Nowiki\" data-parsoid='{\"src\":\"{{\",\"dsr\":[77,79,0,0]}'>{{</span>echo|<i data-parsoid='{\"dsr\":[84,91,2,2]}'>a:b</i><span typeof=\"mw:Nowiki\" data-parsoid='{\"src\":\"}}\",\"dsr\":[91,93,0,0]}'>}}</span></dt></dl>"); add("wt2html", "External links: Free with trailing punctuation", "<p data-parsoid='{\"dsr\":[0,198,0,0]}'><a rel=\"mw:ExtLink\" href=\"http://example.com\" data-parsoid='{\"stx\":\"url\",\"dsr\":[0,18,0,0]}'>http://example.com</a>,\n<a rel=\"mw:ExtLink\" href=\"http://example.com;\" data-parsoid='{\"stx\":\"url\",\"dsr\":[20,39,0,0]}'>http://example.com;</a>\n<a rel=\"mw:ExtLink\" href=\"http://example.com\\\" data-parsoid='{\"stx\":\"url\",\"dsr\":[40,59,0,0]}'>http://example.com\\</a>\n<a rel=\"mw:ExtLink\" href=\"http://example.com\" data-parsoid='{\"stx\":\"url\",\"dsr\":[60,78,0,0]}'>http://example.com</a>.\n<a rel=\"mw:ExtLink\" href=\"http://example.com\" data-parsoid='{\"stx\":\"url\",\"dsr\":[80,98,0,0]}'>http://example.com</a>:\n<a rel=\"mw:ExtLink\" href=\"http://example.com!\" data-parsoid='{\"stx\":\"url\",\"dsr\":[100,119,0,0]}'>http://example.com!</a>\n<a rel=\"mw:ExtLink\" href=\"http://example.com?\" data-parsoid='{\"stx\":\"url\",\"dsr\":[120,139,0,0]}'>http://example.com?</a>\n<a rel=\"mw:ExtLink\" href=\"http://example.com\" data-parsoid='{\"stx\":\"url\",\"dsr\":[140,158,0,0]}'>http://example.com</a>)\n<a rel=\"mw:ExtLink\" href=\"http://example.com/url_with_(brackets)\" data-parsoid='{\"stx\":\"url\",\"dsr\":[160,198,0,0]}'>http://example.com/url_with_(brackets)</a></p>"); add("wt2html", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid", "<p data-parsoid='{\"dsr\":[0,204,0,0]}'><i data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[0,29,2,0]}'><a rel=\"mw:ExtLink\" href=\"http://example.com\" data-parsoid='{\"targetOff\":22,\"contentOffsets\":[22,28],\"dsr\":[2,29,20,1]}'>text<i data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[26,28,2,0]}'></i></a></i>\n<a rel=\"mw:ExtLink\" href=\"http://example.com\" data-parsoid='{\"targetOff\":50,\"contentOffsets\":[50,57],\"dsr\":[30,58,20,1]}'><b data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[50,57,3,0]}'>text</b></a><b data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[58,61,3,0]}'></b>\n<i data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[62,106,2,0]}'>Something <a rel=\"mw:ExtLink\" href=\"http://example.com\" data-parsoid='{\"targetOff\":94,\"contentOffsets\":[94,105],\"dsr\":[74,106,20,1]}'>in italic<i data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[103,105,2,0]}'></i></a></i>\n<i data-parsoid='{\"dsr\":[107,164,2,2]}'>Something <a rel=\"mw:ExtLink\" href=\"http://example.com\" data-parsoid='{\"targetOff\":139,\"contentOffsets\":[139,160],\"dsr\":[119,161,20,1]}'>mixed<b data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[144,160,3,0]}'><i data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[147,160,2,0]}'>, even bold</i></b></a>'</i>\n<b data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[165,204,3,0]}'><i data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[168,204,2,0]}'>Now <a rel=\"mw:ExtLink\" href=\"http://example.com\" data-parsoid='{\"targetOff\":194,\"contentOffsets\":[194,203],\"dsr\":[174,204,20,1]}'>both<b data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[198,203,3,0]}'><i data-parsoid='{\"autoInsertedEnd\":true,\"dsr\":[201,203,2,0]}'></i></b></a></i></b></p>"); add("wt2html", "External link containing double-single-quotes in text embedded in italics (bug 4598 sanity check)", "<p data-parsoid='{\"dsr\":[0,60,0,0]}'><i data-parsoid='{\"dsr\":[0,60,2,2]}'>Some <a rel=\"mw:ExtLink\" href=\"http://example.com/\" data-parsoid='{\"targetOff\":28,\"contentOffsets\":[28,56],\"dsr\":[7,57,21,1]}'>pretty <i data-parsoid='{\"dsr\":[35,46,2,2]}'>italics</i> and stuff</a>!</i></p>"); @@ -429,6 +430,7 @@ add("wt2wt", "Templates: Handle comments in the target", "{{echo\n<!-- should be ignored -->\n|foo}}\n\n{{echo<!-- should be ignored -->\n|foo}}\n\n{{echo<!-- should be ignored -->|foo}}\n\n{{echo|foo}}"); add("wt2wt", "4. Indent-Pre and extension tags", " a <gallery>\n File:foobar.jpg\n </gallery>\n"); add("wt2wt", "Definition lists: self-closed tag", ";one<br />two : two-line fun"); +add("wt2wt", "Definition Lists: colons occurring in tags", ";a:b\n;'''<nowiki>a:b</nowiki>'''\n;<i><nowiki>a:b</nowiki></i>\n;<span><nowiki>a:b</nowiki></span>\n;<div><nowiki>a:b</nowiki></div>\n;{{echo|a:b}}\n;{{echo|''<nowiki>a:b</nowiki>''}}"); add("wt2wt", "BUG 289: \">\"-token in bracketed URL", "[http://www.example.com/ <hello> stuff]\n"); add("wt2wt", "BUG 289: literal \">\"-token in bracketed URL", "[http://www.example.com/ <b>html</b> stuff]\n"); add("wt2wt", "BUG 289: literal double quote in bracketed URL", "[http://www.example.com/ \"hello\" stuff]\n"); @@ -1036,6 +1038,7 @@ add("html2wt", "Definition Lists: Mixed Lists: Test 10", "*#; foo \n*#: bar\n"); add("html2wt", "Definition Lists: Mixed Lists: Test 11", "\n*#*#;*;; foo :bar\n*#*#; boo :baz\n"); add("html2wt", "Definition Lists: Weird Ones: Test 1", "\n*#;*::;; foo : bar (who uses this?)\n"); +add("html2wt", "Definition Lists: colons occurring in tags", "; a\n: b\n; '''<nowiki>a:b</nowiki>'''\n; ''<nowiki>a:b</nowiki>''\n; <span><nowiki>a:b</nowiki></span>\n; <div><nowiki>a:b</nowiki></div>\n; a\n: b\n; ''<nowiki>a:b</nowiki>''\n"); add("html2wt", "External links: URL in text", "URL in text: http://example.com\n"); add("html2wt", "External links: Clickable images", "ja-style clickable images: http://meta.wikimedia.org/upload/f/f1/Ncwikicol.png\n"); add("html2wt", "External links: raw ampersand", "Old & use: http://x&y\n"); @@ -2014,6 +2017,20 @@ add("selser", "Definition Lists: Weird Ones: Test 1 [[[[[[2]]]]]]", "*#: m79ersaiyeqaor\n;*::;; foo : bar (who uses this?)"); add("selser", "Definition Lists: Weird Ones: Test 1 [[[[[[[2]]]]]]]", "*#;q93hogh7bi2ro1or\n*::;; foo : bar (who uses this?)"); add("selser", "Definition Lists: Weird Ones: Test 1 [[[[2]]]]", "*# mug1j4xbiskprpb9\n#;*::;; foo : bar (who uses this?)"); +add("selser", "Definition Lists: colons occurring in tags [[0,3,0,[1],2,[[2]],0,0,0,[3],3,3,[4,0],0,1]]", ";a\n;'''<nowiki>a:b</nowiki>'''\n: ys0n0w3d4116ecdi\n;<i><nowiki>fy196dn5diyvte29a:b</nowiki></i>\n;<span>a:b</span>\n;:rvorhd48coapds4i}}\n;{{echo|''a:b''}}"); +add("selser", "Definition Lists: colons occurring in tags [[[4],0,2,3,0,2,3,0,0,[1],0,1,[0,4],4,2]]", ";9gfzwpnyxe5yu8fr:b\n: loymwoag7t4kj4i\n: an61hey1xada38fr\n;<i>a:b</i>\n;<span>a:b</span>\n;<div data-foobar=\"my31jm28o5x3l3di\"><nowiki>a:b</nowiki></div>\n;{{echo|a:bsyjfu7my13fk1emi\n: humc3hu91lhj8aor\n: cxt9vsc9ijrwwmi\n;{{echo|''a:b''}}"); +add("selser", "Definition Lists: colons occurring in tags [2]", "qg79k8v4av6de7b9\n;a:b\n;'''a:b'''\n;<i>a:b</i>\n;<span>a:b</span>\n;<div>a:b</div>\n;{{echo|a:b}}\n;{{echo|''a:b''}}"); +add("selser", "Definition Lists: colons occurring in tags [[[3],0,0,4,4,0,3,1,0,[1],4,3,3,0,3]]", ";:b\n: 51som0s9u4lmzpvi\n: ch4hiy5obuo9a4i\n;<i>a:b</i>\n;<span><nowiki>a:b</nowiki></span>\n;<div data-foobar=\"973qaj55ay2b7qfr\"><nowiki>a:b</nowiki></div>\n: rtjgxstacmrvygb9\n"); +add("selser", "Definition Lists: colons occurring in tags [[0,2,0,2,0,3,2,[[2]],0,2,0,4,2,2,[[3],4,4,0]]]", ";a\n: fj95vp8edfl59udi:b\n: u24q4w5kw2g4aemi\n;'''a:b'''\n: h4vsyritaat3mcxr\n;<span><nowiki>ihs4g0zd6w9daemia:b</nowiki></span>\n: 7jv6vu2zrfx39pb9\n;<div>a:b</div>\n: xedf27pleyuy2e29\n: xftw6z4tzn4jkyb9:b}}\n: dpnbk1jpt1jlwhfr\n;<nowiki></nowiki>6c9jmtk06vx9lik9q75d7mgt3ghhyqfr}}"); +add("selser", "Definition Lists: colons occurring in tags [[3,3,2,2,3,0,2,1,4,1,3,4,1,4,4]]", ": rkfl49pzedp74x6r\n: j4bea9vr60llerk9\n;'''a:b'''\n;<i>a:b</i>\n: mrdchqoarp2pgb9\n;<span><nowiki>a:b</nowiki></span>\n: 6vvs2ldmzswf80k9\n;<div><nowiki>a:b</nowiki></div>\n: vo4ojp5oe96647vi:b}}\n: kh9h26ygii3eg66r\n: j5ih9vueg4l323xr"); +add("selser", "Definition Lists: colons occurring in tags [[[3],0,0,2,3,[2],2,1,0,[[2]],0,0,4,0,[1,0,3,2]]]", ";:b\n: 6h7k5vwrusmm42t9\n;'''a:b'''\n;8rd6vrl47o0w9udi<i><nowiki>a:b</nowiki></i>\n: 32oaq1raqxb6gvi\n;<span><nowiki>a:b</nowiki></span>\n;<div><nowiki>uqtta9p97bep14ia:b</nowiki></div>\n;{{echo|a\n: dz52ya2iz46j38fr\n;{{echo|yphodo863oapds4i}}"); +add("selser", "Definition Lists: colons occurring in tags [[4,3,2,0,0,[[4]],0,4,4,0,2,4,3,3,[1,0,[3],[2]]]]", ": ou6k8yl61aotuik9\n: s8ck7qu9l7vte29\n;'''a:b'''\n;<i>g4rfjaxfaohqto6r</i>\n: rt1n440zvjn3766r\n: tz22po3agxyzm2t9\n;<div>a:b</div>\n: snxgpqwbdexnipb9\n: ikuc8ebf076fajor\n;{{echo|''<nowiki/>''<nowiki>pm92s2mj26clq5mi}}</nowiki>"); +add("selser", "Definition Lists: colons occurring in tags [[1,1,3,[3],0,[3],0,1,4,[1],0,3,2,0,[4,0,[2],[4]]]]", ";a:b\n;\n;\n;<span>a:b</span>\n: h8hh4v34xh2maemi\n;<div data-foobar=\"u0847t68ar2akyb9\"><nowiki>a:b</nowiki></div>\n: sma0ums92l2z9f6r:b}}\n;8hiue1du4ujcq5miecho|''<nowiki>eesi68mx5ok1emia:b</nowiki>''<nowiki>x6nz2gbsesdfs9k9</nowiki>"); +add("selser", "Definition Lists: colons occurring in tags [[3,[4],0,[[2]],4,0,4,[4],0,[[2]],2,[[4],2],0,0,4]]", ":5lkbigrdwdx11yvi\n;'''<nowiki>cqslr7dvsnd5z5mia:b</nowiki>'''\n: masspab3x9don7b9\n;<i>a:b</i>\n: oa29nopebnoy9zfr\n;98d7as2k961ug14i\n;<div><nowiki>1bwhvaem3l0izfra:b</nowiki></div>\n: vnpj9u2knmmzehfr\n;<nowiki>zv7k43lu5scq5mi</nowiki>1f68p993haorecho|a:b}}\n: hxhtxsqncdi"); +add("selser", "Definition Lists: colons occurring in tags [[0,0,4,[2],3,[4],0,[[3]],4,[[3]],0,2,2,0,[0,0,0,1]]]", ";a:b\n: h3264ujrqrkq33di\n;h56kdmfdrrv34n29'''<nowiki>a:b</nowiki>'''\n;fxo6p8aww4p9zfr\n;<span></span>\n: h438k9e0o3aexw29\n;<div></div>\n: diwu40dj1dims4i\n;{{echo|a\n: v7xj8olgqb4eu3di:b}}\n;{{echo|''a:b''}}"); +add("selser", "Definition Lists: colons occurring in tags [[[2],[4],0,0,2,2,0,0,0,[3],0,[3,0],1,0,[3,3,[3],1]]]", ";2i4uz6v2wrjjdcxra:8rtdy7xxnyjk0529\n;'''a:b'''\n: f7n8b211erhehfr\n: lxtc8w2sd6phw7b9\n;<i>a:b</i>\n;<span>a:b</span>\n;\n;echo|a:b}}\n;''<nowiki/>''}}"); +add("selser", "Definition Lists: colons occurring in tags [1]", ";a:b\n;'''a:b'''\n;<i>a:b</i>\n;<span>a:b</span>\n;<div>a:b</div>\n;{{echo|a:b}}\n;{{echo|''a:b''}}"); +add("selser", "Definition Lists: colons occurring in tags [[3,1,0,4,4,3,0,1,0,3,0,4,1,0,2]]", ":b\n: 31ffgp6invtb7qfr\n: 0wohsqpeyj0s5rk9\n;<span><nowiki>a:b</nowiki></span>\n: w56o0bxf1aif6r:b}}\n: wfzg6tcjj6f20529\n;{{echo|''a:b''}}"); add("selser", "BUG 289: \">\"-token in bracketed URL [2]", "ll0lxyd1rfgpsyvi\n\n[http://www.example.com/<hello> stuff]"); add("selser", "BUG 289: \">\"-token in bracketed URL [1]", "[http://www.example.com/<hello> stuff]"); add("selser", "BUG 289: \">\"-token in bracketed URL [[2]]", "kemi29jie4b1emi[http://www.example.com/<hello> stuff]"); diff --git a/tests/parserTests.txt b/tests/parserTests.txt index 77694c2..0fe2502 100644 --- a/tests/parserTests.txt +++ b/tests/parserTests.txt @@ -3966,6 +3966,30 @@ </ul> !! end +!! test +Definition Lists: colons occurring in tags +!! wikitext +;a:b +;'''a:b''' +;<i>a:b</i> +;<span>a:b</span> +;<div>a:b</div> +;{{echo|a:b}} +;{{echo|''a:b''}} +!! html +<dl><dt>a</dt> +<dd>b</dd> +<dt><b>a:b</b></dt> +<dt><i>a:b</i></dt> +<dt><span>a:b</span></dt> +<dt><div>a:b</div></dt> +<dt>a</dt> +<dd>b</dd> +<dt><i>a:b</i></dt></dl> + +!! end + + ### ### External links ### -- To view, visit https://gerrit.wikimedia.org/r/155868 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I2d8403eb732fdb022e8c87a990fd392a62e7f477 Gerrit-PatchSet: 5 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Arlolra <[email protected]> Gerrit-Reviewer: Arlolra <[email protected]> Gerrit-Reviewer: Cscott <[email protected]> Gerrit-Reviewer: Marcoil <[email protected]> Gerrit-Reviewer: Subramanya Sastry <[email protected]> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
