Subramanya Sastry has uploaded a new change for review. https://gerrit.wikimedia.org/r/179917
Change subject: Implement a heuristic to strip <nowiki/> in some quote scenarios ...................................................................... Implement a heuristic to strip <nowiki/> in some quote scenarios * When <i> and <b> tags are matched up properly on a line, any quote char that precedes a <i> or <b> tag does not need to be protected with a <nowiki/> * Since this is hard to do without a full-line context, the <nowiki/> is stripped after the fact by examining the current line of serialized output. * Let us get rid of a few <nowiki/> copies of parser tests and fixes soem failing tests. Change-Id: I81d15ecce888f6e0104fb089329a9db77828d86c --- M lib/mediawiki.WikitextSerializer.js M lib/wts.SerializerState.js M lib/wts.escapeWikitext.js M tests/parserTests-blacklist.js M tests/parserTests.txt 5 files changed, 51 insertions(+), 56 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/17/179917/1 diff --git a/lib/mediawiki.WikitextSerializer.js b/lib/mediawiki.WikitextSerializer.js index 8ac9133..22e61e3 100644 --- a/lib/mediawiki.WikitextSerializer.js +++ b/lib/mediawiki.WikitextSerializer.js @@ -1240,6 +1240,30 @@ return out; } +// This implements a heuristic to strip a common source of <nowiki/> +// - When <i> and <b> tags are matched up properly, any single ' char +// before <i> or <b> does not need <nowiki/> protection. +function stripUnnecessaryQuoteNowikis(wt) { + // no-quotes OR matched quote segments with 5/3/2 quotes OR single-quote char + var testRE = /^[^']+$|((^|[^']+)('''''[^']+'''''|'''[^']+'''|''[^']+''|'))+$/; + + return wt.split(/\n|$/).map(function(line) { + // If all segments match the above regexp, the <nowiki/>s are unnecessary. + var pieces = line.split(/<nowiki\/>/); + var n = pieces.length; + for (var i = 0; i < n; i++) { + // All but the last piece should end in a single ' char + // since that is the only scenario we are optimizing for here + if (!testRE.test(pieces[i]) || + (i < n-1 && !/(^|[^'])'$/.test(pieces[i]))) + { + return line; + } + } + return pieces.join(''); + }).join("\n"); +} + /** * Serialize an HTML DOM document. */ @@ -1287,6 +1311,10 @@ // Strip excess/useless nowikis out = stripUnnecessaryIndentPreNowikis(out); } + if (state.hasQuoteNowikis) { + // Strip excess/useless nowikis + out = stripUnnecessaryQuoteNowikis(out); + } if (chunkCB) { // Pass entire output in one big chunk diff --git a/lib/wts.SerializerState.js b/lib/wts.SerializerState.js index f65167d..7ec2337 100644 --- a/lib/wts.SerializerState.js +++ b/lib/wts.SerializerState.js @@ -38,6 +38,10 @@ * Did we introduce nowikis for indent-pre protection? * If yes, we might run a post-pass to strip useless ones. * + * hasQuoteNowikis + * Did we introduce nowikis to preserve quote semantics? + * If yes, we might run a post-pass to strip useless ones. + * * wikiTableNesting * Records the nesting level of wikitext tables * @@ -66,6 +70,7 @@ inIndentPre: false, inPHPBlock: false, hasIndentPreNowikis: false, + hasQuoteNowikis: false, wikiTableNesting: 0, wteHandlerStack: [], // XXX: replace with output buffering per line diff --git a/lib/wts.escapeWikitext.js b/lib/wts.escapeWikitext.js index f90a155..37f07ea 100644 --- a/lib/wts.escapeWikitext.js +++ b/lib/wts.escapeWikitext.js @@ -58,7 +58,7 @@ } }; -function hasLeadingEscapableQuoteChar(text, opts) { +function hasLeadingEscapableQuoteChar(state, text, opts) { var node = opts.node; // Use 'node.textContent' to do the tests since it hasn't had newlines // stripped out from it. @@ -80,7 +80,7 @@ return false; } -function hasTrailingEscapableQuoteChar(text, opts) { +function hasTrailingEscapableQuoteChar(state, text, opts) { var node = opts.node; // Use 'node.textContent' to do the tests since it hasn't had newlines // stripped out from it. @@ -109,7 +109,7 @@ // wrapper since there are on other quote chars on the line. // // This is checking text-node siblings of i/b tags. -function escapedIBSiblingNodeText(text, opts) { +function escapedIBSiblingNodeText(state, text, opts) { // For a sequence of 2+ quote chars, we have to // fully wrap the sequence in <nowiki>...</nowiki> // <nowiki/> at the start and end doesn't work. @@ -128,11 +128,12 @@ // Check whether the head and/or tail of the text needs <nowiki/> protection. var out = ''; - if (hasTrailingEscapableQuoteChar(text, opts)) { + if (hasTrailingEscapableQuoteChar(state, text, opts)) { + state.hasQuoteNowikis = true; out = text + "<nowiki/>"; } - if (hasLeadingEscapableQuoteChar(text, opts)) { + if (hasLeadingEscapableQuoteChar(state, text, opts)) { out = "<nowiki/>" + (out || text); } @@ -535,8 +536,8 @@ // Quote-escape test if (/''+/.test(text) - || hasLeadingEscapableQuoteChar(text, opts) - || hasTrailingEscapableQuoteChar(text, opts)) + || hasLeadingEscapableQuoteChar(state, text, opts) + || hasTrailingEscapableQuoteChar(state, text, opts)) { // Check if we need full-wrapping <nowiki>..</nowiki> // or selective <nowiki/> escaping for quotes. @@ -548,7 +549,7 @@ state.env.log("trace/wt-escape", "---quotes: full nowiki wrap---"); return this.escapedText(state, false, text, true); } else { - var quoteEscapedText = escapedIBSiblingNodeText(text, opts); + var quoteEscapedText = escapedIBSiblingNodeText(state, text, opts); if (quoteEscapedText) { state.env.log("trace/wt-escape", "---sibling of i/b tag---"); return quoteEscapedText; diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js index 2b54a07..67b0486 100644 --- a/tests/parserTests-blacklist.js +++ b/tests/parserTests-blacklist.js @@ -507,7 +507,6 @@ add("wt2wt", "ISBN code coverage", "ISBN 978-0-1234-56 789\n"); add("wt2wt", "RFC code coverage", "RFC 983 987\n"); add("wt2wt", "Image with page parameter", "[[File:LoremIpsum.djvu]]\n"); -add("wt2wt", "Another italics / bold test", " '<nowiki/>'' ''x'\n"); add("wt2wt", "Don't fall for the self-closing div", "<div>hello world</div>"); add("wt2wt", "Parsing of overlapping (improperly nested) inline html tags", "<span><s>x</span>\n"); add("wt2wt", "Don't break table handling if language converter markup is in the cell.", "{|\n|-\n| -{R |B}-\n|}"); @@ -1584,7 +1583,6 @@ add("html2wt", "Width-sized image (using px, with following whitespace - test regression from r39467)", "http://example.com/images/thumb/3/3a/Foobar.jpg/640px-Foobar.jpg\n"); add("html2wt", "Width-sized image (using px, with preceding whitespace - test regression from r39467)", "http://example.com/images/thumb/3/3a/Foobar.jpg/640px-Foobar.jpg\n"); add("html2wt", "Image with page parameter", "[[File:LoremIpsum.djvu]]\n"); -add("html2wt", "Another italics / bold test", " '<nowiki/>'' ''x'\n"); add("html2wt", "HTML with raw HTML ($wgRawHtml==true)", "<script>alert(1);</script>\n"); add("html2wt", "Parents of subpages, one level up", "[/index.php?title=Subpage_test/L1/L2&action=edit&redlink=1 L2]\n"); add("html2wt", "Parents of subpages, one level up, not named", "[/index.php?title=Subpage_test/L1/L2&action=edit&redlink=1 Subpage test/L1/L2]\n"); @@ -2621,7 +2619,6 @@ add("selser", "RFC code coverage [[2,0,2]]", "8rmi32979htfn7b9RFC 983 4i0lf243q1j4te29987"); add("selser", "Image with page parameter [2]", "t1gktsrwi7iizfr\n\n[[File:LoremIpsum.djvu|page=2]]"); add("selser", "Image with page parameter [1]", "[[File:LoremIpsum.djvu|page=2]]"); -add("selser", "Another italics / bold test [2]", "kzib9uzhe4su4n29\n ''' ''x'"); add("selser", "dt/dd/dl test [[1]]", ";;;::"); add("selser", "dt/dd/dl test [[[1]]]", ";;;::"); add("selser", "dt/dd/dl test [[[[[1]]]]]", ";;::"); @@ -2817,11 +2814,12 @@ add("selser", "Entities in ref name [1]", "<ref name=\"test & me\">hi</ref>"); add("selser", "Headings: 5. Empty headings [[2],3,2,0,4,4,0,2,1,3,0]", "=hnjo2713bjvjwcdi<nowiki/>=\nx2d05yq3dapaxlxr\n==<nowiki/>==\n\nkaovt9si227ta9k9\n\nb2688fvzpaocrf6r\n====<nowiki/>====\nljshl952j0emte29\n\n=====<nowiki/>=====\n\n======<nowiki/>======"); add("selser", "Headings: 6a. Heading chars in SOL context (with trailing spaces) [4,0,1,0,4,4,[2],3]", "o4fu7yz6zedvlsor\n\n<nowiki>=a=</nowiki> \n\nfzi337zwgfmbzkt9\n\nrziif3qr3rspp66r\n\nzm9kz709iqpsnhfr<nowiki>=a=</nowiki> \t"); -add("selser", "1. Quotes inside <b> and <i> [[2,3,[1],4,0,0,[3],0,2,2,0,4,0,0,4,1,0,0,0,0,3,1,4,3,2,4,0,0,0,0,0,0,0,2,1,0,2,[2],0,3,0,0,4,0,0,1]]", "b9ve2r72aq44e7b9''<nowiki/>'foo'<nowiki/>''''<nowiki>''foo''</nowiki>''jduad2jvmauac3di''<nowiki>'''foo'''</nowiki>''\n''<nowiki/>''<nowiki/>hb7ucb9xukofajor's\nimf9p1zrvbbdfgvi'''<nowiki/>'foo'<nowiki/>'''\nd92wpym0cizwu3di\n'''<nowiki>'''foo'''</nowiki>'''4mia6l6nhzolxr'''foo'<nowiki/>''bar'<nowiki/>''baz'''\n'''foo'''<nowiki/>'s\n'''foo''6wag5p5b648l4n29rs74ede4dxiuow29<nowiki/>s455j5uyzl0daemi<nowiki/>''foo''<nowiki/>'\n'<nowiki/>'''foo'''\nc9hosabzbqhbyb9'''foo'''<nowiki/>'\n'1yl4yeqhhtr19k9<nowiki/>'''9k3pgwyb0uvaq0k9foo'''<nowiki/>''fools'<span> errand</span>''\n3mwa3kwctc8r529\na|!*#-:;+-~[]{}b'<nowiki/>''x''"); -add("selser", "1. Quotes inside <b> and <i> [[[2,0,3],0,[[3]],4,4,2,[3],3,0,[0,3,1],3,[2],3,[[3]],3,[0,1,2,2],3,3,0,4,4,4,0,[4],0,0,0,0,4,0,2,1,0,3,0,2,0,2,4,2,[3,1],4,1,2,0,4]]", "''rou4oqqj45pwg66r<nowiki/>'foo'''\n''<nowiki></nowiki>''6o82gwbgw9xxn7b9n4fc723rbxtg9zfr2po5rm4z5rhehfr\n''<nowiki/>''<nowiki/>'s\n'''<nowiki/><nowiki/>''''''8vtzkg2u8wnopqfr<nowiki>''foo''</nowiki>''''''<nowiki></nowiki>''''''foo'<nowiki/>ipon7rm9iy0hpvi''bar'<nowiki/>''jsehige2uxov42t9baz'''<nowiki/>dinfburnti0ms4ibximthck3qgaxlxrrbhllev1hoo2mx6r\n''tvi87wnpldblc8fr''<nowiki/>'\n'<nowiki/>''foo''mans72mc33csdcxr'\n'dnvvbq5ayqc6usor<nowiki/>'''foo'''\n<nowiki/>9293k6dvvmoq1tt9'\n'<nowiki/>zs5z7v3q0qaug14i'''foo'''deu4ioyka97fogvii2578yu6svt5ipb9'\n''<span data-foobar=\"ntagvrajvtoqolxr\"> errand</span>''icug3imme9mjkyb9''<span>fool</span>'s errand''wglst0tvcv2w3ik9\na|!*#-:;+-~[]{}b'<nowiki/>i9b91kkaoiggb9"); -add("selser", "1. Quotes inside <b> and <i> [[3,0,[[4]],4,0,0,3,0,0,[2,0,3],0,2,0,[4],0,[0,0,1,2],4,0,0,2,0,1,2,0,0,3,0,[4],0,0,3,[2],3,4,3,4,0,[4],1,3,1,3,3,4,0,2]]", "\n''<nowiki>9du4tukpxr5p14i</nowiki>''a3oxn4o6xuoez5mi''<nowiki>'''foo'''</nowiki>''\n<nowiki/>'s\n'''zb6b8p4dsrgj5rk9<nowiki/>'foo''''\nn5g2nainzi9evcxr'''<nowiki>''foo''</nowiki>'''\n'''fjf9en2rxyw1xlxr'''\n'''foo'<nowiki/>''bar'<nowiki/>''3t058jjruq4wvcxrbaz'''w3assj8fjchm2t9'''foo'''<nowiki/>406bo4oovs3sdcxr's\n'<nowiki/>''foo''l8ur9en9wqe8w7b9\n''foo''<nowiki/><nowiki/>''upyarw5tif1rlik9''<nowiki/>'\n''''xf76q2gb9hmbcsorfoo'''zeaqonfmvysexw295dvwath003dqjjor<nowiki/>'''q5arckx2cdk4vx6r'''<nowiki/>''fools'<span> errand</span>''4prh3f1lk2j7cik9<nowiki/>wyd9n604zjafko6r''x''"); -add("selser", "1. Quotes inside <b> and <i> [[4,2,0,0,[2],0,1,0,4,[0,0,3],2,0,2,[[3]],0,[0,1,[3,0],0],0,4,4,0,1,2,0,[3],0,0,1,0,4,4,3,0,4,[2],0,4,4,3,0,4,4,4,[[4],3],0,4,0]]", "acy673rinovfgviplr0nwpq14079zfr\n''<nowiki>''foo''</nowiki>''\n''x9s8roz7iecul3di<nowiki>'''foo'''</nowiki>''\n''foo''<nowiki/>ncaawjaqbiall3di'''<nowiki/>'foo''''7mofib93ncj714i\n'''<nowiki>''foo''</nowiki>'''a0h559r0kaf9a4i\n'''<nowiki></nowiki>'''\n'''foo'<nowiki/>''<nowiki/>''baz'''\n265zz6wn68zg2e29dcfy6wfyuuonipb9's\n'<nowiki/>n0so9m9gubthjjor''foo''\n''<nowiki/>''<nowiki/>'\n'<nowiki/>''foo''eu8bi30kt40lik9enwlc3t7myt4kj4i'''foo'''x6ngpya7ros10pb9'''pil88s26p0mp9zfrfoo'''<nowiki/>guy0jrmki6v18aor79cji1kde00jxlxr<nowiki/>ce9yefc1ixp3nmiudh3zchhff94fgvi41nysayj1hsemi''<span>6nuqrvo7e9qtcsor</span>''\na|!*#-:;+-~[]{}b'j80zfyt47psyvi''x''"); -add("selser", "1. Quotes inside <b> and <i> [[4,0,4,0,0,2,1,4,2,1,3,[3],3,[[2]],3,3,3,0,4,0,3,4,0,[3],0,0,4,[2],3,0,0,[4],0,3,0,0,1,[3],0,0,[0,4],2,[0,4],4,0,3]]", "bcv9t18xvahh0k9\n7j4ebaxa7o9ftj4i\n''<nowiki>'''foo'''</nowiki>''mry7gwh281tt9\n''foo''6z57b2fhe9yn9udi7d4vr8m4oxcivn29's\n'''<nowiki/>'foo'<nowiki/>''''''<nowiki/>''''''<nowiki>x4ljrkxn00jxlxr'''foo'''</nowiki>''''''foo'''eg408kyvbtck57b9's\n'72iu8q6iuihehfr\n''<nowiki/>''<nowiki/>'\n'hs9psbz3fcdw8kt9''hpob0uhvs0885mifoo'''\n'<nowiki/>'''1ikpa9oovowghkt9'''\n<nowiki/>'\n'<nowiki/>'''<nowiki/>'''<nowiki/>'\n''fools'2cecdk1nov927qfr''qgboi2cpqmovquxr\n''<span>fool</span>kx25x3hw1m7vi''8qfgpvlpg7v9ggb9<nowiki/>\n"); +add("selser", "1. Quotes inside <b> and <i> [[3,2,1,3,0,3,4,0,2,3,0,1,4,1,3,[2,0,0,0],0,0,0,0,[3],0,0,4,0,0,2,3,3,[2],3,4,3,0,0,3,4,3,3,0,2,0,[4]]]", "3ggp4kwixzloko6r\n''<nowiki>''foo''</nowiki>''''<nowiki>'''foo'''</nowiki>''wednybdch5ixusor<nowiki/>jiu7uxfou9n6d2t9's\n\n'''<nowiki>''foo''</nowiki>'''5yp9smtnl1bgldi'''<nowiki>'''foo'''</nowiki>''''''0adusjelvz3t0529foo'<nowiki/>''bar'<nowiki/>''baz'''\n'''foo'''<nowiki/>'s\n'''<nowiki/>''\n''foo''myh73q54kveljtt9'\n'c10qrc4l4kkjra4i''foo'''''c9meu8765gjpp66rfoo'''1wwik15k156x0f6r'\n'8qu5jiqalmvg3nmi\n191wmp849f7gmn29''<span>fool</span>'s errand''\na|!*#-:;+-~[]{}b'''6zhdeznwfwxko6r''"); +add("selser", "1. Quotes inside <b> and <i> [[[2,0,3],0,[[3]],4,4,2,[3],3,0,[0,3,1],3,[2],3,[[3]],3,[0,1,2,2],3,3,0,4,4,4,0,3,0,0,0,0,4,[3],2,[3],0,2,0,2,4,2,[3,1],4,1,2,[4]]]", "''rou4oqqj45pwg66r<nowiki/>'foo'''\n''<nowiki></nowiki>''6o82gwbgw9xxn7b9n4fc723rbxtg9zfr2po5rm4z5rhehfr\n''<nowiki/>''<nowiki/>'s\n'''<nowiki/><nowiki/>''''''8vtzkg2u8wnopqfr<nowiki>''foo''</nowiki>''''''<nowiki></nowiki>''''''foo'<nowiki/>ipon7rm9iy0hpvi''bar'<nowiki/>''jsehige2uxov42t9baz'''<nowiki/>dinfburnti0ms4ibximthck3qgaxlxrrbhllev1hoo2mx6r''foo''<nowiki/>'\n'<nowiki/>''foo''<nowiki/>tvi87wnpldblc8fr'''<nowiki/>'''mans72mc33csdcxr\n'''<nowiki/>'''<nowiki/>dnvvbq5ayqc6usor'\n'3mtgpe7obxirudi'''foo'''9293k6dvvmoq1tt9zs5z7v3q0qaug14i'\n''<span data-foobar=\"deu4ioyka97fogvi\"> errand</span>''i2578yu6svt5ipb9''<span>fool</span>'s errand''icug3imme9mjkyb9\na|!*#-:;+-~[]{}b'''ir49227rhe4mfgvi''"); +add("selser", "1. Quotes inside <b> and <i> [[3,0,[[3]],0,1,0,0,1,4,4,2,2,4,2,0,1,4,4,4,0,[4],4,0,3,0,0,2,0,0,0,3,3,0,4,0,2,0,3,3,0,2,3,[2]]]", "\n''<nowiki></nowiki>''\n''<nowiki>'''foo'''</nowiki>''\n''foo''<nowiki/>zn8e50rixfo2mx6rnwxcb0vltssmj9k971d5b4dgu6av2t9\nb5vbc2u00aoflxr'''<nowiki>''foo''</nowiki>'''2pmyruoajdjcerk9x1opqkhimg77rpb9'''<nowiki>'''foo'''</nowiki>'''\n'''foo'<nowiki/>''bar'<nowiki/>''baz'''q8wvj0qw43x47vio054hnv223wbqpvid0g2obid9akvgqfr's\n'<nowiki/>''dj4fdj44116ecdi''qxeb9eikbg3krzfr''foo''<nowiki/>'\n'<nowiki/>pnvdzbfba10pb9''foo''<nowiki/>'\n''''foo'''<nowiki/>0kybjloy4qf9wwmi<nowiki/>5h2hbokwkouyds4i'''foo'''<nowiki/>\ne7bgdd2wwqy6i529''<span>fool</span>'s errand''''0h73fkwca49ggb9x''"); +add("selser", "1. Quotes inside <b> and <i> [[3,0,[[4]],4,0,0,3,0,0,[2,0,3],0,2,0,[4],0,[0,0,1,2],4,0,0,2,[2],2,0,0,3,0,[4],0,0,3,0,1,2,4,3,4,0,0,4,2,3,2,2]]", "\n''<nowiki>9du4tukpxr5p14i</nowiki>''a3oxn4o6xuoez5mi''<nowiki>'''foo'''</nowiki>''\n<nowiki/>'s\n'''zb6b8p4dsrgj5rk9<nowiki/>'foo''''\nn5g2nainzi9evcxr'''<nowiki>''foo''</nowiki>'''\n'''fjf9en2rxyw1xlxr'''\n'''foo'<nowiki/>''bar'<nowiki/>''3t058jjruq4wvcxrbaz'''w3assj8fjchm2t9'''foo'''<nowiki/>406bo4oovs3sdcxr's\n'<nowiki/>''f7lfw6f9p9442t9foo''l8ur9en9wqe8w7b9\n''foo''<nowiki/><nowiki/>''upyarw5tif1rlik9''<nowiki/>'\n'\n'''foo'''zeaqonfmvysexw29<nowiki/>5dvwath003dqjjorq5arckx2cdk4vx6r<nowiki/>'\nbofwi490po8q6w29cnmgvy3btmcjif6r\n4prh3f1lk2j7cik9\na|!*#-:;+-~[]{}b'wyd9n604zjafko6r''x''"); +add("selser", "1. Quotes inside <b> and <i> [[4,2,0,0,[2],0,1,0,4,[0,0,3],2,0,2,[[3]],0,[0,1,[3,0],0],0,4,4,0,1,2,0,2,0,0,1,0,0,4,4,3,0,0,4,[2],0,4,4,3,[4,4],4,0]]", "acy673rinovfgviplr0nwpq14079zfr\n''<nowiki>''foo''</nowiki>''\n''x9s8roz7iecul3di<nowiki>'''foo'''</nowiki>''\n''foo''<nowiki/>ncaawjaqbiall3di'''<nowiki/>'foo''''7mofib93ncj714i\n'''<nowiki>''foo''</nowiki>'''a0h559r0kaf9a4i\n'''<nowiki></nowiki>'''\n'''foo'<nowiki/>''<nowiki/>''baz'''\n265zz6wn68zg2e29dcfy6wfyuuonipb9's\n'<nowiki/>''foo''n0so9m9gubthjjor\n''foo''5mx7467s2pe6d2t9<nowiki/>'\n'<nowiki/>''foo''<nowiki/>'\n'enwlc3t7myt4kj4ix6ngpya7ros10pb9<nowiki/>'\n'pil88s26p0mp9zfr'''guy0jrmki6v18aorfoo'''<nowiki/>79cji1kde00jxlxrce9yefc1ixp3nmi''udh3zchhff94fgvi41nysayj1hsemi''6nuqrvo7e9qtcsor''x''"); +add("selser", "1. Quotes inside <b> and <i> [[[2,0,0],2,[[3]],0,2,0,0,1,0,[2,0,4],0,4,0,[[2]],0,4,3,2,4,3,0,0,[2],2,0,4,1,3,3,1,0,0,0,0,3,1,3,0,0,0,[2,3],0,[3]]]", "''es46exuy3rpv6lxr<nowiki/>'foo'<nowiki/>''9lie3fn2xgzrrudi\n''<nowiki></nowiki>''\nhtda1ot8ovjqncdi''<nowiki>'''foo'''</nowiki>''\n''foo''<nowiki/>'s\n'''k7lgn4pezn2ep14i<nowiki/>'foo'ds664qx68o20529'''\ng9y64rrhdde4gqfr\n'''<nowiki>9zk8vipefj1t6gvi'''foo'''</nowiki>'''\nfr7un8vrsmbfn7b9hxhjdizbj88jv2t9'''foo'''l7d0p06g7tu766r''foo''\n''1ki5kz2uf4wsif6rfoo''00lf2pwa757v6lxr<nowiki/>'\n'uoebnwplbpxecdi''foo'''''foo'''\n'''foo'''<nowiki/>'\n''''foo'''<nowiki/>'\n''fools'<span> errand</span>''\n''gzrgk0lm4e7b9<span>fool</span>''\na|!*#-:;+-~[]{}b'<nowiki/>''<nowiki/>''"); add("selser", "HTML tag with broken attribute value quoting [1]", "<span title=\"Hello world>Foo</span>"); add("selser", "HTML tag with broken attribute value quoting [2]", "zlkpme7jgj76tj4i\n\n<span title=\"Hello world>Foo</span>"); add("selser", "HTML tag with broken attribute value quoting [[2]]", "19ydfukj49zoajor<span title=\"Hello world>Foo</span>"); diff --git a/tests/parserTests.txt b/tests/parserTests.txt index f0a4af2..4e4d0f2 100644 --- a/tests/parserTests.txt +++ b/tests/parserTests.txt @@ -615,21 +615,9 @@ !! test Italics and bold: 3-quote opening sequence: (3,2) -!! options -parsoid=wt2html !! wikitext '''foo'' !! html/* -<p>'<i>foo</i> -</p> -!!end - -# same html as previous, but wikitext adjusted to match parsoid html2wt -!! test -Italics and bold: 3-quote opening sequence: (3,2) w/ nowiki -!! wikitext -'<nowiki/>''foo'' -!! html <p>'<i>foo</i> </p> !!end @@ -720,21 +708,9 @@ !! test Italics and bold: 4-quote opening sequence: (4,3) -!! options -parsoid=wt2html !! wikitext ''''foo''' !! html/* -<p>'<b>foo</b> -</p> -!!end - -# same html as previous, but wikitext adjusted to match parsoid html2wt -!! test -Italics and bold: 4-quote opening sequence: (4,3) w/ nowiki -!! wikitext -'<nowiki/>'''foo''' -!! html <p>'<b>foo</b> </p> !!end @@ -1047,22 +1023,9 @@ !! test Italics and bold: other quote tests: (3,2,3,3) -!! options -parsoid=wt2html !! wikitext '''this is about ''foo'''s family''' !! html/* -<p>'<i>this is about </i>foo<b>s family</b> -</p> -!!end - - -# same html as previous, but wikitext adjusted to match parsoid html2wt -!! test -Italics and bold: other quote tests: (3,2,3,3) w/ nowiki -!! wikitext -'<nowiki/>''this is about ''foo'''s family''' -!! html <p>'<i>this is about </i>foo<b>s family</b> </p> !!end @@ -20787,15 +20750,15 @@ '''<nowiki>'''foo'''</nowiki>''' '''foo'<nowiki/>''bar'<nowiki/>''baz''' '''foo'''<nowiki/>'s -'<nowiki/>''foo'' +'''foo'' ''foo''<nowiki/>' '<nowiki/>''foo''<nowiki/>' -'<nowiki/>'''foo''' +''''foo''' '''foo'''<nowiki/>' '<nowiki/>'''foo'''<nowiki/>' ''fools'<span> errand</span>'' ''<span>fool</span>'s errand'' -a|!*#-:;+-~[]{}b'<nowiki/>''x'' +a|!*#-:;+-~[]{}b'''x'' !! html/* <p><i>'foo'</i> <i>''foo''</i> -- To view, visit https://gerrit.wikimedia.org/r/179917 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I81d15ecce888f6e0104fb089329a9db77828d86c Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Subramanya Sastry <ssas...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits