jenkins-bot has submitted this change and it was merged. Change subject: Improved wikitext escaping of headings ......................................................................
Improved wikitext escaping of headings * Headings dont need to escaped unless they are in the extremities In some cases (Ex: "==bogus== <s>a</s>"), the escaping didn't account for additional non-heading chars in eol position. * The fixes now lead to 2 more wt2wt, 2 more html2wt and 32 more passing selser tests. * Updated parserTests-blacklist Change-Id: I65cfa51c7508145881c6b370d9ed452cc678903b --- M js/lib/mediawiki.DOMUtils.js M js/lib/mediawiki.WikitextSerializer.js M js/tests/parserTests-blacklist.js 3 files changed, 32 insertions(+), 51 deletions(-) Approvals: Cscott: Looks good to me, approved jenkins-bot: Verified diff --git a/js/lib/mediawiki.DOMUtils.js b/js/lib/mediawiki.DOMUtils.js index e5f1a57..fd46824 100644 --- a/js/lib/mediawiki.DOMUtils.js +++ b/js/lib/mediawiki.DOMUtils.js @@ -484,7 +484,7 @@ * Get the first child element or non-IEW text node, ignoring * whitespace-only text nodes and comments. */ - getFirstNonSepChildNode: function(node) { + firstNonSepChildNode: function(node) { var child = node.firstChild; while (child && !this.isContentNode(child)) { child = child.nextSibling; diff --git a/js/lib/mediawiki.WikitextSerializer.js b/js/lib/mediawiki.WikitextSerializer.js index 099d56f..85a2a2c 100644 --- a/js/lib/mediawiki.WikitextSerializer.js +++ b/js/lib/mediawiki.WikitextSerializer.js @@ -246,9 +246,13 @@ } if (!linksOnly && tc === pd.TagTk) { - // mw:Entity tokens + // Ignore mw:Entity tokens if (t.name === 'span' && t.getAttribute('typeof') === 'mw:Entity') { numEntities++; + continue; + } + // Ignore heading tokens + if (t.name.match(/^h\d$/)) { continue; } @@ -256,9 +260,13 @@ } if (!linksOnly && tc === pd.EndTagTk) { - // mw:Entity tokens + // Ignore mw:Entity tokens if (numEntities > 0 && t.name === 'span') { numEntities--; + continue; + } + // Ignore heading tokens + if (t.name.match(/^h\d$/)) { continue; } @@ -615,10 +623,22 @@ text = text.replace(/<(\/?nowiki)>/g, '<$1>'); // Use the tokenizer to see if we have any wikitext tokens + // + // Ignores headings & entities -- headings have additional + // EOL matching requirements which are not captured by the + // hasWikitextTokens check if (this.wteHandlers.hasWikitextTokens(state, sol, text) || hasTildes) { // console.warn("---EWT:DBG1---"); return escapedText(text); - } else if (!state.onSOL) { + } else if (state.onSOL) { + if (text.match(/^=+[^=]+=+$/)) { + // console.warn("---EWT:DBG2---"); + return escapedText(text); + } else { + // console.warn("---EWT:DBG3---"); + return text; + } + } else { // Detect if we have open brackets or heading chars -- we use 'processed' flag // as a performance opt. to run this detection only if/when required. // @@ -636,7 +656,7 @@ // - a text node: (Ex: <p>=foo=</p>) // - the first child of a heading node: (Ex: <h1>=foo=</h1>) if (cl.text.match(/^=/) && - (DU.isText(cl.firstNode) || + (DU.isText(DU.firstNonSepChildNode(cl.firstNode.parentNode)) || cl.firstNode.nodeName.match(/^H/) && cl.firstNode.firstChild && DU.isText(cl.firstNode.firstChild))) { cl.hasOpenHeadingChar = true; @@ -659,15 +679,12 @@ cl.hasOpenBrackets && text.match(/^[^\[]*\]/) && this.wteHandlers.hasWikitextTokens(state, sol, cl.text + text, true)) { - // console.warn("---EWT:DBG2---"); + // console.warn("---EWT:DBG4---"); return escapedText(text); } else { - // console.warn("---EWT:DBG3---"); + // console.warn("---EWT:DBG5---"); return text; } - } else { - // console.warn("---EWT:DBG4---"); - return text; } }; @@ -1317,13 +1334,13 @@ return { handle: function (node, state, cb) { - var firstChildElt = DU.getFirstNonSepChildNode(node); + var firstChildElt = DU.firstNonSepChildNode(node); // Skip builder-inserted wrappers // Ex: <ul><s auto-inserted-start-and-end-><li>..</li><li>..</li></s>...</ul> // output from: <s>\n*a\n*b\n*c</s> while (firstChildElt && isBuilderInsertedElt(firstChildElt)) { - firstChildElt = DU.getFirstNonSepChildNode(firstChildElt); + firstChildElt = DU.firstNonSepChildNode(firstChildElt); } if (!firstChildElt || ! (firstChildElt.nodeName in firstChildNames)) { @@ -1354,7 +1371,7 @@ li: { handle: function (node, state, cb) { - var firstChildElement = DU.getFirstNonSepChildNode(node); + var firstChildElement = DU.firstNonSepChildNode(node); if (!DU.isList(firstChildElement)) { cb(state.serializer._getListBullets(node), node); } @@ -1384,7 +1401,7 @@ dt: { handle: function (node, state, cb) { - var firstChildElement = DU.getFirstNonSepChildNode(node); + var firstChildElement = DU.firstNonSepChildNode(node); if (!DU.isList(firstChildElement)) { cb(state.serializer._getListBullets(node), node); } @@ -1412,7 +1429,7 @@ dd: { handle: function (node, state, cb) { - var firstChildElement = DU.getFirstNonSepChildNode(node); + var firstChildElement = DU.firstNonSepChildNode(node); if (!DU.isList(firstChildElement)) { // XXX: handle stx: row if (node.data.parsoid.stx === 'row') { diff --git a/js/tests/parserTests-blacklist.js b/js/tests/parserTests-blacklist.js index 1ccff8d..c58aa79 100644 --- a/js/tests/parserTests-blacklist.js +++ b/js/tests/parserTests-blacklist.js @@ -588,8 +588,6 @@ add("wt2wt", "Transclusion of nonexistent MediaWiki message"); add("wt2wt", "Transclusion of MediaWiki message with underscore"); add("wt2wt", "Transclusion of MediaWiki message with space"); -add("wt2wt", "Section extraction test with bogus <nowiki> heading (section 1)"); -add("wt2wt", "Section extraction test with bogus <nowiki> heading (section 2)"); add("wt2wt", "Section extraction, <pre> around bogus header (bug 10309)"); add("wt2wt", "Section replacement, <pre> around bogus header (bug 10309)"); add("wt2wt", "5 quotes, code coverage +1 line (parsoid)"); @@ -1748,8 +1746,6 @@ add("html2wt", "Section replacement test (section 9)"); add("html2wt", "Section replacement test (section 10)"); add("html2wt", "Section replacement test with initial whitespace (bug 13728)"); -add("html2wt", "Section extraction, heading followed by pre with 20 spaces (bug 6398)"); -add("html2wt", "Section extraction, heading followed by pre with 19 spaces (bug 6398 sanity check)"); add("html2wt", "Section extraction, <pre> around bogus header (bug 10309)"); add("html2wt", "Section replacement, <pre> around bogus header (bug 10309)"); add("html2wt", "Handling of 
 in URLs"); @@ -2953,38 +2949,6 @@ add("selser", "Transclusion of MediaWiki message with space [1]"); add("selser", "Transclusion of MediaWiki message with space [4]"); add("selser", "Transclusion of MediaWiki message with space [2]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 1) [4,0,[0,1]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 1) [[0]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 1) [1,0,1,0,1]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 1) [2,0,[0,4]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 1) [1,0,[0,[0]]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 1) [2,0,[0,[0]]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 1) [3,0,[0,[0]]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 1) [3,0,2,0,[0]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 1) [3,0,[0,1]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 1) [1,0,1,0,[0]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 1) [1,0,[0,4]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 1) [4,0,[0,[0]]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 1) [2,0,4,0,1]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 1) [4,0,1,0,[0]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 1) [4,0,4,0,1]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 2) [2,0,4,0,1]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 2) [3,0,[0,1]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 2) [[0]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 2) [1,0,[0,[0]]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 2) [3,0,4,0,[0]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 2) [2,0,[0,[0]]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 2) [4,0,1,0,[0]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 2) [1,0,[0,1]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 2) [4,0,2,0,[0]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 2) [4,0,1,0,4]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 2) [2,0,2,0,2]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 2) [3,0,2,0,2]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 2) [3,0,1,0,[0]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 2) [2,0,1,0,1]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 2) [1,0,1,0,[0]]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 2) [2,0,4,0,2]"); -add("selser", "Section extraction test with bogus <nowiki> heading (section 2) [2,0,4,0,[0]]"); add("selser", "Section extraction, <pre> around bogus header (bug 10309) [1,0,4,0,1,4,0,3]"); add("selser", "Section extraction, <pre> around bogus header (bug 10309) [1,0,[0]]"); add("selser", "Section extraction, <pre> around bogus header (bug 10309) [1,0,2,0,2,[0]]"); -- To view, visit https://gerrit.wikimedia.org/r/60970 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I65cfa51c7508145881c6b370d9ed452cc678903b Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Parsoid Gerrit-Branch: master Gerrit-Owner: Subramanya Sastry <ssas...@wikimedia.org> Gerrit-Reviewer: Cscott <wikime...@cscott.net> Gerrit-Reviewer: GWicke <gwi...@wikimedia.org> Gerrit-Reviewer: jenkins-bot _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits