jenkins-bot has submitted this change and it was merged. Change subject: T94599: <a> tags with invalid hrefs should serialize to text ......................................................................
T94599: <a> tags with invalid hrefs should serialize to text * Split escaping into escaping of link target and link content * Escape wikilink content only after it is clear what we are going to serialize it to (invalid links => text; valid links => wikilink) * A new parser test specifics functionality for invalid links and tests proper nowiki-ing behaviour. Change-Id: Ifaaf2a399a1443556abf067a9a5fb430abbd02da --- M lib/wts.LinkHandler.js M tests/parserTests.txt 2 files changed, 70 insertions(+), 43 deletions(-) Approvals: Arlolra: Looks good to me, approved jenkins-bot: Verified diff --git a/lib/wts.LinkHandler.js b/lib/wts.LinkHandler.js index 96a7de2..87ba585 100644 --- a/lib/wts.LinkHandler.js +++ b/lib/wts.LinkHandler.js @@ -233,28 +233,29 @@ }); }; -var escapeWikiLinkContentString = function(linkTarget, state, node, suppressLinkTest) { - // First, entity-escape the content. +var escapeLinkTarget = function(linkTarget, state) { + // Entity-escape the content. linkTarget = Util.escapeEntities(linkTarget); + return { + linkTarget: linkTarget, + // Is this an invalid link? + invalidLink: !state.env.isValidLinkTarget(linkTarget) || /\|/.test(linkTarget), + }; +}; + +var escapeLinkContent = function(str, state, solState, node) { + // Entity-escape the content. + str = Util.escapeEntities(str); // Wikitext-escape content. - // - // When processing link text, we are no longer in newline state - // since that will be preceded by "[[" or "[" text in target wikitext. - state.onSOL = false; + state.onSOL = solState; state.wteHandlerStack.push(state.serializer.wteHandlers.wikilinkHandler); state.inLink = true; - var res = state.serializer.wteHandlers.escapeWikiText(state, linkTarget, { node: node }); + var res = state.serializer.wteHandlers.escapeWikiText(state, str, { node: node }); state.inLink = false; state.wteHandlerStack.pop(); - if (!suppressLinkTest && - (!state.env.isValidLinkTarget(linkTarget) || /[\|]/.test(linkTarget))) { - linkTarget = "MediaWiki:Badtitletext"; - state.env.log("error", "Bad title text", node.outerHTML); - } - - return { contentSrc: res, linkTarget: linkTarget }; + return res; }; /** @@ -365,8 +366,11 @@ function serializeAsWikiLink(node, state, linkData, cb) { var contentParts, contentSrc = '', + isPiped = false, + requiresEscaping = true, env = state.env, wiki = env.conf.wiki, + oldSOLState = state.onSOL, target = linkData.target, dp = DU.getDataParsoid(node); @@ -435,14 +439,14 @@ } // The string value of the content, if it is plain text. - var linkTarget, escapedRes; + var linkTarget, escapedTgt; if ( linkData.isRedirect ) { linkTarget = target.value; if (target.modified || !target.fromsrc) { linkTarget = linkTarget.replace(/^(\.\.?\/)*/, '').replace(/_/g, ' '); - escapedRes = escapeWikiLinkContentString(linkTarget, state, node); - linkTarget = escapedRes.linkTarget; + escapedTgt = escapeLinkTarget(linkTarget, state); + linkTarget = escapedTgt.linkTarget; // Determine if it's a redirect to a category, in which case // it needs a ':' on front to distingish from a category link. var categoryMatch = linkTarget.match(/^([^:]+)[:]/); @@ -460,16 +464,13 @@ } } } - cb( new WikiLinkText( linkData.prefix + '[[' + linkTarget + ']]', node, wiki, linkData.type ), node ); - return; } else if ( isSimpleWikiLink(env, dp, target, linkData) ) { // Simple case if (!target.modified && !linkData.contentModified) { linkTarget = target.value.replace(/^\.\//, ''); } else { - escapedRes = escapeWikiLinkContentString(linkData.content.string, - state, node); - linkTarget = addColonEscape(env, escapedRes.linkTarget, linkData); + escapedTgt = escapeLinkTarget(linkData.content.string, state); + linkTarget = addColonEscape(env, escapedTgt.linkTarget, linkData); if (linkData.isInterwikiLang && !/^[:]/.test(linkTarget) && linkData.type !== 'mw:PageProp/Language') { // ensure interwiki links can't be confused with @@ -477,9 +478,10 @@ linkTarget = ':' + linkTarget; } } - cb( new WikiLinkText( linkData.prefix + '[[' + linkTarget + ']]' + linkData.tail, node, wiki, linkData.type ), node ); - return; } else { + // Emit piped wikilink syntax + isPiped = true; + var usePT = usePipeTrick(env, dp, target, linkData); // First get the content source @@ -494,23 +496,19 @@ linkData.tail = contentParts.tail; dp.prefix = contentParts.prefix; linkData.prefix = contentParts.prefix; + requiresEscaping = false; } else if ( !usePT ) { - if (linkData.content.fromsrc) { - contentSrc = linkData.content.string; - } else { - // Don't test for bad-title text since we are not - // using that escapedRes.linkTarget for the output. - escapedRes = escapeWikiLinkContentString(linkData.content.string || '', - state, node, !!"suppressLinkTest"); - contentSrc = escapedRes.contentSrc; - } + contentSrc = linkData.content.string || ''; + requiresEscaping = !linkData.content.fromsrc; } if ( contentSrc === '' && !usePT && linkData.type !== 'mw:PageProp/Category' ) { // Protect empty link content from PST pipe trick contentSrc = '<nowiki/>'; + requiresEscaping = false; } + linkTarget = target.value; if (target.modified || !target.fromsrc) { // Links starting with ./ shouldn't get _ replaced with ' ' @@ -521,17 +519,40 @@ if (!linkData.isInterwiki && !linkContentIsRelative) { linkTarget = linkTarget.replace(/_/g, ' '); } - escapedRes = escapeWikiLinkContentString(linkTarget, - state, node); - linkTarget = escapedRes.linkTarget; + escapedTgt = escapeLinkTarget(linkTarget, state); + linkTarget = escapedTgt.linkTarget; } linkTarget = addColonEscape(env, linkTarget, linkData); + } + var pipedText; + if (escapedTgt && escapedTgt.invalidLink) { + // If the link target was invalid, instead of emitting an invalid link, + // omit the link and serialize just the content instead. But, log the + // invalid html for Parsoid clients to investigate later. + state.env.log("error", "Bad title text", node.outerHTML); + + // For non-piped content, use the original invalid link text + pipedText = isPiped ? contentSrc : linkTarget; + + if (requiresEscaping) { + // Escape the text in the old sol context + pipedText = escapeLinkContent(pipedText, state, oldSOLState, node); + } + cb(linkData.prefix + pipedText + linkData.tail, node); + } else { + if (isPiped && requiresEscaping) { + // We are definitely not in sol context since content + // will be preceded by "[[" or "[" text in target wikitext. + pipedText = '|' + escapeLinkContent(contentSrc, state, false, node); + } else if (isPiped) { + pipedText = '|' + contentSrc; + } else { + pipedText = ''; + } cb( new WikiLinkText( - linkData.prefix + - '[[' + linkTarget + '|' + contentSrc + ']]' + - linkData.tail, node, wiki, linkData.type ), node ); - return; + linkData.prefix + '[[' + linkTarget + pipedText + ']]' + linkData.tail, + node, wiki, linkData.type ), node ); } } diff --git a/tests/parserTests.txt b/tests/parserTests.txt index 15a8c08..b4ed614 100644 --- a/tests/parserTests.txt +++ b/tests/parserTests.txt @@ -5238,13 +5238,19 @@ !! end !! test -Replace invalid link targets when serializing +Serialize <a> tags with invalid link targets as plain text !! options parsoid=html2wt !! html -<a rel="mw:WikiLink" href="./]] foo [[bar">Manual</a> +<a rel="mw:WikiLink" href="[[foo]]">text</a> +<a rel="mw:WikiLink" href="[[foo]]">*text</a> +<a rel="mw:WikiLink" href="[[foo]]">[[foo]]</a> +<a rel="mw:WikiLink" href="[[foo]]">*a [[foo]]</a> !! wikitext -[[MediaWiki:Badtitletext|Manual]] +text +<nowiki>*</nowiki>text +<nowiki>[[foo]]</nowiki> +<nowiki>*a [[foo]]</nowiki> !! end ### -- To view, visit https://gerrit.wikimedia.org/r/201387 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ifaaf2a399a1443556abf067a9a5fb430abbd02da Gerrit-PatchSet: 5 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Subramanya Sastry <ssas...@wikimedia.org> Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org> Gerrit-Reviewer: Cscott <canan...@wikimedia.org> Gerrit-Reviewer: Marcoil <marc...@wikimedia.org> Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits