jenkins-bot has submitted this change and it was merged.

Change subject: T94599: <a> tags with invalid hrefs should serialize to text
......................................................................


T94599: <a> tags with invalid hrefs should serialize to text

* Split escaping into escaping of link target and link content
* Escape wikilink content only after it is clear what we are going
  to serialize it to (invalid links => text; valid links => wikilink)
* A new parser test specifics functionality for invalid links
  and tests proper nowiki-ing behaviour.

Change-Id: Ifaaf2a399a1443556abf067a9a5fb430abbd02da
---
M lib/wts.LinkHandler.js
M tests/parserTests.txt
2 files changed, 70 insertions(+), 43 deletions(-)

Approvals:
  Arlolra: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/wts.LinkHandler.js b/lib/wts.LinkHandler.js
index 96a7de2..87ba585 100644
--- a/lib/wts.LinkHandler.js
+++ b/lib/wts.LinkHandler.js
@@ -233,28 +233,29 @@
        });
 };
 
-var escapeWikiLinkContentString = function(linkTarget, state, node, 
suppressLinkTest) {
-       // First, entity-escape the content.
+var escapeLinkTarget = function(linkTarget, state) {
+       // Entity-escape the content.
        linkTarget = Util.escapeEntities(linkTarget);
+       return {
+               linkTarget: linkTarget,
+               // Is this an invalid link?
+               invalidLink: !state.env.isValidLinkTarget(linkTarget) || 
/\|/.test(linkTarget),
+       };
+};
+
+var escapeLinkContent = function(str, state, solState, node) {
+       // Entity-escape the content.
+       str = Util.escapeEntities(str);
 
        // Wikitext-escape content.
-       //
-       // When processing link text, we are no longer in newline state
-       // since that will be preceded by "[[" or "[" text in target wikitext.
-       state.onSOL = false;
+       state.onSOL = solState;
        
state.wteHandlerStack.push(state.serializer.wteHandlers.wikilinkHandler);
        state.inLink = true;
-       var res = state.serializer.wteHandlers.escapeWikiText(state, 
linkTarget, { node: node });
+       var res = state.serializer.wteHandlers.escapeWikiText(state, str, { 
node: node });
        state.inLink = false;
        state.wteHandlerStack.pop();
 
-       if (!suppressLinkTest &&
-               (!state.env.isValidLinkTarget(linkTarget) || 
/[\|]/.test(linkTarget))) {
-               linkTarget = "MediaWiki:Badtitletext";
-               state.env.log("error", "Bad title text", node.outerHTML);
-       }
-
-       return { contentSrc: res, linkTarget: linkTarget };
+       return res;
 };
 
 /**
@@ -365,8 +366,11 @@
 function serializeAsWikiLink(node, state, linkData, cb) {
        var contentParts,
                contentSrc = '',
+               isPiped = false,
+               requiresEscaping = true,
                env = state.env,
                wiki = env.conf.wiki,
+               oldSOLState = state.onSOL,
                target = linkData.target,
                dp = DU.getDataParsoid(node);
 
@@ -435,14 +439,14 @@
        }
 
        // The string value of the content, if it is plain text.
-       var linkTarget, escapedRes;
+       var linkTarget, escapedTgt;
 
        if ( linkData.isRedirect ) {
                linkTarget = target.value;
                if (target.modified || !target.fromsrc) {
                        linkTarget = linkTarget.replace(/^(\.\.?\/)*/, 
'').replace(/_/g, ' ');
-                       escapedRes = escapeWikiLinkContentString(linkTarget, 
state, node);
-                       linkTarget = escapedRes.linkTarget;
+                       escapedTgt = escapeLinkTarget(linkTarget, state);
+                       linkTarget = escapedTgt.linkTarget;
                        // Determine if it's a redirect to a category, in which 
case
                        // it needs a ':' on front to distingish from a 
category link.
                        var categoryMatch = linkTarget.match(/^([^:]+)[:]/);
@@ -460,16 +464,13 @@
                                }
                        }
                }
-               cb( new WikiLinkText( linkData.prefix + '[[' + linkTarget + 
']]', node, wiki, linkData.type ), node );
-               return;
        } else if ( isSimpleWikiLink(env, dp, target, linkData) ) {
                // Simple case
                if (!target.modified && !linkData.contentModified) {
                        linkTarget = target.value.replace(/^\.\//, '');
                } else {
-                       escapedRes = 
escapeWikiLinkContentString(linkData.content.string,
-                                       state, node);
-                       linkTarget = addColonEscape(env, escapedRes.linkTarget, 
linkData);
+                       escapedTgt = escapeLinkTarget(linkData.content.string, 
state);
+                       linkTarget = addColonEscape(env, escapedTgt.linkTarget, 
linkData);
                        if (linkData.isInterwikiLang && 
!/^[:]/.test(linkTarget) &&
                                linkData.type !== 'mw:PageProp/Language') {
                                // ensure interwiki links can't be confused with
@@ -477,9 +478,10 @@
                                linkTarget = ':' + linkTarget;
                        }
                }
-               cb( new WikiLinkText( linkData.prefix + '[[' + linkTarget + 
']]' + linkData.tail, node, wiki, linkData.type ), node );
-               return;
        } else {
+               // Emit piped wikilink syntax
+               isPiped = true;
+
                var usePT = usePipeTrick(env, dp, target, linkData);
 
                // First get the content source
@@ -494,23 +496,19 @@
                        linkData.tail = contentParts.tail;
                        dp.prefix = contentParts.prefix;
                        linkData.prefix = contentParts.prefix;
+                       requiresEscaping = false;
                } else if ( !usePT ) {
-                       if (linkData.content.fromsrc) {
-                               contentSrc = linkData.content.string;
-                       } else {
-                               // Don't test for bad-title text since we are 
not
-                               // using that escapedRes.linkTarget for the 
output.
-                               escapedRes = 
escapeWikiLinkContentString(linkData.content.string || '',
-                                               state, node, 
!!"suppressLinkTest");
-                               contentSrc = escapedRes.contentSrc;
-                       }
+                       contentSrc = linkData.content.string || '';
+                       requiresEscaping = !linkData.content.fromsrc;
                }
 
                if ( contentSrc === '' && !usePT &&
                                linkData.type !== 'mw:PageProp/Category' ) {
                        // Protect empty link content from PST pipe trick
                        contentSrc = '<nowiki/>';
+                       requiresEscaping = false;
                }
+
                linkTarget = target.value;
                if (target.modified || !target.fromsrc) {
                        // Links starting with ./ shouldn't get _ replaced with 
' '
@@ -521,17 +519,40 @@
                        if (!linkData.isInterwiki && !linkContentIsRelative) {
                                linkTarget = linkTarget.replace(/_/g, ' ');
                        }
-                       escapedRes = escapeWikiLinkContentString(linkTarget,
-                               state, node);
-                       linkTarget = escapedRes.linkTarget;
+                       escapedTgt = escapeLinkTarget(linkTarget, state);
+                       linkTarget = escapedTgt.linkTarget;
                }
                linkTarget = addColonEscape(env, linkTarget, linkData);
+       }
 
+       var pipedText;
+       if (escapedTgt && escapedTgt.invalidLink) {
+               // If the link target was invalid, instead of emitting an 
invalid link,
+               // omit the link and serialize just the content instead. But, 
log the
+               // invalid html for Parsoid clients to investigate later.
+               state.env.log("error", "Bad title text", node.outerHTML);
+
+               // For non-piped content, use the original invalid link text
+               pipedText = isPiped ? contentSrc : linkTarget;
+
+               if (requiresEscaping) {
+                       // Escape the text in the old sol context
+                       pipedText = escapeLinkContent(pipedText, state, 
oldSOLState, node);
+               }
+               cb(linkData.prefix + pipedText + linkData.tail, node);
+       } else {
+               if (isPiped && requiresEscaping) {
+                       // We are definitely not in sol context since content
+                       // will be preceded by "[[" or "[" text in target 
wikitext.
+                       pipedText = '|' + escapeLinkContent(contentSrc, state, 
false, node);
+               } else if (isPiped) {
+                       pipedText = '|' + contentSrc;
+               } else {
+                       pipedText = '';
+               }
                cb( new WikiLinkText(
-                       linkData.prefix +
-                       '[[' + linkTarget + '|' + contentSrc + ']]' +
-                       linkData.tail, node, wiki, linkData.type ), node );
-               return;
+                       linkData.prefix + '[[' + linkTarget + pipedText + ']]' 
+ linkData.tail,
+                       node, wiki, linkData.type ), node );
        }
 }
 
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index 15a8c08..b4ed614 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -5238,13 +5238,19 @@
 !! end
 
 !! test
-Replace invalid link targets when serializing
+Serialize <a> tags with invalid link targets as plain text
 !! options
 parsoid=html2wt
 !! html
-<a rel="mw:WikiLink" href="./]] foo [[bar">Manual</a>
+<a rel="mw:WikiLink" href="[[foo]]">text</a>
+<a rel="mw:WikiLink" href="[[foo]]">*text</a>
+<a rel="mw:WikiLink" href="[[foo]]">[[foo]]</a>
+<a rel="mw:WikiLink" href="[[foo]]">*a [[foo]]</a>
 !! wikitext
-[[MediaWiki:Badtitletext|Manual]]
+text
+<nowiki>*</nowiki>text
+<nowiki>[[foo]]</nowiki>
+<nowiki>*a [[foo]]</nowiki>
 !! end
 
 ###

-- 
To view, visit https://gerrit.wikimedia.org/r/201387
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ifaaf2a399a1443556abf067a9a5fb430abbd02da
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <ssas...@wikimedia.org>
Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org>
Gerrit-Reviewer: Cscott <canan...@wikimedia.org>
Gerrit-Reviewer: Marcoil <marc...@wikimedia.org>
Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to