Subramanya Sastry has uploaded a new change for review.
https://gerrit.wikimedia.org/r/201387
Change subject: WIP: T94599: <a> tags with invalid hrefs serialize to text
......................................................................
WIP: T94599: <a> tags with invalid hrefs serialize to text
* Mostly functional except for one edge case that leads to
double nowiki-ing. Needs some refactoring to get it working.
Change-Id: Ifaaf2a399a1443556abf067a9a5fb430abbd02da
---
M lib/wts.LinkHandler.js
M tests/parserTests.txt
2 files changed, 56 insertions(+), 27 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid
refs/changes/87/201387/1
diff --git a/lib/wts.LinkHandler.js b/lib/wts.LinkHandler.js
index 6cc6709..dcf0e25 100644
--- a/lib/wts.LinkHandler.js
+++ b/lib/wts.LinkHandler.js
@@ -239,25 +239,28 @@
// First, entity-escape the content.
linkTarget = Util.escapeEntities(linkTarget);
- // Wikitext-escape content.
- //
- // When processing link text, we are no longer in newline state
- // since that will be preceded by "[[" or "[" text in target wikitext.
- state.onSOL = false;
-
state.wteHandlerStack.push(state.serializer.wteHandlers.wikilinkHandler);
- state.inLink = true;
- var res = state.serializer.wteHandlers.escapeWikiText(state,
linkTarget, { node: node });
- state.inLink = false;
- state.wteHandlerStack.pop();
-
+ // Is this an invalid link?
+ var invalidLink = false;
if (!suppressLinkTest &&
(!state.env.isValidLinkTarget(linkTarget) ||
/[\|]/.test(linkTarget)))
{
- linkTarget = "MediaWiki:Badtitletext";
- state.env.log("error", "Bad title text", node.outerHTML);
+ invalidLink = true;
}
- return { contentSrc: res, linkTarget: linkTarget };
+ // Wikitext-escape content.
+ var res;
+ if (!invalidLink) {
+ // When processing link text, we are no longer in newline state
+ // since that will be preceded by "[[" or "[" text in target
wikitext.
+ state.onSOL = false;
+
state.wteHandlerStack.push(state.serializer.wteHandlers.wikilinkHandler);
+ state.inLink = true;
+ res = state.serializer.wteHandlers.escapeWikiText(state,
linkTarget, { node: node });
+ state.inLink = false;
+ state.wteHandlerStack.pop();
+ }
+
+ return { contentSrc: res, linkTarget: linkTarget, invalidLink:
invalidLink };
};
/**
@@ -370,8 +373,10 @@
function serializeAsWikiLink(node, state, linkData, cb) {
var contentParts,
contentSrc = '',
+ isPiped = false,
env = state.env,
wiki = env.conf.wiki,
+ oldSOLState = state.onSOL,
target = linkData.target,
dp = DU.getDataParsoid(node);
@@ -465,8 +470,6 @@
}
}
}
- cb( new WikiLinkText( linkData.prefix + '[[' + linkTarget +
']]', node, wiki, linkData.type ), node );
- return;
} else if ( isSimpleWikiLink(env, dp, target, linkData) ) {
// Simple case
if (!target.modified && !linkData.contentModified) {
@@ -482,9 +485,10 @@
linkTarget = ':' + linkTarget;
}
}
- cb( new WikiLinkText( linkData.prefix + '[[' + linkTarget +
']]' + linkData.tail, node, wiki, linkData.type ), node );
- return;
} else {
+ // Emit piped wikilink syntax
+ isPiped = true;
+
var usePT = usePipeTrick(env, dp, target, linkData);
// First get the content source
@@ -526,17 +530,36 @@
if (!linkData.isInterwiki && !linkContentIsRelative) {
linkTarget = linkTarget.replace(/_/g, ' ');
}
- escapedRes = escapeWikiLinkContentString(linkTarget,
- state, node);
+ escapedRes = escapeWikiLinkContentString(linkTarget,
state, node);
linkTarget = escapedRes.linkTarget;
}
linkTarget = addColonEscape(env, linkTarget, linkData);
+ }
+ // If the link target was invalid, instead of emitting an invalid link,
+ // omit the link and serialize just the content instead. But, log the
+ // invalid html for Parsoid clients to investigate later.
+ var pipedText;
+ if (escapedRes && escapedRes.invalidLink) {
+ // Log it
+ state.env.log("error", "Bad title text", node.outerHTML);
+
+ // For non-piped content, use the original invalid link text
+ pipedText = isPiped ? contentSrc : linkTarget;
+
+ // Escape the text in the old sol context
+ //
+ // SSS FIXME: There is one scenario where contentSrc has already
+ // been escaped .. So, we have to detect that and eliminate
double-escaping
+ state.onSOL = oldSOLState;
+ pipedText = state.serializer.wteHandlers.escapeWikiText(state,
pipedText, { node: node });
+
+ cb(linkData.prefix + pipedText + linkData.tail, node);
+ } else {
+ pipedText = isPiped ? '|' + contentSrc : '';
cb( new WikiLinkText(
- linkData.prefix +
- '[[' + linkTarget + '|' + contentSrc + ']]' +
- linkData.tail, node, wiki, linkData.type ), node );
- return;
+ linkData.prefix + '[[' + linkTarget + pipedText + ']]'
+ linkData.tail,
+ node, wiki, linkData.type ), node );
}
}
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index dc63a94..6b6d14d 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -5208,13 +5208,19 @@
!! end
!! test
-Replace invalid link targets when serializing
+Serialize <a> tags with invalid link targets as plain text
!! options
parsoid=html2wt
!! html
-<a rel="mw:WikiLink" href="./]] foo [[bar">Manual</a>
+<a rel="mw:WikiLink" href="[[foo]]">text</a>
+<a rel="mw:WikiLink" href="[[foo]]">*text</a>
+<a rel="mw:WikiLink" href="[[foo]]">[[foo]]</a>
+<a rel="mw:WikiLink" href="[[foo]]">*a [[foo]]</a>
!! wikitext
-[[MediaWiki:Badtitletext|Manual]]
+text
+<nowiki>*</nowiki>text
+<nowiki>[[foo]]</nowiki>
+<nowiki>*a [[foo]]</nowiki>
!! end
###
--
To view, visit https://gerrit.wikimedia.org/r/201387
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ifaaf2a399a1443556abf067a9a5fb430abbd02da
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits