jenkins-bot has submitted this change and it was merged.
Change subject: nowiki escaping: Reduce use of fullWrap scenarios.
......................................................................
nowiki escaping: Reduce use of fullWrap scenarios.
* This is hangover from old code when we weren't tokenizing the
string to be nowikied. Now that we tokenize and always wrap the
smallest fragments, we are guaranteed to not leave behind any
wikitext-like string in the process.
The only scenario where we might still need full-wrapping is when
the string shows up in a context-specific handler where the
tokenization context is different in each scenario. With some
careful attention, these scenarios could also be pared down
further.
* Updated parser tests.
Change-Id: Id5e9be29ffc327fb23ec5888fdc0636c17835ecc
---
M lib/wts.escapeWikitext.js
M tests/parserTests-blacklist.js
M tests/parserTests.txt
3 files changed, 17 insertions(+), 17 deletions(-)
Approvals:
Arlolra: Looks good to me, approved
jenkins-bot: Verified
diff --git a/lib/wts.escapeWikitext.js b/lib/wts.escapeWikitext.js
index 576349d..7ed80fb 100644
--- a/lib/wts.escapeWikitext.js
+++ b/lib/wts.escapeWikitext.js
@@ -391,9 +391,9 @@
* text, anything that tokenizes to another construct needs to be
* wrapped.
*
- * Full-wrapping is enabled in the following cases:
- * - origText has url triggers (RFC, ISBN, etc.)
- * - is being escaped within context-specific handlers
+ * Full-wrapping is enabled if the string is being escaped within
+ * context-specific handlers where the tokenization context might
+ * be different from what we use in this code.
* ---------------------------------------------------------------- */
WEHP.escapedText = function(state, sol, origText, fullWrap) {
try {
@@ -610,7 +610,7 @@
// If the reason for full wrap is that the text
contains non-quote
// escapable chars, it's still possible to minimize the
contents
// of the <nowiki> (T71950).
- return this.escapedText(state, sol, text,
!hasNonQuoteEscapableChars);
+ return this.escapedText(state, sol, text);
} else {
var quoteEscapedText = escapedIBSiblingNodeText(state,
text, opts);
if (quoteEscapedText) {
@@ -625,7 +625,7 @@
// of whether we are in template arg context or not.
if (text.match(/\{\{\{|\{\{|\}\}\}|\}\}/)) {
state.env.log("trace/wt-escape", "---Unconditional:
transclusion chars---");
- return this.escapedText(state, false, text, fullCheckNeeded);
+ return this.escapedText(state, false, text);
}
// Once we eliminate the possibility of multi-line tokens, split the
text
@@ -654,7 +654,7 @@
if (ret === text &&
this.hasWikitextTokens(state, sol,
this.serializer.options, text)) {
state.env.log("trace/wt-escape", "---Found multi-line
wt tokens---");
- ret = this.escapedText(state, sol, text,
fullCheckNeeded);
+ ret = this.escapedText(state, sol, text);
}
state.env.log("trace/wt-escape", "--
</multi-line-escaping-mode> --");
@@ -696,7 +696,7 @@
state.env.log("trace/wt-escape", "---SOL and pre---");
state.hasIndentPreNowikis = true;
- return this.escapedText(state, sol, text, fullCheckNeeded);
+ return this.escapedText(state, sol, text);
}
@@ -710,14 +710,14 @@
// hasWikitextTokens check
if (hasTildes) {
state.env.log("trace/wt-escape", "---Found tildes---");
- return this.escapedText(state, sol, text, fullCheckNeeded);
+ return this.escapedText(state, sol, text);
} else if (this.hasWikitextTokens(state, sol, this.serializer.options,
text)) {
state.env.log("trace/wt-escape", "---Found WT tokens---");
- return this.escapedText(state, sol, text, fullCheckNeeded);
+ return this.escapedText(state, sol, text);
} else if (sol) {
if (text.match(/(^|\n)=+[^\n=]+=+[ \t]*\n/)) {
state.env.log("trace/wt-escape", "---SOL: heading (easy
test)---");
- return this.escapedText(state, sol, text,
fullCheckNeeded);
+ return this.escapedText(state, sol, text);
} else if (text.match(/(^|\n)=+[^\n=]+=+[ \t]*$/)) {
/*
---------------------------------------------------------------
* '$' is only specific to 'text' and not the entire
line.
@@ -747,7 +747,7 @@
if (!nonSepSibling ||
DU.isText(nonSepSibling) &&
nonSepSibling.nodeValue.match(/^\s*\n/)) {
state.env.log("trace/wt-escape", "---SOL:
heading (complex single-line test) ---");
- return this.escapedText(state, sol, text,
fullCheckNeeded);
+ return this.escapedText(state, sol, text);
} else {
state.env.log("trace/wt-escape", "---SOL:
no-heading (complex single-line test)---");
return text;
@@ -800,7 +800,7 @@
cl.hasOpenBrackets && text.match(/^[^\[]*\]/) &&
this.hasWikitextTokens(state, sol,
this.serializer.options, cl.text + text, true)) {
state.env.log("trace/wt-escape", "---Wikilink chars:
complex single-line test---");
- return this.escapedText(state, sol, text,
fullCheckNeeded);
+ return this.escapedText(state, sol, text);
} else {
state.env.log("trace/wt-escape", "---All good!---");
return text;
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index f888dca..6aebadf 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -923,9 +923,9 @@
add("html2wt", "Magic Word: {{NUMBEROFFILES}}", "5\n");
add("html2wt", "Magic Word: {{PAGENAME}}", "Ævar Arnfjörð Bjarmason\n");
add("html2wt", "Magic Word: {{PAGENAME}} with metacharacters",
"''<nowiki/>'foo & bar = baz'''\n");
-add("html2wt", "Magic Word: {{PAGENAME}} with metacharacters (bug 26781)",
"<nowiki>*RFC 1234 http://example.com/</nowiki>\n");
+add("html2wt", "Magic Word: {{PAGENAME}} with metacharacters (bug 26781)",
"<nowiki>*</nowiki><nowiki>RFC 1234</nowiki>
<nowiki>http://example.com/</nowiki>\n");
add("html2wt", "Magic Word: {{PAGENAMEE}}",
"%C3%86var_Arnfj%C3%B6r%C3%B0_Bjarmason\n");
-add("html2wt", "Magic Word: {{PAGENAMEE}} with metacharacters (bug 26781)",
"<nowiki>*RFC_1234_http://example.com/</nowiki>\n");
+add("html2wt", "Magic Word: {{PAGENAMEE}} with metacharacters (bug 26781)",
"<nowiki>*</nowiki>RFC_1234_http://example.com/\n");
add("html2wt", "Magic Word: {{REVISIONID}}", "1337\n");
add("html2wt", "Magic Word: {{SCRIPTPATH}}", "/\n");
add("html2wt", "Magic Word: {{STYLEPATH}}", "/skins\n");
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index 700032f..48fe5dc 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -23281,7 +23281,7 @@
<p>this is not a link: http://example.com
</p>
!! wikitext
-<nowiki>this is not a link: http://example.com</nowiki>
+this is not a link: <nowiki>http://example.com</nowiki>
!! end
!! test
@@ -25433,7 +25433,7 @@
!! html/parsoid
<p>http://example.com is not a link.</p>
!! wikitext
-<nowiki>http://example.com is not a link.</nowiki>
+<nowiki>http://example.com</nowiki> is not a link.
!! end
!! test
@@ -25443,7 +25443,7 @@
!! html/parsoid
<p><a rel="mw:ExtLink" href="http://example.com">http://example.com</a>
http://example.com is not a link.</p>
!! wikitext
-http://example.com<nowiki> http://example.com is not a link.</nowiki>
+http://example.com <nowiki>http://example.com</nowiki> is not a link.
!! end
!! test
--
To view, visit https://gerrit.wikimedia.org/r/240138
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Id5e9be29ffc327fb23ec5888fdc0636c17835ecc
Gerrit-PatchSet: 3
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits