jenkins-bot has submitted this change and it was merged.
Change subject: Fixed unhandled scenarios in the wikitext escaping <td> handler
......................................................................
Fixed unhandled scenarios in the wikitext escaping <td> handler
* There was a missing check in the td-handler to see if the
text-node was the first child of the <td> node since "-" in
other children don't need escaping.
* Generalized fix from cc14d8b90 -- that patch only checked for
<p> nodes that break td-content into multiple lines. This patch
handles this more generally by checking if the text content
being escaped is on the first line or on subsequent lines.
Updated existing parser tests to capture this generalization
(these tests would have failed without this fix). Marked one
of these tests html2wt only since wt2html will not be able
to match the html output being serialized.
* Updated and re-enabled a td-escaping (parsoid-only) parser test.
The updated test would fail without this patch and passes with
this patch.
* Applied the same fix to the existing li-handler which was missing
the test for deleted node markers. The fix eliminates selser
failures on an updated li-escaping test (which would have failed
without this fix).
* Eliminates RT errors from fr:Manchester_City_Football_Club
reported in bug 56918.
Bug: 56918
Change-Id: I5acd594eaa44e869a3f06b7365a16aaf6a94cfb7
---
M js/lib/mediawiki.WikitextSerializer.js
M js/tests/parserTests-blacklist.js
M js/tests/parserTests.txt
3 files changed, 58 insertions(+), 25 deletions(-)
Approvals:
GWicke: Looks good to me, approved
jenkins-bot: Verified
diff --git a/js/lib/mediawiki.WikitextSerializer.js
b/js/lib/mediawiki.WikitextSerializer.js
index 84401f2..3dc0e9e 100644
--- a/js/lib/mediawiki.WikitextSerializer.js
+++ b/js/lib/mediawiki.WikitextSerializer.js
@@ -129,11 +129,27 @@
return buf.join('');
}
-var WikitextEscapeHandlers = function() { };
+// Empty constructor
+var WikitextEscapeHandlers = function() {};
var WEHP = WikitextEscapeHandlers.prototype;
WEHP.urlParser = new PegTokenizer();
+
+WEHP.isFirstContentNode = function(node) {
+ // Conservative but safe
+ if (!node) {
+ return true;
+ }
+
+ // Skip deleted-node markers
+ var prev = node.previousSibling;
+ while (prev && DU.isMarkerMeta(prev, "mw:DiffMarker")) {
+ prev = prev.previousSibling;
+ }
+
+ return prev === null;
+};
WEHP.headingHandler = function(headingNode, state, text, opts) {
// Only "=" at the extremities trigger escaping
@@ -150,10 +166,11 @@
};
WEHP.liHandler = function(liNode, state, text, opts) {
- // Only bullets at the beginning of the list trigger escaping
+ // For <dt> nodes, ":" anywhere trigger nowiki
+ // For first nodes of <li>'s, bullets in sol posn trigger escaping
if (liNode.nodeName === 'DT' && /:/.test(text)) {
return true;
- } else if (state.currLine.text === '' && opts.node ===
liNode.firstChild) {
+ } else if (state.currLine.text === '' &&
this.isFirstContentNode(opts.node)) {
return text.match(/^[#\*:;]/);
} else {
return false;
@@ -185,13 +202,21 @@
return text.match(/\]$/);
};
-WEHP.tdHandler = function(state, text, opts) {
- // As long as this is not a p-wrapped text node:
+WEHP.tdHandler = function(tdNode, state, text, opts) {
+ // If 'text' is on the same wikitext line as the "|" corresponding
+ // to the <td>,
// * | in a td should be escaped
// * +- in SOL position for the first node on the current line should
be escaped
- return (!opts.node || opts.node.parentNode.nodeName !== 'P') &&
- (text.match(/\|/) ||
- state.currLine.text === '' && text.match(/^[\-+]/) &&
!state.inWideTD);
+ return (!opts.node || state.currLine.firstNode === tdNode) &&
+ text.match(/\|/) || (
+ !state.inWideTD &&
+ state.currLine.text === '' &&
+ // Has to be the first content node in the <td>.
+ // In <td><a ..>..</a>-foo</td>, even though "-foo"
meets the other conditions,
+ // we don't need to escape it.
+ this.isFirstContentNode(opts.node) &&
+ text.match(/^[\-+]/)
+ );
};
WEHP.hasWikitextTokens = function ( state, onNewline, options, text, linksOnly
) {
@@ -2532,7 +2557,8 @@
state.inWideTD = true;
}
emitStartTag(res, node, state, cb);
- state.serializeChildren(node, cb,
state.serializer.wteHandlers.tdHandler);
+ state.serializeChildren(node, cb,
+
state.serializer.wteHandlers.tdHandler.bind(state.serializer.wteHandlers,
node));
// FIXME: bad state hack!
state.inWideTD = undefined;
},
diff --git a/js/tests/parserTests-blacklist.js
b/js/tests/parserTests-blacklist.js
index 94bd12d..496e0fb 100644
--- a/js/tests/parserTests-blacklist.js
+++ b/js/tests/parserTests-blacklist.js
@@ -2158,7 +2158,6 @@
add("html2wt", "Tables: 1b. No escaping needed");
add("html2wt", "Tables: 1c. No escaping needed");
add("html2wt", "Tables: 1d. No escaping needed");
-add("html2wt", "Tables: 4d. No escaping needed");
add("html2wt", "Links 2. WikiLinks: Escapes needed");
add("html2wt", "1. Leading whitespace in SOL context should be escaped");
add("html2wt", "HTML tag with 'unnecessary' entity encoding in attributes");
diff --git a/js/tests/parserTests.txt b/js/tests/parserTests.txt
index 1c196d8..c03058f 100644
--- a/js/tests/parserTests.txt
+++ b/js/tests/parserTests.txt
@@ -17263,6 +17263,8 @@
*''foo''*bar
*[[Foo]]: bar
+
+*[[Foo]]*bar
!! result
<ul>
<li>foo*bar
@@ -17274,6 +17276,10 @@
</ul>
<ul>
<li><a rel="mw:WikiLink" href="Foo">Foo</a>: bar
+</li>
+</ul>
+<ul>
+<li><a rel="mw:WikiLink" href="Foo">Foo</a>*bar
</li>
</ul>
!!end
@@ -17419,14 +17425,18 @@
!! test
Tables: 2a. Nested in td
!! options
-parsoid
+parsoid=html2wt
!! input
{|
|<nowiki>foo|bar</nowiki>
+|-
+|x<div><nowiki>a|b</nowiki></div>
|}
!! result
<table><tbody><tr>
-<td><span typeof="mw:Nowiki">foo|bar</span></td></tr></tbody></table>
+<td>foo|bar</td></tr>
+<tr><td>x<div>a|b</div></td>
+</tbody></table>
!! end
!! test
@@ -17549,6 +17559,9 @@
bar|baz
+bar
-bar
+|-
+|x
+<div>a|b</div>
|}
!! result
<table><tbody>
@@ -17558,32 +17571,27 @@
<p>bar|baz
+bar
-bar</p></td></tr>
+<tr><td>x
+<div>a|b</div></td>
</tbody></table>
!! end
-
-### SSS FIXME: Disabled right now because accurate html2wt
-### on this snippet requires data-parsoid flags that we've
-### stripped out of these tests. We should scheme how we
-### we want to handle these kind of tests that require
-### data-parsoid flags for accurate html2wt serialization
!! test
Tables: 4d. No escaping needed
!! options
-disabled
+parsoid
!! input
{|
+|[[Foo]]-bar
||+1
||-2
|}
!! result
<table>
-<tr>
-<td>+1
-</td>
-<td>-2
-</td></tr></table>
-
+<tbody><tr><td><a rel="mw:WikiLink" href="./Foo">Foo</a>-bar</td>
+<td data-parsoid='{"startTagSrc":"|","attrSepSrc":"|"}'>+1</td>
+<td data-parsoid='{"startTagSrc":"|","attrSepSrc":"|"}'>-2</td></tr>
+</tbody></table>
!! end
#### --------------- Links ----------------
--
To view, visit https://gerrit.wikimedia.org/r/94952
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I5acd594eaa44e869a3f06b7365a16aaf6a94cfb7
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: GWicke <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits