jenkins-bot has submitted this change and it was merged.
Change subject: Bug 50603, 50589: Handle |{{echo|{{!}} Foo}}
......................................................................
Bug 50603, 50589: Handle |{{echo|{{!}} Foo}}
* Add a DOM post-processor pass that fixes the handling of this test case:
{|
|{{echo|{{!}} Foo}}
|}
The first td is removed and the appropriate adjustments to the
transclusion-generated td are performed (DSR adjustment, data-mw updates).
* Also add two test cases to verify wt2html, wt2wt and html2wt modes.
* Change the serializer behavior for
<table><!-- --><tr><td>bar</td></tr></table> to default in
{|
<!-- -->|bar
|}
instead of
{|
|-
<!-- -->|bar
|}
This fixes a few selser tests, but adds some failures as well. Overall it
does seem to be a win though and leads to cleaner wikitext output.
Limitations:
* This does not work yet for content nested inside a template, as DOM
postprocessor passes we rely on (auto-inserted end tag detection, template
encapsulation) are not enabled for that content. We'll have to come up with
a better way to detect the auto-inserted end tag followed by template
generating a td situation. This means that bug 50589 is not fixed yet. The
chart table in http://en.wikipedia.org/wiki/5_O%27Clock_%28T-Pain_song%29
does not render correctly yet as that is in turn wrapped into a multi-column
template. A version without the multi-column wrapper template at
http://en.wikipedia.org/wiki/User:GWicke/ChartTableTest however works fine
now.
* This fixes the first (and easier) part of bug 50603 which covers most of bug
50589. The second part of bug 50603 is still TODO.
Bug: 50603
Bug: 50589
Change-Id: Ibb3fbfdaf71d2242d9b7c6ca0d9f1ab29f56e616
---
A js/lib/dom.t.TDFixups.js
M js/lib/dom.t.handleLIHack.js
M js/lib/mediawiki.DOMPostProcessor.js
M js/lib/mediawiki.DOMUtils.js
M js/lib/mediawiki.WikitextSerializer.js
M js/tests/parserTests-blacklist.js
M js/tests/parserTests.txt
7 files changed, 124 insertions(+), 56 deletions(-)
Approvals:
Subramanya Sastry: Looks good to me, approved
jenkins-bot: Verified
diff --git a/js/lib/dom.t.TDFixups.js b/js/lib/dom.t.TDFixups.js
new file mode 100644
index 0000000..5a09b44
--- /dev/null
+++ b/js/lib/dom.t.TDFixups.js
@@ -0,0 +1,70 @@
+var DU = require('./mediawiki.DOMUtils.js').DOMUtils;
+
+
+/**
+ * DOM visitor that strips the double td for this test case:
+ * |{{echo|{{!}} Foo}}
+ *
+ * See https://bugzilla.wikimedia.org/show_bug.cgi?id=50603
+ */
+function stripDoubleTDs (env, node) {
+ var nextNode = node.nextSibling;
+
+ if (!DU.isLiteralHTMLNode(node) &&
+ // FIXME: will not be set within template content!
+ node.data.parsoid.autoInsertedEnd === true &&
+ nextNode !== null &&
+ nextNode.nodeName === 'TD' &&
+ !DU.isLiteralHTMLNode(nextNode) &&
+ // FIXME: will not be set within template content!
+ DU.isTplElementNode(env, nextNode) &&
+ DU.nodeEssentiallyEmpty(node))
+ {
+ // Update the dsr. Since we are coalescing the first
+ // node with the second (or, more precisely, deleting
+ // the first node), we have to update the second DSR's
+ // starting point and start tag width.
+ var nodeDSR = node.data.parsoid.dsr,
+ nextNodeDSR = nextNode.data.parsoid.dsr;
+
+ if (nodeDSR && nextNodeDSR) {
+ nextNodeDSR[0] = nodeDSR[0];
+ }
+
+ // Now update data-mw
+ // XXX: use data.mw for data-mw as well!
+ var dataMW = DU.getJSONAttribute(nextNode, 'data-mw'),
+ nodeSrc = DU.getWTSource(env, node);
+ if (!dataMW.parts) {
+ dataMW.parts = [
+ nodeSrc,
+ {
+ // XXX: Should we always use parts or
at least the
+ // template wrapper? This will need to
be updated whenever
+ // we change the template info.
+ template: {
+ target: dataMW.target,
+ params: dataMW.params,
+ i: 0
+ }
+ }
+ ];
+ dataMW.target = undefined;
+ dataMW.params = undefined;
+ } else {
+ dataMW.unshift(nodeSrc);
+ }
+ DU.setJSONAttribute(nextNode, 'data-mw', dataMW);
+
+ // Delete the duplicated <td> node.
+ node.parentNode.removeChild(node);
+ // This node was deleted, so don't continue processing on it.
+ return nextNode;
+ }
+
+ return true;
+}
+
+if (typeof module === "object") {
+ module.exports.stripDoubleTDs = stripDoubleTDs;
+}
diff --git a/js/lib/dom.t.handleLIHack.js b/js/lib/dom.t.handleLIHack.js
index 48972f3..23b132a 100644
--- a/js/lib/dom.t.handleLIHack.js
+++ b/js/lib/dom.t.handleLIHack.js
@@ -20,25 +20,11 @@
function handleLIHack(env, node) {
var prevNode = node.previousSibling;
- /* Does `node` contain nothing or just non-newline whitespace? */
- function nodeEssentiallyEmpty(node) {
- var childNodes = node.childNodes;
- if (0 === childNodes.length) {
- return true;
- } else if (childNodes.length > 1) {
- return false;
- } else {
- var child = childNodes[0];
- return (child.nodeName === "#text" &&
- /^[ \t]*$/.test(child.nodeValue));
- }
- }
-
if (DU.isLiteralHTMLNode(node) &&
prevNode !== null &&
prevNode.nodeName === 'LI' &&
!DU.isLiteralHTMLNode(prevNode) &&
- nodeEssentiallyEmpty(prevNode)) {
+ DU.nodeEssentiallyEmpty(prevNode)) {
// We have to store the extra information in order to
// reconstruct the original source for roundtripping.
node.data.parsoid.liHackSrc = DU.getWTSource(env, prevNode);
diff --git a/js/lib/mediawiki.DOMPostProcessor.js
b/js/lib/mediawiki.DOMPostProcessor.js
index 1736c9f..f3c2ab1 100644
--- a/js/lib/mediawiki.DOMPostProcessor.js
+++ b/js/lib/mediawiki.DOMPostProcessor.js
@@ -22,7 +22,8 @@
migrateTrailingNLs =
require('./dom.migrateTrailingNLs.js').migrateTrailingNLs,
stripMarkerMetas = CleanUp.stripMarkerMetas,
unpackDOMFragments =
require('./dom.t.unpackDOMFragments.js').unpackDOMFragments,
- wrapTemplates = require('./dom.wrapTemplates.js').wrapTemplates;
+ wrapTemplates = require('./dom.wrapTemplates.js').wrapTemplates,
+ TDFixups = require('./dom.t.TDFixups.js');
// map from mediawiki metadata names to RDFa property names
var metadataMap = {
@@ -142,6 +143,7 @@
var domVisitor2 = new DOMTraverser();
domVisitor2.addHandler( 'meta', stripMarkerMetas.bind(null,
env.conf.parsoid.editMode) );
domVisitor2.addHandler( 'li', handleLIHack.bind( null, env ) );
+ domVisitor2.addHandler( 'td', TDFixups.stripDoubleTDs.bind( null, env )
);
domVisitor2.addHandler( null, cleanupAndSaveDataParsoid );
this.processors.push(domVisitor2.traverse.bind(domVisitor2));
}
diff --git a/js/lib/mediawiki.DOMUtils.js b/js/lib/mediawiki.DOMUtils.js
index 052dbec..c555f50 100644
--- a/js/lib/mediawiki.DOMUtils.js
+++ b/js/lib/mediawiki.DOMUtils.js
@@ -768,6 +768,22 @@
},
/**
+ * Does `node` contain nothing or just non-newline whitespace?
+ */
+ nodeEssentiallyEmpty: function (node) {
+ var childNodes = node.childNodes;
+ if (0 === childNodes.length) {
+ return true;
+ } else if (childNodes.length > 1) {
+ return false;
+ } else {
+ var child = childNodes[0];
+ return (child.nodeName === "#text" &&
+ /^[ \t]*$/.test(child.nodeValue));
+ }
+ },
+
+ /**
* Make a span element to wrap some bare text.
*
* @param {TextNode} node
diff --git a/js/lib/mediawiki.WikitextSerializer.js
b/js/lib/mediawiki.WikitextSerializer.js
index 20aa105..47edc1a 100644
--- a/js/lib/mediawiki.WikitextSerializer.js
+++ b/js/lib/mediawiki.WikitextSerializer.js
@@ -2259,7 +2259,8 @@
// If the token has 'startTagSrc' set, it means that
the tr was present
// in the source wikitext and we emit it -- if not, we
ignore it.
var dp = node.data.parsoid;
- if (node.previousSibling || dp.startTagSrc) {
+ // ignore comments and ws
+ if (DU.previousNonSepSibling(node) || dp.startTagSrc) {
var res =
state.serializer._serializeTableTag(dp.startTagSrc || "|-", '', state,
node, wrapperUnmodified
);
emitStartTag(res, node, state, cb);
diff --git a/js/tests/parserTests-blacklist.js
b/js/tests/parserTests-blacklist.js
index ce2fa66..bb5ec3e 100644
--- a/js/tests/parserTests-blacklist.js
+++ b/js/tests/parserTests-blacklist.js
@@ -2845,12 +2845,8 @@
add("selser", "Table td-cell syntax variations [[0,[1,4]]]");
add("selser", "Table td-cell syntax variations [[0,[[1,4,0,2,2,0,4,0,0],3]]]");
add("selser", "Table td-cell syntax variations [[0,2]]");
-add("selser", "Simple table [[0,[2,0,1,4]]]");
add("selser", "Multiplication table [[0,[2],2,2]]");
add("selser", "Multiplication table
[[4,1,4,[[0,[0,1,4],3,1,0],0,2,0,1,0,[0,2,2,2,2,[2]],0,3,0,[4,0,0,3,0,4],0]]]");
-add("selser", "Accept \"||\" in table headings [[4,[2,0]]]");
-add("selser", "Accept \"||\" in indented table headings [[[[2,[2,0]]]]]");
-add("selser", "Accept empty attributes in td/th cells (td/th cells starting
with leading ||) [[2,[2,0]]]");
add("selser", "Allow +/- in 2nd and later cells in a row, in 1st cell when
td-attrs are present, or in 1st cell when there is a space between \"|\" and
+/- [[4,[[0,0,4,3],4,[2,4,[2],[3],2,[3],[2],1],3,4,0]]]");
add("selser", "Allow +/- in 2nd and later cells in a row, in 1st cell when
td-attrs are present, or in 1st cell when there is a space between \"|\" and
+/- [1]");
add("selser", "Allow +/- in 2nd and later cells in a row, in 1st cell when
td-attrs are present, or in 1st cell when there is a space between \"|\" and
+/- [2]");
@@ -2900,12 +2896,7 @@
add("selser", "Invalid attributes in table cell (bug 1830) [[2,1]]");
add("selser", "Invalid attributes in table cell (bug 1830) [[3,[[2],0]]]");
add("selser", "Invalid attributes in table cell (bug 1830) [[0,[1,3]]]");
-add("selser", "Table security: embedded pipes
(http://lists.wikimedia.org/mailman/htdig/wikitech-l/2006-April/022293.html)
[[3,[2]]]");
add("selser", "Table security: embedded pipes
(http://lists.wikimedia.org/mailman/htdig/wikitech-l/2006-April/022293.html)
[2]");
-add("selser", "Template-generated table cell attributes and cell content
[[2,[2,0]]]");
-add("selser", "Template-generated table cell attributes and cell content
[[0,[2,4]]]");
-add("selser", "Template-generated table cell attributes and cell content (2)
[[2,[2,2]]]");
-add("selser", "Template-generated table cell attributes and cell content (3)
[[3,[2,0]]]");
add("selser", "Table attributes with empty value [2]");
add("selser", "Table attributes with empty value [[3,[[[3]],0]]]");
add("selser", "Table attributes with empty value [1]");
@@ -2914,13 +2905,7 @@
add("selser", "Table attributes with empty value [[2,0]]");
add("selser", "Table attributes with empty value [[2,1]]");
add("selser", "Table attributes with empty value [[0,[1,2]]]");
-add("selser", "Wikitext table with a lot of comments
[[0,4,0,[2,3,2,4,4,3,0,0]]]");
add("selser", "Wikitext table with a lot of comments
[[0,4,3,[0,0,0,0,2,0,3,0]]]");
-add("selser", "Wikitext table with a lot of comments
[[3,4,0,[2,2,3,0,[2,0,0,1],0,0,0]]]");
-add("selser", "Wikitext table with a lot of comments
[[0,0,4,[2,4,0,4,[4,0,0,1],2,2,2]]]");
-add("selser", "Wikitext table with double-line table cell [[3,[2,2]]]");
-add("selser", "Table cell with a single comment [[0,[2,3]]]");
-add("selser", "Table cell with a single comment [[4,[2,4]]]");
add("selser", "Implicit <td> after a |-\n(PHP parser relies on Tidy to add the
missing <td> tags) [[3,[2,0]]]");
add("selser", "Implicit <td> after a |-\n(PHP parser relies on Tidy to add the
missing <td> tags) [1]");
add("selser", "Implicit <td> after a |-\n(PHP parser relies on Tidy to add the
missing <td> tags) [[4,1]]");
@@ -3196,9 +3181,6 @@
add("selser", "Nonexistent template [[4]]");
add("selser", "Nonexistent template [[2]]");
add("selser", "Nonexistent template [[3]]");
-add("selser", "BUG 553: link with two variables in a piped link [[3,[2,3]]]");
-add("selser", "BUG 553: link with two variables in a piped link [[0,[2,2]]]");
-add("selser", "BUG 553: link with two variables in a piped link [[0,[2,0]]]");
add("selser", "Template with targets containing wikilinks [0,2,1,0,0]");
add("selser", "Template with targets containing wikilinks [0,0,0,0,[3]]");
add("selser", "Template with targets containing wikilinks [0,0,3,0,2]");
@@ -3213,7 +3195,6 @@
add("selser", "msgnw keyword [[4]]");
add("selser", "msgnw keyword [[2]]");
add("selser", "msgnw keyword [[3]]");
-add("selser", "Unbalanced includeonly and noinclude tags [[0,[2,0]]]");
add("selser", "2. includeonly in html attr value [[0,0,3,0,0]]");
add("selser", "2. includeonly in html attr value [[0,0,4,0,0]]");
add("selser", "2. includeonly in html attr value [2]");
@@ -3459,14 +3440,10 @@
add("selser", "Fuzz testing: Parser14-table [0,4,0]");
add("selser", "Fuzz testing: Parser16 [2]");
add("selser", "Fuzz testing: Parser16 [[0,[[0,2,0,0]]]]");
-add("selser", "Fuzz testing: Parser16 [[3,[2]]]");
add("selser", "Fuzz testing: Parser16 [[0,[[[3],0,2,4]]]]");
add("selser", "Fuzz testing: Parser16 [[0,[[0,2,4,2]]]]");
-add("selser", "Fuzz testing: Parser21 [[4,[2]]]");
add("selser", "Fuzz testing: Parser21 [[0,[[[4,[4],0],0,0]]]]");
add("selser", "Fuzz testing: Parser21 [2]");
-add("selser", "Fuzz testing: Parser21 [[2,[2]]]");
-add("selser", "Fuzz testing: Parser21 [[0,[2]]]");
add("selser", "Fuzz testing: Parser22 [[4],0,0]");
add("selser", "Fuzz testing: Parser22 [1,2,2]");
add("selser", "Fuzz testing: Parser22 [1,3,0]");
@@ -3879,15 +3856,14 @@
add("selser", "Tables: 1d. No escaping needed [2]");
add("selser", "Tables: 1d. No escaping needed [3]");
add("selser", "Tables: 1d. No escaping needed [4]");
-add("selser", "Tables: 2a. Nested in td [[3,[2,0]]]");
add("selser", "Tables: 2b. Nested in td [[2,[[2,0,2],2]]]");
-add("selser", "Tables: 3b. Nested in th [[3,[2,0]]]");
-add("selser", "Tables: 3c. Nested in th -- no escaping needed [[2,[2,0]]]");
add("selser", "Tables: 4a. Escape - [[3,[2,2,2,0]]]");
+add("selser", "Tables: 4b. Escape + [[2,[[2],3,2,0]]]");
+add("selser", "Tables: 4c. No escaping needed
[[4,[[2,0,[2]],0,[2,4,3,[[2],3]],2,2,2]]]");
+add("selser", "Tables: 4c. No escaping needed
[[0,[[2,3,[2]],3,2,0,[2,2],0]]]");
add("selser", "Tables: 4c. No escaping needed
[[0,[1,3,[0,2,4,[0,3]],0,2,4]]]");
-add("selser", "Tables: 4c. No escaping needed
[[4,[[0,3,0,[2]],4,2,0,[4,1],4]]]");
+add("selser", "Tables: 4c. No escaping needed
[[4,[[[3],0,[2]],4,2,0,[4,1],4]]]");
add("selser", "Tables: 4d. No escaping needed [[3,[[[3],3,2],4]]]");
-add("selser", "Tables: 4d. No escaping needed [[2,[2,0]]]");
add("selser", "Tables: 4d. No escaping needed [[2,[[0,4,2],0]]]");
add("selser", "Links 1. Quote marks in link text [[[4,3]]]");
add("selser", "1. Quotes inside <b> and <i>
[[3,2,1,3,0,3,4,0,2,3,0,1,4,1,3,[1,0,0],0,0,0,0,0,3,0,0,4,0,[3],3,3,[2],3,4,3,0,0,3,4,3]]");
@@ -3947,7 +3923,6 @@
add("selser", "Parsoid-only: Table with broken attribute value quoting on
consecutive lines [[0,2]]");
add("selser", "Parsoid-only: Table with broken attribute value quoting on
consecutive lines [[2,[2,0]]]");
add("selser", "Parsoid-only: Table with broken attribute value quoting on
consecutive lines [[0,[1,0]]]");
-add("selser", "Accept empty td cell attribute [[0,[2,2]]]");
add("selser", "Empty table rows go away [[0,[[[2],4,2],0,0,3,1,2]]]");
add("selser", "Empty table rows go away [[3,[[[2],0,[4]],0,0,0,2,4]]]");
add("selser", "RT-ed inter-element separators should be valid separators
[[2],[2,[[3],4]]]");
@@ -3970,7 +3945,6 @@
add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for
correct output) [[0,1],0,2]");
add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for
correct output) [[2,[[[[2,[2]]]],0]],0,[0,[[3],3]]]");
add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for
correct output) [[3,[2,3]],0,4]");
-add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for
correct output) [[2,4],4,[4,[2,0]]]");
add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for
correct output) [2,0,[0,2]]");
add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for
correct output) [2,0,[4,4]]");
add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for
correct output) [2,0,[2,4]]");
diff --git a/js/tests/parserTests.txt b/js/tests/parserTests.txt
index 146f867..71bf309 100644
--- a/js/tests/parserTests.txt
+++ b/js/tests/parserTests.txt
@@ -4321,22 +4321,18 @@
parsoid=html2wt
!! input
{|
-|-
|foo
|}
<nowiki> </nowiki>bar
{|
-|-
|baz
|}
'''quux'''
!! result
<table><tbody>
-<tr>
-<td>foo</td></tr></tbody></table> bar
+<tr><td>foo</td></tr></tbody></table> bar
<table><tbody>
-<tr>
-<td>baz</td></tr></tbody></table><b>quux</b>
+<tr><td>baz</td></tr></tbody></table><b>quux</b>
!! end
###
@@ -7190,6 +7186,32 @@
<p typeof="mw:Transclusion">a</p>
<table></table>
<p>b</p>
+!! end
+
+!! test
+Parsoid: Merge double tds (bug 50603)
+!! options
+parsoid=wt2html,wt2wt
+!! input
+{|
+|{{echo|{{!}} foo}}
+|}
+!! result
+<table>
+<tbody><td about="#mwt1" typeof="mw:Transclusion">foo</td></tr></tbody></table>
+!! end
+
+!! test
+Parsoid: Merge double tds, html2wt (bug 50603)
+!! options
+parsoid=html2wt
+!! input
+{|
+|{{echo|{{!}}foo}}
+|}
+!! result
+<table>
+<tbody><td about="#mwt1" typeof="mw:Transclusion"
data-mw='{"i":0,"parts":["|",{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"{{!}}foo"}},"i":0}}]}'>foo</td></tr></tbody></table>
!! end
###
@@ -16481,7 +16503,6 @@
parsoid
!! input
{|
-|-
!-bar
|-
|<nowiki>-bar</nowiki>
@@ -16499,7 +16520,6 @@
parsoid
!! input
{|
-|-
!+bar
|-
|<nowiki>+bar</nowiki>
@@ -16517,7 +16537,6 @@
parsoid
!! input
{|
-|-
|foo-bar
|foo+bar
|-
--
To view, visit https://gerrit.wikimedia.org/r/79430
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ibb3fbfdaf71d2242d9b7c6ca0d9f1ab29f56e616
Gerrit-PatchSet: 10
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: GWicke <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: GWicke <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits