jenkins-bot has submitted this change and it was merged.

Change subject: Bug 50603, 50589: Handle |{{echo|{{!}} Foo}}
......................................................................


Bug 50603, 50589: Handle |{{echo|{{!}} Foo}}

* Add a DOM post-processor pass that fixes the handling of this test case:

  {|
  |{{echo|{{!}} Foo}}
  |}

  The first td is removed and the appropriate adjustments to the
  transclusion-generated td are performed (DSR adjustment, data-mw updates).

* Also add two test cases to verify wt2html, wt2wt and html2wt modes.

* Change the serializer behavior for
  <table><!-- --><tr><td>bar</td></tr></table> to default in

  {|
  <!-- -->|bar
  |}

  instead of

  {|
  |-
  <!-- -->|bar
  |}

  This fixes a few selser tests, but adds some failures as well. Overall it
  does seem to be a win though and leads to cleaner wikitext output.

Limitations:

* This does not work yet for content nested inside a template, as DOM
  postprocessor passes we rely on (auto-inserted end tag detection, template
  encapsulation) are not enabled for that content. We'll have to come up with
  a better way to detect the auto-inserted end tag followed by template
  generating a td situation.  This means that bug 50589 is not fixed yet. The
  chart table in http://en.wikipedia.org/wiki/5_O%27Clock_%28T-Pain_song%29
  does not render correctly yet as that is in turn wrapped into a multi-column
  template. A version without the multi-column wrapper template at
  http://en.wikipedia.org/wiki/User:GWicke/ChartTableTest however works fine 
now.

* This fixes the first (and easier) part of bug 50603 which covers most of bug
  50589. The second part of bug 50603 is still TODO.

Bug: 50603
Bug: 50589

Change-Id: Ibb3fbfdaf71d2242d9b7c6ca0d9f1ab29f56e616
---
A js/lib/dom.t.TDFixups.js
M js/lib/dom.t.handleLIHack.js
M js/lib/mediawiki.DOMPostProcessor.js
M js/lib/mediawiki.DOMUtils.js
M js/lib/mediawiki.WikitextSerializer.js
M js/tests/parserTests-blacklist.js
M js/tests/parserTests.txt
7 files changed, 124 insertions(+), 56 deletions(-)

Approvals:
  Subramanya Sastry: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/js/lib/dom.t.TDFixups.js b/js/lib/dom.t.TDFixups.js
new file mode 100644
index 0000000..5a09b44
--- /dev/null
+++ b/js/lib/dom.t.TDFixups.js
@@ -0,0 +1,70 @@
+var DU = require('./mediawiki.DOMUtils.js').DOMUtils;
+
+
+/**
+ * DOM visitor that strips the double td for this test case:
+ * |{{echo|{{!}} Foo}}
+ *
+ * See https://bugzilla.wikimedia.org/show_bug.cgi?id=50603
+ */
+function stripDoubleTDs (env, node) {
+       var nextNode = node.nextSibling;
+
+       if (!DU.isLiteralHTMLNode(node) &&
+               // FIXME: will not be set within template content!
+               node.data.parsoid.autoInsertedEnd === true &&
+               nextNode !== null &&
+           nextNode.nodeName === 'TD' &&
+           !DU.isLiteralHTMLNode(nextNode) &&
+               // FIXME: will not be set within template content!
+               DU.isTplElementNode(env, nextNode) &&
+           DU.nodeEssentiallyEmpty(node))
+       {
+               // Update the dsr. Since we are coalescing the first
+               // node with the second (or, more precisely, deleting
+               // the first node), we have to update the second DSR's
+               // starting point and start tag width.
+               var nodeDSR     = node.data.parsoid.dsr,
+                   nextNodeDSR = nextNode.data.parsoid.dsr;
+
+               if (nodeDSR && nextNodeDSR) {
+                       nextNodeDSR[0] = nodeDSR[0];
+               }
+
+               // Now update data-mw
+               // XXX: use data.mw for data-mw as well!
+               var dataMW = DU.getJSONAttribute(nextNode, 'data-mw'),
+                       nodeSrc = DU.getWTSource(env, node);
+               if (!dataMW.parts) {
+                       dataMW.parts = [
+                               nodeSrc,
+                               {
+                                       // XXX: Should we always use parts or 
at least the
+                                       // template wrapper? This will need to 
be updated whenever
+                                       // we change the template info.
+                                       template: {
+                                               target: dataMW.target,
+                                               params: dataMW.params,
+                                               i: 0
+                                       }
+                               }
+                       ];
+                       dataMW.target = undefined;
+                       dataMW.params = undefined;
+               } else {
+                       dataMW.unshift(nodeSrc);
+               }
+               DU.setJSONAttribute(nextNode, 'data-mw', dataMW);
+
+               // Delete the duplicated <td> node.
+               node.parentNode.removeChild(node);
+               // This node was deleted, so don't continue processing on it.
+               return nextNode;
+       }
+
+       return true;
+}
+
+if (typeof module === "object") {
+       module.exports.stripDoubleTDs = stripDoubleTDs;
+}
diff --git a/js/lib/dom.t.handleLIHack.js b/js/lib/dom.t.handleLIHack.js
index 48972f3..23b132a 100644
--- a/js/lib/dom.t.handleLIHack.js
+++ b/js/lib/dom.t.handleLIHack.js
@@ -20,25 +20,11 @@
 function handleLIHack(env, node) {
        var prevNode = node.previousSibling;
 
-       /* Does `node` contain nothing or just non-newline whitespace? */
-       function nodeEssentiallyEmpty(node) {
-               var childNodes = node.childNodes;
-               if (0 === childNodes.length) {
-                       return true;
-               } else if (childNodes.length > 1) {
-                       return false;
-               } else {
-                       var child = childNodes[0];
-                       return (child.nodeName === "#text" &&
-                               /^[ \t]*$/.test(child.nodeValue));
-               }
-       }
-
        if (DU.isLiteralHTMLNode(node) &&
            prevNode !== null &&
            prevNode.nodeName === 'LI' &&
            !DU.isLiteralHTMLNode(prevNode) &&
-           nodeEssentiallyEmpty(prevNode)) {
+           DU.nodeEssentiallyEmpty(prevNode)) {
                // We have to store the extra information in order to
                // reconstruct the original source for roundtripping.
                node.data.parsoid.liHackSrc = DU.getWTSource(env, prevNode);
diff --git a/js/lib/mediawiki.DOMPostProcessor.js 
b/js/lib/mediawiki.DOMPostProcessor.js
index 1736c9f..f3c2ab1 100644
--- a/js/lib/mediawiki.DOMPostProcessor.js
+++ b/js/lib/mediawiki.DOMPostProcessor.js
@@ -22,7 +22,8 @@
        migrateTrailingNLs = 
require('./dom.migrateTrailingNLs.js').migrateTrailingNLs,
        stripMarkerMetas = CleanUp.stripMarkerMetas,
        unpackDOMFragments = 
require('./dom.t.unpackDOMFragments.js').unpackDOMFragments,
-       wrapTemplates = require('./dom.wrapTemplates.js').wrapTemplates;
+       wrapTemplates = require('./dom.wrapTemplates.js').wrapTemplates,
+       TDFixups = require('./dom.t.TDFixups.js');
 
 // map from mediawiki metadata names to RDFa property names
 var metadataMap = {
@@ -142,6 +143,7 @@
        var domVisitor2 = new DOMTraverser();
        domVisitor2.addHandler( 'meta', stripMarkerMetas.bind(null, 
env.conf.parsoid.editMode) );
        domVisitor2.addHandler( 'li', handleLIHack.bind( null, env ) );
+       domVisitor2.addHandler( 'td', TDFixups.stripDoubleTDs.bind( null, env ) 
);
        domVisitor2.addHandler( null, cleanupAndSaveDataParsoid );
        this.processors.push(domVisitor2.traverse.bind(domVisitor2));
 }
diff --git a/js/lib/mediawiki.DOMUtils.js b/js/lib/mediawiki.DOMUtils.js
index 052dbec..c555f50 100644
--- a/js/lib/mediawiki.DOMUtils.js
+++ b/js/lib/mediawiki.DOMUtils.js
@@ -768,6 +768,22 @@
        },
 
        /**
+        * Does `node` contain nothing or just non-newline whitespace?
+        */
+       nodeEssentiallyEmpty: function (node) {
+               var childNodes = node.childNodes;
+               if (0 === childNodes.length) {
+                       return true;
+               } else if (childNodes.length > 1) {
+                       return false;
+               } else {
+                       var child = childNodes[0];
+                       return (child.nodeName === "#text" &&
+                               /^[ \t]*$/.test(child.nodeValue));
+               }
+       },
+
+       /**
         * Make a span element to wrap some bare text.
         *
         * @param {TextNode} node
diff --git a/js/lib/mediawiki.WikitextSerializer.js 
b/js/lib/mediawiki.WikitextSerializer.js
index 20aa105..47edc1a 100644
--- a/js/lib/mediawiki.WikitextSerializer.js
+++ b/js/lib/mediawiki.WikitextSerializer.js
@@ -2259,7 +2259,8 @@
                        // If the token has 'startTagSrc' set, it means that 
the tr was present
                        // in the source wikitext and we emit it -- if not, we 
ignore it.
                        var dp = node.data.parsoid;
-                       if (node.previousSibling || dp.startTagSrc) {
+                       // ignore comments and ws
+                       if (DU.previousNonSepSibling(node) || dp.startTagSrc) {
                                var res = 
state.serializer._serializeTableTag(dp.startTagSrc || "|-", '', state,
                                                        node, wrapperUnmodified 
);
                                emitStartTag(res, node, state, cb);
diff --git a/js/tests/parserTests-blacklist.js 
b/js/tests/parserTests-blacklist.js
index ce2fa66..bb5ec3e 100644
--- a/js/tests/parserTests-blacklist.js
+++ b/js/tests/parserTests-blacklist.js
@@ -2845,12 +2845,8 @@
 add("selser", "Table td-cell syntax variations [[0,[1,4]]]");
 add("selser", "Table td-cell syntax variations [[0,[[1,4,0,2,2,0,4,0,0],3]]]");
 add("selser", "Table td-cell syntax variations [[0,2]]");
-add("selser", "Simple table [[0,[2,0,1,4]]]");
 add("selser", "Multiplication table [[0,[2],2,2]]");
 add("selser", "Multiplication table 
[[4,1,4,[[0,[0,1,4],3,1,0],0,2,0,1,0,[0,2,2,2,2,[2]],0,3,0,[4,0,0,3,0,4],0]]]");
-add("selser", "Accept \"||\" in table headings [[4,[2,0]]]");
-add("selser", "Accept \"||\" in indented table headings [[[[2,[2,0]]]]]");
-add("selser", "Accept empty attributes in td/th cells (td/th cells starting 
with leading ||) [[2,[2,0]]]");
 add("selser", "Allow +/- in 2nd and later cells in a row, in 1st cell when 
td-attrs are present, or in 1st cell when there is a space between \"|\" and 
+/-  [[4,[[0,0,4,3],4,[2,4,[2],[3],2,[3],[2],1],3,4,0]]]");
 add("selser", "Allow +/- in 2nd and later cells in a row, in 1st cell when 
td-attrs are present, or in 1st cell when there is a space between \"|\" and 
+/-  [1]");
 add("selser", "Allow +/- in 2nd and later cells in a row, in 1st cell when 
td-attrs are present, or in 1st cell when there is a space between \"|\" and 
+/-  [2]");
@@ -2900,12 +2896,7 @@
 add("selser", "Invalid attributes in table cell (bug 1830) [[2,1]]");
 add("selser", "Invalid attributes in table cell (bug 1830) [[3,[[2],0]]]");
 add("selser", "Invalid attributes in table cell (bug 1830) [[0,[1,3]]]");
-add("selser", "Table security: embedded pipes 
(http://lists.wikimedia.org/mailman/htdig/wikitech-l/2006-April/022293.html) 
[[3,[2]]]");
 add("selser", "Table security: embedded pipes 
(http://lists.wikimedia.org/mailman/htdig/wikitech-l/2006-April/022293.html) 
[2]");
-add("selser", "Template-generated table cell attributes and cell content 
[[2,[2,0]]]");
-add("selser", "Template-generated table cell attributes and cell content 
[[0,[2,4]]]");
-add("selser", "Template-generated table cell attributes and cell content (2) 
[[2,[2,2]]]");
-add("selser", "Template-generated table cell attributes and cell content (3) 
[[3,[2,0]]]");
 add("selser", "Table attributes with empty value [2]");
 add("selser", "Table attributes with empty value [[3,[[[3]],0]]]");
 add("selser", "Table attributes with empty value [1]");
@@ -2914,13 +2905,7 @@
 add("selser", "Table attributes with empty value [[2,0]]");
 add("selser", "Table attributes with empty value [[2,1]]");
 add("selser", "Table attributes with empty value [[0,[1,2]]]");
-add("selser", "Wikitext table with a lot of comments 
[[0,4,0,[2,3,2,4,4,3,0,0]]]");
 add("selser", "Wikitext table with a lot of comments 
[[0,4,3,[0,0,0,0,2,0,3,0]]]");
-add("selser", "Wikitext table with a lot of comments 
[[3,4,0,[2,2,3,0,[2,0,0,1],0,0,0]]]");
-add("selser", "Wikitext table with a lot of comments 
[[0,0,4,[2,4,0,4,[4,0,0,1],2,2,2]]]");
-add("selser", "Wikitext table with double-line table cell [[3,[2,2]]]");
-add("selser", "Table cell with a single comment [[0,[2,3]]]");
-add("selser", "Table cell with a single comment [[4,[2,4]]]");
 add("selser", "Implicit <td> after a |-\n(PHP parser relies on Tidy to add the 
missing <td> tags) [[3,[2,0]]]");
 add("selser", "Implicit <td> after a |-\n(PHP parser relies on Tidy to add the 
missing <td> tags) [1]");
 add("selser", "Implicit <td> after a |-\n(PHP parser relies on Tidy to add the 
missing <td> tags) [[4,1]]");
@@ -3196,9 +3181,6 @@
 add("selser", "Nonexistent template [[4]]");
 add("selser", "Nonexistent template [[2]]");
 add("selser", "Nonexistent template [[3]]");
-add("selser", "BUG 553: link with two variables in a piped link [[3,[2,3]]]");
-add("selser", "BUG 553: link with two variables in a piped link [[0,[2,2]]]");
-add("selser", "BUG 553: link with two variables in a piped link [[0,[2,0]]]");
 add("selser", "Template with targets containing wikilinks [0,2,1,0,0]");
 add("selser", "Template with targets containing wikilinks [0,0,0,0,[3]]");
 add("selser", "Template with targets containing wikilinks [0,0,3,0,2]");
@@ -3213,7 +3195,6 @@
 add("selser", "msgnw keyword [[4]]");
 add("selser", "msgnw keyword [[2]]");
 add("selser", "msgnw keyword [[3]]");
-add("selser", "Unbalanced includeonly and noinclude tags [[0,[2,0]]]");
 add("selser", "2. includeonly in html attr value [[0,0,3,0,0]]");
 add("selser", "2. includeonly in html attr value [[0,0,4,0,0]]");
 add("selser", "2. includeonly in html attr value [2]");
@@ -3459,14 +3440,10 @@
 add("selser", "Fuzz testing: Parser14-table [0,4,0]");
 add("selser", "Fuzz testing: Parser16 [2]");
 add("selser", "Fuzz testing: Parser16 [[0,[[0,2,0,0]]]]");
-add("selser", "Fuzz testing: Parser16 [[3,[2]]]");
 add("selser", "Fuzz testing: Parser16 [[0,[[[3],0,2,4]]]]");
 add("selser", "Fuzz testing: Parser16 [[0,[[0,2,4,2]]]]");
-add("selser", "Fuzz testing: Parser21 [[4,[2]]]");
 add("selser", "Fuzz testing: Parser21 [[0,[[[4,[4],0],0,0]]]]");
 add("selser", "Fuzz testing: Parser21 [2]");
-add("selser", "Fuzz testing: Parser21 [[2,[2]]]");
-add("selser", "Fuzz testing: Parser21 [[0,[2]]]");
 add("selser", "Fuzz testing: Parser22 [[4],0,0]");
 add("selser", "Fuzz testing: Parser22 [1,2,2]");
 add("selser", "Fuzz testing: Parser22 [1,3,0]");
@@ -3879,15 +3856,14 @@
 add("selser", "Tables: 1d. No escaping needed [2]");
 add("selser", "Tables: 1d. No escaping needed [3]");
 add("selser", "Tables: 1d. No escaping needed [4]");
-add("selser", "Tables: 2a. Nested in td [[3,[2,0]]]");
 add("selser", "Tables: 2b. Nested in td [[2,[[2,0,2],2]]]");
-add("selser", "Tables: 3b. Nested in th [[3,[2,0]]]");
-add("selser", "Tables: 3c. Nested in th -- no escaping needed [[2,[2,0]]]");
 add("selser", "Tables: 4a. Escape - [[3,[2,2,2,0]]]");
+add("selser", "Tables: 4b. Escape + [[2,[[2],3,2,0]]]");
+add("selser", "Tables: 4c. No escaping needed 
[[4,[[2,0,[2]],0,[2,4,3,[[2],3]],2,2,2]]]");
+add("selser", "Tables: 4c. No escaping needed 
[[0,[[2,3,[2]],3,2,0,[2,2],0]]]");
 add("selser", "Tables: 4c. No escaping needed 
[[0,[1,3,[0,2,4,[0,3]],0,2,4]]]");
-add("selser", "Tables: 4c. No escaping needed 
[[4,[[0,3,0,[2]],4,2,0,[4,1],4]]]");
+add("selser", "Tables: 4c. No escaping needed 
[[4,[[[3],0,[2]],4,2,0,[4,1],4]]]");
 add("selser", "Tables: 4d. No escaping needed [[3,[[[3],3,2],4]]]");
-add("selser", "Tables: 4d. No escaping needed [[2,[2,0]]]");
 add("selser", "Tables: 4d. No escaping needed [[2,[[0,4,2],0]]]");
 add("selser", "Links 1. Quote marks in link text [[[4,3]]]");
 add("selser", "1. Quotes inside <b> and <i> 
[[3,2,1,3,0,3,4,0,2,3,0,1,4,1,3,[1,0,0],0,0,0,0,0,3,0,0,4,0,[3],3,3,[2],3,4,3,0,0,3,4,3]]");
@@ -3947,7 +3923,6 @@
 add("selser", "Parsoid-only: Table with broken attribute value quoting on 
consecutive lines [[0,2]]");
 add("selser", "Parsoid-only: Table with broken attribute value quoting on 
consecutive lines [[2,[2,0]]]");
 add("selser", "Parsoid-only: Table with broken attribute value quoting on 
consecutive lines [[0,[1,0]]]");
-add("selser", "Accept empty td cell attribute [[0,[2,2]]]");
 add("selser", "Empty table rows go away [[0,[[[2],4,2],0,0,3,1,2]]]");
 add("selser", "Empty table rows go away [[3,[[[2],0,[4]],0,0,0,2,4]]]");
 add("selser", "RT-ed inter-element separators should be valid separators 
[[2],[2,[[3],4]]]");
@@ -3970,7 +3945,6 @@
 add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext 
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for 
correct output) [[0,1],0,2]");
 add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext 
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for 
correct output) [[2,[[[[2,[2]]]],0]],0,[0,[[3],3]]]");
 add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext 
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for 
correct output) [[3,[2,3]],0,4]");
-add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext 
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for 
correct output) [[2,4],4,[4,[2,0]]]");
 add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext 
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for 
correct output) [2,0,[0,2]]");
 add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext 
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for 
correct output) [2,0,[4,4]]");
 add("selser", "Trailing newlines in a deep dom-subtree that ends a wikitext 
line should be migrated out\n(Parsoid-only since PHP parser relies on Tidy for 
correct output) [2,0,[2,4]]");
diff --git a/js/tests/parserTests.txt b/js/tests/parserTests.txt
index 146f867..71bf309 100644
--- a/js/tests/parserTests.txt
+++ b/js/tests/parserTests.txt
@@ -4321,22 +4321,18 @@
 parsoid=html2wt
 !! input
 {|
-|-
 |foo
 |}
 <nowiki> </nowiki>bar
 {|
-|-
 |baz
 |}
 '''quux'''
 !! result
 <table><tbody>
-<tr>
-<td>foo</td></tr></tbody></table> bar
+<tr><td>foo</td></tr></tbody></table> bar
 <table><tbody>
-<tr>
-<td>baz</td></tr></tbody></table><b>quux</b>
+<tr><td>baz</td></tr></tbody></table><b>quux</b>
 !! end
 
 ###
@@ -7190,6 +7186,32 @@
 <p typeof="mw:Transclusion">a</p>
 <table></table>
 <p>b</p>
+!! end
+
+!! test
+Parsoid: Merge double tds (bug 50603)
+!! options
+parsoid=wt2html,wt2wt
+!! input
+{|
+|{{echo|{{!}} foo}}
+|}
+!! result
+<table>
+<tbody><td about="#mwt1" typeof="mw:Transclusion">foo</td></tr></tbody></table>
+!! end
+
+!! test
+Parsoid: Merge double tds, html2wt (bug 50603)
+!! options
+parsoid=html2wt
+!! input
+{|
+|{{echo|{{!}}foo}}
+|}
+!! result
+<table>
+<tbody><td about="#mwt1" typeof="mw:Transclusion" 
data-mw='{"i":0,"parts":["|",{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"{{!}}foo"}},"i":0}}]}'>foo</td></tr></tbody></table>
 !! end
 
 ###
@@ -16481,7 +16503,6 @@
 parsoid
 !! input
 {|
-|-
 !-bar
 |-
 |<nowiki>-bar</nowiki>
@@ -16499,7 +16520,6 @@
 parsoid
 !! input
 {|
-|-
 !+bar
 |-
 |<nowiki>+bar</nowiki>
@@ -16517,7 +16537,6 @@
 parsoid
 !! input
 {|
-|-
 |foo-bar
 |foo+bar
 |-

-- 
To view, visit https://gerrit.wikimedia.org/r/79430
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ibb3fbfdaf71d2242d9b7c6ca0d9f1ab29f56e616
Gerrit-PatchSet: 10
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: GWicke <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: GWicke <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to