jenkins-bot has submitted this change and it was merged.

Change subject: Normalizer: Tweaks to <td> escapable prefix normalization
......................................................................


Normalizer: Tweaks to <td> escapable prefix normalization

* Only the first <td> in a wikitext row needs the escape.
* +/- in html <td>s don't need escaping.
* Added several parser tests to spec different edit scenarios.
* This seems to be last major source of rt-diffs (from normalizations).
* Ideally, we would move this out of the normalization phase
  to the core serialization phase, but that looks like a more complex
  undertaking. This fix could capture the majority of scenarios
  that need handling.

Change-Id: I2672a2d21ed767e2904fa7bb3d3a746f7b9006bf
---
M lib/wts.normalizeDOM.js
M tests/parserTests-blacklist.js
M tests/parserTests.txt
3 files changed, 83 insertions(+), 14 deletions(-)

Approvals:
  Arlolra: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/wts.normalizeDOM.js b/lib/wts.normalizeDOM.js
index 8c290b2..8526b54 100644
--- a/lib/wts.normalizeDOM.js
+++ b/lib/wts.normalizeDOM.js
@@ -272,6 +272,18 @@
 
        // Table cells
        } else if (node.nodeName === 'TD') {
+               var dp = DU.getDataParsoid(node);
+
+               // * HTML <td>s won't have escapable prefixes
+               // * First cell should always be checked for escapable prefixes
+               // * Second and later cells in a wikitext td row (with 
stx_v='row' flag)
+               //   won't have escapable prefixes.
+               if (dp.stx === 'html' ||
+                       (DU.firstNonSepChildNode(node.parentNode) !== node &&
+                       dp.stx_v === 'row')) {
+                       return node;
+               }
+
                var first = DU.firstNonDeletedChildNode(node);
                // Emit a space before escapable prefix
                // This is preferable to serializing with a nowiki.
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index 0d6fc5a..31ac2e0 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -2549,17 +2549,7 @@
 add("selser", "Normalizations should be restricted to edited content 
[0,0,0,0,[2]]", "a\n= =\ngavtydem1ujl9pb9b");
 add("selser", "Normalizations should be restricted to edited content 
[1,2,0,0,0]", "a\n\n8uc2mhvuqc7kfbt9\n\n= =\nb");
 add("selser", "Normalizations should be restricted to edited content 
[1,2,2,3,0]", "a\n\n8bevm5ajbpo561or\n\nqjal98t96jgrdx6r\n\n= =\nb");
-add("selser", "3. Table cells with escapable prefixes after edits [1]", "{| 
data-foobar=\"k1uhbblupl5o2yb9\"\n|a||-\n|}");
-add("selser", "3. Table cells with escapable prefixes after edits [2]", 
"r1fafyruyogk3xr\n{|\n|a||-\n|}");
-add("selser", "3. Table cells with escapable prefixes after edits [[0,2]]", 
"{|\n<!--0dyo6t6c6ic6jemi-->|a||-\n|}");
-add("selser", "3. Table cells with escapable prefixes after edits 
[[0,[[[2],0],0]]]", "{|\n|n6mhp6fok98jjora||-\n|}");
-add("selser", "3. Table cells with escapable prefixes after edits [[4,1]]", 
"{|<!--zce235qk94zehfr-->\n|a||-\n|}");
-add("selser", "3. Table cells with escapable prefixes after edits 
[[4,[[2,0],0]]]", "{|<!--7nnptl6yf8g833di-->\n|56rc04ftl7s98uxr\n|a||-\n|}");
-add("selser", "3. Table cells with escapable prefixes after edits 
[[0,[1,0]]]", "{|\n|a||-\n|}");
-add("selser", "3. Table cells with escapable prefixes after edits [[2,2]]", 
"{|<!--xri7xa8msub1q0k9-->\n<!--ior8ojhvj5gvte29-->|a||-\n|}");
-add("selser", "3. Table cells with escapable prefixes after edits 
[[0,[[4,0],0]]]", "{|\n|wtbr3fxby1zto6r||-\n|}");
-add("selser", "3. Table cells with escapable prefixes after edits [[4,2]]", 
"{|<!--foy4y1aouncj714i--><!--h3b9y1s65stt9-->\n|a||-\n|}");
-add("selser", "3b. Table cells without escapable prefixes after edits manual", 
"{|\n| id=\"x\" | -\n|}");
+add("selser", "4a. Table cells without escapable prefixes after edits manual", 
"{|\n| id=\"x\" | -\n|}");
 
 // ### DO NOT REMOVE THIS LINE ### (end of automatically-generated section)
 
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index 254796f..0ca85c8 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -25493,7 +25493,7 @@
 !! end
 
 !! test
-3. Table cells with escapable prefixes after edits
+3a. Table cells with escapable prefixes after edits
 !! options
 parsoid={
   "modes": ["selser"],
@@ -25513,7 +25513,7 @@
 !! end
 
 !! test
-3a. Table cells with escapable prefixes after edits
+3b. Table cells with escapable prefixes after edits
 !! options
 parsoid={
   "modes": ["selser"],
@@ -25539,7 +25539,7 @@
 # normalization doesn't realize that the id attribute
 # will eliminate the escapable scenario
 !! test
-3b. Table cells without escapable prefixes after edits
+4a. Table cells without escapable prefixes after edits
 !! options
 parsoid={
   "modes": ["selser"],
@@ -25558,6 +25558,73 @@
 |}
 !! end
 
+## This tests normalizer's ability to discriminate between
+## cells having identical content.
+!! test
+4b. Table cells without escapable prefixes after edits
+!! options
+parsoid={
+  "modes": ["selser"],
+  "scrubWikitext": true,
+  "changes": [
+    [ "td", "html", "-" ]
+  ]
+}
+!! wikitext
+{|
+|a||b
+|}
+!! wikitext/edited
+{|
+| -||-
+|}
+!! end
+
+## This tests normalizer's ability to not be tripped by
+## comments (and whitespace)
+!! test
+4c. Table cells without escapable prefixes after edits
+!! options
+parsoid={
+  "modes": ["selser"],
+  "scrubWikitext": true,
+  "changes": [
+    [ "table tbody tr td:first-child", "remove" ]
+  ]
+}
+!! wikitext
+{|
+|-
+<!--foo--> |a||-
+|}
+!! wikitext/edited
+{|
+|-
+<!--foo--> | -
+|}
+!! end
+
+## This tests normalizer's ability to handle HTML cells
+!! test
+4d. Table cells without escapable prefixes after edits
+!! options
+parsoid={
+  "modes": ["selser"],
+  "scrubWikitext": true,
+  "changes": [
+    [ "td", "html", "-" ]
+  ]
+}
+!! wikitext
+<table>
+<tr><td>a</td></tr>
+</table>
+!! wikitext/edited
+<table>
+<tr><td>-</td></tr>
+</table>
+!! end
+
 # ---------------------------------------------------
 # End of tests spec'ing wikitext serialization norms |
 # ---------------------------------------------------

-- 
To view, visit https://gerrit.wikimedia.org/r/231008
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I2672a2d21ed767e2904fa7bb3d3a746f7b9006bf
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to