jenkins-bot has submitted this change and it was merged.
Change subject: Normalizer: Tweaks to <td> escapable prefix normalization
......................................................................
Normalizer: Tweaks to <td> escapable prefix normalization
* Only the first <td> in a wikitext row needs the escape.
* +/- in html <td>s don't need escaping.
* Added several parser tests to spec different edit scenarios.
* This seems to be last major source of rt-diffs (from normalizations).
* Ideally, we would move this out of the normalization phase
to the core serialization phase, but that looks like a more complex
undertaking. This fix could capture the majority of scenarios
that need handling.
Change-Id: I2672a2d21ed767e2904fa7bb3d3a746f7b9006bf
---
M lib/wts.normalizeDOM.js
M tests/parserTests-blacklist.js
M tests/parserTests.txt
3 files changed, 83 insertions(+), 14 deletions(-)
Approvals:
Arlolra: Looks good to me, approved
jenkins-bot: Verified
diff --git a/lib/wts.normalizeDOM.js b/lib/wts.normalizeDOM.js
index 8c290b2..8526b54 100644
--- a/lib/wts.normalizeDOM.js
+++ b/lib/wts.normalizeDOM.js
@@ -272,6 +272,18 @@
// Table cells
} else if (node.nodeName === 'TD') {
+ var dp = DU.getDataParsoid(node);
+
+ // * HTML <td>s won't have escapable prefixes
+ // * First cell should always be checked for escapable prefixes
+ // * Second and later cells in a wikitext td row (with
stx_v='row' flag)
+ // won't have escapable prefixes.
+ if (dp.stx === 'html' ||
+ (DU.firstNonSepChildNode(node.parentNode) !== node &&
+ dp.stx_v === 'row')) {
+ return node;
+ }
+
var first = DU.firstNonDeletedChildNode(node);
// Emit a space before escapable prefix
// This is preferable to serializing with a nowiki.
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index 0d6fc5a..31ac2e0 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -2549,17 +2549,7 @@
add("selser", "Normalizations should be restricted to edited content
[0,0,0,0,[2]]", "a\n= =\ngavtydem1ujl9pb9b");
add("selser", "Normalizations should be restricted to edited content
[1,2,0,0,0]", "a\n\n8uc2mhvuqc7kfbt9\n\n= =\nb");
add("selser", "Normalizations should be restricted to edited content
[1,2,2,3,0]", "a\n\n8bevm5ajbpo561or\n\nqjal98t96jgrdx6r\n\n= =\nb");
-add("selser", "3. Table cells with escapable prefixes after edits [1]", "{|
data-foobar=\"k1uhbblupl5o2yb9\"\n|a||-\n|}");
-add("selser", "3. Table cells with escapable prefixes after edits [2]",
"r1fafyruyogk3xr\n{|\n|a||-\n|}");
-add("selser", "3. Table cells with escapable prefixes after edits [[0,2]]",
"{|\n<!--0dyo6t6c6ic6jemi-->|a||-\n|}");
-add("selser", "3. Table cells with escapable prefixes after edits
[[0,[[[2],0],0]]]", "{|\n|n6mhp6fok98jjora||-\n|}");
-add("selser", "3. Table cells with escapable prefixes after edits [[4,1]]",
"{|<!--zce235qk94zehfr-->\n|a||-\n|}");
-add("selser", "3. Table cells with escapable prefixes after edits
[[4,[[2,0],0]]]", "{|<!--7nnptl6yf8g833di-->\n|56rc04ftl7s98uxr\n|a||-\n|}");
-add("selser", "3. Table cells with escapable prefixes after edits
[[0,[1,0]]]", "{|\n|a||-\n|}");
-add("selser", "3. Table cells with escapable prefixes after edits [[2,2]]",
"{|<!--xri7xa8msub1q0k9-->\n<!--ior8ojhvj5gvte29-->|a||-\n|}");
-add("selser", "3. Table cells with escapable prefixes after edits
[[0,[[4,0],0]]]", "{|\n|wtbr3fxby1zto6r||-\n|}");
-add("selser", "3. Table cells with escapable prefixes after edits [[4,2]]",
"{|<!--foy4y1aouncj714i--><!--h3b9y1s65stt9-->\n|a||-\n|}");
-add("selser", "3b. Table cells without escapable prefixes after edits manual",
"{|\n| id=\"x\" | -\n|}");
+add("selser", "4a. Table cells without escapable prefixes after edits manual",
"{|\n| id=\"x\" | -\n|}");
// ### DO NOT REMOVE THIS LINE ### (end of automatically-generated section)
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index 254796f..0ca85c8 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -25493,7 +25493,7 @@
!! end
!! test
-3. Table cells with escapable prefixes after edits
+3a. Table cells with escapable prefixes after edits
!! options
parsoid={
"modes": ["selser"],
@@ -25513,7 +25513,7 @@
!! end
!! test
-3a. Table cells with escapable prefixes after edits
+3b. Table cells with escapable prefixes after edits
!! options
parsoid={
"modes": ["selser"],
@@ -25539,7 +25539,7 @@
# normalization doesn't realize that the id attribute
# will eliminate the escapable scenario
!! test
-3b. Table cells without escapable prefixes after edits
+4a. Table cells without escapable prefixes after edits
!! options
parsoid={
"modes": ["selser"],
@@ -25558,6 +25558,73 @@
|}
!! end
+## This tests normalizer's ability to discriminate between
+## cells having identical content.
+!! test
+4b. Table cells without escapable prefixes after edits
+!! options
+parsoid={
+ "modes": ["selser"],
+ "scrubWikitext": true,
+ "changes": [
+ [ "td", "html", "-" ]
+ ]
+}
+!! wikitext
+{|
+|a||b
+|}
+!! wikitext/edited
+{|
+| -||-
+|}
+!! end
+
+## This tests normalizer's ability to not be tripped by
+## comments (and whitespace)
+!! test
+4c. Table cells without escapable prefixes after edits
+!! options
+parsoid={
+ "modes": ["selser"],
+ "scrubWikitext": true,
+ "changes": [
+ [ "table tbody tr td:first-child", "remove" ]
+ ]
+}
+!! wikitext
+{|
+|-
+<!--foo--> |a||-
+|}
+!! wikitext/edited
+{|
+|-
+<!--foo--> | -
+|}
+!! end
+
+## This tests normalizer's ability to handle HTML cells
+!! test
+4d. Table cells without escapable prefixes after edits
+!! options
+parsoid={
+ "modes": ["selser"],
+ "scrubWikitext": true,
+ "changes": [
+ [ "td", "html", "-" ]
+ ]
+}
+!! wikitext
+<table>
+<tr><td>a</td></tr>
+</table>
+!! wikitext/edited
+<table>
+<tr><td>-</td></tr>
+</table>
+!! end
+
# ---------------------------------------------------
# End of tests spec'ing wikitext serialization norms |
# ---------------------------------------------------
--
To view, visit https://gerrit.wikimedia.org/r/231008
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I2672a2d21ed767e2904fa7bb3d3a746f7b9006bf
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits