Subramanya Sastry has uploaded a new change for review.
https://gerrit.wikimedia.org/r/76467
Change subject: Take #2: (Bug 52103) nowiki escaping when quotes surround i/b
tags
......................................................................
Take #2: (Bug 52103) nowiki escaping when quotes surround i/b tags
* Previous attempt (87759dea) overreached by applying previously
existing checks for HTML nodes to text nodes -- this was buggy
based on the failure on the example below.
$ echo "'<span><i>foo</i></span>'" | node parse --html2wt
'<span><nowiki/>''foo''<nowiki/></span>'
This requires no <nowiki/> escaping but 87759dea added them.
* Added a parser test to capture this scenario + regenerated
selser changes.
Change-Id: I0ca644d565cf2790cca918fd7bc9c20dfbc6d29d
---
M js/lib/mediawiki.WikitextSerializer.js
M js/tests/parserTests.txt
M js/tests/selser.changes.json
3 files changed, 27 insertions(+), 34 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid
refs/changes/67/76467/1
diff --git a/js/lib/mediawiki.WikitextSerializer.js
b/js/lib/mediawiki.WikitextSerializer.js
index 28febdf..bcf736b 100644
--- a/js/lib/mediawiki.WikitextSerializer.js
+++ b/js/lib/mediawiki.WikitextSerializer.js
@@ -2007,7 +2007,6 @@
return res + space;
};
-
/**
* Bold/italic helper: Get a preceding quote/italic element or a '-char
*/
@@ -2017,6 +2016,12 @@
// string, which means that we can't be directly preceded by
quotes.
return null;
}
+
+ var prev = node.previousSibling;
+ if (prev && DU.isText(prev) && prev.nodeValue.match(/'$/)) {
+ return prev;
+ }
+
// Move up first until we have a sibling
while (node && !node.previousSibling) {
node = node.parentNode;
@@ -2025,18 +2030,12 @@
node = node.previousSibling;
}
- if (node && DU.isText(node) && node.nodeValue.match(/'$/)) {
- return node;
- }
-
// Now move down the lastChilds to see if there are any italics / bolds
while (node && DU.isElt(node)) {
- if (node.nodeName in {I:1, B:1} && node.lastChild) {
- if (node.lastChild.nodeName in {I:1, B:1} ||
- DU.isText(node.lastChild) &&
node.lastChild.nodeValue.match(/'$/))
- {
- return state.sep.lastSourceNode === node ?
node.lastChild : null;
- }
+ if (node.nodeName in {I:1, B:1} &&
+ node.lastChild && node.lastChild.nodeName in {I:1, B:1})
+ {
+ return state.sep.lastSourceNode === node ?
node.lastChild : null;
} else if (state.sep.lastSourceNode === node) {
// If a separator was already emitted, or an
outstanding separator
// starts at another node that produced output, we are
not
@@ -2049,27 +2048,8 @@
};
WSP._quoteTextFollows = function(node, state) {
- // Move up first until we have a sibling
- while (node && !node.nextSibling) {
- node = node.parentNode;
- }
- if (node) {
- node = node.nextSibling;
- }
-
- if (node && DU.isText(node) && node.nodeValue[0] === "'") {
- return true;
- }
-
- // Now move down the firstChilds
- while (node && node.nodeName in {I:1, B:1} && node.firstChild) {
- if (DU.isText(node.firstChild) &&
node.firstChild.nodeValue.match(/'$/)) {
- return true;
- }
- node = node.firstChild;
- }
-
- return false;
+ var next = node.nextSibling;
+ return next && DU.isText(next) && next.nodeValue[0] === "'";
};
function wtEOL(node, otherNode) {
diff --git a/js/tests/parserTests.txt b/js/tests/parserTests.txt
index 461357e..aedbf45 100644
--- a/js/tests/parserTests.txt
+++ b/js/tests/parserTests.txt
@@ -15806,6 +15806,7 @@
#### 1. Quotes inside <b> and <i>
#### 2. Link fragments separated by <i> and <b> tags
#### 3. Link fragments inside <i> and <b>
+#### 4. No escaping needed
#### --------------------------------------
!! test
1. Quotes inside <b> and <i>
@@ -15858,7 +15859,7 @@
!! end
!! test
-2. Link fragments inside <i> and <b>
+3. Link fragments inside <i> and <b>
(FIXME: Escaping one or both of [[ and ]] is also acceptable --
this is one of the shortcomings of this format)
!! input
@@ -15871,6 +15872,17 @@
</p>
!! end
+!! test
+4. No escaping needed
+!! input
+'<span>''bar''</span>'
+'<span>'''bar'''</span>'
+!! result
+<p>'<span><i>bar</i></span>'
+'<span><b>bar</b></span>'
+</p>
+!! end
+
#### ----------- Paragraphs ---------------
#### 1. No unnecessary escapes
#### --------------------------------------
diff --git a/js/tests/selser.changes.json b/js/tests/selser.changes.json
index a5e7581..10ad527 100644
--- a/js/tests/selser.changes.json
+++ b/js/tests/selser.changes.json
@@ -1112,7 +1112,8 @@
"Links 5. ExtLinks: No escapes
needed":[0,[[2]],[1],[[3]],[2],[3],[[4]],[[[4]]],[4],[[[2]]],[[1]],[[[3]]]],
"1. Quotes inside <b> and <i>":[],
"2. Link fragments separated by <i> and <b>
tags":[[0,2,4],[3,0,[4,[3],0]],[[0,[4],2],0,[0,1,1]],[[4,[2],0],0,3],[[4,4,0],0,1],[2,0,[0,[4],4]],[4,2,1],[[3,3,2],2,1],[[0,2,2],3,[2,4,[2]]],[4,0,1],[[0,1,[2]],0,2],[2,2,2],[[2,0,1],2,2],[[3,3,[3]],4,2],[[4,1,[3]],0,2],[[0,2,0],0,2],[4,0,[4,[2],2]],[3,0,3],[[0,4,3],3,2],[[0,2,4],0,3]],
-"2. Link fragments inside <i> and <b>\n(FIXME: Escaping one or both of[[and ]]
is also acceptable --\n this is one of the shortcomings of this
format)":[[3,3,4],[4,0,2],[1,0,2],[2,0,3],[4,4,1],[[2,1],4,[0,1]],[1,0,0],[4,0,1],[[1,4],3,1],[1,0,3],[[4,[2]],4,3],[4,3,3],[1,4,4],[[0,4],4,[1,[4]]],[[[2],1],4,[2,1]],[3,0,[4,2]],[[3,[4]],4,[4,0]],[[2,[3]],0,[4,[4]]],[1,2,[4,1]],[[0,2],2,1]],
+"3. Link fragments inside <i> and <b>\n(FIXME: Escaping one or both of[[and ]]
is also acceptable --\n this is one of the shortcomings of this
format)":[[[[4],4],2,[2,0]],[[[4],[4]],0,[4,1]],[[3,1],3,[0,4]],[4,0,1],[[2,2],0,[4,[3]]],[[3,0],2,[4,3]],[[0,[2]],0,2],[[0,3],4,[0,2]],[4,0,2],[3,0,[2,0]],[[0,3],4,[0,[3]]],[2,2,1],[[0,3],2,1],[4,0,0],[1,0,[2,0]],[[[4],[4]],0,1],[[1,[2]],0,[0,[4]]],[1,0,1],[0,0,[1,0]],[[4,0],2,2]],
+"4. No escaping
needed":[[[2,[[2]],2,[[3]],0]],[4],[[0,[4],0,[2],2]],[3],[2],[[0,1,2,2,4]],[[0,2,0,[3],0]],[1],[[3,[4],2,[3],0]],[[0,[[2]],2,2,3]],[[2,[4],0,2,2]],[[0,[1],2,[4],0]],[[4,4,2,[[3]],2]],[[0,3,0,2,0]],[[0,3,0,0,0]],[[3,2,0,[2],4]],[[0,[3],0,4,0]],[[4,[1],2,2,0]],[[0,2,2,3,3]],[[0,2,4,2,2]]],
"1. No unnecessary
escapes":[[[2,[2]],0,2,0,[3,4],0,[0,4],0,[0,3,1]],[2,0,1,0,[3,1],0,1,0,[2,[2],0]],[[4,3],0,2,2,2,0,[3,3],3,[0,2,0]],[2,0,1,4,1,2,2,0,[4,[3],0]],[[0,4],0,[3,4],4,[0,2],4,[2,0],0,[0,3,[3]]],[[0,4],0,[4,2],0,3,2,[2,4],2,[3,[2],4]],[0,0,[0,[2]],0,1,3,3,3,2],[1,4,[0,3],3,[0,2],2,1,0,3],[2,0,2,4,1,2,0,4,[0,4,0]],[1,0,[0,1],4,[3,[[4]]],0,[4,4],2,3],[[0,[2]],0,2,0,[0,[[4]]],4,2,4,[0,0,3]],[[0,[3]],0,[3,0],3,2,4,[2,[1]],2,3],[[4,1],0,[0,2],3,2,0,[3,4],0,4],[4,4,3,3,3,2,[0,[4]],0,[0,1,4]],[4,2,[4,0],4,[0,3],3,[0,4],0,[0,[2],0]],[3,3,[0,4],4,3,3,3,0,[0,2,0]],[2,0,4,2,[0,3],0,1,0,[0,4,[3]]],[1,0,[0,3],3,[0,[[4]]],0,3,2,[2,[3],4]],[3,2,[0,3],0,[3,[4]],2,4,0,4],[[3,4],0,[2,4],2,[4,[4]],3,[0,4],3,[3,3,[4]]]],
"1. Leading whitespace in SOL context should be
escaped":[[3,0,[4,2],3,4,3,[1,4,2,0,4,0],0,[0,[2],2],0,[0,4,4],0,3],[2,0,[[2],4],3,3,0,1,0,[2,[4],0],2,4,0,[4,[2],0]],[[1,3],4,2,3,[[3],3],0,[3,2,0,0,4,2],0,4,0,4,0,[0,[2],2]],[[2,3],4,2,0,3,3,[1,0,2,3,1,2],3,[2,[3],4],2,[0,2,0],0,[0,[2],4]],[3,2,3,0,[2,3],0,[0,3,0,3,0,0],4,[3,4,3],2,1,0,[0,1,0]],[[[4],0],0,1,0,2,0,[1,3,0,0,3,0],0,[0,4,0],0,[0,[3],2],0,4],[[3,0],0,[[2],2],0,[[4],4],2,1,3,[0,0,4],4,[0,1,4],0,[0,1,3]],[[4,0],4,1,0,[1,0],0,[3,0,4,2,[4],3],0,[3,3,3],0,[0,0,3],0,1],[3,0,[[4],2],4,[2,0],2,[0,2,0,0,0,0],0,3,3,4,4,[3,3,0]],[1,0,4,0,4,3,1,3,4,2,[3,3,3],0,1],[[1,3],0,[4,2],3,2,4,[2,0,3,2,[4],0],0,2,4,[3,0,2],2,1],[4,0,[2,4],0,1,0,[1,0,4,2,0,0],4,[2,0,2],4,1,4,[0,0,4]],[[3,2],0,4,0,0,4,3,4,[3,[3],2],2,2,0,[2,[4],2]],[4,2,3,4,4,0,[0,0,0,2,4,3],0,0,0,4,0,[0,3,0]],[2,0,1,0,[1,0],0,2,4,3,0,[0,[3],4],0,[3,3,2]],[[[4],2],0,[2,3],0,[2,0],0,[1,2,2,0,[4],2],2,0,0,1,0,[3,4,0]],[[3,0],0,3,0,[1,0],0,4,4,3,3,3,0,4],[[0,3],3,0,0,[[2],4],0,[3,2,4,4,4,0],0,4,0,[0,[2],4],0,[0,3,0]],[[4,2],0,3,4,[[3],0],0,4,3,[4,1,0],3,[2,0,0],4,[2,0,0]],[1,3,[3,2],2,2,0,[3,0,0,0,4,0],0,2,0,[0,0,4],3,3]],
"1. a tags":[[3],[1],[4],[[4]],0,[2],[[3]],[[2]]],
--
To view, visit https://gerrit.wikimedia.org/r/76467
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I0ca644d565cf2790cca918fd7bc9c20dfbc6d29d
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits