jenkins-bot has submitted this change and it was merged.
Change subject: (Bug 52103) <nowiki/> escape when single quotes wrap new i/b
tags
......................................................................
(Bug 52103) <nowiki/> escape when single quotes wrap new i/b tags
* Updated i/b dom handlers to recognize when the i/b tags
are preceded/followed by single-quote chars and hence require
<nowiki/> separation to prevent them being reparsed as
a different i/b tag than intended.
* The fix in this patchset also covers the scenario fixed in
git sha 04e52daa3 for bug 50179 and the fix from that patchset
has been removed.
* Updated html2wt escaping tests for quotes. In addition, converted
that test to html2wt mode only since html2html and wt2html causes
failures because our normalizer cannot strip mw:Placeholder
metas introduce for <nowiki/>.
* Regenerated selser.changes.json file. No change in blacklist.
Change-Id: I28f3cffa69be4e11462a68483cf3d3d85d380c85
---
M js/lib/mediawiki.WikitextSerializer.js
M js/tests/parserTests.txt
M js/tests/selser.changes.json
3 files changed, 69 insertions(+), 29 deletions(-)
Approvals:
Cscott: Looks good to me, approved
jenkins-bot: Verified
diff --git a/js/lib/mediawiki.WikitextSerializer.js
b/js/lib/mediawiki.WikitextSerializer.js
index 9009171..e975c78 100644
--- a/js/lib/mediawiki.WikitextSerializer.js
+++ b/js/lib/mediawiki.WikitextSerializer.js
@@ -741,16 +741,6 @@
return this.escapedText(state, false, text, fullCheckNeeded);
}
- // Escape quotes that come after I/B nodes that can be reparsed
- // differently since quote-parsing is context-sensitive.
- if (text.match(/^'[^']/)) {
- var prev = opts.node && opts.node.previousSibling ?
opts.node.previousSibling.nodeName : '';
- if (prev === 'I' || prev === 'B') {
- // console.warn("---EWT:F3b---");
- return this.escapedText(state, false, text, true);
- }
- }
-
var sol = state.onSOL && !state.inIndentPre && !state.inPHPBlock,
hasNewlines = text.match(/\n./),
hasTildes = text.match(/~{3,5}/);
@@ -2019,14 +2009,13 @@
/**
- * Bold/italic helper: Determine if an element was preceded by a
- * bold/italic combination
+ * Bold/italic helper: Get a preceding quote/italic element or a '-char
*/
-WSP._hasPrecedingQuoteElements = function(node, state) {
+WSP._getPrecedingQuoteElement = function(node, state) {
if (!state.sep.lastSourceNode) {
// A separator was emitted before some other non-empty wikitext
// string, which means that we can't be directly preceded by
quotes.
- return false;
+ return null;
}
// Move up first until we have a sibling
while (node && !node.previousSibling) {
@@ -2035,23 +2024,51 @@
if (node) {
node = node.previousSibling;
}
+
+ if (node && DU.isText(node) && node.nodeValue.match(/'$/)) {
+ return node;
+ }
+
// Now move down the lastChilds to see if there are any italics / bolds
- while(node && node.nodeType === node.ELEMENT_NODE) {
- if (node.nodeName in {I:1, B:1} &&
- node.lastChild && node.lastChild.nodeName in
{I:1, B:1}) {
- if (state.sep.lastSourceNode === node) {
- return true;
- } else {
- return false;
+ while (node && DU.isElt(node)) {
+ if (node.nodeName in {I:1, B:1} && node.lastChild) {
+ if (node.lastChild.nodeName in {I:1, B:1} ||
+ DU.isText(node.lastChild) &&
node.lastChild.nodeValue.match(/'$/))
+ {
+ return state.sep.lastSourceNode === node ?
node.lastChild : null;
}
} else if (state.sep.lastSourceNode === node) {
// If a separator was already emitted, or an
outstanding separator
// starts at another node that produced output, we are
not
// directly preceded by quotes in the wikitext.
- return false;
+ return null;
}
node = node.lastChild;
}
+ return null;
+};
+
+WSP._quoteTextFollows = function(node, state) {
+ // Move up first until we have a sibling
+ while (node && !node.nextSibling) {
+ node = node.parentNode;
+ }
+ if (node) {
+ node = node.nextSibling;
+ }
+
+ if (node && DU.isText(node) && node.nodeValue[0] === "'") {
+ return true;
+ }
+
+ // Now move down the firstChilds
+ while (node && node.nodeName in {I:1, B:1} && node.firstChild) {
+ if (DU.isText(node.firstChild) &&
node.firstChild.nodeValue.match(/'$/)) {
+ return true;
+ }
+ node = node.firstChild;
+ }
+
return false;
};
@@ -2697,22 +2714,32 @@
},
b: {
handle: function(node, state, cb) {
- if (state.serializer._hasPrecedingQuoteElements(node,
state)) {
+ var q1 =
state.serializer._getPrecedingQuoteElement(node, state);
+ var q2 = state.serializer._quoteTextFollows(node,
state);
+ if (q1 && (q2 || DU.isElt(q1))) {
emitStartTag('<nowiki/>', node, state, cb);
}
emitStartTag("'''", node, state, cb);
state.serializeChildren(node, cb,
state.serializer.wteHandlers.quoteHandler);
emitEndTag("'''", node, state, cb);
+ if (q2) {
+ emitEndTag('<nowiki/>', node, state, cb);
+ }
}
},
i: {
handle: function(node, state, cb) {
- if (state.serializer._hasPrecedingQuoteElements(node,
state)) {
+ var q1 =
state.serializer._getPrecedingQuoteElement(node, state);
+ var q2 = state.serializer._quoteTextFollows(node,
state);
+ if (q1 && (q2 || DU.isElt(q1))) {
emitStartTag('<nowiki/>', node, state, cb);
}
emitStartTag("''", node, state, cb);
state.serializeChildren(node, cb,
state.serializer.wteHandlers.quoteHandler);
emitEndTag("''", node, state, cb);
+ if (q2) {
+ emitEndTag('<nowiki/>', node, state, cb);
+ }
}
},
a: {
diff --git a/js/tests/parserTests.txt b/js/tests/parserTests.txt
index 6ee3ded..979ef97 100644
--- a/js/tests/parserTests.txt
+++ b/js/tests/parserTests.txt
@@ -15786,16 +15786,24 @@
#### --------------------------------------
!! test
1. Quotes inside <b> and <i>
+!! options
+parsoid=html2wt,wt2wt
!! input
''<nowiki>'foo'</nowiki>''
''<nowiki>''foo''</nowiki>''
''<nowiki>'''foo'''</nowiki>''
-''foo''<nowiki>'s</nowiki>
+''foo''<nowiki/>'s
'''<nowiki>'foo'</nowiki>'''
'''<nowiki>''foo''</nowiki>'''
'''<nowiki>'''foo'''</nowiki>'''
'''<nowiki>foo'</nowiki>''<nowiki>bar'</nowiki>''baz'''
-'''foo'''<nowiki>'s</nowiki>
+'''foo'''<nowiki/>'s
+'''foo''
+''foo''<nowiki/>'
+'<nowiki/>''foo''<nowiki/>'
+''''foo'''
+'''foo'''<nowiki/>'
+'<nowiki/>'''foo'''<nowiki/>'
!! result
<p><i>'foo'</i>
<i>''foo''</i>
@@ -15806,7 +15814,12 @@
<b>'''foo'''</b>
<b>foo'<i>bar'</i>baz</b>
<b>foo</b>'s
-</p>
+'<i>foo</i>
+<i>foo</i>'
+'<i>foo</i>'
+'<b>foo</b>
+<b>foo</b>'
+'<b>foo</b>'</p>
!! end
!! test
diff --git a/js/tests/selser.changes.json b/js/tests/selser.changes.json
index e351f22..b264e7e 100644
--- a/js/tests/selser.changes.json
+++ b/js/tests/selser.changes.json
@@ -1109,7 +1109,7 @@
"Links 3. WikiLinks: No escapes
needed":[[[[3],0,4]],[3],[4],[[[2],0,2]],[[4,4,[3]]],[1],[[0,3,4]],[[3,4,[2]]],[2],[[[2],0,3]],[[4,0,[3]]],[[3,0,2]],[[2,3,3]],[[[4],0,0]],[[0,2,2]],[[0,4,2]],[[[3],2,1]],[[0,4,1]],[[[3],4,0]],[[1,0,1]]],
"Links 4. ExtLinks: Escapes
needed":[[[0,3,2]],[[[1],0,[2]]],[[[1],2,[4]]],[[[[4]],0,3]],[4],[2],[1],[[4,0,[4]]],[[3,0,2]],[[[[2]],4,1]],[[0,4,1]],[[2,0,4]],[3],[[2,0,2]],[[[4],3,4]],[[4,4,[1]]],[[1,3,0]],[[3,2,1]],[[4,0,2]],[[2,0,1]]],
"Links 5. ExtLinks: No escapes
needed":[0,[[2]],[1],[[3]],[2],[3],[[4]],[[[4]]],[4],[[[2]]],[[1]],[[[3]]]],
-"1. Quotes inside <b> and
<i>":[[[3,2,1,3,0,3,4,[2],3,[1],4,1,3,[1],0,0,0,0,2]],[1],[[2,3,[1],4,0,0,[3],[2],3,[4],0,0,4,1,0,[0,[3],2],4,3,2]],[3],[[4,2,4,0,[[3]],3,0,3,3,[1],0,3,0,[3],4,[0,[[4]],4],0,2,1]],[[[[4]],0,3,2,2,2,1,1,0,2,3,[[4]],4,[[2]],0,3,3,3,[3]]],[[0,0,4,3,[4],4,2,[3],0,4,0,0,4,2,3,4,2,4,4]],[[[2],0,3,0,[[3]],4,4,1,0,2,4,0,3,1,3,[2,2,0],0,2,3]],[[3,0,[[3]],0,1,0,0,1,4,4,2,2,4,2,0,1,4,4,4]],[[3,0,[[4]],4,0,0,3,0,0,2,0,3,0,2,0,[4,0,0],2,2,3]],[[4,2,0,0,[2],0,1,[4],0,[[3]],2,0,2,[[3]],0,[[2],[3],0],0,[4],4]],[[1,2,2,3,0,0,0,0,4,3,0,1,4,2,2,2,0,0,1]],[[4,0,4,0,0,2,1,4,2,1,3,[3],3,[[2]],3,3,3,0,4]],[2],[[3,0,0,0,0,4,4,[3],2,3,0,[[3]],2,2,4,4,4,[3],4]],[[2,0,[4],0,[2],2,[2],3,0,1,3,1,0,1,0,[[3],[[4]],4],3,4,1]],[[[2],0,[1],0,[2],0,2,0,0,1,0,[2],0,4,0,4,0,0,1]],[4],[[[2],0,0,4,[4],0,2,0,4,1,0,4,2,0,0,4,0,0,[4]]],[[[1],0,4,2,4,0,0,[3],0,1,4,1,3,0,0,1,3,0,0]]],
+"1. Quotes inside <b> and <i>":[],
"2. Link fragments separated by <i> and <b>
tags":[[0,2,4],[3,0,[4,[3],0]],[[0,[4],2],0,[0,1,1]],[[4,[2],0],0,3],[[4,4,0],0,1],[2,0,[0,[4],4]],[4,2,1],[[3,3,2],2,1],[[0,2,2],3,[2,4,[2]]],[4,0,1],[[0,1,[2]],0,2],[2,2,2],[[2,0,1],2,2],[[3,3,[3]],4,2],[[4,1,[3]],0,2],[[0,2,0],0,2],[4,0,[4,[2],2]],[3,0,3],[[0,4,3],3,2],[[0,2,4],0,3]],
"2. Link fragments inside <i> and <b>\n(FIXME: Escaping one or both of[[and ]]
is also acceptable --\n this is one of the shortcomings of this
format)":[[3,3,4],[4,0,2],[1,0,2],[2,0,3],[4,4,1],[[2,1],4,[0,1]],[1,0,0],[4,0,1],[[1,4],3,1],[1,0,3],[[4,[2]],4,3],[4,3,3],[1,4,4],[[0,4],4,[1,[4]]],[[[2],1],4,[2,1]],[3,0,[4,2]],[[3,[4]],4,[4,0]],[[2,[3]],0,[4,[4]]],[1,2,[4,1]],[[0,2],2,1]],
"1. No unnecessary
escapes":[[[2,[2]],0,2,0,[3,4],0,[0,4],0,[0,3,1]],[2,0,1,0,[3,1],0,1,0,[2,[2],0]],[[4,3],0,2,2,2,0,[3,3],3,[0,2,0]],[2,0,1,4,1,2,2,0,[4,[3],0]],[[0,4],0,[3,4],4,[0,2],4,[2,0],0,[0,3,[3]]],[[0,4],0,[4,2],0,3,2,[2,4],2,[3,[2],4]],[0,0,[0,[2]],0,1,3,3,3,2],[1,4,[0,3],3,[0,2],2,1,0,3],[2,0,2,4,1,2,0,4,[0,4,0]],[1,0,[0,1],4,[3,[[4]]],0,[4,4],2,3],[[0,[2]],0,2,0,[0,[[4]]],4,2,4,[0,0,3]],[[0,[3]],0,[3,0],3,2,4,[2,[1]],2,3],[[4,1],0,[0,2],3,2,0,[3,4],0,4],[4,4,3,3,3,2,[0,[4]],0,[0,1,4]],[4,2,[4,0],4,[0,3],3,[0,4],0,[0,[2],0]],[3,3,[0,4],4,3,3,3,0,[0,2,0]],[2,0,4,2,[0,3],0,1,0,[0,4,[3]]],[1,0,[0,3],3,[0,[[4]]],0,3,2,[2,[3],4]],[3,2,[0,3],0,[3,[4]],2,4,0,4],[[3,4],0,[2,4],2,[4,[4]],3,[0,4],3,[3,3,[4]]]],
@@ -1147,4 +1147,4 @@
"Lists: Add space after bullets":[],
"Parsoid: Serialize positional parameters with = in them as named
parameter":[],
"_numchanges": 20
-}
\ No newline at end of file
+}
--
To view, visit https://gerrit.wikimedia.org/r/76162
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I28f3cffa69be4e11462a68483cf3d3d85d380c85
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits