Subramanya Sastry has uploaded a new change for review.
https://gerrit.wikimedia.org/r/181177
Change subject: Handle html/extension tags in quote nowiki stripping algo
......................................................................
Handle html/extension tags in quote nowiki stripping algo
* This is required because quotes and <ref>..</ref> tags show up
on the same line quite commonly.
Change-Id: I5e565dec59d4cadca327403f63be84258e88b632
---
M lib/mediawiki.WikitextSerializer.js
1 file changed, 11 insertions(+), 7 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid
refs/changes/77/181177/1
diff --git a/lib/mediawiki.WikitextSerializer.js
b/lib/mediawiki.WikitextSerializer.js
index 56e4584..461350a 100644
--- a/lib/mediawiki.WikitextSerializer.js
+++ b/lib/mediawiki.WikitextSerializer.js
@@ -1226,9 +1226,11 @@
// * Strip out nowiki-protected strings since we are only
interested in
// quote sequences that correspond to <i>/<b> tags.
- var simplifiedLine = line.replace(/<nowiki>.*?<\/nowiki>/g, '');
+ // * Strip out <ref>..</ref> strings as well since ref content
has been
+ // processed separately.
+ var simplifiedLine = line.replace(/<nowiki>.*?<\/nowiki>/g,
'').replace(/<ref[^<>]*>.*?<\/ref>/g, '');
- // * Split out all the [[ ]] {{ }} '' ''' '''''
+ // * Split out all the [[ ]] {{ }} '' ''' ''''' <..> </...>
// parens in the regexp mean that the split segments will
// be spliced into the result array as the odd elements.
// * If we match up the tags properly and we see opening
@@ -1239,7 +1241,7 @@
// <i> / <b> / <i><b> tags preceded by a '<nowiki/>, we
// can remove all those nowikis.
// Ex: ''foo'<nowiki/>'' bar '''baz'<nowiki/>'''
- var p =
simplifiedLine.split(/('''''|'''|''|\[\[|\]\]|\{\{|\}\})/);
+ var p =
simplifiedLine.split(/('''''|'''|''|\[\[|\]\]|\{\{|\}\}|<\/?[^<>\/]*>)/);
// Which of the two scenarios have we encountered?
// HTML: '<i>foo</i> wt: '<nowiki/>''foo''
@@ -1251,14 +1253,16 @@
var stack = [];
var n = p.length;
for (var j=1; j<n; j+=2) {
- // Don't allow literal html tags in this first pass.
- // We can opt for this later.
- if (/<(?!nowiki)/.test(p[j-1])) { return line; }
-
if (p[j]===']]') {
if (stack.pop()!=='[[') { return line; }
} else if (p[j]==='}}') {
if (stack.pop()!=='{{') { return line; }
+ } else if (/<\//.test(p[j])) {
+ // match html/ext tags
+ var tag = stack.pop();
+ if (tag.replace(/<(\w+)/, "$1") !==
p[j].replace(/<\/(\w+)/, "$1")) {
+ return line;
+ }
} else if (p[j][0]==="'" &&
stack[stack.length-1]===p[j]) {
if (/(^|[^'])'<nowiki\/>/.test(p[j-1])) {
nowiki_2_seen = true;
--
To view, visit https://gerrit.wikimedia.org/r/181177
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I5e565dec59d4cadca327403f63be84258e88b632
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits