Subramanya Sastry has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/181177

Change subject: Handle html/extension tags in quote nowiki stripping algo
......................................................................

Handle html/extension tags in quote nowiki stripping algo

* This is required because quotes and <ref>..</ref> tags show up
  on the same line quite commonly.

Change-Id: I5e565dec59d4cadca327403f63be84258e88b632
---
M lib/mediawiki.WikitextSerializer.js
1 file changed, 11 insertions(+), 7 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/77/181177/1

diff --git a/lib/mediawiki.WikitextSerializer.js 
b/lib/mediawiki.WikitextSerializer.js
index 56e4584..461350a 100644
--- a/lib/mediawiki.WikitextSerializer.js
+++ b/lib/mediawiki.WikitextSerializer.js
@@ -1226,9 +1226,11 @@
 
                // * Strip out nowiki-protected strings since we are only 
interested in
                //   quote sequences that correspond to <i>/<b> tags.
-               var simplifiedLine = line.replace(/<nowiki>.*?<\/nowiki>/g, '');
+               // * Strip out <ref>..</ref> strings as well since ref content 
has been
+               //   processed separately.
+               var simplifiedLine = line.replace(/<nowiki>.*?<\/nowiki>/g, 
'').replace(/<ref[^<>]*>.*?<\/ref>/g, '');
 
-               // * Split out all the [[ ]] {{ }} '' ''' '''''
+               // * Split out all the [[ ]] {{ }} '' ''' ''''' <..> </...>
                //   parens in the regexp mean that the split segments will
                //   be spliced into the result array as the odd elements.
                // * If we match up the tags properly and we see opening
@@ -1239,7 +1241,7 @@
                //   <i> / <b> / <i><b> tags preceded by a '<nowiki/>, we
                //   can remove all those nowikis.
                //   Ex: ''foo'<nowiki/>'' bar '''baz'<nowiki/>'''
-               var p = 
simplifiedLine.split(/('''''|'''|''|\[\[|\]\]|\{\{|\}\})/);
+               var p = 
simplifiedLine.split(/('''''|'''|''|\[\[|\]\]|\{\{|\}\}|<\/?[^<>\/]*>)/);
 
                // Which of the two scenarios have we encountered?
                // HTML: '<i>foo</i>  wt: '<nowiki/>''foo''
@@ -1251,14 +1253,16 @@
                var stack = [];
                var n = p.length;
                for (var j=1; j<n; j+=2) {
-                       // Don't allow literal html tags in this first pass.
-                       // We can opt for this later.
-                       if (/<(?!nowiki)/.test(p[j-1])) { return line; }
-
                        if (p[j]===']]') {
                                if (stack.pop()!=='[[') { return line; }
                        } else if (p[j]==='}}') {
                                if (stack.pop()!=='{{') { return line; }
+                       } else if (/<\//.test(p[j])) {
+                               // match html/ext tags
+                               var tag = stack.pop();
+                               if (tag.replace(/<(\w+)/, "$1") !== 
p[j].replace(/<\/(\w+)/, "$1")) {
+                                       return line;
+                               }
                        } else if (p[j][0]==="'" && 
stack[stack.length-1]===p[j]) {
                                if (/(^|[^'])'<nowiki\/>/.test(p[j-1])) {
                                        nowiki_2_seen = true;

-- 
To view, visit https://gerrit.wikimedia.org/r/181177
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I5e565dec59d4cadca327403f63be84258e88b632
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to