C. Scott Ananian has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/316235

Change subject: fixup! WIP: parse language converter markup.
......................................................................

fixup! WIP: parse language converter markup.

Change-Id: I0344d3b14c827defe0c14bbe91598a8cfadedee9
---
M lib/html2wt/LanguageVariantHandler.js
M lib/html2wt/escapeWikitext.js
M lib/wt2html/pegTokenizer.pegjs.txt
3 files changed, 6 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/35/316235/1

diff --git a/lib/html2wt/LanguageVariantHandler.js 
b/lib/html2wt/LanguageVariantHandler.js
index 2f3d3f4..cedbb3d 100644
--- a/lib/html2wt/LanguageVariantHandler.js
+++ b/lib/html2wt/LanguageVariantHandler.js
@@ -69,7 +69,7 @@
        };
        // Helper function: combine the three parts of the -{ }- string
        var combine = function(flagStr, bodyStr, useTrailingSemi) {
-               if (flagStr) { flagStr = flagStr + '|'; }
+               if (flagStr || /|/.test(bodyStr)) { flagStr = flagStr + '|'; }
                if (useTrailingSemi) { bodyStr = bodyStr + ';'; }
                return flagStr + bodyStr;
        };
diff --git a/lib/html2wt/escapeWikitext.js b/lib/html2wt/escapeWikitext.js
index 56cd406..0b2980a 100644
--- a/lib/html2wt/escapeWikitext.js
+++ b/lib/html2wt/escapeWikitext.js
@@ -653,6 +653,7 @@
        var hasMagicWord = /(^|\W)(RFC|ISBN|PMID)\s/.test(text);
        var hasAutolink = state.env.conf.wiki.findValidProtocol(text);
        var fullCheckNeeded = !state.inLink && (hasMagicWord || hasAutolink);
+       var hasLanguageConverter = false;
        var hasQuoteChar = false;
        var indentPreUnsafe = false;
        var hasNonQuoteEscapableChars = false;
@@ -662,6 +663,8 @@
                hasQuoteChar = /'/.test(text);
                indentPreUnsafe = (!indentPreSafeMode && (/\n 
+[^\r\n]*?[^\s]+/).test(text) || sol && (/^ +[^\r\n]*?[^\s]+/).test(text));
                hasNonQuoteEscapableChars = 
/[<>\[\]\-\+\|!=#\*:;~{}]|__[^_]*__/.test(text);
+               hasLanguageConverter = /-{|}-/.test(text);
+               if (hasLanguageConverter) { fullCheckNeeded = true; }
        }
 
        // Quick check for the common case (useful to kill a majority of 
requests)
diff --git a/lib/wt2html/pegTokenizer.pegjs.txt 
b/lib/wt2html/pegTokenizer.pegjs.txt
index f311e85..67286ae 100644
--- a/lib/wt2html/pegTokenizer.pegjs.txt
+++ b/lib/wt2html/pegTokenizer.pegjs.txt
@@ -852,11 +852,12 @@
     rest:(space_or_newline* ";" space_or_newline* ff:lang_variant_flag
           { return ff; })*
     { return [ f ].concat(rest); }
+  / "" { return []; }
 
 lang_variant_flag
   = f:[-+A-Z]           { return { flag: f }; }
   / v:lang_variant_name { return { variant: v }; }
-  / b:(!space_or_newline [^|;])+ { return { bogus: b.join('') }; /* bad flag 
*/}
+  / b:(!space_or_newline [^{}|;])+ { return { bogus: b.join('') }; /* bad flag 
*/}
 
 lang_variant_name // language variant name, like zh, zh-cn, etc.
   = h:[a-z] t:[-a-z]+ { return h + t.join(''); }

-- 
To view, visit https://gerrit.wikimedia.org/r/316235
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I0344d3b14c827defe0c14bbe91598a8cfadedee9
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: C. Scott Ananian <canan...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to