C. Scott Ananian has uploaded a new change for review. https://gerrit.wikimedia.org/r/316235
Change subject: fixup! WIP: parse language converter markup. ...................................................................... fixup! WIP: parse language converter markup. Change-Id: I0344d3b14c827defe0c14bbe91598a8cfadedee9 --- M lib/html2wt/LanguageVariantHandler.js M lib/html2wt/escapeWikitext.js M lib/wt2html/pegTokenizer.pegjs.txt 3 files changed, 6 insertions(+), 2 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/35/316235/1 diff --git a/lib/html2wt/LanguageVariantHandler.js b/lib/html2wt/LanguageVariantHandler.js index 2f3d3f4..cedbb3d 100644 --- a/lib/html2wt/LanguageVariantHandler.js +++ b/lib/html2wt/LanguageVariantHandler.js @@ -69,7 +69,7 @@ }; // Helper function: combine the three parts of the -{ }- string var combine = function(flagStr, bodyStr, useTrailingSemi) { - if (flagStr) { flagStr = flagStr + '|'; } + if (flagStr || /|/.test(bodyStr)) { flagStr = flagStr + '|'; } if (useTrailingSemi) { bodyStr = bodyStr + ';'; } return flagStr + bodyStr; }; diff --git a/lib/html2wt/escapeWikitext.js b/lib/html2wt/escapeWikitext.js index 56cd406..0b2980a 100644 --- a/lib/html2wt/escapeWikitext.js +++ b/lib/html2wt/escapeWikitext.js @@ -653,6 +653,7 @@ var hasMagicWord = /(^|\W)(RFC|ISBN|PMID)\s/.test(text); var hasAutolink = state.env.conf.wiki.findValidProtocol(text); var fullCheckNeeded = !state.inLink && (hasMagicWord || hasAutolink); + var hasLanguageConverter = false; var hasQuoteChar = false; var indentPreUnsafe = false; var hasNonQuoteEscapableChars = false; @@ -662,6 +663,8 @@ hasQuoteChar = /'/.test(text); indentPreUnsafe = (!indentPreSafeMode && (/\n +[^\r\n]*?[^\s]+/).test(text) || sol && (/^ +[^\r\n]*?[^\s]+/).test(text)); hasNonQuoteEscapableChars = /[<>\[\]\-\+\|!=#\*:;~{}]|__[^_]*__/.test(text); + hasLanguageConverter = /-{|}-/.test(text); + if (hasLanguageConverter) { fullCheckNeeded = true; } } // Quick check for the common case (useful to kill a majority of requests) diff --git a/lib/wt2html/pegTokenizer.pegjs.txt b/lib/wt2html/pegTokenizer.pegjs.txt index f311e85..67286ae 100644 --- a/lib/wt2html/pegTokenizer.pegjs.txt +++ b/lib/wt2html/pegTokenizer.pegjs.txt @@ -852,11 +852,12 @@ rest:(space_or_newline* ";" space_or_newline* ff:lang_variant_flag { return ff; })* { return [ f ].concat(rest); } + / "" { return []; } lang_variant_flag = f:[-+A-Z] { return { flag: f }; } / v:lang_variant_name { return { variant: v }; } - / b:(!space_or_newline [^|;])+ { return { bogus: b.join('') }; /* bad flag */} + / b:(!space_or_newline [^{}|;])+ { return { bogus: b.join('') }; /* bad flag */} lang_variant_name // language variant name, like zh, zh-cn, etc. = h:[a-z] t:[-a-z]+ { return h + t.join(''); } -- To view, visit https://gerrit.wikimedia.org/r/316235 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I0344d3b14c827defe0c14bbe91598a8cfadedee9 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: C. Scott Ananian <canan...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits