[MediaWiki-commits] [Gerrit] mediawiki...parsoid[master]: Support extension tags which shadows block level elements
jenkins-bot has submitted this change and it was merged. Change subject: Support extension tags which shadows block level elements .. Support extension tags which shadows block level elements Change-Id: Ieadcc21966dc30511fd9c56365b1abfcdadee3fe --- M lib/config/WikitextConstants.js M lib/html2wt/WikitextSerializer.js M lib/utils/Util.js M lib/wt2html/pegTokenizer.pegjs M tests/parserTests-blacklist.js 5 files changed, 158 insertions(+), 142 deletions(-) Approvals: Subramanya Sastry: Looks good to me, approved jenkins-bot: Verified diff --git a/lib/config/WikitextConstants.js b/lib/config/WikitextConstants.js index 430cc28..b39a872 100644 --- a/lib/config/WikitextConstants.js +++ b/lib/config/WikitextConstants.js @@ -174,8 +174,7 @@ "METER", "NAV", "NOSCRIPT", "OBJECT", "OL", "OPTGROUP", "OPTION", "OUTPUT", "P", "PARAM", "PRE", "PROGRESS", "Q", "RB", "RP", "RT", "RTC", "RUBY", "S", "SAMP", "SCRIPT", "SECTION", "SELECT", "SMALL", - // "SOURCE", Support the deprecated alias for syntaxhighlight - "SPAN", "STRONG", "STYLE", "SUB", "SUMMARY", "SUP", + "SOURCE", "SPAN", "STRONG", "STYLE", "SUB", "SUMMARY", "SUP", "TABLE", "TBODY", "TD", "TEXTAREA", "TFOOT", "TH", "THEAD", "TIME", "TITLE", "TR", "TRACK", "U", "UL", "VAR", "VIDEO", "WBR", ]), diff --git a/lib/html2wt/WikitextSerializer.js b/lib/html2wt/WikitextSerializer.js index 3fd9f84..d8c2882 100644 --- a/lib/html2wt/WikitextSerializer.js +++ b/lib/html2wt/WikitextSerializer.js @@ -1128,6 +1128,8 @@ reqd = true; break; } else if (Consts.HTML.BlockTags.has(tagName)) { + // FIXME: Extension tags shadowing html5 tags might not + // have block semantics. // Block tags on a line suppress nowikis reqd = false; } diff --git a/lib/utils/Util.js b/lib/utils/Util.js index b285c15..d86a459 100644 --- a/lib/utils/Util.js +++ b/lib/utils/Util.js @@ -1383,11 +1383,6 @@ }).join(''); }; -Util.isHTMLElementName = function(name) { - name = name.toUpperCase(); - return Consts.HTML.HTML5Tags.has(name) || Consts.HTML.OlderHTMLTags.has(name); -}; - /** * Determine whether the protocol of a link is potentially valid. Use the * environment's per-wiki config to do so. diff --git a/lib/wt2html/pegTokenizer.pegjs b/lib/wt2html/pegTokenizer.pegjs index 60d6e4f..68fd213 100644 --- a/lib/wt2html/pegTokenizer.pegjs +++ b/lib/wt2html/pegTokenizer.pegjs @@ -72,6 +72,148 @@ } }; +var isXMLTag = function(name, block) { +var lName = name.toLowerCase(); +var uName = name.toUpperCase(); + +// FIXME: These are installed extension tags which we, for some +// historical reason, are special casing in the grammar. Ignore them +// here, they have their own rules. +// +// For , see https://gerrit.wikimedia.org/r/#/c/281076/ +// where we'll clean this up. Notice how much we can remove! +// +// For , see https://gerrit.wikimedia.org/r/#/c/232313/ +// which has some relevant info for serialization. +var ignoredExtTag = lName === 'pre' || lName === 'nowiki'; + +var isInstalledExt = env.conf.wiki.extensionTags.has(lName) && !ignoredExtTag; +var isIncludeTag = lName === 'includeonly' || +lName === 'noinclude' || lName === 'onlyinclude'; + +var isHtmlTag = block ? +// We need to ignore them here too because block tags have +// higher precedence than our questionable rules. +constants.HTML.BlockTags.has(uName) && !ignoredExtTag : +constants.HTML.HTML5Tags.has(uName) || constants.HTML.OlderHTMLTags.has(uName); + +return isHtmlTag || isInstalledExt || isIncludeTag; +}; + +var maybeExtensionTag = function(t) { +var tagName = t.name.toLowerCase(); + +var isInstalledExt = env.conf.wiki.extensionTags.has(tagName); +var isIncludeTag = tagName === 'includeonly' || +tagName === 'noinclude' || tagName === 'onlyinclude'; + +// Extensions have higher precedence when they shadow html tags. +if (!(isInstalledExt || isIncludeTag)) { +return t; +} + +var dp = t.dataAttribs; +var skipLen = 0; + +switch (t.constructor) { +case EndTagTk: +return t; +case SelfclosingTagTk: +dp.src = input.substring(dp.tsr[0], dp.tsr[1]); +dp.tagWidths =
[MediaWiki-commits] [Gerrit] mediawiki...parsoid[master]: Support extension tags which shadows block level elements
Arlolra has uploaded a new change for review. https://gerrit.wikimedia.org/r/325507 Change subject: Support extension tags which shadows block level elements .. Support extension tags which shadows block level elements Change-Id: Ieadcc21966dc30511fd9c56365b1abfcdadee3fe --- M lib/utils/Util.js M lib/wt2html/pegTokenizer.pegjs M tests/parserTests-blacklist.js 3 files changed, 155 insertions(+), 140 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/07/325507/1 diff --git a/lib/utils/Util.js b/lib/utils/Util.js index 590406e..0e71410 100644 --- a/lib/utils/Util.js +++ b/lib/utils/Util.js @@ -1377,11 +1377,6 @@ }).join(''); }; -Util.isHTMLElementName = function(name) { - name = name.toUpperCase(); - return Consts.HTML.HTML5Tags.has(name) || Consts.HTML.OlderHTMLTags.has(name); -}; - /** * Determine whether the protocol of a link is potentially valid. Use the * environment's per-wiki config to do so. diff --git a/lib/wt2html/pegTokenizer.pegjs b/lib/wt2html/pegTokenizer.pegjs index 60d6e4f..68fd213 100644 --- a/lib/wt2html/pegTokenizer.pegjs +++ b/lib/wt2html/pegTokenizer.pegjs @@ -72,6 +72,148 @@ } }; +var isXMLTag = function(name, block) { +var lName = name.toLowerCase(); +var uName = name.toUpperCase(); + +// FIXME: These are installed extension tags which we, for some +// historical reason, are special casing in the grammar. Ignore them +// here, they have their own rules. +// +// For , see https://gerrit.wikimedia.org/r/#/c/281076/ +// where we'll clean this up. Notice how much we can remove! +// +// For , see https://gerrit.wikimedia.org/r/#/c/232313/ +// which has some relevant info for serialization. +var ignoredExtTag = lName === 'pre' || lName === 'nowiki'; + +var isInstalledExt = env.conf.wiki.extensionTags.has(lName) && !ignoredExtTag; +var isIncludeTag = lName === 'includeonly' || +lName === 'noinclude' || lName === 'onlyinclude'; + +var isHtmlTag = block ? +// We need to ignore them here too because block tags have +// higher precedence than our questionable rules. +constants.HTML.BlockTags.has(uName) && !ignoredExtTag : +constants.HTML.HTML5Tags.has(uName) || constants.HTML.OlderHTMLTags.has(uName); + +return isHtmlTag || isInstalledExt || isIncludeTag; +}; + +var maybeExtensionTag = function(t) { +var tagName = t.name.toLowerCase(); + +var isInstalledExt = env.conf.wiki.extensionTags.has(tagName); +var isIncludeTag = tagName === 'includeonly' || +tagName === 'noinclude' || tagName === 'onlyinclude'; + +// Extensions have higher precedence when they shadow html tags. +if (!(isInstalledExt || isIncludeTag)) { +return t; +} + +var dp = t.dataAttribs; +var skipLen = 0; + +switch (t.constructor) { +case EndTagTk: +return t; +case SelfclosingTagTk: +dp.src = input.substring(dp.tsr[0], dp.tsr[1]); +dp.tagWidths = [dp.tsr[1] - dp.tsr[0], 0]; +if (isIncludeTag) { +return t; +} +break; +case TagTk: +var tsr0 = dp.tsr[0]; +var endTagRE = new RegExp("^[\\s\\S]*?()", "mi"); +var restOfInput = input.substring(tsr0); +var tagContent = restOfInput.match(endTagRE); + +if (!tagContent) { +dp.src = input.substring(dp.tsr[0], dp.tsr[1]); +dp.tagWidths = [dp.tsr[1] - dp.tsr[0], 0]; +if (isIncludeTag) { +return t; +} else { +// This is undefined behaviour. The php parser currently +// returns a tag here as well, which results in unclosed +// extension tags that shadow html tags falling back to +// their html equivalent. The sanitizer will take care +// of converting to text where necessary. We do this to +// simplify `hasWikitextTokens` when escaping wikitext, +// which wants these as tokens because it's otherwise +// lacking in context. +return t; // not text() +} +} + +var extSrc = tagContent[0]; +var endTagWidth = tagContent[1].length; + +// FIXME: This should be removed in favour of a native parser function +// for `tag`, which invokes the extension handler directly. +if (tagName === 'ref') { +// Support 1-level nesting of tags during tokenizing. +// tags are the exception