Arlolra has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/403319 )
Change subject: WIP: Permit extension tags in xmlish attribute values ...................................................................... WIP: Permit extension tags in xmlish attribute values Bug: T183515 Authored-by: Arlo Breault <[email protected]> Authored-by: Shannon Bailey <[email protected]> Change-Id: I311d0fbbd2cdcf2a0c0a3dbf698912e6df5f3356 --- M lib/wt2html/pegTokenizer.pegjs M lib/wt2html/tt/AttributeExpander.js M tests/mockAPI.js M tests/parserTests.txt 4 files changed, 41 insertions(+), 9 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/19/403319/1 diff --git a/lib/wt2html/pegTokenizer.pegjs b/lib/wt2html/pegTokenizer.pegjs index 1a0f97c..65d10f9 100644 --- a/lib/wt2html/pegTokenizer.pegjs +++ b/lib/wt2html/pegTokenizer.pegjs @@ -1412,6 +1412,12 @@ tag_name_chars = [^\t\n\v />\0] tag_name = $([A-Za-z] tag_name_chars*) +extension_tag = + &{ return !stops.onStack('extTag'); } + extToken:xmlish_tag + &{ return extToken.name === 'extension'; } + { return extToken; } + xmlish_tag = & { // By the time we get to `doTableStuff` in the php parser, we've already @@ -2258,7 +2264,7 @@ = r:( $[^{}&<\-|/ \t\n\r\x0c>]+ / !inline_breaks !'/>' - s:( directive / [{}&<\-|/] ) { return s; } + s:( directive / extension_tag / [{}&<\-|/] ) { return s; } )+ { return tu.flattenString(r); } @@ -2268,7 +2274,7 @@ = r:( $[^{}&<\-|/'>]+ / !inline_breaks !'/>' - s:( directive / [{}&<\-|/] ) { return s; } + s:( directive / extension_tag / [{}&<\-|/] ) { return s; } )* { return tu.flattenString(r); } @@ -2278,7 +2284,7 @@ = r:( $[^{}&<\-|/">]+ / !inline_breaks !'/>' - s:( directive / [{}&<\-|/] ) { return s; } + s:( directive / extension_tag / [{}&<\-|/] ) { return s; } )* { return tu.flattenString(r); } @@ -2294,7 +2300,7 @@ // The stop set is space_or_newline and | which matches table_att_value. table_attribute_preprocessor_text = r:( $[^{}&<\-!\[ \t\n\r\x0c|]+ - / !inline_breaks s:( directive / [{}&<\-!\[] ) { return s; } + / !inline_breaks s:( directive / extension_tag / [{}&<\-!\[] ) { return s; } )+ { return tu.flattenString(r); } @@ -2302,7 +2308,7 @@ // The stop set is '\r\n| which matches table_att_value. table_attribute_preprocessor_text_single = r:( $[^{}&<\-!\['\r\n|]+ - / !inline_breaks s:( directive / [{}&<\-!\[] ) { return s; } + / !inline_breaks s:( directive / extension_tag / [{}&<\-!\[] ) { return s; } )* { return tu.flattenString(r); } @@ -2310,7 +2316,7 @@ // The stop set is "\r\n| which matches table_att_value. table_attribute_preprocessor_text_double = r:( $[^{}&<\-!\["\r\n|]+ - / !inline_breaks s:( directive / [{}&<\-!\[] ) { return s; } + / !inline_breaks s:( directive / extension_tag / [{}&<\-!\[] ) { return s; } )* { return tu.flattenString(r); } diff --git a/lib/wt2html/tt/AttributeExpander.js b/lib/wt2html/tt/AttributeExpander.js index 9a76502..59d88e8 100644 --- a/lib/wt2html/tt/AttributeExpander.js +++ b/lib/wt2html/tt/AttributeExpander.js @@ -122,7 +122,7 @@ * This helper method strips all meta tags introduced by * transclusions, etc. and returns the content. * ---------------------------------------------------------- */ -function stripMetaTags(tokens, wrapTemplates) { +function stripMetaTags(env, tokens, wrapTemplates) { var buf = []; var isPushed = false; var hasGeneratedContent = false; @@ -130,6 +130,20 @@ for (var i = 0, l = tokens.length; i < l; i++) { var t = tokens[i]; if ([TagTk, SelfclosingTagTk].indexOf(t.constructor) !== -1) { + // Reinsert expanded extension content that's been parsed to DOM + // as a string. This should match what the php parser does since + // extension content is html being placed in an attribute context. + if (t.getAttribute('typeof') === 'mw:DOMFragment') { + var nodes = env.fragmentMap.get(t.dataAttribs.html); + var str = nodes.reduce(function(prev, next) { + // We strip tags since the sanitizer would normally drop + // tokens but we're already at html + return prev + next.textContent; + }, ''); + buf.push(str); + // TODO: Maybe cleanup the remaining about sibbling wrappers + // but the sanitizer will drop them anyways + } isPushed = false; if (wrapTemplates) { // Strip all meta tags. @@ -348,7 +362,7 @@ metaTokens = updatedK.metaTokens; } else { // Scenario 2 from the documentation comment above. - updatedK = stripMetaTags(expandedK, wrapTemplates); + updatedK = stripMetaTags(env, expandedK, wrapTemplates); expandedK = updatedK.value; } @@ -438,7 +452,7 @@ metaTokens = updatedV.metaTokens; } else { // Scenario 2 from the documentation comment above. - updatedV = stripMetaTags(attrValTokens, wrapTemplates); + updatedV = stripMetaTags(env, attrValTokens, wrapTemplates); attrValTokens = updatedV.value; } expandedA.v = attrValTokens; diff --git a/tests/mockAPI.js b/tests/mockAPI.js index 1e16673..2cbfe55 100644 --- a/tests/mockAPI.js +++ b/tests/mockAPI.js @@ -431,6 +431,9 @@ }; var parse = function(text, onlypst) { + // TODO: Something that mocks this stuff more generally since there're + // a few test that end up here + if (/<translate>/.test(text)) { return { text: text }; } var html = onlypst ? text.replace(/\{\{subst:echo\|([^}]+)\}\}/, "$1") : '\n'; return { text: html }; }; diff --git a/tests/parserTests.txt b/tests/parserTests.txt index 4f0023c..ec3b371 100644 --- a/tests/parserTests.txt +++ b/tests/parserTests.txt @@ -17386,6 +17386,15 @@ <div id="title=" data-parsoid='{"stx":"html"}'>HTML rocks</div> !! end +## QUESTION: Should this have ExpandedAttrs? +!! test +Extension tag in attribute value +!! wikitext +<span title="<translate>123</translate>">ok</span> +!! html/parsoid +<p><span title="123" data-parsoid='{"stx":"html","a":{"title":"123"},"sa":{"title":"<translate>123</translate>"}'>ok</span></p> +!! end + !! test table with multiple empty attribute values !! options -- To view, visit https://gerrit.wikimedia.org/r/403319 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I311d0fbbd2cdcf2a0c0a3dbf698912e6df5f3356 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Arlolra <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
