Arlolra has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/406126 )
Change subject: Consolidate bailing out from ext/wiki links ...................................................................... Consolidate bailing out from ext/wiki links So that extlinks get the same expanded attribute treatment. Fixes the regression in, http://localhost:8000/ar.wikipedia.org/v3/page/html/%D8%B0%D8%B1%D8%A9_%D8%A7%D9%84%D9%87%D9%8A%D8%AF%D8%B1%D9%88%D8%AC%D9%8A%D9%86/25945469 Follows up to 217c68a by adding typeof "mw:ExpandedAttrs" to the nodes where xmlish attributes contain extension tags for expanding. Change-Id: I188f1c903619320052b0502ba13c7b3be7040e6f --- M lib/wt2html/tt/AttributeExpander.js M lib/wt2html/tt/LinkHandler.js M tests/parserTests.txt 3 files changed, 75 insertions(+), 102 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/26/406126/1 diff --git a/lib/wt2html/tt/AttributeExpander.js b/lib/wt2html/tt/AttributeExpander.js index 59d88e8..bb7a751 100644 --- a/lib/wt2html/tt/AttributeExpander.js +++ b/lib/wt2html/tt/AttributeExpander.js @@ -124,7 +124,6 @@ * ---------------------------------------------------------- */ function stripMetaTags(env, tokens, wrapTemplates) { var buf = []; - var isPushed = false; var hasGeneratedContent = false; for (var i = 0, l = tokens.length; i < l; i++) { @@ -141,10 +140,12 @@ return prev + next.textContent; }, ''); buf.push(str); + hasGeneratedContent = true; + continue; // TODO: Maybe cleanup the remaining about sibbling wrappers // but the sanitizer will drop them anyways } - isPushed = false; + if (wrapTemplates) { // Strip all meta tags. var type = t.getAttribute("typeof"); @@ -154,12 +155,12 @@ hasGeneratedContent = true; } } else { - isPushed = true; buf.push(t); + continue; } } - if (!isPushed && t.name !== "meta") { + if (t.name !== "meta") { // Dont strip token if it is not a meta-tag buf.push(t); } diff --git a/lib/wt2html/tt/LinkHandler.js b/lib/wt2html/tt/LinkHandler.js index 7397b3c..a9aa56c 100644 --- a/lib/wt2html/tt/LinkHandler.js +++ b/lib/wt2html/tt/LinkHandler.js @@ -210,13 +210,71 @@ }); }; +var bailTokens = function(env, token, isExtLink) { + var count = isExtLink ? 1 : 2; + var tokens = ["[".repeat(count)]; + + if (/mw:ExpandedAttrs/.test(token.getAttribute("typeof"))) { + var dataMW = JSON.parse(token.getAttribute("data-mw")).attribs; + var html; + for (var i = 0; i < dataMW.length; i++) { + if (dataMW[i][0].txt === "href") { + html = dataMW[i][1].html; + break; + } + } + + // Since we are splicing off '['s and ']'s from the incoming token, + // adjust TSR of the DOM-fragment by `count` each on both end. + var tsr = token.dataAttribs && token.dataAttribs.tsr; + if (tsr && typeof (tsr[0]) === 'number' && typeof (tsr[1]) === 'number') { + tsr = [tsr[0] + count, tsr[1] - count]; + } else { + tsr = null; + } + + var body = DU.ppToDOM(html); + var dft = DU.buildDOMFragmentTokens(env, token, body, null, { + noPWrapping: true, + tsr: tsr, + }); + + tokens = tokens.concat(dft); + } else { + // FIXME: Duplicate work + tokens = tokens.concat(token.getAttribute("href")); + } + + // Append rest of the attributes + if (isExtLink) { + var content = Util.lookup(token.attribs, 'mw:content'); + + // FIXME: Use this attribute in regular extline + // cases to rt spaces correctly maybe? Unsure + // it is worth it. + var spaces = token.getAttribute('spaces') || ''; + + tokens = tokens.concat(spaces, content); + } else { + token.attribs.forEach(function(a) { + if (a.k === "mw:maybeContent") { + tokens = tokens.concat("|", a.v); + } + }); + } + + tokens.push("]".repeat(count)); + + return tokens; +}; + /** * Handle a mw:WikiLink token. */ WikiLinkHandler.prototype.onWikiLink = function(token, frame, cb) { var env = this.manager.env; var hrefKV = Util.lookupKV(token.attribs, 'href'); - var target, tokens, tsr; + var target; try { target = this.getWikiLinkTargetInfo(token, hrefKV); @@ -226,47 +284,7 @@ } if (!target) { - tokens = ["[["]; - if (/mw:ExpandedAttrs/.test(token.getAttribute("typeof"))) { - var dataMW = JSON.parse(token.getAttribute("data-mw")).attribs; - var html; - for (var i = 0; i < dataMW.length; i++) { - if (dataMW[i][0].txt === "href") { - html = dataMW[i][1].html; - break; - } - } - - // Since we are splicing off '[[' and ']]' from the incoming token, - // adjust TSR of the DOM-fragment by 2 each on both end. - tsr = token.dataAttribs && token.dataAttribs.tsr; - if (tsr && typeof (tsr[0]) === 'number' && typeof (tsr[1]) === 'number') { - tsr = [tsr[0] + 2, tsr[1] - 2]; - } else { - tsr = null; - } - - var body = DU.ppToDOM(html); - var dft = DU.buildDOMFragmentTokens(env, token, body, null, { - noPWrapping: true, - tsr: tsr, - }); - - tokens = tokens.concat(dft); - } else { - // FIXME: Duplicate work - tokens = tokens.concat(token.getAttribute("href")); - } - - // Append rest of the attributes - token.attribs.forEach(function(a) { - if (a.k === "mw:maybeContent") { - tokens = tokens.concat("|", a.v); - } - }); - - tokens.push("]]"); - cb({ tokens: tokens }); + cb({ tokens: bailTokens(env, token, false) }); return; } @@ -293,13 +311,13 @@ var extToks = this.urlParser.tokenizeExtlink(src); if (extToks) { - tsr = token.dataAttribs && token.dataAttribs.tsr; + var tsr = token.dataAttribs && token.dataAttribs.tsr; Util.shiftTokenTSR(extToks, 1 + (tsr ? tsr[0] : 0)); } else { extToks = src; } - tokens = ['['].concat(extToks, ']'); + var tokens = ['['].concat(extToks, ']'); tokens.rank = this.rank - 0.002; // Magic rank, since extlink is -0.001 cb({ tokens: tokens }); return; @@ -308,7 +326,6 @@ // Ok, it looks like we have a sane href. Figure out which handler to use. var isRedirect = !!token.getAttribute('redirect'); var handler = this.getWikiLinkHandler(token, target, isRedirect); - // and call it. handler(token, frame, cb, target); }; @@ -2137,51 +2154,7 @@ }); } else { // Not a link, convert href to plain text. - var tokens = ['[']; - var closingTok = null; - var spaces = token.getAttribute('spaces') || ''; - - if ((token.getAttribute("typeof") || "").match(/mw:ExpandedAttrs/)) { - // The token 'non-url' came from a template. - // Introduce a span and capture the original source for RT purposes. - var da = token.dataAttribs; - // targetOff covers all spaces before content - // and we need src without those spaces. - var tsr0b = da.tsr[0] + 1; - var tsr1b = da.targetOff - spaces.length; - var span = Util.placeholder({}, { // {} !== null - tsr: [tsr0b, tsr1b], - src: env.page.src.substring(tsr0b, tsr1b), - })[0]; // We only want the first token. - tokens.push(span); - closingTok = new EndTagTk('span'); - } - - var hrefText = token.getAttribute("href"); - if (Array.isArray(hrefText)) { - tokens = tokens.concat(hrefText); - } else { - tokens.push(hrefText); - } - - if (closingTok) { - tokens.push(closingTok); - } - - // FIXME: Use this attribute in regular extline - // cases to rt spaces correctly maybe? Unsure - // it is worth it. - if (spaces) { - tokens.push(spaces); - } - - if (content.length) { - tokens = tokens.concat(content); - } - - tokens.push(']'); - - cb({ tokens: tokens }); + cb({ tokens: bailTokens(env, token, true) }); } }; diff --git a/tests/parserTests.txt b/tests/parserTests.txt index fd05393..7d87a78 100644 --- a/tests/parserTests.txt +++ b/tests/parserTests.txt @@ -5888,12 +5888,12 @@ [foo <i>bar</i>] [fool's] errand [fool's errand] -[<span typeof="mw:Placeholder" data-parsoid='{"src":"{{echo|foo}}"}'>foo</span>] -[<span typeof="mw:Placeholder" data-parsoid='{"src":"{{echo|foo}}"}'>foo</span> bar] -[<span typeof="mw:Placeholder" data-parsoid='{"src":"{{echo|foo}}"}'>foo</span> <i>bar</i>] -[<span typeof="mw:Placeholder" data-parsoid='{"src":"{{echo|foo}}l's"}'>fool's</span>] errand -[<span typeof="mw:Placeholder" data-parsoid='{"src":"{{echo|foo}}l's"}'>fool's</span> errand] -[<span typeof="mw:Placeholder" data-parsoid='{"src":"url={{echo|foo}}"}'>url=foo</span>] +[<span about="#mwt19" typeof="mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"foo"}},"i":0}}]}'>foo</span>] +[<span about="#mwt20" typeof="mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"foo"}},"i":0}}]}'>foo</span> bar] +[<span about="#mwt21" typeof="mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"foo"}},"i":0}}]}'>foo</span> <i>bar</i>] +[<span about="#mwt22" typeof="mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"foo"}},"i":0}}]}'>foo</span>l's] errand +[<span about="#mwt23" typeof="mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"foo"}},"i":0}}]}'>foo</span>l's errand] +[url=<span about="#mwt24" typeof="mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"foo"}},"i":0}}]}'>foo</span>] [url=<a rel="mw:ExtLink" href="http://example.com">http://example.com</a>] [http:// bare protocols don't count]</p> !! end @@ -17397,13 +17397,12 @@ <div id="title=" data-parsoid='{"stx":"html"}'>HTML rocks</div> !! end -## QUESTION: Should this have ExpandedAttrs? !! test Extension tag in attribute value !! wikitext <span title="<translate>123</translate>">ok</span> !! html/parsoid -<p><span title="123" data-parsoid='{"stx":"html","a":{"title":"123"},"sa":{"title":"<translate>123</translate>"}}'>ok</span></p> +<p><span title="123" about="#mwt4" typeof="mw:ExpandedAttrs" data-parsoid='{"stx":"html","a":{"title":"123"},"sa":{"title":"<translate>123</translate>"}}' data-mw='{"attribs":[[{"txt":"title"},{"html":"<translate typeof=\"mw:Extension/translate\" about=\"#mwt3\" data-parsoid='{\"dsr\":[13,39,2,2]}' data-mw='{\"name\":\"translate\",\"attrs\":{},\"body\":{\"extsrc\":\"123\"}}'>123</translate>"}]]}'>ok</span></p> !! end !! test -- To view, visit https://gerrit.wikimedia.org/r/406126 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I188f1c903619320052b0502ba13c7b3be7040e6f Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Arlolra <abrea...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits