Marcoil has uploaded a new change for review. https://gerrit.wikimedia.org/r/130583
Change subject: WIP: Process template params in parallel with template expansion ...................................................................... WIP: Process template params in parallel with template expansion Change-Id: I47fc5839d59f3ddc3343df31d24bf8f5516eb1e6 --- M lib/ext.core.TemplateHandler.js M lib/mediawiki.TokenTransformManager.js 2 files changed, 117 insertions(+), 112 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/83/130583/1 diff --git a/lib/ext.core.TemplateHandler.js b/lib/ext.core.TemplateHandler.js index ddd6a78..0f07ef0 100644 --- a/lib/ext.core.TemplateHandler.js +++ b/lib/ext.core.TemplateHandler.js @@ -114,10 +114,7 @@ this.convertAttribsToString(state, token.attribs, cb); return; } else { - var templateName = tgt.target, - srcHandler = state.srcCB.bind( - this, state, frame, cb, - { name: templateName, attribs: [], cacheKey: text }); + var templateName = tgt.target; // Check if we have an expansion for this template in the cache // already if (env.transclusionCache[text]) { @@ -129,8 +126,24 @@ cb({ tokens: toks }); } else { + // Use a TokenAccumulator to divide the template processing + // in two parts: The child part will take care of the main + // template element (including parameters) and the sibling + // will process the returned template expansion + state.accum = new TokenAccumulator(this.manager, cb); + var accumReceiveToksFromSibling = state.accum.receiveToksFromSibling.bind(state.accum), + srcHandler = state.srcCB.bind( + this, state, frame, + accumReceiveToksFromSibling, + { name: templateName, attribs: [], cacheKey: text }) + + // Process the main template element + this._encapsulateTemplate(state, + state.accum.receiveToksFromChild.bind(state.accum)); + // Fetch and process the template expansion this.fetchExpandedTpl( env.page.name || '', - text, PreprocessorRequest, cb, srcHandler); + text, PreprocessorRequest, + accumReceiveToksFromSibling, srcHandler); } } } else { @@ -596,7 +609,7 @@ return tokens; }; -TemplateHandler.prototype.addEncapsulationInfo = function ( state, chunk ) { +TemplateHandler.prototype.getEncapsulationInfo = function (state) { // TODO // * only add this information for top-level includes, but track parameter // expansion in lower-level templates @@ -607,8 +620,7 @@ // * ref all tables to this (just add about) // * ref end token to this, add property="mw:Transclusion/End" - var done = false, - attrs = [ + var attrs = [ new KV('typeof', state.wrapperType), new KV('about', '#' + state.wrappedObjectId), new KV('id', state.wrappedObjectId) @@ -616,32 +628,12 @@ dataParsoid = { tsr: Util.clone(state.token.dataAttribs.tsr), src: state.token.dataAttribs.src - }; - - if ( chunk.length ) { - var firstToken = chunk[0]; - if ( firstToken.constructor === String ) { - // Also include following string tokens - var stringTokens = [ chunk.shift() ]; - while ( chunk.length && chunk[0].constructor === String ) { - stringTokens.push( chunk.shift() ); - } - // Wrap in span with info - var span = new TagTk( 'span', attrs, dataParsoid ); - chunk = [span].concat(stringTokens, [ new EndTagTk( 'span' ) ], chunk); - done = true; - } - } - - if (!done) { + }, // add meta tag - chunk = [new SelfclosingTagTk( 'meta', attrs, dataParsoid )].concat(chunk); - } + chunk = [new SelfclosingTagTk( 'meta', attrs, dataParsoid )]; chunk.rank = this.rank; - - // add about ref to all tables - return this.addAboutToTableElements( state, chunk ); + return chunk; }; TemplateHandler.prototype.getEncapsulationInfoEndTag = function ( state ) { @@ -726,10 +718,76 @@ }; /** + * Process the main template element, including the arguments + */ +TemplateHandler.prototype._encapsulateTemplate = function (state, cb) { + var i, n, + env = this.manager.env, + chunk = this.getEncapsulationInfo(state); + + if (!this.options.inTemplate && state.recordArgDict) { + // Get the arg dict + var argInfo = this.getArgInfo(state), + argDict = argInfo.dict; + + // Collect the parameters that need parsing into HTML, that is, + // those that are not simple strings. + // This optimizes for the common case where all are simple strings, + // in which we don't need to go async. + var params = []; + for (i = 0, n = argInfo.paramInfos.length; i < n; i++) { + var paramInfo = argInfo.paramInfos[i], + param = argDict.params[paramInfo.k], + paramTokens; + if (paramInfo.named) { + paramTokens = state.token.getAttribute(paramInfo.k); + } else { + paramTokens = state.token.attribs[paramInfo.k].v; + } + + // No need to pass through a whole sub-pipeline to get the + // html if the param is either a single string, or if it's + // just text, comments or newlines. + if (paramTokens && + (paramTokens.constructor === String || + isSimpleParam(paramTokens))) { + param.html = param.wt; + } else { + // Prepare the data needed to parse to HTML + params.push({ + param: param, + info: paramInfo, + tokens: paramTokens + }); + } + } + + if (params.length) { + // TODO: We could avoid going async by checking if all params are strings + // and, in that case returning them immediately. + async.each(params, getParamHTML.bind(this), function (err) { + // Use a data-attribute to prevent the sanitizer from stripping this + // attribute before it reaches the DOM pass where it is needed. + chunk[0].attribs.push(new KV("data-mw-arginfo", JSON.stringify(argInfo))); + env.dp( 'TemplateHandler._encapsulateTemplate', chunk ); + cb({tokens: chunk}); + }.bind(this)); + + cb({tokens: [], async: true}); + return; + } else { + chunk[0].attribs.push(new KV("data-mw-arginfo", JSON.stringify(argInfo))); + } + } + + env.dp( 'TemplateHandler._encapsulateTemplate', chunk ); + cb({tokens: chunk}); +}; + +/** * Handle chunk emitted from the input pipeline after feeding it a template */ TemplateHandler.prototype._onChunk = function( state, cb, chunk ) { - var env = this.manager.env; chunk = Util.stripEOFTkfromTokens( chunk ); var i, n; @@ -748,73 +806,22 @@ } } - if (this.options.wrapTemplates) { - if (!state.emittedFirstChunk && !this.options.inTemplate) { - chunk = this.addEncapsulationInfo(state, chunk ); - chunk.rank = this.rank; - state.emittedFirstChunk = true; - - /* TODO: Move param conversion to HTML to an independent method so - * it can be used by the onDocument path when we enable it. - */ - if (state.recordArgDict) { - // Get the arg dict - var argInfo = this.getArgInfo(state), - argDict = argInfo.dict; - - // Collect the parameters that need parsing into HTML, that is, - // those that are not simple strings. - // This optimizes for the common case where all are simple strings, - // in which we don't need to go async. - var params = []; - for (i = 0, n = argInfo.paramInfos.length; i < n; i++) { - var paramInfo = argInfo.paramInfos[i], - param = argDict.params[paramInfo.k], - paramTokens; - if (paramInfo.named) { - paramTokens = state.token.getAttribute(paramInfo.k); - } else { - paramTokens = state.token.attribs[paramInfo.k].v; - } - - // No need to pass through a whole sub-pipeline to get the - // html if the param is either a single string, or if it's - // just text, comments or newlines. - if (paramTokens && - (paramTokens.constructor === String || - isSimpleParam(paramTokens))) { - param.html = param.wt; - } else { - // Prepare the data needed to parse to HTML - params.push({ - param: param, - info: paramInfo, - tokens: paramTokens - }); - } - } - - if (params.length) { - state.accum = new TokenAccumulator(this.manager, cb); - // TODO: We could avoid going async by checking if all params are strings - // and, in that case returning them immediately. - async.each(params, getParamHTML.bind(this), function (err) { - // Use a data-attribute to prevent the sanitizer from stripping this - // attribute before it reaches the DOM pass where it is needed. - chunk[0].attribs.push(new KV("data-mw-arginfo", JSON.stringify(argInfo))); - env.dp( 'TemplateHandler._onChunk', chunk ); - state.accum.receiveToksFromChild({tokens: chunk}); - }.bind(this)); - - return; - } else { - chunk[0].attribs.push(new KV("data-mw-arginfo", JSON.stringify(argInfo))); - } + // Move all string tokens inside a span tag + if ( chunk.length ) { + var firstToken = chunk[0]; + if ( firstToken.constructor === String ) { + // Also include following string tokens + var stringTokens = [ chunk.shift() ]; + while ( chunk.length && chunk[0].constructor === String ) { + stringTokens.push( chunk.shift() ); } - } else { - chunk = this.addAboutToTableElements( state, chunk ); + // Wrap in span with info + var span = new TagTk( 'span' ); + chunk = [span].concat(stringTokens, [ new EndTagTk( 'span' ) ], chunk); } - } else { + } + + if (!this.options.wrapTemplates) { // Ignore comments in template transclusion mode var newChunk = []; for (i = 0, n = chunk.length; i < n; i++) { @@ -825,12 +832,12 @@ chunk = newChunk; } - env.dp( 'TemplateHandler._onChunk', chunk ); - if (state.accum) { - state.accum.receiveToksFromSibling({tokens: chunk, async: true}); - } else { - cb({tokens: chunk, async: true}); - } + // add about ref to all tables + chunk = this.addAboutToTableElements( state, chunk ); + + this.manager.env.dp( 'TemplateHandler._onChunk', chunk ); + chunk.rank = this.rank; + cb({tokens: chunk, async: true}); }; /** @@ -843,12 +850,7 @@ var endTag = this.getEncapsulationInfoEndTag(state), res = { tokens: [endTag] }; res.tokens.rank = this.rank; - state.emittedFirstChunk = false; - if (state.accum) { - state.accum.receiveToksFromSibling( res ); - } else { - cb(res); - } + cb(res); } else { cb( { tokens: [] } ); } @@ -1065,7 +1067,8 @@ // append request, process in document order env.requestQueue[text].once( 'src', cb ); - parentCB ( { async: true } ); + // Advise we're going async + parentCB({tokens: [], async: true}); } }; diff --git a/lib/mediawiki.TokenTransformManager.js b/lib/mediawiki.TokenTransformManager.js index f1302fc..48b2969 100644 --- a/lib/mediawiki.TokenTransformManager.js +++ b/lib/mediawiki.TokenTransformManager.js @@ -1179,7 +1179,9 @@ TokenAccumulator.prototype.receiveToksFromChild = function ( ret ) { ret = verifyTokensIntegrity(this.manager.env, ret, false); // console.warn("\nTA-" + this.uid + "; c: " + this.waitForChild + "; s: " + this.waitForSibling + " <-- from child: " + JSON.stringify(ret)); - if (!ret.tokens.rank) { + // Empty tokens are used to signal async, so they don't need to be in the + // same rank + if (ret.tokens.length && !ret.tokens.rank) { this.manager.env.log('error/tta/child/rank/none', ret.tokens); ret.tokens.rank = this.manager.phaseEndRank; } @@ -1228,7 +1230,7 @@ // allow the sibling to go direct, and call back parent with // tokens. The internal accumulator is empty at this stage, as its // tokens got passed to the parent when the child was done. - if (!ret.tokens.rank) { + if (ret.tokens.length && !ret.tokens.rank) { this.manager.env.dp('TokenAccumulator.receiveToksFromSibling without rank', ret.tokens); ret.tokens.rank = this.manager.phaseEndRank; } -- To view, visit https://gerrit.wikimedia.org/r/130583 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I47fc5839d59f3ddc3343df31d24bf8f5516eb1e6 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: dom_tpl_params_3 Gerrit-Owner: Marcoil <marc...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits