Marcoil has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/130583

Change subject: WIP: Process template params in parallel with template expansion
......................................................................

WIP: Process template params in parallel with template expansion

Change-Id: I47fc5839d59f3ddc3343df31d24bf8f5516eb1e6
---
M lib/ext.core.TemplateHandler.js
M lib/mediawiki.TokenTransformManager.js
2 files changed, 117 insertions(+), 112 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/83/130583/1

diff --git a/lib/ext.core.TemplateHandler.js b/lib/ext.core.TemplateHandler.js
index ddd6a78..0f07ef0 100644
--- a/lib/ext.core.TemplateHandler.js
+++ b/lib/ext.core.TemplateHandler.js
@@ -114,10 +114,7 @@
                                this.convertAttribsToString(state, 
token.attribs, cb);
                                return;
                        } else {
-                               var templateName = tgt.target,
-                                       srcHandler = state.srcCB.bind(
-                                               this, state, frame, cb,
-                                               { name: templateName, attribs: 
[], cacheKey: text });
+                               var templateName = tgt.target;
                                // Check if we have an expansion for this 
template in the cache
                                // already
                                if (env.transclusionCache[text]) {
@@ -129,8 +126,24 @@
 
                                        cb({ tokens: toks });
                                } else {
+                                       // Use a TokenAccumulator to divide the 
template processing
+                                       // in two parts: The child part will 
take care of the main
+                                       // template element (including 
parameters) and the sibling
+                                       // will process the returned template 
expansion
+                                       state.accum = new 
TokenAccumulator(this.manager, cb);
+                                       var accumReceiveToksFromSibling = 
state.accum.receiveToksFromSibling.bind(state.accum),
+                                               srcHandler = state.srcCB.bind(
+                                                       this, state, frame,
+                                                       
accumReceiveToksFromSibling,
+                                                       { name: templateName, 
attribs: [], cacheKey: text })
+
+                                       // Process the main template element
+                                       this._encapsulateTemplate(state,
+                                               
state.accum.receiveToksFromChild.bind(state.accum));
+                                       // Fetch and process the template 
expansion
                                        this.fetchExpandedTpl( env.page.name || 
'',
-                                                       text, 
PreprocessorRequest, cb, srcHandler);
+                                                       text, 
PreprocessorRequest,
+                                                       
accumReceiveToksFromSibling, srcHandler);
                                }
                        }
                } else {
@@ -596,7 +609,7 @@
        return tokens;
 };
 
-TemplateHandler.prototype.addEncapsulationInfo = function ( state, chunk ) {
+TemplateHandler.prototype.getEncapsulationInfo = function (state) {
        // TODO
        // * only add this information for top-level includes, but track 
parameter
        // expansion in lower-level templates
@@ -607,8 +620,7 @@
        // * ref all tables to this (just add about)
        // * ref end token to this, add property="mw:Transclusion/End"
 
-       var done = false,
-               attrs = [
+       var attrs = [
                        new KV('typeof', state.wrapperType),
                        new KV('about', '#' + state.wrappedObjectId),
                        new KV('id', state.wrappedObjectId)
@@ -616,32 +628,12 @@
                dataParsoid = {
                        tsr: Util.clone(state.token.dataAttribs.tsr),
                        src: state.token.dataAttribs.src
-               };
-
-       if ( chunk.length ) {
-               var firstToken = chunk[0];
-               if ( firstToken.constructor === String ) {
-                       // Also include following string tokens
-                       var stringTokens = [ chunk.shift() ];
-                       while ( chunk.length && chunk[0].constructor === String 
) {
-                               stringTokens.push( chunk.shift() );
-                       }
-                       // Wrap in span with info
-                       var span = new TagTk( 'span', attrs, dataParsoid );
-                       chunk = [span].concat(stringTokens, [ new EndTagTk( 
'span' ) ], chunk);
-                       done = true;
-               }
-       }
-
-       if (!done) {
+               },
                // add meta tag
-               chunk = [new SelfclosingTagTk( 'meta', attrs, dataParsoid 
)].concat(chunk);
-       }
+               chunk = [new SelfclosingTagTk( 'meta', attrs, dataParsoid )];
 
        chunk.rank = this.rank;
-
-       // add about ref to all tables
-       return this.addAboutToTableElements( state, chunk );
+       return chunk;
 };
 
 TemplateHandler.prototype.getEncapsulationInfoEndTag = function ( state ) {
@@ -726,10 +718,76 @@
 };
 
 /**
+ * Process the main template element, including the arguments
+ */
+TemplateHandler.prototype._encapsulateTemplate = function (state, cb) {
+       var i, n,
+               env = this.manager.env,
+               chunk = this.getEncapsulationInfo(state);
+
+       if (!this.options.inTemplate && state.recordArgDict) {
+               // Get the arg dict
+               var argInfo = this.getArgInfo(state),
+                       argDict = argInfo.dict;
+
+               // Collect the parameters that need parsing into HTML, that is,
+               // those that are not simple strings.
+               // This optimizes for the common case where all are simple 
strings,
+               // in which we don't need to go async.
+               var params = [];
+               for (i = 0, n = argInfo.paramInfos.length; i < n; i++) {
+                       var paramInfo = argInfo.paramInfos[i],
+                               param = argDict.params[paramInfo.k],
+                               paramTokens;
+                       if (paramInfo.named) {
+                               paramTokens = 
state.token.getAttribute(paramInfo.k);
+                       } else {
+                               paramTokens = 
state.token.attribs[paramInfo.k].v;
+                       }
+
+                       // No need to pass through a whole sub-pipeline to get 
the
+                       // html if the param is either a single string, or if 
it's
+                       // just text, comments or newlines.
+                       if (paramTokens &&
+                           (paramTokens.constructor === String ||
+                            isSimpleParam(paramTokens))) {
+                               param.html = param.wt;
+                       } else {
+                               // Prepare the data needed to parse to HTML
+                               params.push({
+                                       param: param,
+                                       info: paramInfo,
+                                       tokens: paramTokens
+                               });
+                       }
+               }
+
+               if (params.length) {
+                       // TODO: We could avoid going async by checking if all 
params are strings
+                       // and, in that case returning them immediately.
+                       async.each(params, getParamHTML.bind(this), function 
(err) {
+                               // Use a data-attribute to prevent the 
sanitizer from stripping this
+                               // attribute before it reaches the DOM pass 
where it is needed.
+                               chunk[0].attribs.push(new KV("data-mw-arginfo", 
JSON.stringify(argInfo)));
+                               env.dp( 'TemplateHandler._encapsulateTemplate', 
chunk );
+                               cb({tokens: chunk});
+                       }.bind(this));
+
+                       cb({tokens: [], async: true});
+                       return;
+               } else {
+                       chunk[0].attribs.push(new KV("data-mw-arginfo", 
JSON.stringify(argInfo)));
+               }
+       }
+
+       env.dp( 'TemplateHandler._encapsulateTemplate', chunk );
+       cb({tokens: chunk});
+};
+
+/**
  * Handle chunk emitted from the input pipeline after feeding it a template
  */
 TemplateHandler.prototype._onChunk = function( state, cb, chunk ) {
-       var env = this.manager.env;
        chunk = Util.stripEOFTkfromTokens( chunk );
 
        var i, n;
@@ -748,73 +806,22 @@
                }
        }
 
-       if (this.options.wrapTemplates) {
-               if (!state.emittedFirstChunk && !this.options.inTemplate) {
-                       chunk = this.addEncapsulationInfo(state, chunk );
-                       chunk.rank = this.rank;
-                       state.emittedFirstChunk = true;
-
-                       /* TODO: Move param conversion to HTML to an 
independent method so
-                        * it can be used by the onDocument path when we enable 
it.
-                        */
-                       if (state.recordArgDict) {
-                               // Get the arg dict
-                               var argInfo = this.getArgInfo(state),
-                                       argDict = argInfo.dict;
-
-                               // Collect the parameters that need parsing 
into HTML, that is,
-                               // those that are not simple strings.
-                               // This optimizes for the common case where all 
are simple strings,
-                               // in which we don't need to go async.
-                               var params = [];
-                               for (i = 0, n = argInfo.paramInfos.length; i < 
n; i++) {
-                                       var paramInfo = argInfo.paramInfos[i],
-                                               param = 
argDict.params[paramInfo.k],
-                                               paramTokens;
-                                       if (paramInfo.named) {
-                                               paramTokens = 
state.token.getAttribute(paramInfo.k);
-                                       } else {
-                                               paramTokens = 
state.token.attribs[paramInfo.k].v;
-                                       }
-
-                                       // No need to pass through a whole 
sub-pipeline to get the
-                                       // html if the param is either a single 
string, or if it's
-                                       // just text, comments or newlines.
-                                       if (paramTokens &&
-                                           (paramTokens.constructor === String 
||
-                                            isSimpleParam(paramTokens))) {
-                                               param.html = param.wt;
-                                       } else {
-                                               // Prepare the data needed to 
parse to HTML
-                                               params.push({
-                                                       param: param,
-                                                       info: paramInfo,
-                                                       tokens: paramTokens
-                                               });
-                                       }
-                               }
-
-                               if (params.length) {
-                                       state.accum = new 
TokenAccumulator(this.manager, cb);
-                                       // TODO: We could avoid going async by 
checking if all params are strings
-                                       // and, in that case returning them 
immediately.
-                                       async.each(params, 
getParamHTML.bind(this), function (err) {
-                                               // Use a data-attribute to 
prevent the sanitizer from stripping this
-                                               // attribute before it reaches 
the DOM pass where it is needed.
-                                               chunk[0].attribs.push(new 
KV("data-mw-arginfo", JSON.stringify(argInfo)));
-                                               env.dp( 
'TemplateHandler._onChunk', chunk );
-                                               
state.accum.receiveToksFromChild({tokens: chunk});
-                                       }.bind(this));
-
-                                       return;
-                               } else {
-                                       chunk[0].attribs.push(new 
KV("data-mw-arginfo", JSON.stringify(argInfo)));
-                               }
+       // Move all string tokens inside a span tag
+       if ( chunk.length ) {
+               var firstToken = chunk[0];
+               if ( firstToken.constructor === String ) {
+                       // Also include following string tokens
+                       var stringTokens = [ chunk.shift() ];
+                       while ( chunk.length && chunk[0].constructor === String 
) {
+                               stringTokens.push( chunk.shift() );
                        }
-               } else {
-                       chunk = this.addAboutToTableElements( state, chunk );
+                       // Wrap in span with info
+                       var span = new TagTk( 'span' );
+                       chunk = [span].concat(stringTokens, [ new EndTagTk( 
'span' ) ], chunk);
                }
-       } else {
+       }
+
+       if (!this.options.wrapTemplates) {
                // Ignore comments in template transclusion mode
                var newChunk = [];
                for (i = 0, n = chunk.length; i < n; i++) {
@@ -825,12 +832,12 @@
                chunk = newChunk;
        }
 
-       env.dp( 'TemplateHandler._onChunk', chunk );
-       if (state.accum) {
-               state.accum.receiveToksFromSibling({tokens: chunk, async: 
true});
-       } else {
-               cb({tokens: chunk, async: true});
-       }
+       // add about ref to all tables
+       chunk = this.addAboutToTableElements( state, chunk );
+
+       this.manager.env.dp( 'TemplateHandler._onChunk', chunk );
+       chunk.rank = this.rank;
+       cb({tokens: chunk, async: true});
 };
 
 /**
@@ -843,12 +850,7 @@
                var endTag = this.getEncapsulationInfoEndTag(state),
                        res = { tokens: [endTag] };
                res.tokens.rank = this.rank;
-               state.emittedFirstChunk = false;
-               if (state.accum) {
-                       state.accum.receiveToksFromSibling( res );
-               } else {
-                       cb(res);
-               }
+               cb(res);
        } else {
                cb( { tokens: [] } );
        }
@@ -1065,7 +1067,8 @@
                // append request, process in document order
                env.requestQueue[text].once( 'src', cb );
 
-               parentCB ( { async: true } );
+               // Advise we're going async
+               parentCB({tokens: [], async: true});
        }
 };
 
diff --git a/lib/mediawiki.TokenTransformManager.js 
b/lib/mediawiki.TokenTransformManager.js
index f1302fc..48b2969 100644
--- a/lib/mediawiki.TokenTransformManager.js
+++ b/lib/mediawiki.TokenTransformManager.js
@@ -1179,7 +1179,9 @@
 TokenAccumulator.prototype.receiveToksFromChild = function ( ret ) {
        ret = verifyTokensIntegrity(this.manager.env, ret, false);
        // console.warn("\nTA-" + this.uid + "; c: " + this.waitForChild + "; 
s: " + this.waitForSibling + " <-- from child: " + JSON.stringify(ret));
-       if (!ret.tokens.rank) {
+       // Empty tokens are used to signal async, so they don't need to be in 
the
+       // same rank
+       if (ret.tokens.length && !ret.tokens.rank) {
                this.manager.env.log('error/tta/child/rank/none', ret.tokens);
                ret.tokens.rank = this.manager.phaseEndRank;
        }
@@ -1228,7 +1230,7 @@
                // allow the sibling to go direct, and call back parent with
                // tokens. The internal accumulator is empty at this stage, as 
its
                // tokens got passed to the parent when the child was done.
-               if (!ret.tokens.rank) {
+               if (ret.tokens.length && !ret.tokens.rank) {
                        
this.manager.env.dp('TokenAccumulator.receiveToksFromSibling without rank', 
ret.tokens);
                        ret.tokens.rank = this.manager.phaseEndRank;
                }

-- 
To view, visit https://gerrit.wikimedia.org/r/130583
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I47fc5839d59f3ddc3343df31d24bf8f5516eb1e6
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: dom_tpl_params_3
Gerrit-Owner: Marcoil <marc...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to