jenkins-bot has submitted this change and it was merged.
Change subject: Batch MW parser and imageinfo API requests
......................................................................
Batch MW parser and imageinfo API requests
* Implement a system for mixed batches of parser, preprocessor and
imageinfo requests. This uses an MW extension specific to Parsoid
which provides the relevant API.
* Implement caching inside Batcher, replacing env.pageCache, except for
its original use case. parserTests.js uses env.pageCache to inject
template wikitext, which will still work -- it was never really
correct to allow parserTests.js to inject other API responses into the
cache.
* Remove Processor parameter from fetchExpandedTpl() since it was always
the same.
Bug: T45888
Change-Id: I2bd6f574bca8c64302810a9569f9390c4cf64626
---
M lib/ParsoidLogger.js
M lib/ext.core.ExtensionHandler.js
M lib/ext.core.LinkHandler.js
M lib/ext.core.TemplateHandler.js
M lib/mediawiki.ApiRequest.js
A lib/mediawiki.Batcher.js
M lib/mediawiki.ParsoidConfig.js
M lib/mediawiki.TokenTransformManager.js
M lib/mediawiki.Util.js
M lib/mediawiki.parser.environment.js
10 files changed, 580 insertions(+), 142 deletions(-)
Approvals:
Arlolra: Looks good to me, approved
jenkins-bot: Verified
diff --git a/lib/ParsoidLogger.js b/lib/ParsoidLogger.js
index 5207f0a..b6df3e1 100644
--- a/lib/ParsoidLogger.js
+++ b/lib/ParsoidLogger.js
@@ -146,6 +146,7 @@
"trace/selser": "[SELSER]",
"trace/domdiff": "[DOM-DIFF]",
"trace/wt-escape": "[wt-esc]",
+ "trace/batcher": "[batcher]",
};
ParsoidLogger.prototype._defaultTracerBackend = function(logData, cb) {
diff --git a/lib/ext.core.ExtensionHandler.js b/lib/ext.core.ExtensionHandler.js
index 054edcf..ad86e54 100644
--- a/lib/ext.core.ExtensionHandler.js
+++ b/lib/ext.core.ExtensionHandler.js
@@ -4,7 +4,6 @@
var coreutil = require('util');
var Util = require('./mediawiki.Util.js').Util;
var DU = require('./mediawiki.DOMUtils.js').DOMUtils;
-var PHPParseRequest = require('./mediawiki.ApiRequest.js').PHPParseRequest;
var defines = require('./mediawiki.parser.defines.js');
// define some constructor shortcuts
@@ -83,16 +82,12 @@
var env = this.env;
// We are about to start an async request for an extension
env.dp('Note: trying to expand ', text);
-
- // Start a new request if none is outstanding
- if (env.requestQueue[text] === undefined) {
- env.tp('Note: Starting new request for ' + text);
- env.requestQueue[text] = new PHPParseRequest(env, title, text);
+ var cacheEntry = env.batcher.parse(title, text, cb);
+ if (cacheEntry !== undefined) {
+ cb(cacheEntry);
+ } else {
+ parentCB ({ async: true });
}
- // append request, process in document order
- env.requestQueue[text].once('src', cb);
-
- parentCB ({ async: true });
};
function normalizeExtOptions(options) {
diff --git a/lib/ext.core.LinkHandler.js b/lib/ext.core.LinkHandler.js
index cc5a1d6..4fc51f5 100644
--- a/lib/ext.core.LinkHandler.js
+++ b/lib/ext.core.LinkHandler.js
@@ -947,8 +947,17 @@
var containerClose = new EndTagTk(containerName);
if (!err && data) {
- var ns = data.imgns;
- image = data.pages[ns + ':' + title.key];
+ if (data.batchResponse !== undefined) {
+ info = data.batchResponse;
+ } else {
+ var ns = data.imgns;
+ image = data.pages[ns + ':' + title.key];
+ if (image && image.imageinfo && image.imageinfo[0]) {
+ info = image.imageinfo[0];
+ } else {
+ info = false;
+ }
+ }
}
// FIXME gwicke: Make sure our filename is never of the form
@@ -961,18 +970,14 @@
// full 'filename' does not match any of them, so image is then
// undefined here. So for now (as a workaround) check if we
// actually have an image to work with instead of crashing.
- if (!image || !image.imageinfo) {
+ if (!info) {
// Use sane defaults.
- image = {
- imageinfo: [
- {
- url: './Special:FilePath/' +
Util.sanitizeTitleURI(title.key),
- // Preserve width and height from the
wikitext options
- // even if the image is non-existent.
- width: opts.size.v.width || 220,
- height: opts.size.v.height ||
opts.size.v.width || 220,
- },
- ],
+ info = {
+ url: './Special:FilePath/' +
Util.sanitizeTitleURI(title.key),
+ // Preserve width and height from the wikitext options
+ // even if the image is non-existent.
+ width: opts.size.v.width || 220,
+ height: opts.size.v.height || opts.size.v.width || 220,
};
// Add mw:Error to the RDFa type.
@@ -1003,7 +1008,6 @@
errs.push({"key": "missing-image", "message": "This
image does not exist." });
}
}
- info = image.imageinfo[0];
var imageSrc = dataAttribs.src;
if (!dataAttribs.uneditable) {
@@ -1425,17 +1429,12 @@
}
}
- var queueKey = title.key + JSON.stringify(opts.size.v);
- if (queueKey in env.pageCache) {
- this.handleImageInfo(cb, token, title, opts, optSources, null,
env.pageCache[ queueKey ]);
+ var cacheEntry = env.batcher.imageinfo(title.key, opts.size.v,
+ this.handleImageInfo.bind(this, cb, token, title, opts,
optSources));
+ if (cacheEntry !== undefined) {
+ this.handleImageInfo(cb, token, title, opts, optSources, null,
cacheEntry);
} else {
cb({ async: true });
-
- if (!(queueKey in env.requestQueue)) {
- env.requestQueue[queueKey] = new ImageInfoRequest(env,
title.key, opts.size.v);
- }
-
- env.requestQueue[queueKey].once('src',
this.handleImageInfo.bind(this, cb, token, title, opts, optSources));
}
};
diff --git a/lib/ext.core.TemplateHandler.js b/lib/ext.core.TemplateHandler.js
index 2321252..f7febc3 100644
--- a/lib/ext.core.TemplateHandler.js
+++ b/lib/ext.core.TemplateHandler.js
@@ -17,7 +17,6 @@
var defines = require('./mediawiki.parser.defines.js');
var TemplateRequest = require('./mediawiki.ApiRequest.js').TemplateRequest;
var api = require('./mediawiki.ApiRequest.js');
-var PreprocessorRequest = api.PreprocessorRequest;
var Util = require('./mediawiki.Util.js').Util;
var DU = require('./mediawiki.DOMUtils.js').DOMUtils;
var async = require('async');
@@ -149,8 +148,7 @@
accumReceiveToksFromChild);
// Fetch and process the template expansion
this.fetchExpandedTpl(env.page.name || '',
- text, PreprocessorRequest,
- accumReceiveToksFromSibling,
srcHandler);
+ text,
accumReceiveToksFromSibling, srcHandler);
}
} else {
// We don't perform recursive template expansion-
something
@@ -1137,30 +1135,22 @@
/**
* Fetch the preprocessed wikitext for a template-like construct.
- * (The 'Processor' argument is a constructor, hence the capitalization.)
*/
-TemplateHandler.prototype.fetchExpandedTpl = function(title, text, Processor,
parentCB, cb) {
+TemplateHandler.prototype.fetchExpandedTpl = function(title, text, parentCB,
cb) {
var env = this.manager.env;
- if (text in env.pageCache) {
- // XXX: store type too (and cache tokens/x-mediawiki)
- cb(null, env.pageCache[text] /* , type */);
- } else if (!env.conf.parsoid.fetchTemplates) {
- parentCB({ tokens: [ 'Warning: Page/template fetching disabled,
and no cache for ' + text] });
+ if (!env.conf.parsoid.fetchTemplates) {
+ parentCB({ tokens: [ 'Warning: Page/template fetching disabled
cannot expand ' + text] });
} else {
-
// We are about to start an async request for a template
env.dp('Note: trying to expand ', text);
-
- // Start a new request if none is outstanding
- if (env.requestQueue[text] === undefined) {
- env.tp('Note: Starting new request for ' + text);
- env.requestQueue[text] = new Processor(env, title,
text);
+ var cacheEntry = env.batcher.preprocess(title, text, cb);
+ if (cacheEntry !== undefined) {
+ // XXX: store type too (and cache tokens/x-mediawiki)
+ cb(null, cacheEntry /* , type */);
+ } else {
+ // Advise we're going async
+ parentCB({tokens: [], async: true});
}
- // append request, process in document order
- env.requestQueue[text].once('src', cb);
-
- // Advise we're going async
- parentCB({tokens: [], async: true});
}
};
diff --git a/lib/mediawiki.ApiRequest.js b/lib/mediawiki.ApiRequest.js
index 01eeb37..62f865f 100644
--- a/lib/mediawiki.ApiRequest.js
+++ b/lib/mediawiki.ApiRequest.js
@@ -70,6 +70,84 @@
}
};
+var manglePreprocessorResponse = function(env, response) {
+ var src = '';
+ if (response.wikitext !== undefined) {
+ src = response.wikitext;
+ } else if (response["*"] !== undefined) {
+ // For backwards compatibility. Older wikis still put the data
here.
+ src = response["*"];
+ }
+
+ // Add the categories which were added by parser functions directly
+ // into the page and not as in-text links.
+ if (Array.isArray(response.categories)) {
+ for (var i in response.categories) {
+ var category = response.categories[i];
+ src += '\n[[Category:' + category['*'];
+ if (category.sortkey) {
+ src += "|" + category.sortkey;
+ }
+ src += ']]';
+ }
+ }
+ // Ditto for page properties (like DISPLAYTITLE and DEFAULTSORT)
+ if (Array.isArray(response.properties)) {
+ response.properties.forEach(function(prop) {
+ if (prop.name === 'displaytitle' || prop.name ===
'defaultsort') {
+ src += '\n{{' + prop.name.toUpperCase() + ':' +
prop['*'] + '}}';
+ }
+ });
+ }
+ // The same for ResourceLoader modules
+ setPageProperty(env, response.modules, "extensionModules");
+ setPageProperty(env, response.modulescripts, "extensionModuleScripts");
+ setPageProperty(env, response.modulestyles, "extensionModuleStyles");
+
+ return src;
+};
+
+var dummyDoc = domino.createDocument();
+var mangleParserResponse = function(env, response) {
+ var parsedHtml = '';
+ if (response.text['*'] !== undefined) {
+ parsedHtml = response.text['*'];
+ }
+
+ // Strip two trailing newlines that action=parse adds after any
+ // extension output
+ parsedHtml = parsedHtml.replace(/\n\n$/, '');
+
+ // Also strip a paragraph wrapper, if any
+ parsedHtml = parsedHtml.replace(/(^<p>)|(<\/p>$)/g, '');
+
+ // Add the modules to the page data
+ setPageProperty(env, response.modules, "extensionModules");
+ setPageProperty(env, response.modulescripts, "extensionModuleScripts");
+ setPageProperty(env, response.modulestyles, "extensionModuleStyles");
+
+ // Add the categories which were added by extensions directly into the
+ // page and not as in-text links
+ if (response.categories) {
+ for (var i in response.categories) {
+ var category = response.categories[i];
+
+ var link = dummyDoc.createElement("link");
+ link.setAttribute("rel", "mw:PageProp/Category");
+
+ var href = env.page.relativeLinkPrefix + "Category:" +
encodeURIComponent(category['*']);
+ if (category.sortkey) {
+ href += "#" +
encodeURIComponent(category.sortkey);
+ }
+ link.setAttribute("href", href);
+
+ parsedHtml += "\n" + link.outerHTML;
+ }
+ }
+
+ return parsedHtml;
+};
+
/**
* @class
* @extends Error
@@ -175,6 +253,7 @@
options.headers['X-Forwarded-Proto'] = 'https';
}
}
+ this.env.dp("Starting HTTP request", this.toString());
return request(options, callback);
};
@@ -444,11 +523,12 @@
* @param {MWParserEnvironment} env
* @param {string} title The title of the page to use as the context
* @param {string} text
+ * @param {string} hash The queue key
*/
-function PreprocessorRequest(env, title, text) {
+function PreprocessorRequest(env, title, text, hash) {
ApiRequest.call(this, env, title);
- this.queueKey = text;
+ this.queueKey = hash;
this.text = text;
this.reqType = "Template Expansion";
@@ -501,48 +581,10 @@
if (error) {
this.env.log("error", error);
this._processListeners(error, '');
- return;
+ } else {
+ this._processListeners(error,
+ manglePreprocessorResponse(this.env,
data.expandtemplates));
}
-
- var src = '';
- if (data.expandtemplates.wikitext !== undefined) {
- src = data.expandtemplates.wikitext;
- } else if (data.expandtemplates["*"] !== undefined) {
- // For backwards compatibility. Older wikis still put the data
here.
- src = data.expandtemplates["*"];
- }
-
- this.env.tp('Expanded ', this.text, src);
-
- // Add the categories which were added by parser functions directly
- // into the page and not as in-text links.
- if (Array.isArray(data.expandtemplates.categories)) {
- for (var i in data.expandtemplates.categories) {
- var category = data.expandtemplates.categories[i];
- src += '\n[[Category:' + category['*'];
- if (category.sortkey) {
- src += "|" + category.sortkey;
- }
- src += ']]';
- }
- }
- // Ditto for page properties (like DISPLAYTITLE and DEFAULTSORT)
- if (Array.isArray(data.expandtemplates.properties)) {
- data.expandtemplates.properties.forEach(function(prop) {
- if (prop.name === 'displaytitle' || prop.name ===
'defaultsort') {
- src += '\n{{' + prop.name.toUpperCase() + ':' +
prop['*'] + '}}';
- }
- });
- }
- // The same for ResourceLoader modules
- setPageProperty(this.env, data.expandtemplates.modules,
"extensionModules");
- setPageProperty(this.env, data.expandtemplates.modulescripts,
"extensionModuleScripts");
- setPageProperty(this.env, data.expandtemplates.modulestyles,
"extensionModuleStyles");
-
- // Add the source to the cache
- this.env.pageCache[this.text] = src;
-
- this._processListeners(error, src);
};
/**
@@ -558,12 +600,13 @@
* @param {string} title The title of the page to use as context
* @param {string} text
* @param {boolean} onlypst (optional) Pass onlypst to PHP parser
+ * @param {string} hash The queue key
*/
-function PHPParseRequest(env, name, text, onlypst) {
+function PHPParseRequest(env, name, text, onlypst, hash) {
ApiRequest.call(this, env, name);
this.text = text;
- this.queueKey = text;
+ this.queueKey = hash || text;
this.reqType = "Extension Parse";
var apiargs = {
@@ -608,7 +651,6 @@
// Function which returns a promise for the result of a parse request.
PHPParseRequest.promise = promiseFor(PHPParseRequest);
-var dummyDoc = domino.createDocument();
PHPParseRequest.prototype._handleJSON = function(error, data) {
logAPIWarnings(this, data);
@@ -620,49 +662,95 @@
if (error) {
this.env.log("error", error);
this._processListeners(error, '');
+ } else {
+ this._processListeners(error, mangleParserResponse(this.env,
data.parse));
+ }
+};
+
+/**
+ * @class
+ * @extends ApiRequest
+ *
+ * Do a mixed-action batch request using the ParsoidBatchAPI extension.
+ *
+ * @constructor
+ * @param {MWParserEnvironment} env
+ * @param {Array} batchParams An array of objects
+ * @param {string} key The queue key
+ */
+function BatchRequest(env, batchParams, key) {
+ ApiRequest.call(this, env);
+ this.queueKey = key;
+ this.batchParams = batchParams;
+ this.reqType = 'Batch request';
+
+ var apiargs = {
+ format: 'json',
+ formatversion: '2',
+ action: 'parsoid-batch',
+ batch: JSON.stringify(batchParams),
+ };
+
+ this.requestOptions = {
+ method: 'POST',
+ followRedirect: true,
+ uri: env.conf.wiki.apiURI,
+ timeout: env.conf.parsoid.timeouts.mwApi.batch,
+ };
+ var req = this.request(this.requestOptions, this._requestCB.bind(this));
+
+ // Use multipart form encoding to get more efficient transfer if the
gain
+ // will be larger than the typical overhead. In later versions of the
request
+ // library, this can easily be done with the formData option, but
coveralls
+ // depends on request 2.40.0.
+ if (encodeURIComponent(apiargs.batch).length - apiargs.batch.length >
600) {
+ var form = req.form();
+ for (var optName in apiargs) {
+ form.append(optName, apiargs[optName]);
+ }
+ } else {
+ req.form(apiargs);
+ }
+}
+
+util.inherits(BatchRequest, ApiRequest);
+
+BatchRequest.prototype._handleJSON = function(error, data) {
+ if (!error && !(data && data['parsoid-batch'] &&
Array.isArray(data['parsoid-batch']))) {
+ error = new Error('Invalid result when expanding template
batch');
+ }
+
+ if (error) {
+ this.env.log("error", error);
+ this.emit('batch', error, null);
return;
}
- var parsedHtml = '';
- if (data.parse.text['*'] !== undefined) {
- parsedHtml = data.parse.text['*'];
- }
-
- // Strip two trailing newlines that action=parse adds after any
- // extension output
- parsedHtml = parsedHtml.replace(/\n\n$/, '');
-
- // Also strip a paragraph wrapper, if any
- parsedHtml = parsedHtml.replace(/(^<p>)|(<\/p>$)/g, '');
-
- // Add the modules to the page data
- setPageProperty(this.env, data.parse.modules, "extensionModules");
- setPageProperty(this.env, data.parse.modulescripts,
"extensionModuleScripts");
- setPageProperty(this.env, data.parse.modulestyles,
"extensionModuleStyles");
-
- // Add the categories which were added by extensions directly into the
- // page and not as in-text links
- if (data.parse.categories) {
- for (var i in data.parse.categories) {
- var category = data.parse.categories[i];
-
- var link = dummyDoc.createElement("link");
- link.setAttribute("rel", "mw:PageProp/Category");
-
- var href = this.env.page.relativeLinkPrefix +
"Category:" + encodeURIComponent(category['*']);
- if (category.sortkey) {
- href += "#" +
encodeURIComponent(category.sortkey);
- }
- link.setAttribute("href", href);
-
- parsedHtml += "\n" + link.outerHTML;
+ var batchResponse = data['parsoid-batch'];
+ var callbackData = [];
+ var index, itemParams, itemResponse, j, mangled;
+ for (index = 0; index < batchResponse.length; index++) {
+ itemParams = this.batchParams[index];
+ itemResponse = batchResponse[index];
+ switch (itemParams.action) {
+ case 'parse':
+ mangled = mangleParserResponse(this.env,
itemResponse);
+ break;
+ case 'preprocess':
+ mangled = manglePreprocessorResponse(this.env,
itemResponse);
+ break;
+ case 'imageinfo':
+ mangled = {batchResponse: itemResponse};
+ break;
+ default:
+ error = new Error("BatchRequest._handleJSON:
Invalid action");
+ this.emit('batch', error, null);
+ return;
}
+ callbackData.push(mangled);
+
}
-
- // Add the source to the cache
- this.env.pageCache[this.text] = parsedHtml;
-
- this._processListeners(error, parsedHtml);
+ this.emit('batch', error, callbackData);
};
/**
@@ -838,10 +926,10 @@
* @param {number} [dims.width]
* @param {number} [dims.height]
*/
-function ImageInfoRequest(env, filename, dims) {
+function ImageInfoRequest(env, filename, dims, key) {
ApiRequest.call(this, env, null);
this.env = env;
- this.queueKey = filename + JSON.stringify(dims);
+ this.queueKey = key;
this.reqType = "Image Info Request";
var conf = env.conf.wiki;
@@ -905,7 +993,7 @@
}
if (data && data.query) {
- // The API indexes its response by page ID. That's stupid.
+ // The API indexes its response by page ID. That's inconvenient.
newpages = {};
pagenames = {};
pages = data.query.pages;
@@ -927,7 +1015,6 @@
data.query.pages = newpages;
data.query.imgns = this.ns;
- this.env.pageCache[ this.queueKey ] = data.query;
this._processListeners(null, data.query);
} else if (data && data.error) {
if (data.error.code === 'readapidenied') {
@@ -946,6 +1033,7 @@
module.exports.TemplateRequest = TemplateRequest;
module.exports.PreprocessorRequest = PreprocessorRequest;
module.exports.PHPParseRequest = PHPParseRequest;
+ module.exports.BatchRequest = BatchRequest;
module.exports.ParsoidCacheRequest = ParsoidCacheRequest;
module.exports.ImageInfoRequest = ImageInfoRequest;
module.exports.DoesNotExistError = DoesNotExistError;
diff --git a/lib/mediawiki.Batcher.js b/lib/mediawiki.Batcher.js
new file mode 100644
index 0000000..b99310d
--- /dev/null
+++ b/lib/mediawiki.Batcher.js
@@ -0,0 +1,315 @@
+'use strict';
+require('./core-upgrade.js');
+
+var Util = require('./mediawiki.Util.js').Util;
+var api = require('./mediawiki.ApiRequest.js');
+
+/**
+ * @class
+ *
+ * This class combines requests into batches for dispatch to the
+ * ParsoidBatchAPI extension, and calls the item callbacks when the batch
+ * result is returned. It handles scheduling and concurrency of batch requests.
+ * It also has a legacy mode which sends requests to the MW core API.
+ *
+ * @constructor
+ * @param {MWParserEnvironment} env
+ */
+function Batcher(env) {
+ this.env = env;
+ this.itemCallbacks = {};
+ this.currentBatch = [];
+ this.pendingBatches = [];
+ this.resultCache = {};
+ this.numOutstanding = 0;
+ this.idleTimer = false;
+
+ this.maxBatchSize = env.conf.parsoid.batchSize;
+ this.targetConcurrency = env.conf.parsoid.batchConcurrency;
+}
+
+/**
+ * Internal function for adding a generic work item.
+ *
+ * @param {Object} dims
+ * @param {Function} item callback
+ */
+Batcher.prototype.pushGeneric = function(params, cb) {
+ var hash = params.hash;
+ if (hash in this.itemCallbacks) {
+ this.trace("Appending callback for hash", hash);
+ this.itemCallbacks[hash].push(cb);
+ } else {
+ this.trace("Creating batch item:", params);
+ this.itemCallbacks[hash] = [cb];
+ this.currentBatch.push(params);
+ if (this.currentBatch.length >= this.maxBatchSize) {
+ this.sealBatch();
+ }
+ }
+};
+
+/**
+ * Declare a batch complete and move it to the queue ready for dispatch. Moving
+ * batches to a queue instead of dispatching them immediately allows for an
+ * upper limit on concurrency.
+ */
+Batcher.prototype.sealBatch = function() {
+ if (this.currentBatch.length > 0) {
+ this.pendingBatches.push(this.currentBatch);
+ this.currentBatch = [];
+ }
+};
+
+/**
+ * Dispatch batches from the pending queue, if it is currently possible.
+ */
+Batcher.prototype.dispatch = function() {
+ while (this.numOutstanding < this.targetConcurrency &&
this.pendingBatches.length) {
+ var batch = this.pendingBatches.shift();
+
+ this.trace("Dispatching batch with", batch.length, "items");
+ this.request(batch).once('batch',
+ this.onBatchResponse.bind(this, batch));
+
+ this.numOutstanding++;
+ if (this.idleTimer) {
+ clearTimeout(this.idleTimer);
+ this.idleTimer = false;
+ }
+ }
+};
+
+/**
+ * Schedule an idle event for the next tick. The idle event will dispatch
+ * batches if necessary to keep the job going. The idle event will be cancelled
+ * if a dispatch is done before returning to the event loop.
+ *
+ * This must be called after the completion of parsing work, and after any
+ * batch response is received, to avoid hanging the request by having an
+ * undispatched batch.
+ */
+Batcher.prototype.scheduleIdle = function() {
+ if (!this.idleTimer) {
+ this.idleTimer = setTimeout(this.onIdle.bind(this), 0);
+ }
+};
+
+/**
+ * Handler for the idle event. Dispatch batches if there is not enough work
+ * outstanding.
+ */
+Batcher.prototype.onIdle = function() {
+ this.idleTimer = false;
+
+ this.trace("Idle with outstanding =", this.numOutstanding,
+ ", pending =", this.pendingBatches.length, "x",
this.maxBatchSize,
+ ", current =", this.currentBatch.length);
+
+ if (this.numOutstanding < this.targetConcurrency) {
+ this.sealBatch();
+ this.dispatch();
+ }
+};
+
+/**
+ * Handle a batch response and call item callbacks, after the request is
+ * decoded by BatchRequest.
+ *
+ * @param {Object} batchParams The parameters as in pushGeneric().
+ * @param {Error/null} error
+ * @param {Array} batchResult
+ */
+Batcher.prototype.onBatchResponse = function(batchParams, error, batchResult) {
+ var i, j, result, params, callbacks;
+ this.numOutstanding--;
+ if (error) {
+ this.trace("Received error in batch response:", error);
+ } else {
+ this.trace("Received batch response with", batchResult.length,
"items");
+ }
+ for (i = 0; i < batchParams.length; i++) {
+ params = batchParams[i];
+ callbacks = this.itemCallbacks[params.hash];
+ if (error) {
+ for (j = 0; j < callbacks.length; j++) {
+ callbacks[j](error, null);
+ }
+ } else {
+ result = batchResult[i];
+ this.resultCache[params.hash] = result;
+ delete this.itemCallbacks[params.hash];
+ for (j = 0; j < callbacks.length; j++) {
+ callbacks[j](null, result);
+ }
+ }
+ }
+ this.scheduleIdle();
+};
+
+/**
+ * Schedule a proprocess (expandtemplates) operation.
+ * @param {string} title The title of the page to use as the context
+ * @param {string} text
+ * @param {Function} cb The completion callback
+ */
+Batcher.prototype.preprocess = function(title, text, cb) {
+ var env = this.env;
+ var hash = Util.makeHash(["preprocess", text, title]);
+ if (hash in this.resultCache) {
+ this.trace("Result cache hit for hash", hash);
+ return this.resultCache[hash];
+ }
+ if (!env.conf.parsoid.useBatchAPI) {
+ this.trace("Non-batched preprocess request");
+ this.legacyRequest(api.PreprocessorRequest,
+ [env, title, text, hash], hash, cb);
+ return;
+ }
+
+ // Add the item to the batch
+ this.pushGeneric(
+ {
+ action: "preprocess",
+ title: title,
+ text: text,
+ hash: hash,
+ }, cb
+ );
+};
+
+/**
+ * Schedule an MW parse operation.
+ * @param {string} title The title of the page to use as the context
+ * @param {string} text
+ * @param {Function} cb The completion callback
+ */
+Batcher.prototype.parse = function(title, text, cb) {
+ var env = this.env;
+ var hash = Util.makeHash(["parse", text, title]);
+ if (hash in this.resultCache) {
+ return this.resultCache[hash];
+ }
+ if (!env.conf.parsoid.useBatchAPI) {
+ this.trace("Non-batched parse request");
+ this.legacyRequest(api.PHPParseRequest,
+ [env, title, text, false, hash], hash, cb);
+ return;
+ }
+
+ this.pushGeneric(
+ {
+ action: "parse",
+ title: title,
+ text: text,
+ hash: hash,
+ }, cb
+ );
+};
+
+/**
+ * Schedule fetching of image info.
+ * @param {string} filename
+ * @param {Object} dims
+ * @param {Function} cb The completion callback
+ */
+Batcher.prototype.imageinfo = function(filename, dims, cb) {
+ var env = this.env;
+ var hash = Util.makeHash(["imageinfo", filename, dims.width || "",
dims.height || ""]);
+ if (hash in this.resultCache) {
+ return this.resultCache[hash];
+ }
+ if (!env.conf.parsoid.useBatchAPI) {
+ this.trace("Non-batched imageinfo request");
+ this.legacyRequest(api.ImageInfoRequest,
+ [env, filename, dims, hash], hash, cb);
+ return;
+ }
+
+ var params = {
+ action: "imageinfo",
+ filename: filename,
+ hash: hash,
+ };
+ if (dims.width !== null || dims.height !== null) {
+ params.txopts = {};
+ if (dims.width !== null) {
+ params.txopts.width = dims.width;
+ }
+ if (dims.height !== null) {
+ params.txopts.height = dims.height;
+ }
+ }
+
+ this.pushGeneric(params, cb);
+};
+
+/**
+ * Helper for sending legacy requests when the extension is not available
+ * @param {Function} Constructor The ApiRequest subclass constructor
+ * @param {Array} args The constructor arguments
+ * @param {string} hash The request identifier hash
+ * @param {Function} cb The completion callback
+ */
+Batcher.prototype.legacyRequest = function(Constructor, args, hash, cb) {
+ var env = this.env;
+ if (env.requestQueue[hash] === undefined) {
+ var req = Object.create(Constructor.prototype);
+ Constructor.apply(req, args);
+ env.requestQueue[hash] = req;
+ }
+ env.requestQueue[hash].once('src', this.onLegacyResponse.bind(this,
hash, cb));
+};
+
+/**
+ * Helper for handling a legacy response
+ */
+Batcher.prototype.onLegacyResponse = function(hash, cb, error, src) {
+ if (!error) {
+ this.resultCache[hash] = src;
+ }
+ cb(error, src);
+};
+
+/**
+ * Actually send a single batch request with the specified parameters.
+ */
+Batcher.prototype.request = function(batchParams) {
+ var i;
+ var params;
+ var apiBatch = [];
+ var key = [];
+ var apiItemParams;
+ for (i = 0; i < batchParams.length; i++) {
+ params = batchParams[i];
+ if (params.action === 'imageinfo') {
+ apiItemParams = {
+ action: params.action,
+ filename: params.filename,
+ };
+ if ("txopts" in params) {
+ apiItemParams.txopts = params.txopts;
+ }
+ } else {
+ apiItemParams = {
+ action: params.action,
+ title: params.title,
+ text: params.text,
+ };
+ }
+ apiBatch.push(apiItemParams);
+ key.push(params.hash);
+ }
+ return new api.BatchRequest(this.env, apiBatch, key.join(':'));
+};
+
+/**
+ * Convenience helper for tracing
+ */
+Batcher.prototype.trace = function() {
+ this.env.log.apply(null,
["trace/batcher"].concat(Array.prototype.slice.call(arguments)));
+};
+
+module.exports = {
+ Batcher: Batcher,
+};
diff --git a/lib/mediawiki.ParsoidConfig.js b/lib/mediawiki.ParsoidConfig.js
index e8a150c..6972f86 100644
--- a/lib/mediawiki.ParsoidConfig.js
+++ b/lib/mediawiki.ParsoidConfig.js
@@ -37,6 +37,8 @@
preprocessor: 30 * 1000,
// action=parse
extParse: 30 * 1000,
+ // action=parsoid-batch
+ batch: 60 * 1000,
// action=query&prop=revisions
srcFetch: 40 * 1000,
// action=query&prop=imageinfo
@@ -269,6 +271,26 @@
ParsoidConfig.prototype.loadWMF = true;
/**
+ * Set to true to use the Parsoid-specific batch API from the ParsoidBatchAPI
+ * extension (action=parsoid-batch).
+ */
+ParsoidConfig.prototype.useBatchAPI = false;
+
+/**
+ * The batch size for parse/preprocess requests
+ */
+ParsoidConfig.prototype.batchSize = 50;
+
+/**
+ * The maximum number of concurrent requests that the API request batcher will
+ * allow to be active at any given time. Before this limit is reached, requests
+ * will be dispatched more aggressively, giving smaller batches on average.
+ * After the limit is reached, batches will be stored in a queue with
+ * APIBatchSize items in each batch.
+ */
+ParsoidConfig.prototype.batchConcurrency = 4;
+
+/**
* @property {null} Settings for Performance timer.
*/
ParsoidConfig.prototype.performanceTimer = null;
diff --git a/lib/mediawiki.TokenTransformManager.js
b/lib/mediawiki.TokenTransformManager.js
index 273f3f7..e5f26ba 100644
--- a/lib/mediawiki.TokenTransformManager.js
+++ b/lib/mediawiki.TokenTransformManager.js
@@ -409,6 +409,7 @@
if (this.tailAccumulator) {
this.env.dp('AsyncTokenTransformManager.onEndEvent: calling
siblingDone',
this.frame.title);
+ this.env.batcher.scheduleIdle();
this.tailAccumulator.siblingDone();
} else {
// nothing was asynchronous, so we'll have to emit end here.
diff --git a/lib/mediawiki.Util.js b/lib/mediawiki.Util.js
index 6b662e9..80d90a7 100644
--- a/lib/mediawiki.Util.js
+++ b/lib/mediawiki.Util.js
@@ -6,6 +6,7 @@
require('./core-upgrade.js');
var async = require('async');
+var crypto = require('crypto');
var request = require('request');
var entities = require('entities');
var TXStatsD = require('node-txstatsd');
@@ -132,6 +133,7 @@
" * selser : trace actions of the selective
serializer",
" * domdiff : trace actions of the DOM diffing code",
" * wt-escape : debug wikitext-escaping",
+ " * batcher : trace API batch aggregation and
dispatch",
"",
"--debug enables tracing of all the above phases except
Token Transform Managers",
"",
@@ -1252,6 +1254,29 @@
typeof (dsr[0]) === 'number' && dsr[0] >= 0 &&
typeof (dsr[1]) === 'number' && dsr[1] >= 0;
},
+
+ /**
+ * Quickly hash an array or string.
+ *
+ * @param {Array/string} arr
+ */
+ makeHash: function(arr) {
+ var md5 = crypto.createHash('MD5');
+ var i;
+ if (Array.isArray(arr)) {
+ for (i = 0; i < arr.length; i++) {
+ if (arr[i] instanceof String) {
+ md5.update(arr[i]);
+ } else {
+ md5.update(arr[i].toString());
+ }
+ md5.update("\0");
+ }
+ } else {
+ md5.update(arr);
+ }
+ return md5.digest('hex');
+ },
};
// FIXME: There is also a DOMUtils.getJSONAttribute. Consolidate
diff --git a/lib/mediawiki.parser.environment.js
b/lib/mediawiki.parser.environment.js
index cb98592..c0c8e9a 100644
--- a/lib/mediawiki.parser.environment.js
+++ b/lib/mediawiki.parser.environment.js
@@ -4,6 +4,7 @@
var WikiConfig = require('./mediawiki.WikiConfig.js').WikiConfig;
var ParsoidConfig = require('./mediawiki.ParsoidConfig.js').ParsoidConfig;
var ConfigRequest = require('./mediawiki.ApiRequest.js').ConfigRequest;
+var Batcher = require('./mediawiki.Batcher.js').Batcher;
var Util = require('./mediawiki.Util.js').Util;
var JSUtils = require('./jsutils.js').JSUtils;
var Title = require('./mediawiki.Title.js').Title;
@@ -92,6 +93,7 @@
// Outstanding page requests (for templates etc)
this.requestQueue = {};
+ this.batcher = new Batcher(this);
};
MWParserEnvironment.prototype.configureLogging = function() {
--
To view, visit https://gerrit.wikimedia.org/r/227208
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I2bd6f574bca8c64302810a9569f9390c4cf64626
Gerrit-PatchSet: 7
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Tim Starling <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: GWicke <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: Tim Starling <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits