Pmiazga has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/387642 )
Change subject: Add timeout to requests in the queue ...................................................................... Add timeout to requests in the queue By default, tasks that are waiting in the queue are aborted after 90 seconds. In a follow up patch we'll limit the number of seconds a render can last and terminate process that last longer. Bug: T178501 Change-Id: Ib8b3ba4e609bfac457a3d1d21cef85c1da08bc20 --- M config.dev.yaml M lib/queue.js M lib/renderer.js M package.json M routes/html2pdf-v1.js M test/features/v1/html2pdf.js M test/lib/queue.js 7 files changed, 214 insertions(+), 48 deletions(-) Approvals: Pmiazga: Verified; Looks good to me, approved diff --git a/config.dev.yaml b/config.dev.yaml index 25306f2..ce24eb4 100644 --- a/config.dev.yaml +++ b/config.dev.yaml @@ -95,4 +95,6 @@ - '--no-sandbox' - '--disable-setuid-sandbox' # the maximum number of puppeteer instances that can be launched at a time - render_concurrency: 1 \ No newline at end of file + render_concurrency: 1 + # don't wait to render a PDF after this many seconds + render_queue_timeout: 90 \ No newline at end of file diff --git a/lib/queue.js b/lib/queue.js index 868f420..63ec493 100644 --- a/lib/queue.js +++ b/lib/queue.js @@ -1,23 +1,121 @@ 'use strict'; const asyncQueue = require('async/queue'); +const renderer = require('./renderer'); +const uuid = require('cassandra-uuid'); +const EventEmitter = require('events'); +// Errors used as the first argument of the callback passed to the queue +const callbackErrors = { + queueBusy: 0, + renderFailed: 1 +}; -module.exports = class Queue { +/** + * Wrapper around `async/queue` + * The class only exposes what's needed and takes care of rejecting + * requests upon timeout. + */ +class Queue extends EventEmitter { /** - * @param {Function} worker - * @param {number} concurrency + * @param {number} concurrency number of concurrent render instances + * @param {number} timeout number of seconds after which the + * yet-to-start renders are aborted + * @param {Object} puppeteerFlags flags used to in starting puppeteer + * @param {Object} pdfOptions pdf options passed to Chromium + * @param {Object} logger app logger */ - constructor(worker, concurrency) { - this._queueObject = asyncQueue(worker, concurrency); + constructor(concurrency, timeout, puppeteerFlags, pdfOptions, logger) { + super(); + this._queueObject = asyncQueue(this._worker.bind(this), concurrency); + this._puppeteerFlags = puppeteerFlags; + this._pdfOptions = pdfOptions; + this._timeout = timeout; + this._logger = logger; + this.on('onBeforePush', this._onBeforePush); + this.on('onBeforeRender', this._onBeforeRender); } /** - * Push data to the queue + * Sets a timeout to cancel the task + * `_timeoutID` is attached to `data` so that it can be cleared if + * the task starts within a predefined time (see `_clearTimeout`). + * When the task is removed from the queue after the time is up + * `callback` is called with an error. + * @param {Object} data that the worker needs + * @param {Function} callback called with `callbackErrors.queueBusy` + * as its first argument when the time is up. + */ + _onBeforePush(data, callback) { + const that = this; + data._id = uuid.TimeUuid.now().toString(); + data._timeoutID = setTimeout(() => { + that._queueObject.remove((worker) => { + if (worker.data._id === data._id) { + that._logger.log('trace/warning', { + msg: `Queue is still busy after waiting ` + + `for ${that._timeout} secs.` + }); + callback(callbackErrors.queueBusy, null); + return true; + } + return false; + }); + }, this._timeout * 1000); + } + + /** + * Clears timeout associated with data that was set using `_setTimeout` + * @param {Object} data that has a `_timeoutID` property + */ + _onBeforeRender(data) { + clearTimeout(data._timeoutID); + } + + /** + * Pushes `data` to the queue + * If the task doesn't start after a predefined time, it will be aborted. + * @param {Object} data that the worker needs + * @param {Function} callback called when the worker finishes. The + * first argument to callback will be one of the error codes from + * `callbackErrors` or `null` if there's no error. The second + * argument will be a promise that resolves with a PDF buffer. In case of + * error, the second argument will be `null`. + */ + push(data, callback) { + this.emit('onBeforePush', data, callback); + this._queueObject.push(data, callback); + } + + /** + * Worker that renders a PDF + * The timeout associated with `data` will be cleared before the + * render starts. In case of failure during the render phase, the + * callback is called with `callbackErrors.renderFailed` as its + * first argument. * @param {Object} data that the worker needs * @param {Function} callback called when the worker finishes */ - push(data, callback) { - this._queueObject.push(data, callback); + _worker(data, callback) { + this.emit('onBeforeRender', data); + + renderer + .articleToPdf(data.uri, data.format, this._puppeteerFlags, + this._pdfOptions) + .then((pdf) => { + callback(null, pdf); + }) + .catch((error) => { + this._logger.log('trace/error', { + msg: `Cannot convert page ${data.uri} to PDF.`, + error + }); + callback(callbackErrors.renderFailed, null); + }); } +} + +module.exports = { + callbackErrors, + Queue }; diff --git a/lib/renderer.js b/lib/renderer.js index 705a31e..dae34f1 100644 --- a/lib/renderer.js +++ b/lib/renderer.js @@ -6,14 +6,15 @@ * Renders content from `url` in PDF * @param {string} url URL to get content from * @param {string} format Page size, e.g. Letter or A4, passed to understands - * @param {Object} conf app configuration + * @param {Object} puppeteerFlags + * @param {Object} pdfOptions * @return {<Promise<Buffer>>} Promise which resolves with PDF buffer */ -exports.articleToPdf = function articleToPdf(url, format, conf) { +exports.articleToPdf = function articleToPdf(url, format, puppeteerFlags, pdfOptions) { let browser; let page; - return puppeteer.launch({ args: conf.puppeteer_flags }) + return puppeteer.launch({ args: puppeteerFlags }) .then((browser_) => { browser = browser_; return browser.newPage(); @@ -24,7 +25,7 @@ }) .then(() => { return page.pdf(Object.assign( - {}, conf.pdf_options, { format } + {}, pdfOptions, { format } )); }) .catch((error) => { diff --git a/package.json b/package.json index 0b8d57b..863fdd0 100644 --- a/package.json +++ b/package.json @@ -29,7 +29,7 @@ }, "homepage": "https://github.com/wikimedia/mediawiki-services-chromium-render", "dependencies": { - "async": "^2.5.0", + "async": "^2.6.0", "bluebird": "^3.5.0", "body-parser": "^1.17.1", "bunyan": "^1.8.9", diff --git a/routes/html2pdf-v1.js b/routes/html2pdf-v1.js index 2aa0cdd..ac8af7c 100644 --- a/routes/html2pdf-v1.js +++ b/routes/html2pdf-v1.js @@ -1,8 +1,7 @@ 'use strict'; -const Queue = require('../lib/queue'); +const { callbackErrors, Queue } = require('../lib/queue'); const sUtil = require('../lib/util'); -const renderer = require('../lib/renderer'); /** * The main router object @@ -29,14 +28,20 @@ app.queue.push({ uri: restbaseRequest.uri, - format: req.params.format, - conf: app.conf + format: req.params.format }, ((error, pdf) => { if (error) { - const error_ = new sUtil.HTTPError( - `Cannot convert page ${restbaseRequest.uri} to PDF.` - ); - res.status(error_.status).send(error_); + let status; + + if (error === callbackErrors.queueBusy) { + status = 503; + } else if (error === callbackErrors.renderFailed) { + status = 500; + } + + const errorObject = new sUtil.HTTPError({ status }); + res.status(errorObject.status).send(errorObject); + return; } @@ -53,22 +58,14 @@ module.exports = function(appObj) { app = appObj; - const worker = (data, callback) => { - renderer - .articleToPdf(data.uri, data.format, data.conf) - .then((pdf) => { - callback(null, pdf); - }) - .catch((error) => { - app.logger.log('trace/error', { - msg: `Cannot convert page ${data.uri} to PDF.`, - error - }); - callback(error, null); - }); - }; - - app.queue = new Queue(worker, app.conf.render_concurrency); + const conf = app.conf; + app.queue = new Queue( + conf.render_concurrency, + conf.render_queue_timeout, + conf.puppeteer_flags, + conf.pdf_options, + app.logger + ); // the returned object mounts the routes on // /{domain}/vX/mount/path diff --git a/test/features/v1/html2pdf.js b/test/features/v1/html2pdf.js index 516e13b..38099ff 100644 --- a/test/features/v1/html2pdf.js +++ b/test/features/v1/html2pdf.js @@ -5,7 +5,7 @@ const server = require('../../utils/server.js'); describe('html2pdf', function() { - this.timeout(5000); + this.timeout(20000); before(function () { return server.start(); }); it('should return a letter-sized PDF', function() { @@ -14,7 +14,7 @@ ) .then(function(res) { assert.status(res, 200); - assert.contentType(res, 'application/PDF'); + assert.contentType(res, 'application/pdf'); assert.deepEqual( Buffer.isBuffer(res.body), true, 'Unexpected body!'); }); diff --git a/test/lib/queue.js b/test/lib/queue.js index 720ca60..19a1fa3 100644 --- a/test/lib/queue.js +++ b/test/lib/queue.js @@ -1,7 +1,27 @@ 'use strict'; const assert = require('../utils/assert.js'); -const Queue = require('../../lib/queue'); +const { callbackErrors, Queue } = require('../../lib/queue'); +const logger = { log: () => {} }; +const puppeteerFlags = [ + '--no-sandbox', + '--disable-setuid-sandbox' +]; +const pdfOptions = { + scale: 1, + displayHeaderFooter: false, + printBackground: false, + landscape: false, + pageRanges: '', + format: 'Letter', + margin: { + top: '0.5in', + right: '0.5in', + // some room for page numbers + bottom: '0.75in', + left: '0.5in' + } +}; describe('concurrency', function() { this.timeout(5000); @@ -10,13 +30,17 @@ let status, testsCompleted = 0; - const worker = (data, callback) => { - status = `done ${data.id}`; - setTimeout(() => { - callback(); - }, data.timeout); - }; - const q = new Queue(worker, 1); + class QueueTest extends Queue { + _worker (data, callback) { + this.emit('onBeforeRender', data); + status = `done ${data.id}`; + // simulate render + setTimeout(() => { + callback(); + }, data.timeout); + }; + } + const q = new QueueTest(1, 90, puppeteerFlags, pdfOptions, logger); // first worker must finish after 1 sec q.push({ @@ -46,4 +70,48 @@ done(); }); }); + + it('should reject timed out tasks', function(done) { + let tasksCompleted = 0, + tasksRejected = 0; + + class QueueTest extends Queue { + _worker (data, callback) { + this.emit('onBeforeRender', data); + // simulate render + setTimeout(() => { + callback(null, {}); + }, data.timeout); + }; + } + const q = new QueueTest(1, 1, puppeteerFlags, pdfOptions, logger); + + // first worker completes in 3 seconds + q.push({ + id: 1, + timeout: 3000 + }, (error, data) => { + assert.ok(error === null); + tasksCompleted += 1; + }); + + // the following two requests should be rejected + q.push({ + id: 2, + timeout: 10 + }, (error, data) => { + assert.ok(tasksCompleted === 0); + assert.ok(error === callbackErrors.queueBusy); + tasksRejected += 1; + }); + q.push({ + id: 3, + timeout: 20 + }, (error, data) => { + assert.ok(tasksCompleted === 0); + assert.ok(tasksRejected === 1); + assert.ok(error === callbackErrors.queueBusy); + done(); + }); + }); }); -- To view, visit https://gerrit.wikimedia.org/r/387642 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ib8b3ba4e609bfac457a3d1d21cef85c1da08bc20 Gerrit-PatchSet: 7 Gerrit-Project: mediawiki/services/chromium-render Gerrit-Branch: master Gerrit-Owner: Bmansurov <bmansu...@wikimedia.org> Gerrit-Reviewer: Bmansurov <bmansu...@wikimedia.org> Gerrit-Reviewer: Jdlrobson <jrob...@wikimedia.org> Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org> Gerrit-Reviewer: Niedzielski <sniedziel...@wikimedia.org> Gerrit-Reviewer: Pmiazga <pmia...@wikimedia.org> Gerrit-Reviewer: Ppchelko <ppche...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits