Mobrovac has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/390144 )
Change subject: Terminate browser process after certain time
......................................................................
Terminate browser process after certain time
By default after the arbitrarily chosen 90 seconds puppeteer will
terminate the browser subprocess if it hasn't finished returning a PDF
yet.
Bug: T178501
Change-Id: I904fc15826b7de3a34e5c977b40bfd8518bb1679
---
M config.dev.yaml
M lib/queue.js
M lib/renderer.js
M routes/html2pdf-v1.js
M test/lib/queue.js
5 files changed, 162 insertions(+), 57 deletions(-)
Approvals:
Mobrovac: Verified; Looks good to me, approved
diff --git a/config.dev.yaml b/config.dev.yaml
index eae5f5b..7b91760 100644
--- a/config.dev.yaml
+++ b/config.dev.yaml
@@ -91,12 +91,17 @@
# some room for page numbers
bottom: '0.75in'
left: '0.5in'
- puppeteer_flags:
- - '--no-sandbox'
- - '--disable-setuid-sandbox'
+ #
https://github.com/GoogleChrome/puppeteer/blob/v0.11.0/docs/api.md#puppeteerlaunchoptions
+ puppeteer_options:
+ timeout: 30000
+ args:
+ - '--no-sandbox'
+ - '--disable-setuid-sandbox'
# the maximum number of puppeteer instances that can be launched at a
time
render_concurrency: 1
# don't wait to render a PDF after this many seconds
render_queue_timeout: 60
# maximum allowed number of pending jobs
max_render_queue_size: 3
+ # the number of seconds before puppeteer terminates the browser instance
+ render_execution_timout: 90
\ No newline at end of file
diff --git a/lib/queue.js b/lib/queue.js
index f66c584..ff437ac 100644
--- a/lib/queue.js
+++ b/lib/queue.js
@@ -1,7 +1,7 @@
'use strict';
const asyncQueue = require('async/queue');
-const renderer = require('./renderer');
+const asyncTimeout = require('async/timeout');
const uuid = require('cassandra-uuid');
// Errors used as the first argument of the callback passed to the queue
@@ -11,7 +11,9 @@
// something went wrong in the render phase
renderFailed: 1,
// the queue is already full, not waiting for it to have room
- queueFull: 2
+ queueFull: 2,
+ // when the render takes longer than allowed
+ renderTimeout: 3
};
/**
@@ -24,20 +26,22 @@
* @param {Object} queueOptions
* @param {number} queueOptions.concurrency number of concurrent
* render instances
- * @param {number} queueOptions.timeout number of seconds after
+ * @param {number} queueOptions.queueTimeout number of seconds after
* which the yet-to-start renders are aborted
+ * @param {number} queueOptions.executionTimeout number of seconds after
+ * which puppeteer is asked to abort the render
* @param {number} queueOptions.maxTaskCount number of tasks the queue
* should hold. New tasks will be rejected once the sum of the
* number of running tasks and the tasks in the queue is equal to
* this number.
- * @param {Object} puppeteerFlags flags used to in starting puppeteer
+ * @param {Object} puppeteerOptions options used to in starting puppeteer
* @param {Object} pdfOptions pdf options passed to Chromium
* @param {Object} logger app logger
*/
- constructor(queueOptions, puppeteerFlags, pdfOptions, logger) {
+ constructor(queueOptions, puppeteerOptions, pdfOptions, logger) {
this._queueObject = asyncQueue(this._worker.bind(this),
queueOptions.concurrency);
- this._puppeteerFlags = puppeteerFlags;
+ this._puppeteerOptions = puppeteerOptions;
this._pdfOptions = pdfOptions;
this._options = queueOptions;
this._logger = logger;
@@ -73,7 +77,7 @@
_setCancelTaskTimeout(data, callback) {
const logger = this._logger;
const queue = this._queueObject;
- const timeout = this._options.timeout;
+ const timeout = this._options.queueTimeout;
data._id = `${uuid.TimeUuid.now().toString()}|${data.uri}`;
data._timeoutID = setTimeout(() => {
@@ -140,8 +144,35 @@
_worker(data, callback) {
this._clearCancelTaskTimeout(data);
this._logger.log('info/queue', `Started rendering ${data._id}`);
- renderer
- .articleToPdf(data.uri, data.format, this._puppeteerFlags,
+
+ const timeout = this._options.executionTimeout;
+ const timedRender = asyncTimeout(this._render.bind(this),
+ timeout * 1000);
+ const renderer = data._renderer;
+ timedRender(data, (error, pdf) => {
+ // error returned by async timeout
+ if (error && error.code === 'ETIMEDOUT') {
+ this._logger.log(
+ 'error/render',
+ `Aborting. Render hasn't finished within ${timeout} ` +
+ `seconds. Data ID: ${data._id}.`
+ );
+ renderer.abortRender();
+ callback(callbackErrors.renderTimeout, null);
+ } else {
+ callback(error, pdf);
+ }
+ });
+ }
+
+ /**
+ * Render a PDF
+ * @param {Object} data used for rendering
+ * @param {Function} callback called on render success/failure
+ */
+ _render(data, callback) {
+ data._renderer
+ .articleToPdf(data.uri, data.format, this._puppeteerOptions,
this._pdfOptions)
.then((pdf) => {
this._logger.log(
diff --git a/lib/renderer.js b/lib/renderer.js
index dae34f1..68ef504 100644
--- a/lib/renderer.js
+++ b/lib/renderer.js
@@ -2,40 +2,63 @@
const puppeteer = require('puppeteer');
-/**
- * Renders content from `url` in PDF
- * @param {string} url URL to get content from
- * @param {string} format Page size, e.g. Letter or A4, passed to understands
- * @param {Object} puppeteerFlags
- * @param {Object} pdfOptions
- * @return {<Promise<Buffer>>} Promise which resolves with PDF buffer
- */
-exports.articleToPdf = function articleToPdf(url, format, puppeteerFlags,
pdfOptions) {
- let browser;
- let page;
+module.exports = class Renderer {
+ constructor() {
+ this._browser = null;
+ }
- return puppeteer.launch({ args: puppeteerFlags })
- .then((browser_) => {
- browser = browser_;
- return browser.newPage();
- })
- .then((page_) => {
- page = page_;
- return page.goto(url, { waitUntil: 'networkidle' });
- })
- .then(() => {
- return page.pdf(Object.assign(
- {}, pdfOptions, { format }
- ));
- })
- .catch((error) => {
- if (browser) {
- browser.close();
- }
- throw error;
- })
- .then((pdf) => {
- browser.close();
- return pdf;
- });
+ /**
+ * Closes any open browser instance
+ */
+ _closeBrowser() {
+ if (this._browser) {
+ this._browser.close();
+ this._browser = null;
+ }
+ }
+
+ /**
+ * Renders content from `url` in PDF
+ * @param {string} url URL to get content from
+ * TODO: merge format with pdfOptions
+ * @param {string} format Page size, e.g. Letter or A4, passed to
understands
+ * @param {Object} puppeteerOptions
+ * @param {Object} pdfOptions
+ * @return {<Promise<Buffer>>} Promise which resolves with PDF buffer
+ */
+ articleToPdf(url, format, puppeteerOptions, pdfOptions) {
+ let page;
+ const that = this;
+
+ return puppeteer.launch(puppeteerOptions)
+ .then((browser) => {
+ that._browser = browser;
+ return browser.newPage();
+ })
+ .then((page_) => {
+ page = page_;
+ return page.goto(url, { waitUntil: 'networkidle' });
+ })
+ .then(() => {
+ return page.pdf(Object.assign(
+ {}, pdfOptions, { format }
+ ));
+ })
+ .catch((error) => {
+ that._closeBrowser();
+ throw error;
+ })
+ .then((pdf) => {
+ that._closeBrowser();
+ return pdf;
+ });
+ }
+
+ /**
+ * Aborts the request to create a PDF.
+ * Should be called after calling articleToPdf
+ */
+ abortRender() {
+ this._closeBrowser();
+ }
};
diff --git a/routes/html2pdf-v1.js b/routes/html2pdf-v1.js
index cfeae4a..f1993e0 100644
--- a/routes/html2pdf-v1.js
+++ b/routes/html2pdf-v1.js
@@ -2,6 +2,7 @@
const { callbackErrors, Queue } = require('../lib/queue');
const sUtil = require('../lib/util');
+const Renderer = require('../lib/renderer');
/**
* The main router object
@@ -26,18 +27,23 @@
}
});
+ const renderer = new Renderer();
app.queue.push({
+ renderer,
uri: restbaseRequest.uri,
format: req.params.format
}, ((error, pdf) => {
if (error) {
let status;
+ const e = callbackErrors;
- if ([callbackErrors.queueBusy, callbackErrors.queueFull]
- .includes(error)) {
- status = 503;
- } else if (error === callbackErrors.renderFailed) {
- status = 500;
+ switch (error) {
+ case e.queueBusy:
+ case e.queueFull:
+ status = 503;
+ break;
+ default:
+ status = 500;
}
const errorObject = new sUtil.HTTPError({ status });
@@ -63,10 +69,11 @@
app.queue = new Queue(
{
concurrency: conf.render_concurrency,
- timeout: conf.render_queue_timeout,
+ queueTimeout: conf.render_queue_timeout,
+ executionTimeout: conf.render_execution_timout,
maxTaskCount: conf.max_render_queue_size
},
- conf.puppeteer_flags,
+ conf.puppeteer_options,
conf.pdf_options,
app.logger
);
diff --git a/test/lib/queue.js b/test/lib/queue.js
index f593b2b..3d63bad 100644
--- a/test/lib/queue.js
+++ b/test/lib/queue.js
@@ -3,6 +3,7 @@
const assert = require('../utils/assert.js');
const { callbackErrors, Queue } = require('../../lib/queue');
const logger = { log: () => {} };
+const renderer = { abortRender: () => {} };
const puppeteerFlags = [
'--no-sandbox',
'--disable-setuid-sandbox'
@@ -42,13 +43,15 @@
}
const q = new QueueTest({
concurrency: 1,
- timeout: 90,
+ queueTimeout: 90,
+ executionTimeout: 90,
maxTaskCount: 3
}, puppeteerFlags, pdfOptions, logger);
// first worker must finish after 1 sec
q.push({
id: 1,
+ renderer,
timeout: 1000
}, () => {
assert.ok(status === 'done 1');
@@ -58,6 +61,7 @@
// second worker must finish 0.5 sec after the first one
q.push({
id: 2,
+ renderer,
timeout: 500
}, () => {
assert.ok(status === 'done 2');
@@ -67,6 +71,7 @@
// the last worker must finish last, regardless of the timeout
q.push({
id: 3,
+ renderer,
timeout: 10
}, () => {
assert.ok(testsCompleted === 2);
@@ -89,13 +94,15 @@
}
const q = new QueueTest({
concurrency: 1,
- timeout: 5,
+ queueTimeout: 5,
+ executionTimeout: 90,
maxTaskCount: 1
}, puppeteerFlags, pdfOptions, logger);
// first worker completes in 3 seconds
q.push({
id: 1,
+ renderer,
timeout: 3000
}, (error, data) => {
assert.ok(error === null);
@@ -106,6 +113,7 @@
// even though it's allowed to wait 5 seconds.
q.push({
id: 2,
+ renderer,
timeout: 0
}, (error, data) => {
assert.ok(testsCompleted === 0);
@@ -130,13 +138,15 @@
}
const q = new QueueTest({
concurrency: 1,
- timeout: 1,
+ queueTimeout: 1,
+ executionTimeout: 90,
maxTaskCount: 3
}, puppeteerFlags, pdfOptions, logger);
// first worker completes in 3 seconds
q.push({
id: 1,
+ renderer,
timeout: 3000
}, (error, data) => {
assert.ok(error === null);
@@ -146,6 +156,7 @@
// the following two requests should be rejected
q.push({
id: 2,
+ renderer,
timeout: 10
}, (error, data) => {
assert.ok(tasksCompleted === 0);
@@ -154,6 +165,7 @@
});
q.push({
id: 3,
+ renderer,
timeout: 20
}, (error, data) => {
assert.ok(tasksCompleted === 0);
@@ -162,4 +174,31 @@
done();
});
});
+
+ it('should reject tasks when render takes long', function(done) {
+ class QueueTest extends Queue {
+ _render(data, callback) {
+ // simulate render
+ setTimeout(() => {
+ callback(null, {});
+ }, data.timeout);
+ }
+ }
+ const q = new QueueTest({
+ concurrency: 10,
+ queueTimeout: 30,
+ executionTimeout: 1,
+ maxTaskCount: 10
+ }, puppeteerFlags, pdfOptions, logger);
+
+ q.push({
+ id: 1,
+ renderer,
+ timeout: 3000
+ }, (error, data) => {
+ assert.ok(error === callbackErrors.renderTimeout,
+ 'Render took more than a second.');
+ done();
+ });
+ });
});
--
To view, visit https://gerrit.wikimedia.org/r/390144
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I904fc15826b7de3a34e5c977b40bfd8518bb1679
Gerrit-PatchSet: 8
Gerrit-Project: mediawiki/services/chromium-render
Gerrit-Branch: master
Gerrit-Owner: Bmansurov <[email protected]>
Gerrit-Reviewer: Bmansurov <[email protected]>
Gerrit-Reviewer: Mobrovac <[email protected]>
Gerrit-Reviewer: Pmiazga <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits