Pmiazga has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/387642 )

Change subject: Add timeout to requests in the queue
......................................................................


Add timeout to requests in the queue

By default, tasks that are waiting in the queue are aborted after 90
seconds. In a follow up patch we'll limit the number of seconds a render
can last and terminate process that last longer.

Bug: T178501
Change-Id: Ib8b3ba4e609bfac457a3d1d21cef85c1da08bc20
---
M config.dev.yaml
M lib/queue.js
M lib/renderer.js
M package.json
M routes/html2pdf-v1.js
M test/features/v1/html2pdf.js
M test/lib/queue.js
7 files changed, 214 insertions(+), 48 deletions(-)

Approvals:
  Pmiazga: Verified; Looks good to me, approved



diff --git a/config.dev.yaml b/config.dev.yaml
index 25306f2..ce24eb4 100644
--- a/config.dev.yaml
+++ b/config.dev.yaml
@@ -95,4 +95,6 @@
         - '--no-sandbox'
         - '--disable-setuid-sandbox'
       # the maximum number of puppeteer instances that can be launched at a 
time
-      render_concurrency: 1
\ No newline at end of file
+      render_concurrency: 1
+      # don't wait to render a PDF after this many seconds
+      render_queue_timeout: 90
\ No newline at end of file
diff --git a/lib/queue.js b/lib/queue.js
index 868f420..63ec493 100644
--- a/lib/queue.js
+++ b/lib/queue.js
@@ -1,23 +1,121 @@
 'use strict';
 
 const asyncQueue = require('async/queue');
+const renderer = require('./renderer');
+const uuid = require('cassandra-uuid');
+const EventEmitter = require('events');
 
+// Errors used as the first argument of the callback passed to the queue
+const callbackErrors = {
+    queueBusy: 0,
+    renderFailed: 1
+};
 
-module.exports = class Queue {
+/**
+ * Wrapper around `async/queue`
+ * The class only exposes what's needed and takes care of rejecting
+ * requests upon timeout.
+ */
+class Queue extends EventEmitter {
     /**
-      * @param {Function} worker
-      * @param {number} concurrency
+      * @param {number} concurrency number of concurrent render instances
+      * @param {number} timeout number of seconds after which the
+      *   yet-to-start renders are aborted
+      * @param {Object} puppeteerFlags flags used to in starting puppeteer
+      * @param {Object} pdfOptions pdf options passed to Chromium
+      * @param {Object} logger app logger
       */
-    constructor(worker, concurrency) {
-        this._queueObject = asyncQueue(worker, concurrency);
+    constructor(concurrency, timeout, puppeteerFlags, pdfOptions, logger) {
+        super();
+        this._queueObject = asyncQueue(this._worker.bind(this), concurrency);
+        this._puppeteerFlags = puppeteerFlags;
+        this._pdfOptions = pdfOptions;
+        this._timeout = timeout;
+        this._logger = logger;
+        this.on('onBeforePush', this._onBeforePush);
+        this.on('onBeforeRender', this._onBeforeRender);
     }
 
     /**
-      * Push data to the queue
+      * Sets a timeout to cancel the task
+      * `_timeoutID` is attached to `data` so that it can be cleared if
+      * the task starts within a predefined time (see `_clearTimeout`).
+      * When the task is removed from the queue after the time is up
+      * `callback` is called with an error.
+      * @param {Object} data that the worker needs
+      * @param {Function} callback called with `callbackErrors.queueBusy`
+      * as its first argument when the time is up.
+      */
+    _onBeforePush(data, callback) {
+        const that = this;
+        data._id = uuid.TimeUuid.now().toString();
+        data._timeoutID = setTimeout(() => {
+            that._queueObject.remove((worker) => {
+                if (worker.data._id === data._id) {
+                    that._logger.log('trace/warning', {
+                        msg: `Queue is still busy after waiting ` +
+                            `for ${that._timeout} secs.`
+                    });
+                    callback(callbackErrors.queueBusy, null);
+                    return true;
+                }
+                return false;
+            });
+        }, this._timeout * 1000);
+    }
+
+    /**
+      * Clears timeout associated with data that was set using `_setTimeout`
+      * @param {Object} data that has a `_timeoutID` property
+      */
+    _onBeforeRender(data) {
+        clearTimeout(data._timeoutID);
+    }
+
+    /**
+      * Pushes `data` to the queue
+      * If the task doesn't start after a predefined time, it will be aborted.
+      * @param {Object} data that the worker needs
+      * @param {Function} callback called when the worker finishes. The
+      * first argument to callback will be one of the error codes from
+      * `callbackErrors` or `null` if there's no error. The second
+      * argument will be a promise that resolves with a PDF buffer. In case of
+      * error, the second argument will be `null`.
+      */
+    push(data, callback) {
+        this.emit('onBeforePush', data, callback);
+        this._queueObject.push(data, callback);
+    }
+
+    /**
+      * Worker that renders a PDF
+      * The timeout associated with `data` will be cleared before the
+      * render starts. In case of failure during the render phase, the
+      * callback is called with `callbackErrors.renderFailed` as its
+      * first argument.
       * @param {Object} data that the worker needs
       * @param {Function} callback called when the worker finishes
       */
-    push(data, callback) {
-        this._queueObject.push(data, callback);
+    _worker(data, callback) {
+        this.emit('onBeforeRender', data);
+
+        renderer
+            .articleToPdf(data.uri, data.format, this._puppeteerFlags,
+                          this._pdfOptions)
+            .then((pdf) => {
+                callback(null, pdf);
+            })
+            .catch((error) => {
+                this._logger.log('trace/error', {
+                    msg: `Cannot convert page ${data.uri} to PDF.`,
+                    error
+                });
+                callback(callbackErrors.renderFailed, null);
+            });
     }
+}
+
+module.exports = {
+    callbackErrors,
+    Queue
 };
diff --git a/lib/renderer.js b/lib/renderer.js
index 705a31e..dae34f1 100644
--- a/lib/renderer.js
+++ b/lib/renderer.js
@@ -6,14 +6,15 @@
  * Renders content from `url` in PDF
  * @param {string} url URL to get content from
  * @param {string} format Page size, e.g. Letter or A4, passed to understands
- * @param {Object} conf app configuration
+ * @param {Object} puppeteerFlags
+ * @param {Object} pdfOptions
  * @return {<Promise<Buffer>>} Promise which resolves with PDF buffer
  */
-exports.articleToPdf = function articleToPdf(url, format, conf) {
+exports.articleToPdf = function articleToPdf(url, format, puppeteerFlags, 
pdfOptions) {
     let browser;
     let page;
 
-    return puppeteer.launch({ args: conf.puppeteer_flags })
+    return puppeteer.launch({ args: puppeteerFlags })
         .then((browser_) => {
             browser = browser_;
             return browser.newPage();
@@ -24,7 +25,7 @@
         })
         .then(() => {
             return page.pdf(Object.assign(
-                {}, conf.pdf_options, { format }
+                {}, pdfOptions, { format }
             ));
         })
         .catch((error) => {
diff --git a/package.json b/package.json
index 0b8d57b..863fdd0 100644
--- a/package.json
+++ b/package.json
@@ -29,7 +29,7 @@
   },
   "homepage": 
"https://github.com/wikimedia/mediawiki-services-chromium-render";,
   "dependencies": {
-    "async": "^2.5.0",
+    "async": "^2.6.0",
     "bluebird": "^3.5.0",
     "body-parser": "^1.17.1",
     "bunyan": "^1.8.9",
diff --git a/routes/html2pdf-v1.js b/routes/html2pdf-v1.js
index 2aa0cdd..ac8af7c 100644
--- a/routes/html2pdf-v1.js
+++ b/routes/html2pdf-v1.js
@@ -1,8 +1,7 @@
 'use strict';
 
-const Queue = require('../lib/queue');
+const { callbackErrors, Queue } = require('../lib/queue');
 const sUtil = require('../lib/util');
-const renderer = require('../lib/renderer');
 
 /**
  * The main router object
@@ -29,14 +28,20 @@
 
     app.queue.push({
         uri: restbaseRequest.uri,
-        format: req.params.format,
-        conf: app.conf
+        format: req.params.format
     }, ((error, pdf) => {
         if (error) {
-            const error_ = new sUtil.HTTPError(
-                `Cannot convert page ${restbaseRequest.uri} to PDF.`
-            );
-            res.status(error_.status).send(error_);
+            let status;
+
+            if (error === callbackErrors.queueBusy) {
+                status = 503;
+            } else if (error === callbackErrors.renderFailed) {
+                status = 500;
+            }
+
+            const errorObject = new sUtil.HTTPError({ status });
+            res.status(errorObject.status).send(errorObject);
+
             return;
         }
 
@@ -53,22 +58,14 @@
 module.exports = function(appObj) {
     app = appObj;
 
-    const worker = (data, callback) => {
-        renderer
-            .articleToPdf(data.uri, data.format, data.conf)
-            .then((pdf) => {
-                callback(null, pdf);
-            })
-            .catch((error) => {
-                app.logger.log('trace/error', {
-                    msg: `Cannot convert page ${data.uri} to PDF.`,
-                    error
-                });
-                callback(error, null);
-            });
-    };
-
-    app.queue = new Queue(worker, app.conf.render_concurrency);
+    const conf = app.conf;
+    app.queue = new Queue(
+        conf.render_concurrency,
+        conf.render_queue_timeout,
+        conf.puppeteer_flags,
+        conf.pdf_options,
+        app.logger
+    );
 
     // the returned object mounts the routes on
     // /{domain}/vX/mount/path
diff --git a/test/features/v1/html2pdf.js b/test/features/v1/html2pdf.js
index 516e13b..38099ff 100644
--- a/test/features/v1/html2pdf.js
+++ b/test/features/v1/html2pdf.js
@@ -5,7 +5,7 @@
 const server = require('../../utils/server.js');
 
 describe('html2pdf', function() {
-    this.timeout(5000);
+    this.timeout(20000);
     before(function () { return server.start(); });
 
     it('should return a letter-sized PDF', function() {
@@ -14,7 +14,7 @@
         )
         .then(function(res) {
             assert.status(res, 200);
-            assert.contentType(res, 'application/PDF');
+            assert.contentType(res, 'application/pdf');
             assert.deepEqual(
                 Buffer.isBuffer(res.body), true, 'Unexpected body!');
         });
diff --git a/test/lib/queue.js b/test/lib/queue.js
index 720ca60..19a1fa3 100644
--- a/test/lib/queue.js
+++ b/test/lib/queue.js
@@ -1,7 +1,27 @@
 'use strict';
 
 const assert = require('../utils/assert.js');
-const Queue = require('../../lib/queue');
+const { callbackErrors, Queue } = require('../../lib/queue');
+const logger = { log: () => {} };
+const puppeteerFlags = [
+    '--no-sandbox',
+    '--disable-setuid-sandbox'
+];
+const pdfOptions = {
+    scale: 1,
+    displayHeaderFooter: false,
+    printBackground: false,
+    landscape: false,
+    pageRanges: '',
+    format: 'Letter',
+    margin: {
+        top: '0.5in',
+        right: '0.5in',
+        // some room for page numbers
+        bottom: '0.75in',
+        left: '0.5in'
+    }
+};
 
 describe('concurrency', function() {
     this.timeout(5000);
@@ -10,13 +30,17 @@
         let status,
             testsCompleted = 0;
 
-        const worker = (data, callback) => {
-            status = `done ${data.id}`;
-            setTimeout(() => {
-                callback();
-            }, data.timeout);
-        };
-        const q = new Queue(worker, 1);
+        class QueueTest extends Queue {
+            _worker (data, callback) {
+                this.emit('onBeforeRender', data);
+                status = `done ${data.id}`;
+                // simulate render
+                setTimeout(() => {
+                    callback();
+                }, data.timeout);
+            };
+        }
+        const q = new QueueTest(1, 90, puppeteerFlags, pdfOptions, logger);
 
         // first worker must finish after 1 sec
         q.push({
@@ -46,4 +70,48 @@
             done();
         });
     });
+
+    it('should reject timed out tasks', function(done) {
+        let tasksCompleted = 0,
+            tasksRejected = 0;
+
+        class QueueTest extends Queue {
+            _worker (data, callback) {
+                this.emit('onBeforeRender', data);
+                // simulate render
+                setTimeout(() => {
+                    callback(null, {});
+                }, data.timeout);
+            };
+        }
+        const q = new QueueTest(1, 1, puppeteerFlags, pdfOptions, logger);
+
+        // first worker completes in 3 seconds
+        q.push({
+            id: 1,
+            timeout: 3000
+        }, (error, data) => {
+            assert.ok(error === null);
+            tasksCompleted += 1;
+        });
+
+        // the following two requests should be rejected
+        q.push({
+            id: 2,
+            timeout: 10
+        }, (error, data) => {
+            assert.ok(tasksCompleted === 0);
+            assert.ok(error === callbackErrors.queueBusy);
+            tasksRejected += 1;
+        });
+        q.push({
+            id: 3,
+            timeout: 20
+        }, (error, data) => {
+            assert.ok(tasksCompleted === 0);
+            assert.ok(tasksRejected === 1);
+            assert.ok(error === callbackErrors.queueBusy);
+            done();
+        });
+    });
 });

-- 
To view, visit https://gerrit.wikimedia.org/r/387642
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ib8b3ba4e609bfac457a3d1d21cef85c1da08bc20
Gerrit-PatchSet: 7
Gerrit-Project: mediawiki/services/chromium-render
Gerrit-Branch: master
Gerrit-Owner: Bmansurov <bmansu...@wikimedia.org>
Gerrit-Reviewer: Bmansurov <bmansu...@wikimedia.org>
Gerrit-Reviewer: Jdlrobson <jrob...@wikimedia.org>
Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org>
Gerrit-Reviewer: Niedzielski <sniedziel...@wikimedia.org>
Gerrit-Reviewer: Pmiazga <pmia...@wikimedia.org>
Gerrit-Reviewer: Ppchelko <ppche...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to