Yurik has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/248284

Change subject: v2 post requests
......................................................................

v2 post requests

new version handles POST requests in this format (only png is supported):
/<domain>/v2/<format>

Optionally, supports two more params (used for debugging only):
/<domain>/v2/<format>/<Title>
/<domain>/v2/<format>/<Title>/<RevId>

The body of the request must be a valid JSON.

To test, use Postman chrome extension, and POST this url:
   http://localhost:6927/www.mediawiki.org/v2/png
 The request body must be set to RAW -- JSON format.

Use any graph spec from https://www.mediawiki.org/wiki/Extension:Graph/Demo

Change-Id: Ie1eb673d3ce6b036cff99f735c5c26ff2b1fc938
---
A lib/vega.js
M package.json
M routes/graphoid-v1.js
A routes/graphoid-v2.js
4 files changed, 345 insertions(+), 131 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/graphoid 
refs/changes/84/248284/1

diff --git a/lib/vega.js b/lib/vega.js
new file mode 100644
index 0000000..b7708d4
--- /dev/null
+++ b/lib/vega.js
@@ -0,0 +1,121 @@
+'use strict';
+
+var BBPromise = require('bluebird');
+var urllib = require('url');
+var vega = require('vega'); // Visualization grammar - 
https://github.com/trifacta/vega
+
+// Vega has its own renderAsync() version, but it does not return a promise
+var renderAsync = BBPromise.promisify(vega.headless.render, vega.headless);
+
+module.exports = {
+    /**
+     * For protocol-relative URLs  (they begin with //), which protocol should 
we use
+     */
+    defaultProtocol: 'https',
+
+    /**
+     * A set of 'oldDomain' => 'newDomain' mappings
+     */
+    domainMap: false,
+
+    /**
+     * Regex to validate domain parameter
+     */
+    serverRe: null
+};
+
+/**
+ * Init vega rendering
+ * @param log
+ * @param domains array of strings - which domains are valid
+ */
+module.exports.initVega = function (log, defaultProtocol, domains, domainMap) {
+    if (module.exports.serverRe) {
+        return; // avoid double-initialization
+    }
+
+    domains = domains || [];
+    module.exports.defaultProtocol = defaultProtocol || 
module.exports.defaultProtocol;
+
+    var validDomains = domains;
+    if (domainMap && Object.getOwnPropertyNames(domainMap).length > 0) {
+        module.exports.domainMap = domainMap;
+        validDomains = 
validDomains.concat(Object.getOwnPropertyNames(domainMap));
+    }
+
+    if (validDomains.length === 0) {
+        log('fatal/config', 'Config must have non-empty "domains" (list) 
and/or "domainMap" (dict)');
+        process.exit(1);
+    }
+
+    // TODO: handle other symbols (even though they shouldn't be in the domains
+    // TODO: implement per-host default protocol, e.g. wikipedia.org -> https, 
wmflabs.org -> http
+    //       per-demain default protocol will probably not be enabled for 
production
+    module.exports.serverRe = new RegExp('^([^@/:]*\.)?(' +
+        validDomains
+            .map(function (s) {
+                return s.replace('.', '\\.');
+            })
+            .join('|') + ')$');
+
+    vega.config.domainWhiteList = domains;
+    vega.config.defaultProtocol = module.exports.defaultProtocol + ':';
+    vega.config.safeMode = true;
+    vega.config.isNode = true; // Vega is flaky with its own detection, fails 
in tests and with IDE debug
+
+    // set up vega loggers to log to our device instead of stderr
+    vega.log = function (msg) {
+        log('debug/vega', msg);
+    };
+    vega.error = function (msg) {
+        log('warn/vega', msg);
+    };
+
+    //
+    // TODO/BUG:  In multithreaded env, we cannot set global vega.config var
+    // while handling multiple requests from multiple hosts.
+    // Until vega is capable of per-rendering context, we must bail on any
+    // relative (no hostname) data or image URLs.
+    //
+    // Do not set vega.config.baseURL. Current sanitizer implementation will 
fail
+    // because of the missing protocol (safeMode == true). Still, lets double 
check
+    // here, in case user has   'http:pathname', which for some strange reason 
is
+    // parsed as correct by url lib.
+    //
+    var originalSanitize = vega.data.load.sanitizeUrl.bind(vega.data.load);
+    vega.data.load.sanitizeUrl = function (urlOrig) {
+        var url = originalSanitize.call(vega.data.load, urlOrig);
+        if (url) {
+            var parts = urllib.parse(url);
+            if (!parts.protocol || !parts.hostname) {
+                url = null;
+            } else if (parts.protocol !== 'http:' && parts.protocol !== 
'https:') {
+                // load.sanitizeUrl() already does this, but double check to 
be safe
+                url = null;
+            }
+        }
+        if (url && module.exports.domainMap) {
+            url = url.replace(/^(https?:\/\/)([^#?\/]+)/, function (match, 
prot, domain) {
+                var repl = module.exports.domainMap[domain];
+                return repl ? prot + repl : match;
+            });
+        }
+
+        if (!url) {
+            log('debug/url-deny', urlOrig);
+        } else if (urlOrig !== url) {
+            log('debug/url-fix', {'req': urlOrig, 'repl': url});
+        } else {
+            log('trace/url-ok', urlOrig);
+        }
+        return url;
+    };
+};
+
+module.exports.render = function (opts) {
+    // BUG: see comment above at vega.data.load.sanitizeUrl = ...
+    // In case of non-absolute URLs, use requesting domain as "local"
+    vega.config.baseURL = module.exports.defaultProtocol + '://' + opts.domain;
+
+    return renderAsync(opts.renderOpts);
+};
diff --git a/package.json b/package.json
index 1780e61..dbd9066 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "graphoid",
-  "version": "0.1.4",
+  "version": "0.1.5",
   "description": "Renders vega graphs from mediawiki pages",
   "main": "./app.js",
   "scripts": {
@@ -38,6 +38,7 @@
     "js-yaml": "^3.3.1",
     "preq": "^0.4.4",
     "service-runner": "^0.2.1",
+    "underscore": "^1.8.3",
     "vega": "git+http://[email protected]/nyurik/vega";
   },
   "devDependencies": {
diff --git a/routes/graphoid-v1.js b/routes/graphoid-v1.js
index 51cee89..276161c 100644
--- a/routes/graphoid-v1.js
+++ b/routes/graphoid-v1.js
@@ -3,8 +3,7 @@
 var BBPromise = require('bluebird');
 var preq = require('preq');
 var sUtil = require('../lib/util');
-var urllib = require('url');
-var vega = require('vega'); // Visualization grammar - 
https://github.com/trifacta/vega
+var vega = require('../lib/vega');
 
 
 /**
@@ -23,24 +22,9 @@
 var metrics;
 
 /**
- * A set of 'oldDomain' => 'newDomain' mappings
- */
-var domainMap = false;
-
-/**
- * For protocol-relative URLs  (they begin with //), which protocol should we 
use
- */
-var defaultProtocol = 'https';
-
-/**
  * Limit request to 10 seconds by default
  */
 var timeout = 10000;
-
-/**
- * Regex to validate domain parameter
- */
-var serverRe = null;
 
 
 /*
@@ -81,66 +65,6 @@
             throw 'timeout'; // we later compare on this value
         })]);
 }
-
-/**
- * Init vega rendering
- * @param domains array of strings - which domains are valid
- */
-function initVega(domains) {
-    vega.config.domainWhiteList = domains;
-    vega.config.defaultProtocol = defaultProtocol + ':';
-    vega.config.safeMode = true;
-    vega.config.isNode = true; // Vega is flaky with its own detection, fails 
in tests and with IDE debug
-
-    // set up vega loggers to log to our device instead of stderr
-    vega.log = function(msg) {
-        log('debug/vega', msg);
-    };
-    vega.error = function(msg) {
-        log('warn/vega', msg);
-    };
-
-    //
-    // TODO/BUG:  In multithreaded env, we cannot set global vega.config var
-    // while handling multiple requests from multiple hosts.
-    // Until vega is capable of per-rendering context, we must bail on any
-    // relative (no hostname) data or image URLs.
-    //
-    // Do not set vega.config.baseURL. Current sanitizer implementation will 
fail
-    // because of the missing protocol (safeMode == true). Still, lets double 
check
-    // here, in case user has   'http:pathname', which for some strange reason 
is
-    // parsed as correct by url lib.
-    //
-    var originalSanitize = vega.data.load.sanitizeUrl.bind(vega.data.load);
-    vega.data.load.sanitizeUrl = function (urlOrig) {
-        var url = originalSanitize.call(vega.data.load, urlOrig);
-        if (url) {
-            var parts = urllib.parse(url);
-            if (!parts.protocol || !parts.hostname) {
-                url = null;
-            } else if (parts.protocol !== 'http:' && parts.protocol !== 
'https:') {
-                // load.sanitizeUrl() already does this, but double check to 
be safe
-                url = null;
-            }
-        }
-        if (url && domainMap) {
-            url = url.replace(/^(https?:\/\/)([^#?\/]+)/, function (match, 
prot, domain) {
-                var repl = domainMap[domain];
-                return repl ? prot + repl : match;
-            });
-        }
-
-        if (!url) {
-            log('debug/url-deny', urlOrig);
-        } else if (urlOrig !== url) {
-            log('debug/url-fix', {'req': urlOrig, 'repl': url});
-        } else {
-            log('trace/url-ok', urlOrig);
-        }
-        return url;
-    };
-}
-
 
 /**
  * Parse and validate request parameters
@@ -200,15 +124,15 @@
     }
     state.graphId = id;
 
-    if (!serverRe.test(domain)) {
+    if (!vega.serverRe.test(domain)) {
         throw new Err('info/param-domain', 'req.domain');
     }
 
     // TODO: Optimize 'en.m.wikipedia.org' -> 'en.wikipedia.org'
-    var domain2 = (domainMap && domainMap[domain]) || domain;
+    var domain2 = (vega.domainMap && vega.domainMap[domain]) || domain;
 
     state.domain = domain2;
-    state.apiUrl = defaultProtocol + '://' + domain2 + '/w/api.php';
+    state.apiUrl = vega.defaultProtocol + '://' + domain2 + '/w/api.php';
     if (domain !== domain2) {
         state.log.backend = domain2;
     }
@@ -316,34 +240,30 @@
 }
 
 function renderOnCanvas(state) {
-    return new BBPromise(function (fulfill, reject){
-        var start = Date.now();
-
-        // BUG: see comment above at vega.data.load.sanitizeUrl = ...
-        // In case of non-absolute URLs, use requesting domain as "local"
-        vega.config.baseURL = defaultProtocol + '://' + state.domain;
-
-        vega.headless.render({spec: state.graphData, renderer: 'canvas'}, 
function (err, result) {
-            if (err) {
-                state.log.vegaErr = err;
-                reject(new Err('error/vega', 'vega.error'));
-            } else {
-                var stream = result.canvas.pngStream();
-                state.response
-                    .status(200)
-                    .type('png')
-                    // For now, lets re-cache more frequently
-                    .header('Cache-Control', 'public, s-maxage=30, 
max-age=30');
-                stream.on('data', function (chunk) {
-                    state.response.write(chunk);
-                });
-                stream.on('end', function () {
-                    state.response.end();
-                    metrics.endTiming('total.vega', start);
-                    fulfill(state);
-                });
-            }
+    var start = Date.now();
+    return vega.render({
+        domain: state.domain,
+        renderOpts: {spec: state.graphData, renderer: 'canvas'}
+    }).then(function (result) {
+        var pendingPromise = BBPromise.pending();
+        var stream = result.canvas.pngStream();
+        state.response
+            .status(200)
+            .type('png')
+            // For now, lets re-cache more frequently
+            .header('Cache-Control', 'public, s-maxage=30, max-age=30');
+        stream.on('data', function (chunk) {
+            state.response.write(chunk);
         });
+        stream.on('end', function () {
+            state.response.end();
+            metrics.endTiming('total.vega', start);
+            pendingPromise.resolve(state);
+        });
+        return pendingPromise.promise;
+    }).catch(function (err) {
+        state.log.vegaErr = err;
+        throw new Err('error/vega', 'vega.error');
     });
 }
 
@@ -408,31 +328,9 @@
     metrics.increment('v1.init');
 
     var conf = app.conf;
-    var domains = conf.domains || [];
     timeout = conf.timeout || timeout;
-    defaultProtocol = conf.defaultProtocol || defaultProtocol;
 
-    var validDomains = domains;
-    if (conf.domainMap && Object.getOwnPropertyNames(conf.domainMap).length > 
0) {
-        domainMap = conf.domainMap;
-        validDomains = 
validDomains.concat(Object.getOwnPropertyNames(domainMap));
-    }
-
-    if (validDomains.length === 0) {
-        log('fatal/config', 'Config must have non-empty "domains" (list) 
and/or "domainMap" (dict)');
-        process.exit(1);
-    }
-
-    // TODO: handle other symbols (even though they shouldn't be in the domains
-    // TODO: implement per-host default protocol, e.g. wikipedia.org -> https, 
wmflabs.org -> http
-    //       per-demain default protocol will probably not be enabled for 
production
-    serverRe = new RegExp('^([^@/:]*\.)?(' +
-    validDomains
-        .map(function (s) {
-            return s.replace('.', '\\.');
-        })
-        .join('|') + ')$');
-    initVega(domains);
+    vega.initVega(log, conf.defaultProtocol, conf.domains, conf.domainMap);
 }
 
 
diff --git a/routes/graphoid-v2.js b/routes/graphoid-v2.js
new file mode 100644
index 0000000..c688e52
--- /dev/null
+++ b/routes/graphoid-v2.js
@@ -0,0 +1,194 @@
+'use strict';
+
+var _ = require('underscore');
+var BBPromise = require('bluebird');
+var preq = require('preq');
+var sUtil = require('../lib/util');
+var vega = require('../lib/vega');
+
+
+/**
+ * Main log function
+ */
+var log;
+
+/**
+ * Metrics object
+ */
+var metrics;
+
+/**
+ * Limit request to 10 seconds by default
+ */
+var timeout = 10000;
+
+
+/*
+ * Utility functions
+ */
+
+function Err(message, metrics) {
+    this.message = message;
+    this.metrics = metrics;
+}
+Err.prototype = Object.create(Error.prototype);
+Err.prototype.constructor = Err;
+
+// Adapted from https://www.promisejs.org/patterns/
+function delay(time) {
+    return new BBPromise(function (fulfill) {
+        setTimeout(fulfill, time);
+    });
+}
+
+function failOnTimeout(promise, time) {
+    return time <= 0 ? promise :
+        BBPromise.race([promise, delay(time).then(function () {
+            throw 'timeout'; // we later compare on this value
+        })]);
+}
+
+/**
+ * Parse and validate request parameters
+ */
+function validateRequest(state) {
+
+    var p = state.request.params,
+        format = p.format,
+        domain = p.domain,
+        body = state.request.body;
+
+    state.log = p; // log all parameters of the request
+
+    if (format !== 'png') {
+        throw new Err('info/param-format', 'req.format');
+    }
+
+    if (!vega.serverRe.test(domain)) {
+        throw new Err('info/param-domain', 'req.domain');
+    }
+
+    // TODO: Optimize 'en.m.wikipedia.org' -> 'en.wikipedia.org'
+    var domain2 = (vega.domainMap && vega.domainMap[domain]) || domain;
+
+    state.domain = domain2;
+    if (domain !== domain2) {
+        state.log.backend = domain2;
+    }
+
+    if (!body) {
+        throw new Err('info/param-body', 'req.body');
+    }
+    state.graphData = body;
+
+    // Log which wiki is actually requesting this
+    if (domain.endsWith('.org')) {
+        domain = domain.substr(0, domain.length - 4);
+    }
+    metrics.increment('req.' + domain.replace('.', '-'));
+
+    return state;
+}
+
+function renderOnCanvas(state) {
+    var start = Date.now();
+    return vega.render({
+        domain: state.domain,
+        renderOpts: {spec: state.graphData, renderer: 'canvas'}
+    }).then(function (result) {
+        var pendingPromise = BBPromise.pending();
+        var stream = result.canvas.pngStream();
+        state.response
+            .status(200)
+            .type('png')
+            // For now, lets re-cache more frequently
+            .header('Cache-Control', 'public, s-maxage=30, max-age=30');
+        stream.on('data', function (chunk) {
+            state.response.write(chunk);
+        });
+        stream.on('end', function () {
+            state.response.end();
+            metrics.endTiming('total.vega', start);
+            pendingPromise.resolve(state);
+        });
+        return pendingPromise.promise;
+    }).catch(function (err) {
+        state.log.vegaErr = err;
+        throw new Err('error/vega', 'vega.error');
+    });
+}
+
+/**
+ * Main entry point for graphoid
+ */
+function renderGraph(req, res) {
+
+    var start = Date.now();
+    var state = {request: req, response: res};
+
+    var render = BBPromise
+        .resolve(state)
+        .then(validateRequest)
+        .then(renderOnCanvas);
+
+    return failOnTimeout(render, timeout)
+        .then(function () {
+
+            // SUCCESS
+            metrics.endTiming('total.success', start);
+
+        }, function (reason) {
+
+            // FAILURE
+            var l = state.log;
+            var msg = 'error/unknown',
+                mx = 'error.unknown';
+
+            if (reason instanceof Err) {
+                l = _.extend(reason, l);
+                msg = reason.message;
+                mx = reason.metrics;
+                delete l.message;
+                delete l.metrics;
+            } else if (reason !== null && typeof reason === 'object') {
+                l = _.extend(reason, l);
+            } else {
+                l.msg = reason;
+            }
+
+            res
+                .status(400)
+                .header('Cache-Control', 'public, s-maxage=30, max-age=30')
+                .json(msg);
+            metrics.increment(mx);
+            req.logger.log(msg, l);
+        });
+}
+
+module.exports = function(app) {
+
+    // The very first operation should set up our logger
+    log = app.logger.log.bind(app.logger);
+    metrics = app.metrics;
+
+    log('info/init', 'starting v2');
+    metrics.increment('v2.init');
+
+    var conf = app.conf;
+    timeout = conf.timeout || timeout;
+
+    vega.initVega(log, conf.defaultProtocol, conf.domains, conf.domainMap);
+
+    var router = sUtil.router();
+    //var bodyParser = require('body-parser').json();
+
+    router.post('/:format', renderGraph);
+    router.post('/:format/:title', renderGraph);
+    router.post('/:format/:title/:revid', renderGraph);
+
+    return {
+        path: '/',
+        api_version: 2,
+        router: router
+    };
+};

-- 
To view, visit https://gerrit.wikimedia.org/r/248284
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie1eb673d3ce6b036cff99f735c5c26ff2b1fc938
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/graphoid
Gerrit-Branch: master
Gerrit-Owner: Yurik <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to