Yurik has uploaded a new change for review. https://gerrit.wikimedia.org/r/248284
Change subject: v2 post requests ...................................................................... v2 post requests new version handles POST requests in this format (only png is supported): /<domain>/v2/<format> Optionally, supports two more params (used for debugging only): /<domain>/v2/<format>/<Title> /<domain>/v2/<format>/<Title>/<RevId> The body of the request must be a valid JSON. To test, use Postman chrome extension, and POST this url: http://localhost:6927/www.mediawiki.org/v2/png The request body must be set to RAW -- JSON format. Use any graph spec from https://www.mediawiki.org/wiki/Extension:Graph/Demo Change-Id: Ie1eb673d3ce6b036cff99f735c5c26ff2b1fc938 --- A lib/vega.js M package.json M routes/graphoid-v1.js A routes/graphoid-v2.js 4 files changed, 345 insertions(+), 131 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/graphoid refs/changes/84/248284/1 diff --git a/lib/vega.js b/lib/vega.js new file mode 100644 index 0000000..b7708d4 --- /dev/null +++ b/lib/vega.js @@ -0,0 +1,121 @@ +'use strict'; + +var BBPromise = require('bluebird'); +var urllib = require('url'); +var vega = require('vega'); // Visualization grammar - https://github.com/trifacta/vega + +// Vega has its own renderAsync() version, but it does not return a promise +var renderAsync = BBPromise.promisify(vega.headless.render, vega.headless); + +module.exports = { + /** + * For protocol-relative URLs (they begin with //), which protocol should we use + */ + defaultProtocol: 'https', + + /** + * A set of 'oldDomain' => 'newDomain' mappings + */ + domainMap: false, + + /** + * Regex to validate domain parameter + */ + serverRe: null +}; + +/** + * Init vega rendering + * @param log + * @param domains array of strings - which domains are valid + */ +module.exports.initVega = function (log, defaultProtocol, domains, domainMap) { + if (module.exports.serverRe) { + return; // avoid double-initialization + } + + domains = domains || []; + module.exports.defaultProtocol = defaultProtocol || module.exports.defaultProtocol; + + var validDomains = domains; + if (domainMap && Object.getOwnPropertyNames(domainMap).length > 0) { + module.exports.domainMap = domainMap; + validDomains = validDomains.concat(Object.getOwnPropertyNames(domainMap)); + } + + if (validDomains.length === 0) { + log('fatal/config', 'Config must have non-empty "domains" (list) and/or "domainMap" (dict)'); + process.exit(1); + } + + // TODO: handle other symbols (even though they shouldn't be in the domains + // TODO: implement per-host default protocol, e.g. wikipedia.org -> https, wmflabs.org -> http + // per-demain default protocol will probably not be enabled for production + module.exports.serverRe = new RegExp('^([^@/:]*\.)?(' + + validDomains + .map(function (s) { + return s.replace('.', '\\.'); + }) + .join('|') + ')$'); + + vega.config.domainWhiteList = domains; + vega.config.defaultProtocol = module.exports.defaultProtocol + ':'; + vega.config.safeMode = true; + vega.config.isNode = true; // Vega is flaky with its own detection, fails in tests and with IDE debug + + // set up vega loggers to log to our device instead of stderr + vega.log = function (msg) { + log('debug/vega', msg); + }; + vega.error = function (msg) { + log('warn/vega', msg); + }; + + // + // TODO/BUG: In multithreaded env, we cannot set global vega.config var + // while handling multiple requests from multiple hosts. + // Until vega is capable of per-rendering context, we must bail on any + // relative (no hostname) data or image URLs. + // + // Do not set vega.config.baseURL. Current sanitizer implementation will fail + // because of the missing protocol (safeMode == true). Still, lets double check + // here, in case user has 'http:pathname', which for some strange reason is + // parsed as correct by url lib. + // + var originalSanitize = vega.data.load.sanitizeUrl.bind(vega.data.load); + vega.data.load.sanitizeUrl = function (urlOrig) { + var url = originalSanitize.call(vega.data.load, urlOrig); + if (url) { + var parts = urllib.parse(url); + if (!parts.protocol || !parts.hostname) { + url = null; + } else if (parts.protocol !== 'http:' && parts.protocol !== 'https:') { + // load.sanitizeUrl() already does this, but double check to be safe + url = null; + } + } + if (url && module.exports.domainMap) { + url = url.replace(/^(https?:\/\/)([^#?\/]+)/, function (match, prot, domain) { + var repl = module.exports.domainMap[domain]; + return repl ? prot + repl : match; + }); + } + + if (!url) { + log('debug/url-deny', urlOrig); + } else if (urlOrig !== url) { + log('debug/url-fix', {'req': urlOrig, 'repl': url}); + } else { + log('trace/url-ok', urlOrig); + } + return url; + }; +}; + +module.exports.render = function (opts) { + // BUG: see comment above at vega.data.load.sanitizeUrl = ... + // In case of non-absolute URLs, use requesting domain as "local" + vega.config.baseURL = module.exports.defaultProtocol + '://' + opts.domain; + + return renderAsync(opts.renderOpts); +}; diff --git a/package.json b/package.json index 1780e61..dbd9066 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "graphoid", - "version": "0.1.4", + "version": "0.1.5", "description": "Renders vega graphs from mediawiki pages", "main": "./app.js", "scripts": { @@ -38,6 +38,7 @@ "js-yaml": "^3.3.1", "preq": "^0.4.4", "service-runner": "^0.2.1", + "underscore": "^1.8.3", "vega": "git+http://[email protected]/nyurik/vega" }, "devDependencies": { diff --git a/routes/graphoid-v1.js b/routes/graphoid-v1.js index 51cee89..276161c 100644 --- a/routes/graphoid-v1.js +++ b/routes/graphoid-v1.js @@ -3,8 +3,7 @@ var BBPromise = require('bluebird'); var preq = require('preq'); var sUtil = require('../lib/util'); -var urllib = require('url'); -var vega = require('vega'); // Visualization grammar - https://github.com/trifacta/vega +var vega = require('../lib/vega'); /** @@ -23,24 +22,9 @@ var metrics; /** - * A set of 'oldDomain' => 'newDomain' mappings - */ -var domainMap = false; - -/** - * For protocol-relative URLs (they begin with //), which protocol should we use - */ -var defaultProtocol = 'https'; - -/** * Limit request to 10 seconds by default */ var timeout = 10000; - -/** - * Regex to validate domain parameter - */ -var serverRe = null; /* @@ -81,66 +65,6 @@ throw 'timeout'; // we later compare on this value })]); } - -/** - * Init vega rendering - * @param domains array of strings - which domains are valid - */ -function initVega(domains) { - vega.config.domainWhiteList = domains; - vega.config.defaultProtocol = defaultProtocol + ':'; - vega.config.safeMode = true; - vega.config.isNode = true; // Vega is flaky with its own detection, fails in tests and with IDE debug - - // set up vega loggers to log to our device instead of stderr - vega.log = function(msg) { - log('debug/vega', msg); - }; - vega.error = function(msg) { - log('warn/vega', msg); - }; - - // - // TODO/BUG: In multithreaded env, we cannot set global vega.config var - // while handling multiple requests from multiple hosts. - // Until vega is capable of per-rendering context, we must bail on any - // relative (no hostname) data or image URLs. - // - // Do not set vega.config.baseURL. Current sanitizer implementation will fail - // because of the missing protocol (safeMode == true). Still, lets double check - // here, in case user has 'http:pathname', which for some strange reason is - // parsed as correct by url lib. - // - var originalSanitize = vega.data.load.sanitizeUrl.bind(vega.data.load); - vega.data.load.sanitizeUrl = function (urlOrig) { - var url = originalSanitize.call(vega.data.load, urlOrig); - if (url) { - var parts = urllib.parse(url); - if (!parts.protocol || !parts.hostname) { - url = null; - } else if (parts.protocol !== 'http:' && parts.protocol !== 'https:') { - // load.sanitizeUrl() already does this, but double check to be safe - url = null; - } - } - if (url && domainMap) { - url = url.replace(/^(https?:\/\/)([^#?\/]+)/, function (match, prot, domain) { - var repl = domainMap[domain]; - return repl ? prot + repl : match; - }); - } - - if (!url) { - log('debug/url-deny', urlOrig); - } else if (urlOrig !== url) { - log('debug/url-fix', {'req': urlOrig, 'repl': url}); - } else { - log('trace/url-ok', urlOrig); - } - return url; - }; -} - /** * Parse and validate request parameters @@ -200,15 +124,15 @@ } state.graphId = id; - if (!serverRe.test(domain)) { + if (!vega.serverRe.test(domain)) { throw new Err('info/param-domain', 'req.domain'); } // TODO: Optimize 'en.m.wikipedia.org' -> 'en.wikipedia.org' - var domain2 = (domainMap && domainMap[domain]) || domain; + var domain2 = (vega.domainMap && vega.domainMap[domain]) || domain; state.domain = domain2; - state.apiUrl = defaultProtocol + '://' + domain2 + '/w/api.php'; + state.apiUrl = vega.defaultProtocol + '://' + domain2 + '/w/api.php'; if (domain !== domain2) { state.log.backend = domain2; } @@ -316,34 +240,30 @@ } function renderOnCanvas(state) { - return new BBPromise(function (fulfill, reject){ - var start = Date.now(); - - // BUG: see comment above at vega.data.load.sanitizeUrl = ... - // In case of non-absolute URLs, use requesting domain as "local" - vega.config.baseURL = defaultProtocol + '://' + state.domain; - - vega.headless.render({spec: state.graphData, renderer: 'canvas'}, function (err, result) { - if (err) { - state.log.vegaErr = err; - reject(new Err('error/vega', 'vega.error')); - } else { - var stream = result.canvas.pngStream(); - state.response - .status(200) - .type('png') - // For now, lets re-cache more frequently - .header('Cache-Control', 'public, s-maxage=30, max-age=30'); - stream.on('data', function (chunk) { - state.response.write(chunk); - }); - stream.on('end', function () { - state.response.end(); - metrics.endTiming('total.vega', start); - fulfill(state); - }); - } + var start = Date.now(); + return vega.render({ + domain: state.domain, + renderOpts: {spec: state.graphData, renderer: 'canvas'} + }).then(function (result) { + var pendingPromise = BBPromise.pending(); + var stream = result.canvas.pngStream(); + state.response + .status(200) + .type('png') + // For now, lets re-cache more frequently + .header('Cache-Control', 'public, s-maxage=30, max-age=30'); + stream.on('data', function (chunk) { + state.response.write(chunk); }); + stream.on('end', function () { + state.response.end(); + metrics.endTiming('total.vega', start); + pendingPromise.resolve(state); + }); + return pendingPromise.promise; + }).catch(function (err) { + state.log.vegaErr = err; + throw new Err('error/vega', 'vega.error'); }); } @@ -408,31 +328,9 @@ metrics.increment('v1.init'); var conf = app.conf; - var domains = conf.domains || []; timeout = conf.timeout || timeout; - defaultProtocol = conf.defaultProtocol || defaultProtocol; - var validDomains = domains; - if (conf.domainMap && Object.getOwnPropertyNames(conf.domainMap).length > 0) { - domainMap = conf.domainMap; - validDomains = validDomains.concat(Object.getOwnPropertyNames(domainMap)); - } - - if (validDomains.length === 0) { - log('fatal/config', 'Config must have non-empty "domains" (list) and/or "domainMap" (dict)'); - process.exit(1); - } - - // TODO: handle other symbols (even though they shouldn't be in the domains - // TODO: implement per-host default protocol, e.g. wikipedia.org -> https, wmflabs.org -> http - // per-demain default protocol will probably not be enabled for production - serverRe = new RegExp('^([^@/:]*\.)?(' + - validDomains - .map(function (s) { - return s.replace('.', '\\.'); - }) - .join('|') + ')$'); - initVega(domains); + vega.initVega(log, conf.defaultProtocol, conf.domains, conf.domainMap); } diff --git a/routes/graphoid-v2.js b/routes/graphoid-v2.js new file mode 100644 index 0000000..c688e52 --- /dev/null +++ b/routes/graphoid-v2.js @@ -0,0 +1,194 @@ +'use strict'; + +var _ = require('underscore'); +var BBPromise = require('bluebird'); +var preq = require('preq'); +var sUtil = require('../lib/util'); +var vega = require('../lib/vega'); + + +/** + * Main log function + */ +var log; + +/** + * Metrics object + */ +var metrics; + +/** + * Limit request to 10 seconds by default + */ +var timeout = 10000; + + +/* + * Utility functions + */ + +function Err(message, metrics) { + this.message = message; + this.metrics = metrics; +} +Err.prototype = Object.create(Error.prototype); +Err.prototype.constructor = Err; + +// Adapted from https://www.promisejs.org/patterns/ +function delay(time) { + return new BBPromise(function (fulfill) { + setTimeout(fulfill, time); + }); +} + +function failOnTimeout(promise, time) { + return time <= 0 ? promise : + BBPromise.race([promise, delay(time).then(function () { + throw 'timeout'; // we later compare on this value + })]); +} + +/** + * Parse and validate request parameters + */ +function validateRequest(state) { + + var p = state.request.params, + format = p.format, + domain = p.domain, + body = state.request.body; + + state.log = p; // log all parameters of the request + + if (format !== 'png') { + throw new Err('info/param-format', 'req.format'); + } + + if (!vega.serverRe.test(domain)) { + throw new Err('info/param-domain', 'req.domain'); + } + + // TODO: Optimize 'en.m.wikipedia.org' -> 'en.wikipedia.org' + var domain2 = (vega.domainMap && vega.domainMap[domain]) || domain; + + state.domain = domain2; + if (domain !== domain2) { + state.log.backend = domain2; + } + + if (!body) { + throw new Err('info/param-body', 'req.body'); + } + state.graphData = body; + + // Log which wiki is actually requesting this + if (domain.endsWith('.org')) { + domain = domain.substr(0, domain.length - 4); + } + metrics.increment('req.' + domain.replace('.', '-')); + + return state; +} + +function renderOnCanvas(state) { + var start = Date.now(); + return vega.render({ + domain: state.domain, + renderOpts: {spec: state.graphData, renderer: 'canvas'} + }).then(function (result) { + var pendingPromise = BBPromise.pending(); + var stream = result.canvas.pngStream(); + state.response + .status(200) + .type('png') + // For now, lets re-cache more frequently + .header('Cache-Control', 'public, s-maxage=30, max-age=30'); + stream.on('data', function (chunk) { + state.response.write(chunk); + }); + stream.on('end', function () { + state.response.end(); + metrics.endTiming('total.vega', start); + pendingPromise.resolve(state); + }); + return pendingPromise.promise; + }).catch(function (err) { + state.log.vegaErr = err; + throw new Err('error/vega', 'vega.error'); + }); +} + +/** + * Main entry point for graphoid + */ +function renderGraph(req, res) { + + var start = Date.now(); + var state = {request: req, response: res}; + + var render = BBPromise + .resolve(state) + .then(validateRequest) + .then(renderOnCanvas); + + return failOnTimeout(render, timeout) + .then(function () { + + // SUCCESS + metrics.endTiming('total.success', start); + + }, function (reason) { + + // FAILURE + var l = state.log; + var msg = 'error/unknown', + mx = 'error.unknown'; + + if (reason instanceof Err) { + l = _.extend(reason, l); + msg = reason.message; + mx = reason.metrics; + delete l.message; + delete l.metrics; + } else if (reason !== null && typeof reason === 'object') { + l = _.extend(reason, l); + } else { + l.msg = reason; + } + + res + .status(400) + .header('Cache-Control', 'public, s-maxage=30, max-age=30') + .json(msg); + metrics.increment(mx); + req.logger.log(msg, l); + }); +} + +module.exports = function(app) { + + // The very first operation should set up our logger + log = app.logger.log.bind(app.logger); + metrics = app.metrics; + + log('info/init', 'starting v2'); + metrics.increment('v2.init'); + + var conf = app.conf; + timeout = conf.timeout || timeout; + + vega.initVega(log, conf.defaultProtocol, conf.domains, conf.domainMap); + + var router = sUtil.router(); + //var bodyParser = require('body-parser').json(); + + router.post('/:format', renderGraph); + router.post('/:format/:title', renderGraph); + router.post('/:format/:title/:revid', renderGraph); + + return { + path: '/', + api_version: 2, + router: router + }; +}; -- To view, visit https://gerrit.wikimedia.org/r/248284 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ie1eb673d3ce6b036cff99f735c5c26ff2b1fc938 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/graphoid Gerrit-Branch: master Gerrit-Owner: Yurik <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
