Yurik has uploaded a new change for review. https://gerrit.wikimedia.org/r/249925
Change subject: Support multiple Vega versions ...................................................................... Support multiple Vega versions Supports Vega 2 in the v2 POST requests based on the VegaVersion header VegaVersion header could be: 1 == forces Vega v1 2 == forces Vega v2 0 == try Vega v2, if failed, try to remove "data." prefix for all fields and try again, and if failed, try v1 Change-Id: Iee2e6f320964000facedc179e8edd867b28c8591 --- M lib/vega.js M package.json M routes/graphoid-v1.js M routes/graphoid-v2.js M scripts/sqlToFiles.js 5 files changed, 233 insertions(+), 122 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/graphoid refs/changes/25/249925/1 diff --git a/lib/vega.js b/lib/vega.js index e1a4440..2800858 100644 --- a/lib/vega.js +++ b/lib/vega.js @@ -2,10 +2,12 @@ var BBPromise = require('bluebird'); var urllib = require('url'); -var vega = require('vega'); // Visualization grammar - https://github.com/trifacta/vega +var vega1x = require('vega-1x'); +var vega = require('vega'); -// Vega has its own renderAsync() version, but it does not return a promise -var renderAsync = BBPromise.promisify(vega.headless.render, vega.headless); +var canvas1ToBuffer, canvas2ToBuffer; +var vega1xRenderAsync = BBPromise.promisify(vega1x.headless.render, vega1x.headless); +var vegaSpecParseAsync = BBPromise.promisify(vega.parse.spec.parse, vega.parse.spec); module.exports = { /** @@ -58,17 +60,23 @@ }) .join('|') + ')$'); - vega.config.domainWhiteList = domains; - vega.config.defaultProtocol = module.exports.defaultProtocol + ':'; - vega.config.safeMode = true; - vega.config.isNode = true; // Vega is flaky with its own detection, fails in tests and with IDE debug + vega1x.config.domainWhiteList = domains; + vega1x.config.defaultProtocol = module.exports.defaultProtocol + ':'; + vega1x.config.safeMode = true; + vega1x.config.isNode = true; // Vega is flaky with its own detection, fails in tests and with IDE debug + + vega.config.load.domainWhiteList = domains; + vega.config.load.defaultProtocol = module.exports.defaultProtocol + ':'; // set up vega loggers to log to our device instead of stderr - vega.log = function (msg) { + vega1x.log = vega.logging.log = function (msg) { log('debug/vega', msg); }; - vega.error = function (msg) { + vega1x.error = function (msg) { log('warn/vega', msg); + }; + vega.logging.error = function (msg) { + throw new Error(msg); }; // @@ -77,47 +85,176 @@ // Until vega is capable of per-rendering context, we must bail on any // relative (no hostname) data or image URLs. // - // Do not set vega.config.baseURL. Current sanitizer implementation will fail + // Do not set vega.config.load.baseURL. Current sanitizer implementation will fail // because of the missing protocol (safeMode == true). Still, lets double check // here, in case user has 'http:pathname', which for some strange reason is // parsed as correct by url lib. // - var originalSanitize = vega.data.load.sanitizeUrl.bind(vega.data.load); - vega.data.load.sanitizeUrl = function (urlOrig) { - var url = originalSanitize.call(vega.data.load, urlOrig); - if (url) { - var parts = urllib.parse(url); - if (!parts.protocol || !parts.hostname) { - url = null; - } else if (parts.protocol !== 'http:' && parts.protocol !== 'https:') { - // load.sanitizeUrl() already does this, but double check to be safe - url = null; - } - } - if (url && module.exports.domainMap) { - url = url.replace(/^(https?:\/\/)([^#?\/]+)/, function (match, prot, domain) { - var repl = module.exports.domainMap[domain]; - return repl ? prot + repl : match; - }); - } + function setSanitizeUrlFunc(isLegacy) { - if (!url) { - log('debug/url-deny', urlOrig); - } else if (urlOrig !== url) { - log('debug/url-fix', {'req': urlOrig, 'repl': url}); - } else { - log('trace/url-ok', urlOrig); - } - return url; - }; + var loadModule = isLegacy ? vega1x.data.load : vega.util.load; + var originalSanitize = loadModule.sanitizeUrl.bind(loadModule); + + loadModule.sanitizeUrl = function(sanitizeParam) { + var urlOrig = isLegacy ? sanitizeParam : sanitizeParam.url; + var url = originalSanitize.apply(loadModule, arguments); + if (url) { + var parts = urllib.parse(url); + if (!parts.protocol || !parts.hostname) { + url = null; + } else if (parts.protocol !== 'http:' && parts.protocol !== 'https:') { + // load.sanitizeUrl() already does this, but double check to be safe + url = null; + } + } + if (url && module.exports.domainMap) { + url = url.replace(/^(https?:\/\/)([^#?\/]+)/, function (match, prot, domain) { + var repl = module.exports.domainMap[domain]; + return repl ? prot + repl : match; + }); + } + + if (!url) { + log('debug/url-deny', urlOrig); + } else if (urlOrig !== url) { + log('debug/url-fix', {'req': urlOrig, 'repl': url}); + } else { + log('trace/url-ok', urlOrig); + } + return url; + }; + } + + setSanitizeUrlFunc(true); + setSanitizeUrlFunc(false); }; -module.exports.render = function (opts) { - // BUG: see comment above at vega.data.load.sanitizeUrl = ... +/** + * + * @param domain domain context of the request + * @param spec graph specification + * @param format string format - png or svg + * @param version + * @param response object to send/stream the results to. If not given, returns [value, isLegacy] + * @param cache string to set 'Cache-Control' header + * @returns {*} promise + */ +module.exports.render = function(domain, spec, format, version, response, cache) { + var isLegacy = false, + isSvg = format === 'svg', + ver; + + var p = BBPromise.try(function () { + if (typeof spec === 'string') { + spec = JSON.parse(spec); + } + }); + + if (version == 1) { + // render using version 1 only + p = p.then(function () { + isLegacy = true; + ver = 'v1'; + return renderV1(domain, spec, isSvg); + }); + } else { + // try version 2 first + p = p.then(function () { + ver = 'v2'; + return renderV2(domain, spec, isSvg); + }); + // try auto-fixing and fallback to 1 + if (!version) { + p = p.catch(function (err) { + // Try to auto-correct some common mistakes + ver = 'v2+fix'; + var spec2 = JSON.parse(JSON.stringify(spec).replace(/"field":"data\./g, '"field":"')); + return renderV2(domain, spec2, isSvg); + }).catch(function () { + // ignore error + isLegacy = true; + ver = 'v1'; + return renderV1(domain, spec, isSvg); + }); + } + } + + return p.then(function (result) { + if (response || isSvg) { + // if response is given, attempt to stream to it + return result; + } + // Convert canvas content to a buffer + if (isLegacy) { + if (!canvas1ToBuffer) { + canvas1ToBuffer = BBPromise.promisify(result.toBuffer); + } + return canvas1ToBuffer.call(result); + } else { + if (!canvas2ToBuffer) { + canvas2ToBuffer = BBPromise.promisify(result.toBuffer); + } + return canvas2ToBuffer.call(result); + } + }).then(function (result) { + if (!response) { + return [result, ver]; + } + + response.status(200).type(format); + if (cache) { + response.header('Cache-Control', cache); + } + response.header('VegaVersion', ver); + if (isSvg) { + response.send(result); + return ver; + } + + // PNG stream copy + var pendingPromise = BBPromise.pending(); + var stream = result.pngStream(); + stream.on('data', function (chunk) { + response.write(chunk); + }); + stream.on('end', function () { + response.end(); + pendingPromise.resolve(ver); + }); + return pendingPromise.promise; + }); +}; + +function renderV1(domain, spec, isSvg) { + // BUG: see comment above at vega1x.data.load.sanitizeUrl = ... // In case of non-absolute URLs, use requesting domain as "local" - vega.config.baseURL = module.exports.defaultProtocol + '://' + opts.domain; + vega1x.config.baseURL = module.exports.defaultProtocol + '://' + domain; // TODO: BUG: possible async bug // need to call it without promises and than wrap it because vega doesn't have request state - return renderAsync(opts.renderOpts); -}; + var renderer = isSvg ? 'svg' : 'canvas'; + return vega1xRenderAsync({spec: spec, renderer: renderer}).get(renderer); +} + +function renderV2(domain, spec, isSvg) { + return BBPromise.try(function () { + var config = { + load: { + defaultProtocol: module.exports.defaultProtocol, + baseURL: module.exports.defaultProtocol + '://' + domain + } + }; + return vegaSpecParseAsync(spec, config).then(function (chart) { + var view = chart({renderer: (isSvg ? 'svg' : 'canvas')}).update(); + + if (isSvg) { + return view.svg(); + } + var pending = BBPromise.pending(); + view.canvasAsync(function (canvas) { + pending.resolve(canvas); + }); + return pending.promise; + }); + }); +} diff --git a/package.json b/package.json index c5ae1e4..031bc4e 100644 --- a/package.json +++ b/package.json @@ -29,6 +29,10 @@ }, "homepage": "https://www.mediawiki.org/wiki/Extension:Graph", "dependencies": { + "underscore": "^1.8.3", + "vega-1x": "git+http://g...@github.com/nyurik/vega#v1.x", + "vega": "git+http://g...@github.com/nyurik/vega", + "bluebird": "~2.8.2", "body-parser": "^1.14.1", "bunyan": "^1.5.1", @@ -39,10 +43,6 @@ "js-yaml": "^3.4.3", "preq": "^0.4.4", "service-runner": "^0.2.12" - - , - "underscore": "^1.8.3", - "vega": "git+http://g...@github.com/nyurik/vega" }, "devDependencies": { "mkdirp": "^0.5.1", diff --git a/routes/graphoid-v1.js b/routes/graphoid-v1.js index 276161c..9eead15 100644 --- a/routes/graphoid-v1.js +++ b/routes/graphoid-v1.js @@ -241,30 +241,15 @@ function renderOnCanvas(state) { var start = Date.now(); - return vega.render({ - domain: state.domain, - renderOpts: {spec: state.graphData, renderer: 'canvas'} - }).then(function (result) { - var pendingPromise = BBPromise.pending(); - var stream = result.canvas.pngStream(); - state.response - .status(200) - .type('png') - // For now, lets re-cache more frequently - .header('Cache-Control', 'public, s-maxage=30, max-age=30'); - stream.on('data', function (chunk) { - state.response.write(chunk); - }); - stream.on('end', function () { - state.response.end(); + return vega + .render(state.domain, state.graphData, 'png', 1, state.response, 'public, s-maxage=30, max-age=30') + .then(function () { metrics.endTiming('total.vega', start); - pendingPromise.resolve(state); + }) + .catch(function (err) { + state.log.vegaErr = err; + throw new Err('error/vega', 'vega.error'); }); - return pendingPromise.promise; - }).catch(function (err) { - state.log.vegaErr = err; - throw new Err('error/vega', 'vega.error'); - }); } /** diff --git a/routes/graphoid-v2.js b/routes/graphoid-v2.js index 98051d3..2ac2404 100644 --- a/routes/graphoid-v2.js +++ b/routes/graphoid-v2.js @@ -22,12 +22,11 @@ */ var timeout = 10000; - /** - * Async version of the can canvas.toBuffer() - * @type {Function} + * Cache header to set on the responses + * @type {string} */ -var canvasToBuffer; +var cacheControlHdr = 'public, s-maxage=30, max-age=30'; /* * Utility functions @@ -104,30 +103,28 @@ if (state.request.headers.revisionid) { state.response.header('RevisionId', state.request.headers.revisionid); } - - var isSvg = state.format === 'svg'; - - return vega.render({ - domain: state.domain, - renderOpts: {spec: state.graphData, renderer: isSvg ? 'svg' : 'canvas'} - }).then(isSvg ? function (result) { - return result.svg; - } : function (result) { - if (!canvasToBuffer) { - canvasToBuffer = BBPromise.promisify(result.canvas.toBuffer); - } - return canvasToBuffer.call(result.canvas); - }).then(function (result) { - state.response - .header('Cache-Control', 'public, s-maxage=30, max-age=30') - .type(state.format) - .send(result); - metrics.endTiming('total.vega', start); - }).catch(function (err) { - state.log.vegaErr = err.message; - state.log.vegaErrStack = err.stack; - throw new Err('error/vega', 'vega.error'); - }).return(state); + var ver; + switch (state.request.headers.vegaversion) { + default: + case '1': + ver = 1; + break; + case '2': + ver = 2; + break; + case '0': + ver = 0; + break; + } + return vega + .render(state.domain, state.graphData, state.format, ver, state.response, cacheControlHdr) + .then(function () { + metrics.endTiming('total.vega', start); + }).catch(function (err) { + state.log.vegaErr = err.message; + state.log.vegaErrStack = err.stack; + throw new Err('error/vega', 'vega.error'); + }).return(state); } /** @@ -170,7 +167,7 @@ res .status(400) - .header('Cache-Control', 'public, s-maxage=30, max-age=30') + .header('Cache-Control', cacheControlHdr) .json(msg); metrics.increment(mx); req.logger.log(msg, l); diff --git a/scripts/sqlToFiles.js b/scripts/sqlToFiles.js index 6333c33..63c8dab 100644 --- a/scripts/sqlToFiles.js +++ b/scripts/sqlToFiles.js @@ -11,6 +11,8 @@ // dump file with all graphs var dumpFile = '/home/yurik/wmf/graphoid/dumps/props_dump.tsv'; +// rendering approach. 1 = v1, 2 = v2, 0 = first try v2, than try to correct v2, than fallback to v1 +var renVer = 0; var fs = require('fs'); var BBPromise = require('bluebird'); @@ -41,25 +43,8 @@ '15': 'Category talk:' }; -var canvasToBuffer; -function renderImage(domain, graphData, isSvg) { - return vega.render({ - domain: domain, - renderOpts: {spec: graphData, renderer: isSvg ? 'svg' : 'canvas'} - }).then(isSvg ? function (result) { - return result.svg; - } : function (result) { - if (!canvasToBuffer) { - canvasToBuffer = BBPromise.promisify(result.canvas.toBuffer); - } - return canvasToBuffer.call(result.canvas); - } - ); -} - - vega.initVega(function (a, b) { - //console.log('\t', a, b); + console.log('\t', a, b); }, 'https', ['mediawiki.org', 'wikibooks.org', @@ -95,7 +80,9 @@ }); return fs.readFileAsync(dumpFile, 'utf8') }).then(function (v) { - return BBPromise.map(v.split('\n'), function (v) { + var graphList = v.split('\n'); + //var graphList = [v.split('\n')[23407]]; + return BBPromise.map(graphList, function (v) { if (v === '') { return; } @@ -106,6 +93,11 @@ var parts = v.split('\t'); var domain = wikimap[parts[0]]; var spec = parts[4].replace(/\\\\/g, '\\'); + + //spec = JSON.parse(fs.readFileSync('/home/yurik/wmf/graphoid/graphoid/node_modules/vega/test/spec/arc.json', 'utf8')); + //spec = JSON.stringify({"hash":spec}); + + var title = (parts[2] in namespaces ? namespaces[parts[2]] : (parts[2] + ':')) + parts[3]; try { var graphSpecMap = JSON.parse(spec); @@ -121,15 +113,15 @@ var graphSpec = graphSpecMap[hash]; var graphSpecStr = JSON.stringify(graphSpec); return BBPromise.all([ - fs.writeFileAsync(pathlib.resolve(jsonPath, domain + '_' + hash) + '.json', graphSpecStr, 'utf8'), - renderImage(domain, graphSpec, true).then(function (data) { - return fs.writeFileAsync(pathlib.resolve(svgPath, domain + '_' + hash) + '.svg', data, 'utf8'); + //fs.writeFileAsync(pathlib.resolve(jsonPath, domain + '_' + hash) + '.json', graphSpecStr, 'utf8'), + vega.render(domain, graphSpec, 'svg', renVer).spread(function (data, ver) { + return fs.writeFileAsync(pathlib.resolve(svgPath, (ver !== 'v2' ? ver + '_' : '') + domain + '_' + hash) + '.svg', data, 'utf8'); }).catch(function (err) { console.log('SVG err: ' + domain + '/wiki/' + title + ' -- ' + ind + ' ' + err); return fs.writeFileAsync(pathlib.resolve(svgErrPath, domain + '_' + hash) + '.json', graphSpecStr, 'utf8'); }), - renderImage(domain, graphSpec, false).then(function (data) { - return fs.writeFileAsync(pathlib.resolve(pngPath, domain + '_' + hash) + '.png', data); + vega.render(domain, graphSpec, 'png', renVer).spread(function (data, ver) { + return fs.writeFileAsync(pathlib.resolve(pngPath, (ver !== 'v2' ? ver + '_' : '') + domain + '_' + hash) + '.png', data); }).catch(function (err) { console.log('PNG err: ' + domain + '/wiki/' + title + ' -- ' + ind + ' ' + err); return fs.writeFileAsync(pathlib.resolve(pngErrPath, domain + '_' + hash) + '.json', graphSpecStr, 'utf8'); @@ -140,5 +132,5 @@ return fs.writeFileAsync(pathlib.resolve(errPath, domain + '_' + encodeURIComponent(title)) + '.txt', spec, 'utf8'); }); }); - }, {concurrency: 30}); + }, {concurrency: 50}); }); -- To view, visit https://gerrit.wikimedia.org/r/249925 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Iee2e6f320964000facedc179e8edd867b28c8591 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/graphoid Gerrit-Branch: master Gerrit-Owner: Yurik <yu...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits