Yurik has uploaded a new change for review.
https://gerrit.wikimedia.org/r/249925
Change subject: Support multiple Vega versions
......................................................................
Support multiple Vega versions
Supports Vega 2 in the v2 POST requests based on the VegaVersion header
VegaVersion header could be:
1 == forces Vega v1
2 == forces Vega v2
0 == try Vega v2, if failed, try to remove "data." prefix for all fields and
try again, and if failed, try v1
Change-Id: Iee2e6f320964000facedc179e8edd867b28c8591
---
M lib/vega.js
M package.json
M routes/graphoid-v1.js
M routes/graphoid-v2.js
M scripts/sqlToFiles.js
5 files changed, 233 insertions(+), 122 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/graphoid
refs/changes/25/249925/1
diff --git a/lib/vega.js b/lib/vega.js
index e1a4440..2800858 100644
--- a/lib/vega.js
+++ b/lib/vega.js
@@ -2,10 +2,12 @@
var BBPromise = require('bluebird');
var urllib = require('url');
-var vega = require('vega'); // Visualization grammar -
https://github.com/trifacta/vega
+var vega1x = require('vega-1x');
+var vega = require('vega');
-// Vega has its own renderAsync() version, but it does not return a promise
-var renderAsync = BBPromise.promisify(vega.headless.render, vega.headless);
+var canvas1ToBuffer, canvas2ToBuffer;
+var vega1xRenderAsync = BBPromise.promisify(vega1x.headless.render,
vega1x.headless);
+var vegaSpecParseAsync = BBPromise.promisify(vega.parse.spec.parse,
vega.parse.spec);
module.exports = {
/**
@@ -58,17 +60,23 @@
})
.join('|') + ')$');
- vega.config.domainWhiteList = domains;
- vega.config.defaultProtocol = module.exports.defaultProtocol + ':';
- vega.config.safeMode = true;
- vega.config.isNode = true; // Vega is flaky with its own detection, fails
in tests and with IDE debug
+ vega1x.config.domainWhiteList = domains;
+ vega1x.config.defaultProtocol = module.exports.defaultProtocol + ':';
+ vega1x.config.safeMode = true;
+ vega1x.config.isNode = true; // Vega is flaky with its own detection,
fails in tests and with IDE debug
+
+ vega.config.load.domainWhiteList = domains;
+ vega.config.load.defaultProtocol = module.exports.defaultProtocol + ':';
// set up vega loggers to log to our device instead of stderr
- vega.log = function (msg) {
+ vega1x.log = vega.logging.log = function (msg) {
log('debug/vega', msg);
};
- vega.error = function (msg) {
+ vega1x.error = function (msg) {
log('warn/vega', msg);
+ };
+ vega.logging.error = function (msg) {
+ throw new Error(msg);
};
//
@@ -77,47 +85,176 @@
// Until vega is capable of per-rendering context, we must bail on any
// relative (no hostname) data or image URLs.
//
- // Do not set vega.config.baseURL. Current sanitizer implementation will
fail
+ // Do not set vega.config.load.baseURL. Current sanitizer implementation
will fail
// because of the missing protocol (safeMode == true). Still, lets double
check
// here, in case user has 'http:pathname', which for some strange reason
is
// parsed as correct by url lib.
//
- var originalSanitize = vega.data.load.sanitizeUrl.bind(vega.data.load);
- vega.data.load.sanitizeUrl = function (urlOrig) {
- var url = originalSanitize.call(vega.data.load, urlOrig);
- if (url) {
- var parts = urllib.parse(url);
- if (!parts.protocol || !parts.hostname) {
- url = null;
- } else if (parts.protocol !== 'http:' && parts.protocol !==
'https:') {
- // load.sanitizeUrl() already does this, but double check to
be safe
- url = null;
- }
- }
- if (url && module.exports.domainMap) {
- url = url.replace(/^(https?:\/\/)([^#?\/]+)/, function (match,
prot, domain) {
- var repl = module.exports.domainMap[domain];
- return repl ? prot + repl : match;
- });
- }
+ function setSanitizeUrlFunc(isLegacy) {
- if (!url) {
- log('debug/url-deny', urlOrig);
- } else if (urlOrig !== url) {
- log('debug/url-fix', {'req': urlOrig, 'repl': url});
- } else {
- log('trace/url-ok', urlOrig);
- }
- return url;
- };
+ var loadModule = isLegacy ? vega1x.data.load : vega.util.load;
+ var originalSanitize = loadModule.sanitizeUrl.bind(loadModule);
+
+ loadModule.sanitizeUrl = function(sanitizeParam) {
+ var urlOrig = isLegacy ? sanitizeParam : sanitizeParam.url;
+ var url = originalSanitize.apply(loadModule, arguments);
+ if (url) {
+ var parts = urllib.parse(url);
+ if (!parts.protocol || !parts.hostname) {
+ url = null;
+ } else if (parts.protocol !== 'http:' && parts.protocol !==
'https:') {
+ // load.sanitizeUrl() already does this, but double check
to be safe
+ url = null;
+ }
+ }
+ if (url && module.exports.domainMap) {
+ url = url.replace(/^(https?:\/\/)([^#?\/]+)/, function (match,
prot, domain) {
+ var repl = module.exports.domainMap[domain];
+ return repl ? prot + repl : match;
+ });
+ }
+
+ if (!url) {
+ log('debug/url-deny', urlOrig);
+ } else if (urlOrig !== url) {
+ log('debug/url-fix', {'req': urlOrig, 'repl': url});
+ } else {
+ log('trace/url-ok', urlOrig);
+ }
+ return url;
+ };
+ }
+
+ setSanitizeUrlFunc(true);
+ setSanitizeUrlFunc(false);
};
-module.exports.render = function (opts) {
- // BUG: see comment above at vega.data.load.sanitizeUrl = ...
+/**
+ *
+ * @param domain domain context of the request
+ * @param spec graph specification
+ * @param format string format - png or svg
+ * @param version
+ * @param response object to send/stream the results to. If not given,
returns [value, isLegacy]
+ * @param cache string to set 'Cache-Control' header
+ * @returns {*} promise
+ */
+module.exports.render = function(domain, spec, format, version, response,
cache) {
+ var isLegacy = false,
+ isSvg = format === 'svg',
+ ver;
+
+ var p = BBPromise.try(function () {
+ if (typeof spec === 'string') {
+ spec = JSON.parse(spec);
+ }
+ });
+
+ if (version == 1) {
+ // render using version 1 only
+ p = p.then(function () {
+ isLegacy = true;
+ ver = 'v1';
+ return renderV1(domain, spec, isSvg);
+ });
+ } else {
+ // try version 2 first
+ p = p.then(function () {
+ ver = 'v2';
+ return renderV2(domain, spec, isSvg);
+ });
+ // try auto-fixing and fallback to 1
+ if (!version) {
+ p = p.catch(function (err) {
+ // Try to auto-correct some common mistakes
+ ver = 'v2+fix';
+ var spec2 =
JSON.parse(JSON.stringify(spec).replace(/"field":"data\./g, '"field":"'));
+ return renderV2(domain, spec2, isSvg);
+ }).catch(function () {
+ // ignore error
+ isLegacy = true;
+ ver = 'v1';
+ return renderV1(domain, spec, isSvg);
+ });
+ }
+ }
+
+ return p.then(function (result) {
+ if (response || isSvg) {
+ // if response is given, attempt to stream to it
+ return result;
+ }
+ // Convert canvas content to a buffer
+ if (isLegacy) {
+ if (!canvas1ToBuffer) {
+ canvas1ToBuffer = BBPromise.promisify(result.toBuffer);
+ }
+ return canvas1ToBuffer.call(result);
+ } else {
+ if (!canvas2ToBuffer) {
+ canvas2ToBuffer = BBPromise.promisify(result.toBuffer);
+ }
+ return canvas2ToBuffer.call(result);
+ }
+ }).then(function (result) {
+ if (!response) {
+ return [result, ver];
+ }
+
+ response.status(200).type(format);
+ if (cache) {
+ response.header('Cache-Control', cache);
+ }
+ response.header('VegaVersion', ver);
+ if (isSvg) {
+ response.send(result);
+ return ver;
+ }
+
+ // PNG stream copy
+ var pendingPromise = BBPromise.pending();
+ var stream = result.pngStream();
+ stream.on('data', function (chunk) {
+ response.write(chunk);
+ });
+ stream.on('end', function () {
+ response.end();
+ pendingPromise.resolve(ver);
+ });
+ return pendingPromise.promise;
+ });
+};
+
+function renderV1(domain, spec, isSvg) {
+ // BUG: see comment above at vega1x.data.load.sanitizeUrl = ...
// In case of non-absolute URLs, use requesting domain as "local"
- vega.config.baseURL = module.exports.defaultProtocol + '://' + opts.domain;
+ vega1x.config.baseURL = module.exports.defaultProtocol + '://' + domain;
// TODO: BUG: possible async bug
// need to call it without promises and than wrap it because vega doesn't
have request state
- return renderAsync(opts.renderOpts);
-};
+ var renderer = isSvg ? 'svg' : 'canvas';
+ return vega1xRenderAsync({spec: spec, renderer: renderer}).get(renderer);
+}
+
+function renderV2(domain, spec, isSvg) {
+ return BBPromise.try(function () {
+ var config = {
+ load: {
+ defaultProtocol: module.exports.defaultProtocol,
+ baseURL: module.exports.defaultProtocol + '://' + domain
+ }
+ };
+ return vegaSpecParseAsync(spec, config).then(function (chart) {
+ var view = chart({renderer: (isSvg ? 'svg' : 'canvas')}).update();
+
+ if (isSvg) {
+ return view.svg();
+ }
+ var pending = BBPromise.pending();
+ view.canvasAsync(function (canvas) {
+ pending.resolve(canvas);
+ });
+ return pending.promise;
+ });
+ });
+}
diff --git a/package.json b/package.json
index c5ae1e4..031bc4e 100644
--- a/package.json
+++ b/package.json
@@ -29,6 +29,10 @@
},
"homepage": "https://www.mediawiki.org/wiki/Extension:Graph",
"dependencies": {
+ "underscore": "^1.8.3",
+ "vega-1x": "git+http://[email protected]/nyurik/vega#v1.x",
+ "vega": "git+http://[email protected]/nyurik/vega",
+
"bluebird": "~2.8.2",
"body-parser": "^1.14.1",
"bunyan": "^1.5.1",
@@ -39,10 +43,6 @@
"js-yaml": "^3.4.3",
"preq": "^0.4.4",
"service-runner": "^0.2.12"
-
- ,
- "underscore": "^1.8.3",
- "vega": "git+http://[email protected]/nyurik/vega"
},
"devDependencies": {
"mkdirp": "^0.5.1",
diff --git a/routes/graphoid-v1.js b/routes/graphoid-v1.js
index 276161c..9eead15 100644
--- a/routes/graphoid-v1.js
+++ b/routes/graphoid-v1.js
@@ -241,30 +241,15 @@
function renderOnCanvas(state) {
var start = Date.now();
- return vega.render({
- domain: state.domain,
- renderOpts: {spec: state.graphData, renderer: 'canvas'}
- }).then(function (result) {
- var pendingPromise = BBPromise.pending();
- var stream = result.canvas.pngStream();
- state.response
- .status(200)
- .type('png')
- // For now, lets re-cache more frequently
- .header('Cache-Control', 'public, s-maxage=30, max-age=30');
- stream.on('data', function (chunk) {
- state.response.write(chunk);
- });
- stream.on('end', function () {
- state.response.end();
+ return vega
+ .render(state.domain, state.graphData, 'png', 1, state.response,
'public, s-maxage=30, max-age=30')
+ .then(function () {
metrics.endTiming('total.vega', start);
- pendingPromise.resolve(state);
+ })
+ .catch(function (err) {
+ state.log.vegaErr = err;
+ throw new Err('error/vega', 'vega.error');
});
- return pendingPromise.promise;
- }).catch(function (err) {
- state.log.vegaErr = err;
- throw new Err('error/vega', 'vega.error');
- });
}
/**
diff --git a/routes/graphoid-v2.js b/routes/graphoid-v2.js
index 98051d3..2ac2404 100644
--- a/routes/graphoid-v2.js
+++ b/routes/graphoid-v2.js
@@ -22,12 +22,11 @@
*/
var timeout = 10000;
-
/**
- * Async version of the can canvas.toBuffer()
- * @type {Function}
+ * Cache header to set on the responses
+ * @type {string}
*/
-var canvasToBuffer;
+var cacheControlHdr = 'public, s-maxage=30, max-age=30';
/*
* Utility functions
@@ -104,30 +103,28 @@
if (state.request.headers.revisionid) {
state.response.header('RevisionId', state.request.headers.revisionid);
}
-
- var isSvg = state.format === 'svg';
-
- return vega.render({
- domain: state.domain,
- renderOpts: {spec: state.graphData, renderer: isSvg ? 'svg' : 'canvas'}
- }).then(isSvg ? function (result) {
- return result.svg;
- } : function (result) {
- if (!canvasToBuffer) {
- canvasToBuffer = BBPromise.promisify(result.canvas.toBuffer);
- }
- return canvasToBuffer.call(result.canvas);
- }).then(function (result) {
- state.response
- .header('Cache-Control', 'public, s-maxage=30, max-age=30')
- .type(state.format)
- .send(result);
- metrics.endTiming('total.vega', start);
- }).catch(function (err) {
- state.log.vegaErr = err.message;
- state.log.vegaErrStack = err.stack;
- throw new Err('error/vega', 'vega.error');
- }).return(state);
+ var ver;
+ switch (state.request.headers.vegaversion) {
+ default:
+ case '1':
+ ver = 1;
+ break;
+ case '2':
+ ver = 2;
+ break;
+ case '0':
+ ver = 0;
+ break;
+ }
+ return vega
+ .render(state.domain, state.graphData, state.format, ver,
state.response, cacheControlHdr)
+ .then(function () {
+ metrics.endTiming('total.vega', start);
+ }).catch(function (err) {
+ state.log.vegaErr = err.message;
+ state.log.vegaErrStack = err.stack;
+ throw new Err('error/vega', 'vega.error');
+ }).return(state);
}
/**
@@ -170,7 +167,7 @@
res
.status(400)
- .header('Cache-Control', 'public, s-maxage=30, max-age=30')
+ .header('Cache-Control', cacheControlHdr)
.json(msg);
metrics.increment(mx);
req.logger.log(msg, l);
diff --git a/scripts/sqlToFiles.js b/scripts/sqlToFiles.js
index 6333c33..63c8dab 100644
--- a/scripts/sqlToFiles.js
+++ b/scripts/sqlToFiles.js
@@ -11,6 +11,8 @@
// dump file with all graphs
var dumpFile = '/home/yurik/wmf/graphoid/dumps/props_dump.tsv';
+// rendering approach. 1 = v1, 2 = v2, 0 = first try v2, than try to correct
v2, than fallback to v1
+var renVer = 0;
var fs = require('fs');
var BBPromise = require('bluebird');
@@ -41,25 +43,8 @@
'15': 'Category talk:'
};
-var canvasToBuffer;
-function renderImage(domain, graphData, isSvg) {
- return vega.render({
- domain: domain,
- renderOpts: {spec: graphData, renderer: isSvg ? 'svg' : 'canvas'}
- }).then(isSvg ? function (result) {
- return result.svg;
- } : function (result) {
- if (!canvasToBuffer) {
- canvasToBuffer = BBPromise.promisify(result.canvas.toBuffer);
- }
- return canvasToBuffer.call(result.canvas);
- }
- );
-}
-
-
vega.initVega(function (a, b) {
- //console.log('\t', a, b);
+ console.log('\t', a, b);
}, 'https',
['mediawiki.org',
'wikibooks.org',
@@ -95,7 +80,9 @@
});
return fs.readFileAsync(dumpFile, 'utf8')
}).then(function (v) {
- return BBPromise.map(v.split('\n'), function (v) {
+ var graphList = v.split('\n');
+ //var graphList = [v.split('\n')[23407]];
+ return BBPromise.map(graphList, function (v) {
if (v === '') {
return;
}
@@ -106,6 +93,11 @@
var parts = v.split('\t');
var domain = wikimap[parts[0]];
var spec = parts[4].replace(/\\\\/g, '\\');
+
+ //spec =
JSON.parse(fs.readFileSync('/home/yurik/wmf/graphoid/graphoid/node_modules/vega/test/spec/arc.json',
'utf8'));
+ //spec = JSON.stringify({"hash":spec});
+
+
var title = (parts[2] in namespaces ? namespaces[parts[2]] : (parts[2]
+ ':')) + parts[3];
try {
var graphSpecMap = JSON.parse(spec);
@@ -121,15 +113,15 @@
var graphSpec = graphSpecMap[hash];
var graphSpecStr = JSON.stringify(graphSpec);
return BBPromise.all([
- fs.writeFileAsync(pathlib.resolve(jsonPath, domain + '_' +
hash) + '.json', graphSpecStr, 'utf8'),
- renderImage(domain, graphSpec, true).then(function (data) {
- return fs.writeFileAsync(pathlib.resolve(svgPath,
domain + '_' + hash) + '.svg', data, 'utf8');
+ //fs.writeFileAsync(pathlib.resolve(jsonPath, domain + '_'
+ hash) + '.json', graphSpecStr, 'utf8'),
+ vega.render(domain, graphSpec, 'svg',
renVer).spread(function (data, ver) {
+ return fs.writeFileAsync(pathlib.resolve(svgPath, (ver
!== 'v2' ? ver + '_' : '') + domain + '_' + hash) + '.svg', data, 'utf8');
}).catch(function (err) {
console.log('SVG err: ' + domain + '/wiki/' + title +
' -- ' + ind + ' ' + err);
return fs.writeFileAsync(pathlib.resolve(svgErrPath,
domain + '_' + hash) + '.json', graphSpecStr, 'utf8');
}),
- renderImage(domain, graphSpec, false).then(function (data)
{
- return fs.writeFileAsync(pathlib.resolve(pngPath,
domain + '_' + hash) + '.png', data);
+ vega.render(domain, graphSpec, 'png',
renVer).spread(function (data, ver) {
+ return fs.writeFileAsync(pathlib.resolve(pngPath, (ver
!== 'v2' ? ver + '_' : '') + domain + '_' + hash) + '.png', data);
}).catch(function (err) {
console.log('PNG err: ' + domain + '/wiki/' + title +
' -- ' + ind + ' ' + err);
return fs.writeFileAsync(pathlib.resolve(pngErrPath,
domain + '_' + hash) + '.json', graphSpecStr, 'utf8');
@@ -140,5 +132,5 @@
return fs.writeFileAsync(pathlib.resolve(errPath, domain + '_'
+ encodeURIComponent(title)) + '.txt', spec, 'utf8');
});
});
- }, {concurrency: 30});
+ }, {concurrency: 50});
});
--
To view, visit https://gerrit.wikimedia.org/r/249925
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Iee2e6f320964000facedc179e8edd867b28c8591
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/graphoid
Gerrit-Branch: master
Gerrit-Owner: Yurik <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits