Yurik has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/249925

Change subject: Support multiple Vega versions
......................................................................

Support multiple Vega versions

Supports Vega 2 in the v2 POST requests based on the VegaVersion header

VegaVersion header could be:
   1 == forces Vega v1
   2 == forces Vega v2
   0 == try Vega v2, if failed, try to remove "data." prefix for all fields and 
try again, and if failed, try v1

Change-Id: Iee2e6f320964000facedc179e8edd867b28c8591
---
M lib/vega.js
M package.json
M routes/graphoid-v1.js
M routes/graphoid-v2.js
M scripts/sqlToFiles.js
5 files changed, 233 insertions(+), 122 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/graphoid 
refs/changes/25/249925/1

diff --git a/lib/vega.js b/lib/vega.js
index e1a4440..2800858 100644
--- a/lib/vega.js
+++ b/lib/vega.js
@@ -2,10 +2,12 @@
 
 var BBPromise = require('bluebird');
 var urllib = require('url');
-var vega = require('vega'); // Visualization grammar - 
https://github.com/trifacta/vega
+var vega1x = require('vega-1x');
+var vega = require('vega');
 
-// Vega has its own renderAsync() version, but it does not return a promise
-var renderAsync = BBPromise.promisify(vega.headless.render, vega.headless);
+var canvas1ToBuffer, canvas2ToBuffer;
+var vega1xRenderAsync = BBPromise.promisify(vega1x.headless.render, 
vega1x.headless);
+var vegaSpecParseAsync = BBPromise.promisify(vega.parse.spec.parse, 
vega.parse.spec);
 
 module.exports = {
     /**
@@ -58,17 +60,23 @@
             })
             .join('|') + ')$');
 
-    vega.config.domainWhiteList = domains;
-    vega.config.defaultProtocol = module.exports.defaultProtocol + ':';
-    vega.config.safeMode = true;
-    vega.config.isNode = true; // Vega is flaky with its own detection, fails 
in tests and with IDE debug
+    vega1x.config.domainWhiteList = domains;
+    vega1x.config.defaultProtocol = module.exports.defaultProtocol + ':';
+    vega1x.config.safeMode = true;
+    vega1x.config.isNode = true; // Vega is flaky with its own detection, 
fails in tests and with IDE debug
+
+    vega.config.load.domainWhiteList = domains;
+    vega.config.load.defaultProtocol = module.exports.defaultProtocol + ':';
 
     // set up vega loggers to log to our device instead of stderr
-    vega.log = function (msg) {
+    vega1x.log = vega.logging.log = function (msg) {
         log('debug/vega', msg);
     };
-    vega.error = function (msg) {
+    vega1x.error = function (msg) {
         log('warn/vega', msg);
+    };
+    vega.logging.error = function (msg) {
+        throw new Error(msg);
     };
 
     //
@@ -77,47 +85,176 @@
     // Until vega is capable of per-rendering context, we must bail on any
     // relative (no hostname) data or image URLs.
     //
-    // Do not set vega.config.baseURL. Current sanitizer implementation will 
fail
+    // Do not set vega.config.load.baseURL. Current sanitizer implementation 
will fail
     // because of the missing protocol (safeMode == true). Still, lets double 
check
     // here, in case user has   'http:pathname', which for some strange reason 
is
     // parsed as correct by url lib.
     //
-    var originalSanitize = vega.data.load.sanitizeUrl.bind(vega.data.load);
-    vega.data.load.sanitizeUrl = function (urlOrig) {
-        var url = originalSanitize.call(vega.data.load, urlOrig);
-        if (url) {
-            var parts = urllib.parse(url);
-            if (!parts.protocol || !parts.hostname) {
-                url = null;
-            } else if (parts.protocol !== 'http:' && parts.protocol !== 
'https:') {
-                // load.sanitizeUrl() already does this, but double check to 
be safe
-                url = null;
-            }
-        }
-        if (url && module.exports.domainMap) {
-            url = url.replace(/^(https?:\/\/)([^#?\/]+)/, function (match, 
prot, domain) {
-                var repl = module.exports.domainMap[domain];
-                return repl ? prot + repl : match;
-            });
-        }
+    function setSanitizeUrlFunc(isLegacy) {
 
-        if (!url) {
-            log('debug/url-deny', urlOrig);
-        } else if (urlOrig !== url) {
-            log('debug/url-fix', {'req': urlOrig, 'repl': url});
-        } else {
-            log('trace/url-ok', urlOrig);
-        }
-        return url;
-    };
+        var loadModule = isLegacy ? vega1x.data.load : vega.util.load;
+        var originalSanitize = loadModule.sanitizeUrl.bind(loadModule);
+
+        loadModule.sanitizeUrl = function(sanitizeParam) {
+            var urlOrig = isLegacy ? sanitizeParam : sanitizeParam.url;
+            var url = originalSanitize.apply(loadModule, arguments);
+            if (url) {
+                var parts = urllib.parse(url);
+                if (!parts.protocol || !parts.hostname) {
+                    url = null;
+                } else if (parts.protocol !== 'http:' && parts.protocol !== 
'https:') {
+                    // load.sanitizeUrl() already does this, but double check 
to be safe
+                    url = null;
+                }
+            }
+            if (url && module.exports.domainMap) {
+                url = url.replace(/^(https?:\/\/)([^#?\/]+)/, function (match, 
prot, domain) {
+                    var repl = module.exports.domainMap[domain];
+                    return repl ? prot + repl : match;
+                });
+            }
+
+            if (!url) {
+                log('debug/url-deny', urlOrig);
+            } else if (urlOrig !== url) {
+                log('debug/url-fix', {'req': urlOrig, 'repl': url});
+            } else {
+                log('trace/url-ok', urlOrig);
+            }
+            return url;
+        };
+    }
+
+    setSanitizeUrlFunc(true);
+    setSanitizeUrlFunc(false);
 };
 
-module.exports.render = function (opts) {
-    // BUG: see comment above at vega.data.load.sanitizeUrl = ...
+/**
+ *
+ * @param domain domain context of the request
+ * @param spec graph specification
+ * @param format string format - png or svg
+ * @param version
+ * @param response object to send/stream the results to.  If not given, 
returns [value, isLegacy]
+ * @param cache string to set 'Cache-Control' header
+ * @returns {*} promise
+ */
+module.exports.render = function(domain, spec, format, version, response, 
cache) {
+    var isLegacy = false,
+        isSvg = format === 'svg',
+        ver;
+
+    var p = BBPromise.try(function () {
+        if (typeof spec === 'string') {
+            spec = JSON.parse(spec);
+        }
+    });
+
+    if (version == 1) {
+        // render using version 1 only
+        p = p.then(function () {
+            isLegacy = true;
+            ver = 'v1';
+            return renderV1(domain, spec, isSvg);
+        });
+    } else {
+        // try version 2 first
+        p = p.then(function () {
+            ver = 'v2';
+            return renderV2(domain, spec, isSvg);
+        });
+        // try auto-fixing and fallback to 1
+        if (!version) {
+            p = p.catch(function (err) {
+                // Try to auto-correct some common mistakes
+                ver = 'v2+fix';
+                var spec2 = 
JSON.parse(JSON.stringify(spec).replace(/"field":"data\./g, '"field":"'));
+                return renderV2(domain, spec2, isSvg);
+            }).catch(function () {
+                // ignore error
+                isLegacy = true;
+                ver = 'v1';
+                return renderV1(domain, spec, isSvg);
+            });
+        }
+    }
+
+    return p.then(function (result) {
+        if (response || isSvg) {
+            // if response is given, attempt to stream to it
+            return result;
+        }
+        // Convert canvas content to a buffer
+        if (isLegacy) {
+            if (!canvas1ToBuffer) {
+                canvas1ToBuffer = BBPromise.promisify(result.toBuffer);
+            }
+            return canvas1ToBuffer.call(result);
+        } else {
+            if (!canvas2ToBuffer) {
+                canvas2ToBuffer = BBPromise.promisify(result.toBuffer);
+            }
+            return canvas2ToBuffer.call(result);
+        }
+    }).then(function (result) {
+        if (!response) {
+            return [result, ver];
+        }
+
+        response.status(200).type(format);
+        if (cache) {
+            response.header('Cache-Control', cache);
+        }
+        response.header('VegaVersion', ver);
+        if (isSvg) {
+            response.send(result);
+            return ver;
+        }
+
+        // PNG stream copy
+        var pendingPromise = BBPromise.pending();
+        var stream = result.pngStream();
+        stream.on('data', function (chunk) {
+            response.write(chunk);
+        });
+        stream.on('end', function () {
+            response.end();
+            pendingPromise.resolve(ver);
+        });
+        return pendingPromise.promise;
+    });
+};
+
+function renderV1(domain, spec, isSvg) {
+    // BUG: see comment above at vega1x.data.load.sanitizeUrl = ...
     // In case of non-absolute URLs, use requesting domain as "local"
-    vega.config.baseURL = module.exports.defaultProtocol + '://' + opts.domain;
+    vega1x.config.baseURL = module.exports.defaultProtocol + '://' + domain;
 
     // TODO: BUG: possible async bug
     // need to call it without promises and than wrap it because vega doesn't 
have request state
-    return renderAsync(opts.renderOpts);
-};
+    var renderer = isSvg ? 'svg' : 'canvas';
+    return vega1xRenderAsync({spec: spec, renderer: renderer}).get(renderer);
+}
+
+function renderV2(domain, spec, isSvg) {
+    return BBPromise.try(function () {
+        var config = {
+            load: {
+                defaultProtocol: module.exports.defaultProtocol,
+                baseURL: module.exports.defaultProtocol + '://' + domain
+            }
+        };
+        return vegaSpecParseAsync(spec, config).then(function (chart) {
+            var view = chart({renderer: (isSvg ? 'svg' : 'canvas')}).update();
+
+            if (isSvg) {
+                return view.svg();
+            }
+            var pending = BBPromise.pending();
+            view.canvasAsync(function (canvas) {
+                pending.resolve(canvas);
+            });
+            return pending.promise;
+        });
+    });
+}
diff --git a/package.json b/package.json
index c5ae1e4..031bc4e 100644
--- a/package.json
+++ b/package.json
@@ -29,6 +29,10 @@
   },
   "homepage": "https://www.mediawiki.org/wiki/Extension:Graph";,
   "dependencies": {
+    "underscore": "^1.8.3",
+    "vega-1x": "git+http://g...@github.com/nyurik/vega#v1.x";,
+    "vega": "git+http://g...@github.com/nyurik/vega";,
+
     "bluebird": "~2.8.2",
     "body-parser": "^1.14.1",
     "bunyan": "^1.5.1",
@@ -39,10 +43,6 @@
     "js-yaml": "^3.4.3",
     "preq": "^0.4.4",
     "service-runner": "^0.2.12"
-
-    ,
-    "underscore": "^1.8.3",
-    "vega": "git+http://g...@github.com/nyurik/vega";
   },
   "devDependencies": {
     "mkdirp": "^0.5.1",
diff --git a/routes/graphoid-v1.js b/routes/graphoid-v1.js
index 276161c..9eead15 100644
--- a/routes/graphoid-v1.js
+++ b/routes/graphoid-v1.js
@@ -241,30 +241,15 @@
 
 function renderOnCanvas(state) {
     var start = Date.now();
-    return vega.render({
-        domain: state.domain,
-        renderOpts: {spec: state.graphData, renderer: 'canvas'}
-    }).then(function (result) {
-        var pendingPromise = BBPromise.pending();
-        var stream = result.canvas.pngStream();
-        state.response
-            .status(200)
-            .type('png')
-            // For now, lets re-cache more frequently
-            .header('Cache-Control', 'public, s-maxage=30, max-age=30');
-        stream.on('data', function (chunk) {
-            state.response.write(chunk);
-        });
-        stream.on('end', function () {
-            state.response.end();
+    return vega
+        .render(state.domain, state.graphData, 'png', 1, state.response, 
'public, s-maxage=30, max-age=30')
+        .then(function () {
             metrics.endTiming('total.vega', start);
-            pendingPromise.resolve(state);
+        })
+        .catch(function (err) {
+            state.log.vegaErr = err;
+            throw new Err('error/vega', 'vega.error');
         });
-        return pendingPromise.promise;
-    }).catch(function (err) {
-        state.log.vegaErr = err;
-        throw new Err('error/vega', 'vega.error');
-    });
 }
 
 /**
diff --git a/routes/graphoid-v2.js b/routes/graphoid-v2.js
index 98051d3..2ac2404 100644
--- a/routes/graphoid-v2.js
+++ b/routes/graphoid-v2.js
@@ -22,12 +22,11 @@
  */
 var timeout = 10000;
 
-
 /**
- * Async version of the can canvas.toBuffer()
- * @type {Function}
+ * Cache header to set on the responses
+ * @type {string}
  */
-var canvasToBuffer;
+var cacheControlHdr = 'public, s-maxage=30, max-age=30';
 
 /*
  * Utility functions
@@ -104,30 +103,28 @@
     if (state.request.headers.revisionid) {
         state.response.header('RevisionId', state.request.headers.revisionid);
     }
-
-    var isSvg = state.format === 'svg';
-
-    return vega.render({
-        domain: state.domain,
-        renderOpts: {spec: state.graphData, renderer: isSvg ? 'svg' : 'canvas'}
-    }).then(isSvg ? function (result) {
-        return result.svg;
-    } : function (result) {
-        if (!canvasToBuffer) {
-            canvasToBuffer = BBPromise.promisify(result.canvas.toBuffer);
-        }
-        return canvasToBuffer.call(result.canvas);
-    }).then(function (result) {
-        state.response
-            .header('Cache-Control', 'public, s-maxage=30, max-age=30')
-            .type(state.format)
-            .send(result);
-        metrics.endTiming('total.vega', start);
-    }).catch(function (err) {
-        state.log.vegaErr = err.message;
-        state.log.vegaErrStack = err.stack;
-        throw new Err('error/vega', 'vega.error');
-    }).return(state);
+    var ver;
+    switch (state.request.headers.vegaversion) {
+        default:
+        case '1':
+            ver = 1;
+            break;
+        case '2':
+            ver = 2;
+            break;
+        case '0':
+            ver = 0;
+            break;
+    }
+    return vega
+        .render(state.domain, state.graphData, state.format, ver, 
state.response, cacheControlHdr)
+        .then(function () {
+            metrics.endTiming('total.vega', start);
+        }).catch(function (err) {
+            state.log.vegaErr = err.message;
+            state.log.vegaErrStack = err.stack;
+            throw new Err('error/vega', 'vega.error');
+        }).return(state);
 }
 
 /**
@@ -170,7 +167,7 @@
 
             res
                 .status(400)
-                .header('Cache-Control', 'public, s-maxage=30, max-age=30')
+                .header('Cache-Control', cacheControlHdr)
                 .json(msg);
             metrics.increment(mx);
             req.logger.log(msg, l);
diff --git a/scripts/sqlToFiles.js b/scripts/sqlToFiles.js
index 6333c33..63c8dab 100644
--- a/scripts/sqlToFiles.js
+++ b/scripts/sqlToFiles.js
@@ -11,6 +11,8 @@
 // dump file with all graphs
 var dumpFile = '/home/yurik/wmf/graphoid/dumps/props_dump.tsv';
 
+// rendering approach.  1 = v1, 2 = v2, 0 = first try v2, than try to correct 
v2, than fallback to v1
+var renVer = 0;
 
 var fs = require('fs');
 var BBPromise = require('bluebird');
@@ -41,25 +43,8 @@
     '15': 'Category talk:'
 };
 
-var canvasToBuffer;
-function renderImage(domain, graphData, isSvg) {
-    return vega.render({
-        domain: domain,
-        renderOpts: {spec: graphData, renderer: isSvg ? 'svg' : 'canvas'}
-    }).then(isSvg ? function (result) {
-            return result.svg;
-        } : function (result) {
-            if (!canvasToBuffer) {
-                canvasToBuffer = BBPromise.promisify(result.canvas.toBuffer);
-            }
-            return canvasToBuffer.call(result.canvas);
-        }
-    );
-}
-
-
 vega.initVega(function (a, b) {
-        //console.log('\t', a, b);
+        console.log('\t', a, b);
     }, 'https',
     ['mediawiki.org',
         'wikibooks.org',
@@ -95,7 +80,9 @@
     });
     return fs.readFileAsync(dumpFile, 'utf8')
 }).then(function (v) {
-    return BBPromise.map(v.split('\n'), function (v) {
+    var graphList = v.split('\n');
+    //var graphList = [v.split('\n')[23407]];
+    return BBPromise.map(graphList, function (v) {
         if (v === '') {
             return;
         }
@@ -106,6 +93,11 @@
         var parts = v.split('\t');
         var domain = wikimap[parts[0]];
         var spec = parts[4].replace(/\\\\/g, '\\');
+
+        //spec = 
JSON.parse(fs.readFileSync('/home/yurik/wmf/graphoid/graphoid/node_modules/vega/test/spec/arc.json',
 'utf8'));
+        //spec = JSON.stringify({"hash":spec});
+
+
         var title = (parts[2] in namespaces ? namespaces[parts[2]] : (parts[2] 
+ ':')) + parts[3];
         try {
             var graphSpecMap = JSON.parse(spec);
@@ -121,15 +113,15 @@
                 var graphSpec = graphSpecMap[hash];
                 var graphSpecStr = JSON.stringify(graphSpec);
                 return BBPromise.all([
-                    fs.writeFileAsync(pathlib.resolve(jsonPath, domain + '_' + 
hash) + '.json', graphSpecStr, 'utf8'),
-                    renderImage(domain, graphSpec, true).then(function (data) {
-                        return fs.writeFileAsync(pathlib.resolve(svgPath, 
domain + '_' + hash) + '.svg', data, 'utf8');
+                    //fs.writeFileAsync(pathlib.resolve(jsonPath, domain + '_' 
+ hash) + '.json', graphSpecStr, 'utf8'),
+                    vega.render(domain, graphSpec, 'svg', 
renVer).spread(function (data, ver) {
+                        return fs.writeFileAsync(pathlib.resolve(svgPath, (ver 
!== 'v2' ? ver + '_' : '') + domain + '_' + hash) + '.svg', data, 'utf8');
                     }).catch(function (err) {
                         console.log('SVG err: ' + domain + '/wiki/' + title + 
' -- ' + ind + ' ' + err);
                         return fs.writeFileAsync(pathlib.resolve(svgErrPath, 
domain + '_' + hash) + '.json', graphSpecStr, 'utf8');
                     }),
-                    renderImage(domain, graphSpec, false).then(function (data) 
{
-                        return fs.writeFileAsync(pathlib.resolve(pngPath, 
domain + '_' + hash) + '.png', data);
+                    vega.render(domain, graphSpec, 'png', 
renVer).spread(function (data, ver) {
+                        return fs.writeFileAsync(pathlib.resolve(pngPath, (ver 
!== 'v2' ? ver + '_' : '') + domain + '_' + hash) + '.png', data);
                     }).catch(function (err) {
                         console.log('PNG err: ' + domain + '/wiki/' + title + 
' -- ' + ind + ' ' + err);
                         return fs.writeFileAsync(pathlib.resolve(pngErrPath, 
domain + '_' + hash) + '.json', graphSpecStr, 'utf8');
@@ -140,5 +132,5 @@
                 return fs.writeFileAsync(pathlib.resolve(errPath, domain + '_' 
+ encodeURIComponent(title)) + '.txt', spec, 'utf8');
             });
         });
-    }, {concurrency: 30});
+    }, {concurrency: 50});
 });

-- 
To view, visit https://gerrit.wikimedia.org/r/249925
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iee2e6f320964000facedc179e8edd867b28c8591
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/graphoid
Gerrit-Branch: master
Gerrit-Owner: Yurik <yu...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to